예제 #1
0
    def get_pipeline(self, pipeline_name=None, astype='lale'):
        """Retrieve one of the trials.

Parameters
----------
pipeline_name : union type, default None

    - string
        Key for table returned by summary(), return a trainable pipeline.

    - None
        When not specified, return the best trained pipeline found.

astype : 'lale' or 'sklearn', default 'lale'
    Type of resulting pipeline.

Returns
-------
result : Trained operator if best, trainable operator otherwise.
"""
        if pipeline_name is None:
            result = getattr(self, '_best_estimator', None)
        else:
            tid = int(pipeline_name[1:])
            params = self._trials.trials[tid]['result']['params']
            result = create_instance_from_hyperopt_search_space(
                self.estimator, params)
        if result is None or astype == 'lale':
            return result
        assert astype == 'sklearn', astype
        return result.export_to_sklearn_pipeline()
예제 #2
0
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            reg = create_instance_from_hyperopt_search_space(
                self.estimator, params)
            try:
                cv_score, _, execution_time = cross_val_score_track_trials(
                    reg,
                    X_train,
                    y_train,
                    cv=KFold(self.cv),
                    scoring=self.scoring)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    reg_trained = reg.fit(X_train_part, y_train_part)
                    scorer = check_scoring(reg, scoring=self.scoring)
                    cv_score = scorer(reg_trained, X_validation, y_validation)
                    execution_time = time.time() - start
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, reg.to_json()))
                    raise e

            return cv_score, execution_time
예제 #3
0
 def get_final_trained_estimator(params, X_train, y_train):
     warnings.filterwarnings("ignore")
     trainable = create_instance_from_hyperopt_search_space(
         self.estimator, params
     )
     trained = trainable.fit(X_train, y_train)
     return trained
예제 #4
0
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            clf = create_instance_from_hyperopt_search_space(
                self.estimator, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    clf, X_train, y_train, cv=self.cv, scoring=self.scoring)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    clf_trained = clf.fit(X_train_part, y_train_part)
                    #predictions = clf_trained.predict(X_validation)
                    scorer = check_scoring(clf, scoring=self.scoring)
                    cv_score = scorer(clf_trained, X_validation, y_validation)
                    execution_time = time.time() - start
                    y_pred_proba = clf_trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, clf.to_json()))
                    raise e
            return cv_score, logloss, execution_time
예제 #5
0
 def proc_train_test(params, X_train, y_train, return_dict):
     return_dict["params"] = copy.deepcopy(params)
     try:
         score, logloss, execution_time = hyperopt_train_test(
             params, X_train=X_train, y_train=y_train
         )
         return_dict["loss"] = self.best_score - score
         return_dict["time"] = execution_time
         return_dict["log_loss"] = logloss
         return_dict["status"] = hyperopt.STATUS_OK
     except BaseException as e:
         exception_type = f"{type(e).__module__}.{type(e).__name__}"
         try:
             trainable = create_instance_from_hyperopt_search_space(
                 self.estimator, params
             )
             trial_info = (
                 f'pipeline: """{trainable.pretty_print(show_imports=False)}"""'
             )
         except BaseException:
             trial_info = f"hyperparams: {params}"
         error_msg = f"Exception caught in Hyperopt: {exception_type}, {traceback.format_exc()}with {trial_info}"
         logger.warning(error_msg + ", setting status to FAIL")
         return_dict["status"] = hyperopt.STATUS_FAIL
         return_dict["error_msg"] = error_msg
         if self.verbose:
             print(return_dict["error_msg"])
예제 #6
0
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            clf = create_instance_from_hyperopt_search_space(
                self.model, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    clf, X_train, y_train, cv=self.cv)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    clf_trained = clf.fit(X_train_part, y_train_part)
                    predictions = clf_trained.predict(X_validation)
                    execution_time = time.time() - start
                    y_pred_proba = clf_trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                    cv_score = accuracy_score(
                        y_validation, [round(pred) for pred in predictions])
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, clf.to_json()))
                    raise e
            #print("TRIALS")
            #print(json.dumps(self.get_trials().trials, default = myconverter, indent=4))
            return cv_score, logloss, execution_time
예제 #7
0
    def get_pipeline(self, pipeline_name=None, astype="lale"):
        """Retrieve one of the trials.

        Parameters
        ----------
        pipeline_name : union type, default None

            - string
                Key for table returned by summary(), return a trainable pipeline.

            - None
                When not specified, return the best trained pipeline found.

        astype : 'lale' or 'sklearn', default 'lale'
            Type of resulting pipeline.

        Returns
        -------
        result : Trained operator if best, trainable operator otherwise."""
        best_name = None
        if self._best_estimator is not None:
            best_name = f'p{self._trials.best_trial["tid"]}'
        if pipeline_name is None:
            pipeline_name = best_name
        if pipeline_name == best_name:
            result = getattr(self, "_best_estimator", None)
        else:
            assert pipeline_name is not None
            tid = int(pipeline_name[1:])
            params = self._trials.trials[tid]["result"]["params"]
            result = create_instance_from_hyperopt_search_space(self.estimator, params)
        if result is None or astype == "lale":
            return result
        assert astype == "sklearn", astype
        return result.export_to_sklearn_pipeline()
예제 #8
0
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            trainable = create_instance_from_hyperopt_search_space(
                self.estimator, params
            )
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    trainable,
                    X_train,
                    y_train,
                    cv=self.cv,
                    scoring=self.scoring,
                    args_to_scorer=self.args_to_scorer,
                )
                logger.debug(
                    "Successful trial of hyperopt with hyperparameters:{}".format(
                        params
                    )
                )
            except BaseException as e:
                # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure and trainable is not None:
                    (
                        X_train_part,
                        X_validation,
                        y_train_part,
                        y_validation,
                    ) = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part, **fit_params)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score = scorer(
                        trained, X_validation, y_validation, **self.args_to_scorer
                    )
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    if trainable is None:
                        logger.debug(
                            "Error {} with uncreatable pipeline with parameters:{}".format(
                                e, lale.pretty_print.hyperparams_to_string(params)
                            )
                        )
                    else:
                        logger.debug(
                            "Error {} with pipeline:{}".format(e, trainable.to_json())
                        )
                    raise e
            return cv_score, logloss, execution_time
예제 #9
0
 def get_final_trained_reg(params, X_train, y_train):
     warnings.filterwarnings("ignore")
     reg = create_instance_from_hyperopt_search_space(
         self.estimator, params)
     reg = reg.fit(X_train, y_train)
     return reg
예제 #10
0
 def get_final_trained_clf(params, X_train, y_train):
     warnings.filterwarnings("ignore")
     clf = create_instance_from_hyperopt_search_space(
         self.model, params)
     clf = clf.fit(X_train, y_train)
     return clf