Esempio n. 1
0
def _create_bankdata_experiment(predefined_kwargs, maker=None, need_test=False, user_kwargs=None):
    target = 'y'
    df = dsutils.load_bank().head(2000)
    df[target] = LabelEncoder().fit_transform(df[target])
    df_train, df_test = train_test_split(df, test_size=0.3, random_state=9527)

    def maker_(*args, **kwargs):

        return make_experiment(PlainModel, *args, **kwargs)

    default_kwargs = dict(log_level='info')

    predefined_kwargs.update(default_kwargs)

    if maker is None:
        maker = maker_
        predefined_kwargs['search_space'] = PlainSearchSpace(enable_lr=True,
                                                             enable_nn=False, enable_dt=False, enable_dtr=False)
        predefined_kwargs['hyper_model_options'] = {'transformer': MultiLabelEncoder}

    if need_test:
        predefined_kwargs['test_data'] = df_test

    predefined_kwargs.update(user_kwargs)

    return maker(df_train, target=target, task=const.TASK_BINARY, **predefined_kwargs)
Esempio n. 2
0
def test_experiment_with_blood_full_features():
    df = dsutils.load_blood()
    target = 'Class'
    df_train, df_test = train_test_split(df, train_size=0.8, random_state=335)
    df_test.pop(target)

    experiment = make_experiment(
        PlainModel,
        df,
        target=target,
        search_space=PlainSearchSpace(),
        test_data=df_test,
        feature_generation=True,
        collinearity_detection=True,
        drift_detection=True,
        feature_selection=True,
        down_sample_search=True,
        down_sample_search_size=0.2,
        feature_reselection=True,
        pseudo_labeling=True,
        random_state=335,
        early_stopping_time_limit=1200,
        # log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None

    step_names = [step[0] for step in estimator.steps]
    assert step_names == [
        StepNames.DATA_CLEAN, StepNames.MULITICOLLINEARITY_DETECTION,
        'estimator'
    ]
Esempio n. 3
0
def _create_experiment(predefined_kwargs, maker=None, need_test=False, user_kwargs=None):
    df = dsutils.load_boston()
    df['Constant'] = [0 for i in range(df.shape[0])]
    df['Id'] = [i for i in range(df.shape[0])]
    target = 'target'
    df_train, df_test = train_test_split(df, test_size=0.2, random_state=1234)
    df_test.pop(target)
    df_train['Drifted'] = np.random.random(df_train.shape[0])
    df_test['Drifted'] = np.random.random(df_test.shape[0]) * 100

    def maker_(*args, **kwargs):
        if 'random_state' not in kwargs.keys():
            kwargs['random_state'] = 1234
        return make_experiment(PlainModel, *args, **kwargs)

    default_kwargs = dict(
        log_level='info',
    )
    predefined_kwargs.update(default_kwargs)
    if maker is None:
        maker = maker_
        predefined_kwargs['search_space'] = PlainSearchSpace(enable_lr=False,
                                                             enable_nn=False, enable_dt=False, enable_dtr=True)
    if need_test:
        predefined_kwargs['test_data'] = df_test

    predefined_kwargs.update(user_kwargs)

    return maker(df_train, target=target, task=const.TASK_REGRESSION, **predefined_kwargs)
Esempio n. 4
0
    def test_creator(self):
        test_data = dsutils.load_blood()
        eval_data = dsutils.load_blood()
        make_options = {
            'test_data': test_data,
            'eval_data': eval_data,
            "task": const.TASK_BINARY,
            'target': "Class",
            'feature_selection': True,
            'feature_selection_strategy': "threshold",
            "evaluation_metrics": "auto",
            "evaluation_persist_prediction": True,
            "report_render": 'excel',
            "search_space": PlainSearchSpace(),
        }
        job_working_dir = common_util.get_temp_dir_path(
            prefix="hyn_job_creator_test_")

        exp = BloodDatasetJobEngine().create_experiment_with_params(
            make_options, job_working_dir)
        assert exp
        assert isinstance(exp, CompeteExperiment)

        run_options = {"max_trials": 2}

        exp.run(**run_options)
        assert (Path(job_working_dir) / "report.xlsx").exists()
Esempio n. 5
0
def test_experiment_with_blood_simple():
    df = dsutils.load_blood()
    experiment = make_experiment(PlainModel,
                                 df,
                                 target='Class',
                                 search_space=PlainSearchSpace())
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None
Esempio n. 6
0
def create_hyper_model(reward_metric='auc', optimize_direction='max'):
    search_space = PlainSearchSpace()
    searcher = make_searcher('random',
                             search_space_fn=search_space,
                             optimize_direction=optimize_direction)
    hyper_model = PlainModel(searcher=searcher,
                             reward_metric=reward_metric,
                             callbacks=[])

    return hyper_model
Esempio n. 7
0
def create_plain_model(reward_metric='auc', optimize_direction='max',
                       with_encoder=False, with_dask=False):
    search_space = PlainSearchSpace(enable_dt=True, enable_lr=True, enable_nn=False)
    searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction)

    encoder = MultiLabelEncoder if with_encoder else None
    cls = DaskPlainModel if with_dask else PlainModel
    hyper_model = cls(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()],
                      transformer=encoder)

    return hyper_model
Esempio n. 8
0
 def maker(df_train, target, df_eval, file_path):
     experiment = make_experiment(
         PlainModel,
         df_train,
         target=target,
         test_data=df_eval.copy(),
         drift_detection_threshold=0.4,
         drift_detection_min_features=3,
         drift_detection_remove_size=0.5,
         search_space=PlainSearchSpace(enable_lr=False, enable_nn=False),
         report_render='excel',
         report_render_options={'file_path': file_path})
     return experiment
Esempio n. 9
0
 def maker(df_train, target, df_eval, file_path):
     from hypernets.experiment.report import ExcelReportRender
     experiment = make_experiment(
         PlainModel,
         df_train,
         target=target,
         eval_data=df_eval,
         test_data=df_eval.copy(),
         drift_detection_threshold=0.4,
         drift_detection_min_features=3,
         drift_detection_remove_size=0.5,
         search_space=PlainSearchSpace(enable_lr=False, enable_nn=False),
         report_render=ExcelReportRender(file_path))
     return experiment
Esempio n. 10
0
def test_regression_task_report():
    df = dsutils.load_boston()
    df['Constant'] = [0 for i in range(df.shape[0])]
    df['Id'] = [i for i in range(df.shape[0])]

    target = 'target'

    df_train, df_eval = train_test_split(df, test_size=0.2)

    df_train['Drifted'] = np.random.random(df_train.shape[0])
    df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100
    file_path = common_util.get_temp_file_path(prefix="report_excel_",
                                               suffix=".xlsx")
    print(file_path)
    experiment = make_experiment(
        PlainModel,
        df_train,
        target=target,
        eval_data=df_eval.copy(),
        test_data=df_eval.copy(),
        drift_detection_threshold=0.4,
        drift_detection_min_features=3,
        drift_detection_remove_size=0.5,
        search_space=PlainSearchSpace(enable_lr=False,
                                      enable_nn=False,
                                      enable_dt=False,
                                      enable_dtr=True),
        report_render='excel',
        report_render_options={'file_path': file_path})
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    mlr_callback = None
    mle_callback = None
    for callback in experiment.callbacks:
        if isinstance(callback, MLReportCallback):
            mlr_callback = callback
        if isinstance(callback, MLEvaluateCallback):
            mle_callback = callback

    assert mlr_callback is not None
    _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_

    assert len(_experiment_meta.resource_usage) > 0
    assert len(_experiment_meta.steps) == 5
    assert os.path.exists(file_path)

    assert mle_callback is not None
    assert _experiment_meta.evaluation_metric is not None
    assert len(_experiment_meta.prediction_stats) == 1
    assert len(_experiment_meta.datasets) == 3
Esempio n. 11
0
def run_experiment(train_df, check_as_local=True, **kwargs):
    experiment = make_experiment(PlainModel,
                                 train_df,
                                 search_space=PlainSearchSpace(),
                                 **kwargs)
    estimator = experiment.run()
    print(experiment.random_state, estimator)

    assert estimator is not None

    if check_as_local:
        assert hasattr(estimator, 'as_local')

        local_estimator = estimator.as_local()
        assert not hasattr(local_estimator, 'as_local')
Esempio n. 12
0
def test_experiment_with_data_adaption():
    df = dsutils.load_bank()
    df = MultiLabelEncoder().fit_transform(df)
    mem_usage = int(df.memory_usage().sum())
    experiment = make_experiment(
        PlainModel,
        df,
        target='y',
        search_space=PlainSearchSpace(),
        data_adaption_memory_limit=mem_usage // 2,
        log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    assert estimator.steps[0][0] == 'data_adaption'
Esempio n. 13
0
def test_experiment_with_blood_down_sample():
    df = dsutils.load_blood()
    experiment = make_experiment(
        PlainModel,
        df,
        target='Class',
        search_space=PlainSearchSpace(),
        down_sample_search=True,
        down_sample_search_size=0.1,
        down_sample_search_time_limit=300,
        down_sample_search_max_trials=10,
        # log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None
def main():
    df = dsutils.load_boston()

    df_train, df_eval = train_test_split(df, test_size=0.2)
    search_space = PlainSearchSpace(enable_lr=False,
                                    enable_nn=False,
                                    enable_dt=False,
                                    enable_dtr=True)

    experiment = make_experiment(PlainModel,
                                 df_train,
                                 target='target',
                                 search_space=search_space,
                                 report_render='excel')
    estimator = experiment.run(max_trials=3)
    print(estimator)