コード例 #1
0
ファイル: job_test.py プロジェクト: lixfz/Hypernets
    def test_read_supported_file(self):
        from hypernets.tabular.datasets.dsutils import basedir
        csv_file = f'{basedir}/heart-disease-uci.csv'
        df_csv = ExperimentJobCreator._read_file(csv_file)
        assert df_csv.shape[0] > 1

        file_path = common_util.get_temp_file_path(prefix="heart-disease-uci",
                                                   suffix=".parquet")
        df_csv.to_parquet(file_path)

        df_parquet = pd.read_parquet(file_path)
        assert df_parquet.shape == df_csv.shape
コード例 #2
0
ファイル: common_test.py プロジェクト: lixfz/Hypernets
def test_make_tempfile():

    temp_file_path: str = common_util.get_temp_file_path(prefix='prefix', suffix='.txt')
    assert not os.path.exists(temp_file_path)

    assert os.path.basename(temp_file_path).startswith('prefix')
    assert os.path.basename(temp_file_path).endswith('.txt')

    temp_file_dir_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=True)
    assert os.path.exists(temp_file_dir_created)

    temp_file_dir_not_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=False)
    assert not os.path.exists(temp_file_dir_not_created)
コード例 #3
0
def test_regression_task_report():
    df = dsutils.load_boston()
    df['Constant'] = [0 for i in range(df.shape[0])]
    df['Id'] = [i for i in range(df.shape[0])]

    target = 'target'

    df_train, df_eval = train_test_split(df, test_size=0.2)

    df_train['Drifted'] = np.random.random(df_train.shape[0])
    df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100
    file_path = common_util.get_temp_file_path(prefix="report_excel_",
                                               suffix=".xlsx")
    print(file_path)
    experiment = make_experiment(
        PlainModel,
        df_train,
        target=target,
        eval_data=df_eval.copy(),
        test_data=df_eval.copy(),
        drift_detection_threshold=0.4,
        drift_detection_min_features=3,
        drift_detection_remove_size=0.5,
        search_space=PlainSearchSpace(enable_lr=False,
                                      enable_nn=False,
                                      enable_dt=False,
                                      enable_dtr=True),
        report_render='excel',
        report_render_options={'file_path': file_path})
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    mlr_callback = None
    mle_callback = None
    for callback in experiment.callbacks:
        if isinstance(callback, MLReportCallback):
            mlr_callback = callback
        if isinstance(callback, MLEvaluateCallback):
            mle_callback = callback

    assert mlr_callback is not None
    _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_

    assert len(_experiment_meta.resource_usage) > 0
    assert len(_experiment_meta.steps) == 5
    assert os.path.exists(file_path)

    assert mle_callback is not None
    assert _experiment_meta.evaluation_metric is not None
    assert len(_experiment_meta.prediction_stats) == 1
    assert len(_experiment_meta.datasets) == 3
コード例 #4
0
def run_export_excel_report(maker, has_eval_data=True, str_label=True):
    df = dsutils.load_blood()
    df['Constant'] = [0 for i in range(df.shape[0])]
    df['Id'] = [i for i in range(df.shape[0])]

    target = 'Class'
    labels = ["no", "yes"]
    if str_label:
        df[target] = df[target].map(lambda v: labels[v])

    df_train, df_eval = train_test_split(df, test_size=0.2)

    df_train['Drifted'] = np.random.random(df_train.shape[0])
    df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100

    file_path = common_util.get_temp_file_path(prefix="report_excel_",
                                               suffix=".xlsx")
    print(file_path)
    experiment = maker(df_train, target, df_eval, file_path)
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    mlr_callback = None
    mle_callback = None
    for callback in experiment.callbacks:
        if isinstance(callback, MLReportCallback):
            mlr_callback = callback
        if isinstance(callback, MLEvaluateCallback):
            mle_callback = callback

    assert mlr_callback is not None
    _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_

    assert len(_experiment_meta.resource_usage) > 0
    assert os.path.exists(file_path)

    if has_eval_data:
        assert mle_callback is not None
        assert _experiment_meta.confusion_matrix is not None
        assert _experiment_meta.classification_report is not None
        assert len(_experiment_meta.prediction_elapsed) == 2
        assert _experiment_meta.confusion_matrix.data.shape == (
            2, 2)  # binary classification
        assert len(_experiment_meta.datasets) == 3
    else:
        assert len(_experiment_meta.datasets) == 2
    return _experiment_meta
コード例 #5
0
ファイル: report_test.py プロジェクト: lixfz/Hypernets
 def test_render(self):
     steps_meta = [
         self.create_data_clean_step_meta(),
         self.create_ensemble_step_meta()
     ]
     experiment_meta = ExperimentMeta(
         task=const.TASK_BINARY,
         datasets=self.create_dataset_meta(),
         steps=steps_meta,
         evaluation_metric=self.create_binary_metric_data(),
         confusion_matrix=self.create_confusion_matrix_data(),
         resource_usage=self.create_resource_monitor_df(),
         prediction_stats=self.create_prediction_stats_df())
     p = common_util.get_temp_file_path(prefix="report_excel_",
                                        suffix=".xlsx")
     print(p)
     ExcelReportRender(file_path=p).render(experiment_meta)
     assert os.path.exists(p)