def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_distilled_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_ag_leaderboard_1h_v15_distill.csv',
        framework_suffix='_1h')
    ag_results_distilled_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_ag_leaderboard_4h_v15_distill.csv',
        framework_suffix='_4h')

    results_list = [
        ag_results_distilled_1h,
        ag_results_distilled_4h,
    ]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_distilled = [
        'autogluon_compressed_1h',
        'autogluon_distilled_1h',
        'autogluon_ensemble_1h',
        'autogluon_compressed_4h',
        'autogluon_distilled_4h',
        'autogluon_ensemble_4h',
    ]

    results_ablation = results_raw[results_raw[FRAMEWORK].isin(
        frameworks_distilled)]
    save_pd.save(path=results_dir_output + 'openml_autogluon_distilled.csv',
                 df=results_ablation)
예제 #2
0
def save_artifacts(predictor, leaderboard, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        models_dir = output_subdir("models", config)
        shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True)

        if 'leaderboard' in artifacts:
            save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"),
                         df=leaderboard)

        if 'info' in artifacts:
            ag_info = predictor.info()
            info_dir = output_subdir("info", config)
            save_pkl.save(path=os.path.join(info_dir, "info.pkl"),
                          object=ag_info)

        if 'models' in artifacts:
            utils.zip_path(models_dir, os.path.join(models_dir, "models.zip"))

        def delete(path, isdir):
            if isdir:
                shutil.rmtree(path, ignore_errors=True)
            elif os.path.splitext(path)[1] == '.pkl':
                os.remove(path)

        utils.walk_apply(models_dir, delete, max_depth=0)

    except Exception:
        log.warning("Error when saving artifacts.", exc_info=True)
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_ablation_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_ablation_1h.csv',
        framework_suffix='_1h')
    ag_results_ablation_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_ablation_4h.csv',
        framework_suffix='_4h')

    results_list = [
        ag_results_ablation_1h,
        ag_results_ablation_4h,
    ]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_ablation = [
        'autogluon_nostack_1h',
        'autogluon_nobag_1h',
        'autogluon_norepeatbag_1h',
        'autogluon_nonn_1h',
        'autogluon_noknn_1h',
        'autogluon_nostack_4h',
        'autogluon_nobag_4h',
        'autogluon_norepeatbag_4h',
        'autogluon_nonn_4h',
        'autogluon_noknn_4h',
    ]

    results_ablation = results_raw[results_raw[FRAMEWORK].isin(
        frameworks_ablation)]
    save_pd.save(path=results_dir_output + 'openml_autogluon_ablation.csv',
                 df=results_ablation)
예제 #4
0
def save_artifacts(predictor, leaderboard, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        models_dir = make_subdir("models", config)
        shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True)

        if 'leaderboard' in artifacts:
            save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"),
                         df=leaderboard)

        if 'info' in artifacts:
            ag_info = predictor.info()
            info_dir = make_subdir("info", config)
            save_pkl.save(path=os.path.join(info_dir, "info.pkl"),
                          object=ag_info)

        if 'models' not in artifacts:
            shutil.rmtree(os.path.join(models_dir, "models"),
                          ignore_errors=True)
            with os.scandir(models_dir) as it:
                for f in it:
                    if f.is_file() and os.path.splitext(f.name)[1] == '.pkl':
                        os.remove(f.path)
    except:
        log.warning("Error when saving artifacts.", exc_info=True)
예제 #5
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/kaggle/'

    kaggle_results = preprocess_kaggle.preprocess_kaggle_input(
        path=results_dir_input + 'results_kaggle_wpercentile.csv',
        framework_suffix='')

    kaggle_results[FRAMEWORK] = kaggle_results[FRAMEWORK].str.replace(
        'GoogleAutoMLTables_', 'GCPTables_', regex=False)

    frameworks_core = [
        'autogluon_4h',
        'GCPTables_4h',
        'autosklearn_4h',
        'H2OAutoML_4h',
        'TPOT_4h',
        'AutoWEKA_4h',
        'autogluon_8h',
        'GCPTables_8h',
        'H2OAutoML_8h',
        'autosklearn_8h',
        'TPOT_8h',
        'AutoWEKA_8h',
    ]

    results_list = [kaggle_results]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    results_ablation = results_raw[results_raw[FRAMEWORK].isin(
        frameworks_core)]
    save_pd.save(path=results_dir_output + 'kaggle_core.csv',
                 df=results_ablation)
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/original/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    other_results_large_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_large-8c4h.csv', framework_suffix='_4h')
    other_results_medium_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_medium-8c4h.csv', framework_suffix='_4h')
    other_results_small_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_small-8c4h.csv', framework_suffix='_4h')
    other_results_medium_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_medium-8c1h.csv', framework_suffix='_1h')
    other_results_small_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_small-8c1h.csv', framework_suffix='_1h')

    results_list = [other_results_large_4h, other_results_medium_4h, other_results_small_4h, other_results_medium_1h, other_results_small_1h]

    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    results_raw[FRAMEWORK] = ['orig_' + name[0] for name in zip(results_raw[FRAMEWORK])]

    frameworks_original = [
        'orig_H2OAutoML_1h',
        'orig_autosklearn_1h',
        'orig_TPOT_1h',
        'orig_AutoWEKA_1h',

        'orig_H2OAutoML_4h',
        'orig_autosklearn_4h',
        'orig_TPOT_4h',
        'orig_AutoWEKA_4h',
    ]

    results_original = results_raw[results_raw[FRAMEWORK].isin(frameworks_original)]
    save_pd.save(path=results_dir_output + 'openml_original.csv', df=results_original)
예제 #7
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/prepared/openml/'
    results_dir_output = results_dir + 'output/openml/core_1h_vs_4h/'

    results_raw = load_pd.load(path=results_dir_input + 'openml_core.csv')

    frameworks = [
        'autogluon',
        'GCPTables',
        'H2OAutoML',
        'autosklearn',
        'TPOT',
        'AutoWEKA',
    ]

    folds_to_keep = [0]
    banned_datasets = []
    full_results_pairs_merged_dict = {}
    for framework in frameworks:
        run_path_prefix = framework + '/'
        framework_1h = framework + '_1h'
        framework_4h = framework + '_4h'

        results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate(
            results_raw=results_raw,
            frameworks=[framework_1h, framework_4h],
            banned_datasets=banned_datasets,
            folds_to_keep=folds_to_keep,
            columns_to_agg_extra=[
                # TIME_INFER_S,
                'acc',
                'auc',
                'logloss'
            ],
            frameworks_compare_vs_all=[framework_4h],
            output_dir=results_dir_output + run_path_prefix,
        )
        full_results_pairs_merged_dict.update(results_pairs_merged_dict)

    dfs = []
    for framework in frameworks:
        framework_1h = framework + '_1h'
        framework_4h = framework + '_4h'
        cur_df = full_results_pairs_merged_dict[framework_4h]
        cur_df = cur_df[cur_df[FRAMEWORK] == framework_1h]
        cur_columns = list(cur_df.columns)
        cur_columns[1] = '> 4h'
        cur_columns[2] = '< 4h'
        cur_columns[3] = '= 4h'
        cur_df.columns = cur_columns
        dfs.append(cur_df)
    df_final = pd.concat(dfs, ignore_index=True)
    print(df_final)
    save_pd.save(path=results_dir_output + 'pairwise/1h_vs_4h.csv',
                 df=df_final)
예제 #8
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_1h.csv',
        framework_suffix='_1h')
    ag_results_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_4h.csv',
        framework_suffix='_4h')

    ag_results_1h[FRAMEWORK] = ag_results_1h[FRAMEWORK].str.replace(
        '_benchmark_', '_', regex=False)
    ag_results_4h[FRAMEWORK] = ag_results_4h[FRAMEWORK].str.replace(
        '_benchmark_', '_', regex=False)

    gcp_results_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_gcptables_1h.csv',
        framework_suffix='_1h')
    gcp_results_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_gcptables_4h.csv',
        framework_suffix='_4h')
    gcp_results_1h[FRAMEWORK] = gcp_results_1h[FRAMEWORK].str.replace(
        'GoogleAutoMLTables_benchmark_', 'GCPTables_', regex=False)
    gcp_results_4h[FRAMEWORK] = gcp_results_4h[FRAMEWORK].str.replace(
        'GoogleAutoMLTables_benchmark_', 'GCPTables_', regex=False)
    gcp_results_1h = gcp_results_1h[gcp_results_1h[FRAMEWORK] ==
                                    'GCPTables_1h']
    gcp_results_4h = gcp_results_4h[gcp_results_4h[FRAMEWORK] ==
                                    'GCPTables_4h']

    results_list = [
        gcp_results_1h, gcp_results_4h, ag_results_1h, ag_results_4h
    ]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_core = [
        'autogluon_1h',
        'GCPTables_1h',
        'H2OAutoML_1h',
        'autosklearn_1h',
        'TPOT_1h',
        'AutoWEKA_1h',
        'autogluon_4h',
        'GCPTables_4h',
        'H2OAutoML_4h',
        'autosklearn_4h',
        'TPOT_4h',
        'AutoWEKA_4h',
    ]

    results_core = results_raw[results_raw[FRAMEWORK].isin(frameworks_core)]
    save_pd.save(path=results_dir_output + 'openml_core.csv', df=results_core)
def aggregate_from_params(s3_bucket, s3_prefix, version_name, suffix,
                          contains):
    result_path = s3_prefix + version_name + '/'
    aggregated_results_name = 'results_automlbenchmark' + suffix + '_' + version_name + '.csv'

    df = aggregate(path_prefix='s3://' + s3_bucket + '/results/' + result_path,
                   contains=contains)

    save_pd.save(path='s3://' + s3_bucket + '/aggregated/' + result_path +
                 aggregated_results_name,
                 df=df)
def prepare_data(config, dataset):
    print('#################')
    print('Config:')
    print(config.__json__())
    print()
    print('Dataset:')
    print(dataset.__dict__)
    print('#################')

    metrics_mapping = dict(acc=metrics.accuracy,
                           auc=metrics.roc_auc,
                           f1=metrics.f1,
                           logloss=metrics.log_loss,
                           mae=metrics.mean_absolute_error,
                           mse=metrics.mean_squared_error,
                           r2=metrics.r2)

    perf_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else None
    if perf_metric is None:
        # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping
        log.warning("Performance metric %s not supported.", config.metric)

    # un = dataset.train.path
    # print(un)
    # raw_data = loadarff(un)
    # df_data = pd.DataFrame(raw_data[0])

    X_train = dataset.train.X
    y_train = dataset.train.y
    X_test = dataset.test.X
    y_test = dataset.test.y

    X_train = pd.DataFrame(X_train)
    X_test = pd.DataFrame(X_test)

    # Save and load data to remove any pre-set dtypes, we want to observe performance from worst-case scenario: raw csv
    save_pd.save(path='tmp/tmp_file_train.csv', df=X_train)
    X_train = load_pd.load(path='tmp/tmp_file_train.csv')
    save_pd.save(path='tmp/tmp_file_test.csv', df=X_test)
    X_test = load_pd.load(path='tmp/tmp_file_test.csv')

    is_classification = config.type == 'classification'
    if is_classification:
        unique_vals = np.unique(y_train)
        if len(unique_vals) == 2:
            problem_type = BINARY
        else:
            problem_type = MULTICLASS
    else:
        problem_type = REGRESSION

    return X_train, y_train, X_test, y_test, problem_type, perf_metric
예제 #11
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_autopilot_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_automlbenchmark_autopilot_1h.csv', framework_suffix='_1h')

    results_list = [ag_results_autopilot_1h]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_autopilot = ['AutoPilot_1h']

    results_core = results_raw[results_raw[FRAMEWORK].isin(frameworks_autopilot)]
    save_pd.save(path=results_dir_output + 'openml_autopilot.csv', df=results_core)
예제 #12
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'output/'
    results_dir_output = results_dir + 'output/combined/4h/tables/'

    input_openml = results_dir_input + 'openml/core/4h/results_ranked_by_dataset_all.csv'
    input_kaggle = results_dir_input + 'kaggle/4h/results_ranked_by_dataset_all.csv'

    results_ranked_by_dataset_all = load_pd.load([input_openml, input_kaggle])
    print(results_ranked_by_dataset_all)

    result = generate_charts.compute_dataset_framework_df(results_ranked_by_dataset_all)
    print(result)

    save_pd.save(path=results_dir_output + 'dataset_x_framework.csv', df=result)
예제 #13
0
def evaluate(results_raw, frameworks=None, banned_datasets=None, folds_to_keep=None, columns_to_agg_extra=None, frameworks_compare_vs_all=None, output_dir=None):
    if frameworks is None:
        frameworks = sorted(list(results_raw[FRAMEWORK].unique()))
    if frameworks_compare_vs_all is None:
        frameworks_compare_vs_all = []
    if folds_to_keep is None:
        folds_to_keep = sorted(list(results_raw[FOLD].unique()))
    if banned_datasets is not None:
        results_raw = results_raw[~results_raw[DATASET].isin(banned_datasets)]

    total_datasets = sorted(results_raw[DATASET].unique())
    results_raw = preprocess_utils.clean_result(result_df=results_raw, folds_to_keep=folds_to_keep, remove_invalid=True)

    results_raw = results_raw[results_raw[FRAMEWORK].isin(frameworks)]

    # Calculate each frameworks errored datasets
    total_frameworks = results_raw[FRAMEWORK].unique()
    total_folds = results_raw[FOLD].unique()
    num_frameworks = len(total_frameworks)
    num_datasets = len(total_datasets)
    num_folds = len(total_folds)
    ideal_rows = num_folds * num_datasets * num_frameworks
    actual_rows = len(results_raw)
    errors = ideal_rows - actual_rows
    print('num_datasets:', num_datasets)
    print('num_folds:', num_folds)
    print('errors:', errors)

    for framework in total_frameworks:
        results_framework = results_raw[results_raw[FRAMEWORK] == framework]
        num_rows_framework = len(results_framework)
        datasets_framework = results_framework[DATASET].unique()
        datasets_framework_errors = [dataset for dataset in total_datasets if dataset not in datasets_framework]
        datasets_framework_errors_count = len(datasets_framework_errors)
        framework_fold_errors = num_datasets * num_folds - num_rows_framework
        print('################################################')
        print('framework:', framework)
        print('datasets_framework_errors:', datasets_framework_errors)
        print('datasets_framework_errors_count:', datasets_framework_errors_count)
        print('framework_fold_errors:', framework_fold_errors)
        print('################################################')

    all_results_pairs = {}
    for framework_2 in frameworks_compare_vs_all:
        results_list = []

        for framework_1 in total_frameworks:
            if framework_1 == framework_2:
                results_ranked, results_ranked_by_dataset = evaluate_utils.compare_frameworks(results_raw=results_raw, frameworks=[framework_2], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets, verbose=False)
                ties = len(results_ranked_by_dataset)
                results_list.append([framework_1, 0, 0, ties])
                continue

            results_ranked, results_ranked_by_dataset = evaluate_utils.compare_frameworks(results_raw=results_raw, frameworks=[framework_1, framework_2], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets, verbose=False)

            datasets_pair = results_ranked_by_dataset[DATASET].unique()
            framework_1_wins = 0
            framework_2_wins = 0
            ties = 0
            for dataset in datasets_pair:
                results_isolated = results_ranked_by_dataset[results_ranked_by_dataset[DATASET] == dataset]
                results_isolated = results_isolated[results_isolated[FRAMEWORK] == framework_1]
                results_isolated_rank = results_isolated[RANK].iloc[0]
                if results_isolated_rank == 1:
                    framework_1_wins += 1
                elif results_isolated_rank == 2:
                    framework_2_wins += 1
                elif results_isolated_rank == 1.5:
                    ties += 1
                else:
                    raise AssertionError('Rank not valid: %s' % results_isolated_rank)
            results_list.append([framework_1, framework_1_wins, framework_2_wins, ties])
        results_pairs = pd.DataFrame(data=results_list, columns=[FRAMEWORK, '> ' + framework_2, '< ' + framework_2, '= ' + framework_2])
        all_results_pairs[framework_2] = results_pairs

    print('################################################')
    print('%s VS %s' % ('all', 'all'))
    print('\tAll datasets regardless of failures')
    results_ranked_all, results_ranked_by_dataset_all = evaluate_utils.compare_frameworks(results_raw=results_raw, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, filter_errors=False, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets)

    if output_dir:
        save_pd.save(path=output_dir + 'results_ranked_all.csv', df=results_ranked_all)
        save_pd.save(path=output_dir + 'results_ranked_by_dataset_all.csv', df=results_ranked_by_dataset_all)

    print('################################################')
    print('%s VS %s' % ('all', 'all'))
    print('\tOnly datasets where all frameworks succeeded')
    results_ranked_valid, results_ranked_by_dataset_valid = evaluate_utils.compare_frameworks(results_raw=results_raw, frameworks=frameworks, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets)

    results_pairs_merged_dict = {}
    for framework in frameworks_compare_vs_all:
        columns_to_get_from_all = [RANK_1, 'rank=2_count', 'rank=3_count', 'rank>3_count', ERROR_COUNT]
        results_pairs = all_results_pairs[framework]
        results_pairs_merged = pd.merge(results_pairs, results_ranked_valid, on=FRAMEWORK, how='left')
        results_pairs_merged = results_pairs_merged.drop(columns_to_get_from_all, axis=1)
        results_pairs_merged = pd.merge(results_pairs_merged, results_ranked_all[[FRAMEWORK] + columns_to_get_from_all], on=FRAMEWORK, how='left')
        results_pairs_merged = results_pairs_merged.sort_values(by=RANK)
        print('################################################')
        print('%s VS %s' % (framework, 'all'))
        with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
            print(results_pairs_merged)
        if output_dir:
            save_pd.save(path=output_dir + 'pairwise/' + framework + '.csv', df=results_pairs_merged)
        results_pairs_merged_dict[framework] = results_pairs_merged

    if output_dir:
        save_pd.save(path=output_dir + 'results_ranked_valid.csv', df=results_ranked_valid)
        save_pd.save(path=output_dir + 'results_ranked_by_dataset_valid.csv', df=results_ranked_by_dataset_valid)

    return results_ranked_valid, results_ranked_by_dataset_valid, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict
예제 #14
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/prepared/openml/'
    results_dir_output = results_dir + 'output/openml/orig_vs_core10fold/'

    results_raw = load_pd.load(path=[
        results_dir_input + 'openml_core.csv',
        results_dir_input + 'openml_original.csv',
    ])

    frameworks_1h = [
        'H2OAutoML_1h',
        'autosklearn_1h',
        'TPOT_1h',
        'AutoWEKA_1h',
    ]

    frameworks_4h = [
        'H2OAutoML_4h',
        'autosklearn_4h',
        'TPOT_4h',
        'AutoWEKA_4h',
    ]

    frameworks_run_list = [frameworks_1h, frameworks_4h]
    folds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    folds_to_keep_list = [folds, folds]
    banned_datasets_list = [DATASETS_LARGE, []]
    num_runs = len(frameworks_run_list)
    full_results_pairs_merged_dict = {}
    for i in range(num_runs):
        frameworks_run = frameworks_run_list[i]
        folds_to_keep = folds_to_keep_list[i]
        banned_datasets = banned_datasets_list[i]

        for framework in frameworks_run:
            run_path_prefix = framework + '/'
            orig_framework = 'orig_' + framework

            results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate(
                results_raw=results_raw,
                frameworks=[framework, orig_framework],
                banned_datasets=banned_datasets,
                folds_to_keep=folds_to_keep,
                columns_to_agg_extra=[
                    # TIME_INFER_S,
                    'acc',
                    'auc',
                    'logloss'
                ],
                frameworks_compare_vs_all=[orig_framework],
                output_dir=results_dir_output + run_path_prefix,
            )
            full_results_pairs_merged_dict.update(results_pairs_merged_dict)

    dfs = []
    frameworks_full = frameworks_1h + frameworks_4h
    for framework in frameworks_full:
        orig_framework = 'orig_' + framework
        cur_df = full_results_pairs_merged_dict[orig_framework]
        cur_df = cur_df[cur_df[FRAMEWORK] == framework]
        cur_columns = list(cur_df.columns)
        cur_columns[1] = '> Original'
        cur_columns[2] = '< Original'
        cur_columns[3] = '= Original'
        cur_df.columns = cur_columns
        dfs.append(cur_df)
    df_final = pd.concat(dfs, ignore_index=True)
    print(df_final)
    save_pd.save(path=results_dir_output + 'pairwise/new_vs_old.csv', df=df_final)
예제 #15
0
def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/prepared/kaggle/'
    output_prefix = 'output/kaggle/'
    raw_kaggle_file = 'results_kaggle_wpercentile.csv'

    results_raw = load_pd.load(path=[
        results_dir_input + 'kaggle_core.csv',
    ])
    # First generate datasets x frameworks raw data dumps:
    metrics = ['LEADER_PERCENTILE', METRIC_SCORE]
    dataset_order = [
        'house-prices-advanced-regression-techniques',
        'mercedes-benz-greener-manufacturing',
        'santander-value-prediction-challenge', 'allstate-claims-severity',
        'bnp-paribas-cardif-claims-management',
        'santander-customer-transaction-prediction',
        'santander-customer-satisfaction',
        'porto-seguro-safe-driver-prediction', 'ieee-fraud-detection',
        'walmart-recruiting-trip-type-classification',
        'otto-group-product-classification-challenge'
    ]
    dataset_order = [KAGGLE_ABBREVS[dat] for dat in dataset_order]
    method_order = [
        'AutoWEKA', 'autosklearn', 'TPOT', 'H2OAutoML', 'GCPTables',
        'autogluon'
    ]
    time_limits = ['4h', '8h']
    results_raw2 = results_raw.drop(METRIC_ERROR, axis=1).copy()
    results_raw2['LEADER_PERCENTILE'] = 1 - results_raw2[
        'LEADER_PERCENTILE']  # convert to actual percentile
    results_raw2.rename(columns={'LEADER_PERCENTILE': METRIC_ERROR},
                        inplace=True)

    # loss_df = generate_charts.compute_dataset_framework_df(results_raw) # values = losses
    percentile_df = generate_charts.compute_dataset_framework_df(results_raw2)
    for time_limit in time_limits:
        methods_t = [meth + "_" + time_limit for meth in method_order]
        df_time = percentile_df[[DATASET] + methods_t].copy()
        df_time[DATASET] = df_time[DATASET].map(KAGGLE_ABBREVS)
        df_ordered = df_time.set_index(DATASET)
        df_ordered = df_ordered.reindex(dataset_order)
        # df_ordered.reset_index(inplace=True)
        # df_ordered.rename(columns={'dataset': 'Dataset'},inplace=True)
        df_ordered.rename(columns=NOTIME_NAMES, inplace=True)
        save_pd.save(path=results_dir + output_prefix + time_limit +
                     "/datasetsXframeworks.csv",
                     df=df_ordered)
        textable_file = results_dir + output_prefix + time_limit + "/allpercentiles.tex"
        tex_table.tex_table(df_ordered,
                            textable_file,
                            bold='max',
                            nan_char=" x ",
                            max_digits=5)

    # Next do pairwise comparisons:
    num_frameworks = 6
    valid_frameworks = [
        'autogluon_4h',
        'GCPTables_4h',
        'autosklearn_4h',
        'H2OAutoML_4h',
        'TPOT_4h',
        'AutoWEKA_4h',
        'autogluon_8h',
        'GCPTables_8h',
        'H2OAutoML_8h',
        'autosklearn_8h',
        'TPOT_8h',
        'AutoWEKA_8h',
    ]

    frameworks_compare_vs_all_list = [
        'autogluon_4h', 'autogluon_8h', 'autogluon_4h', 'autogluon_8h'
    ]
    results_dir_output_list = [
        '4h/', '8h/', 'allVautogluon_4h/', 'allVautogluon_8h/'
    ]
    results_dir_output_list = [
        results_dir + output_prefix + name for name in results_dir_output_list
    ]
    framework_compare_ind_list = [  # list of lists, each corresponding to indices of valid_frameworks that should be compared in a single table.
        list(range(num_frameworks)),
        list(range(num_frameworks, num_frameworks * 2)),
        range(num_frameworks * 2),
        range(num_frameworks * 2),
    ]

    for i in range(len(results_dir_output_list)):
        results_dir_output = results_dir_output_list[i]
        frameworks_to_compare = [
            valid_frameworks[j] for j in framework_compare_ind_list[i]
        ]
        framework_compare_vs_all = frameworks_compare_vs_all_list[i]
        results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate(
            results_raw=results_raw,
            frameworks=frameworks_to_compare,
            banned_datasets=[],
            folds_to_keep=None,
            frameworks_compare_vs_all=[framework_compare_vs_all],
            output_dir=results_dir_output,
            columns_to_agg_extra=['LEADER_PERCENTILE'],
        )
        textab = tex_pairwise_table(results_dir_output,
                                    framework_compare_vs_all)

    # Generate plots:
    producePlots(time_limits, results_dir, raw_kaggle_file)