Python preprocess_openml_input Exemples, autogluon_utils.benchmarking.evaluation.preprocess.preprocess_openml.preprocess_openml_input Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : run_generate_clean_openml_distill.py Projet : yyht/autogluon-benchmarking

def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_distilled_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_ag_leaderboard_1h_v15_distill.csv',
        framework_suffix='_1h')
    ag_results_distilled_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_ag_leaderboard_4h_v15_distill.csv',
        framework_suffix='_4h')

    results_list = [
        ag_results_distilled_1h,
        ag_results_distilled_4h,
    ]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_distilled = [
        'autogluon_compressed_1h',
        'autogluon_distilled_1h',
        'autogluon_ensemble_1h',
        'autogluon_compressed_4h',
        'autogluon_distilled_4h',
        'autogluon_ensemble_4h',
    ]

    results_ablation = results_raw[results_raw[FRAMEWORK].isin(
        frameworks_distilled)]
    save_pd.save(path=results_dir_output + 'openml_autogluon_distilled.csv',
                 df=results_ablation)

Exemple #2

0

Afficher le fichier

Fichier : run_generate_clean_openml_original.py Projet : yyht/autogluon-benchmarking

def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/original/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    other_results_large_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_large-8c4h.csv', framework_suffix='_4h')
    other_results_medium_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_medium-8c4h.csv', framework_suffix='_4h')
    other_results_small_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_small-8c4h.csv', framework_suffix='_4h')
    other_results_medium_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_medium-8c1h.csv', framework_suffix='_1h')
    other_results_small_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_small-8c1h.csv', framework_suffix='_1h')

    results_list = [other_results_large_4h, other_results_medium_4h, other_results_small_4h, other_results_medium_1h, other_results_small_1h]

    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    results_raw[FRAMEWORK] = ['orig_' + name[0] for name in zip(results_raw[FRAMEWORK])]

    frameworks_original = [
        'orig_H2OAutoML_1h',
        'orig_autosklearn_1h',
        'orig_TPOT_1h',
        'orig_AutoWEKA_1h',

        'orig_H2OAutoML_4h',
        'orig_autosklearn_4h',
        'orig_TPOT_4h',
        'orig_AutoWEKA_4h',
    ]

    results_original = results_raw[results_raw[FRAMEWORK].isin(frameworks_original)]
    save_pd.save(path=results_dir_output + 'openml_original.csv', df=results_original)

Exemple #3

0

Afficher le fichier

Fichier : run_generate_clean_openml_ablation.py Projet : yyht/autogluon-benchmarking

def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_ablation_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_ablation_1h.csv',
        framework_suffix='_1h')
    ag_results_ablation_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_ablation_4h.csv',
        framework_suffix='_4h')

    results_list = [
        ag_results_ablation_1h,
        ag_results_ablation_4h,
    ]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_ablation = [
        'autogluon_nostack_1h',
        'autogluon_nobag_1h',
        'autogluon_norepeatbag_1h',
        'autogluon_nonn_1h',
        'autogluon_noknn_1h',
        'autogluon_nostack_4h',
        'autogluon_nobag_4h',
        'autogluon_norepeatbag_4h',
        'autogluon_nonn_4h',
        'autogluon_noknn_4h',
    ]

    results_ablation = results_raw[results_raw[FRAMEWORK].isin(
        frameworks_ablation)]
    save_pd.save(path=results_dir_output + 'openml_autogluon_ablation.csv',
                 df=results_ablation)

Exemple #4

0

Afficher le fichier

def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_1h.csv',
        framework_suffix='_1h')
    ag_results_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_4h.csv',
        framework_suffix='_4h')

    ag_results_1h[FRAMEWORK] = ag_results_1h[FRAMEWORK].str.replace(
        '_benchmark_', '_', regex=False)
    ag_results_4h[FRAMEWORK] = ag_results_4h[FRAMEWORK].str.replace(
        '_benchmark_', '_', regex=False)

    gcp_results_1h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_gcptables_1h.csv',
        framework_suffix='_1h')
    gcp_results_4h = preprocess_openml.preprocess_openml_input(
        path=results_dir_input + 'results_automlbenchmark_gcptables_4h.csv',
        framework_suffix='_4h')
    gcp_results_1h[FRAMEWORK] = gcp_results_1h[FRAMEWORK].str.replace(
        'GoogleAutoMLTables_benchmark_', 'GCPTables_', regex=False)
    gcp_results_4h[FRAMEWORK] = gcp_results_4h[FRAMEWORK].str.replace(
        'GoogleAutoMLTables_benchmark_', 'GCPTables_', regex=False)
    gcp_results_1h = gcp_results_1h[gcp_results_1h[FRAMEWORK] ==
                                    'GCPTables_1h']
    gcp_results_4h = gcp_results_4h[gcp_results_4h[FRAMEWORK] ==
                                    'GCPTables_4h']

    results_list = [
        gcp_results_1h, gcp_results_4h, ag_results_1h, ag_results_4h
    ]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_core = [
        'autogluon_1h',
        'GCPTables_1h',
        'H2OAutoML_1h',
        'autosklearn_1h',
        'TPOT_1h',
        'AutoWEKA_1h',
        'autogluon_4h',
        'GCPTables_4h',
        'H2OAutoML_4h',
        'autosklearn_4h',
        'TPOT_4h',
        'AutoWEKA_4h',
    ]

    results_core = results_raw[results_raw[FRAMEWORK].isin(frameworks_core)]
    save_pd.save(path=results_dir_output + 'openml_core.csv', df=results_core)

Exemple #5

0

Afficher le fichier

def run():
    results_dir = 'data/results/'
    results_dir_input = results_dir + 'input/raw/'
    results_dir_output = results_dir + 'input/prepared/openml/'

    ag_results_autopilot_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_automlbenchmark_autopilot_1h.csv', framework_suffix='_1h')

    results_list = [ag_results_autopilot_1h]
    results_raw = pd.concat(results_list, ignore_index=True, sort=True)

    frameworks_autopilot = ['AutoPilot_1h']

    results_core = results_raw[results_raw[FRAMEWORK].isin(frameworks_autopilot)]
    save_pd.save(path=results_dir_output + 'openml_autopilot.csv', df=results_core)