def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/raw/' results_dir_output = results_dir + 'input/prepared/openml/' ag_results_distilled_1h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_ag_leaderboard_1h_v15_distill.csv', framework_suffix='_1h') ag_results_distilled_4h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_ag_leaderboard_4h_v15_distill.csv', framework_suffix='_4h') results_list = [ ag_results_distilled_1h, ag_results_distilled_4h, ] results_raw = pd.concat(results_list, ignore_index=True, sort=True) frameworks_distilled = [ 'autogluon_compressed_1h', 'autogluon_distilled_1h', 'autogluon_ensemble_1h', 'autogluon_compressed_4h', 'autogluon_distilled_4h', 'autogluon_ensemble_4h', ] results_ablation = results_raw[results_raw[FRAMEWORK].isin( frameworks_distilled)] save_pd.save(path=results_dir_output + 'openml_autogluon_distilled.csv', df=results_ablation)
def save_artifacts(predictor, leaderboard, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: models_dir = output_subdir("models", config) shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True) if 'leaderboard' in artifacts: save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"), df=leaderboard) if 'info' in artifacts: ag_info = predictor.info() info_dir = output_subdir("info", config) save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info) if 'models' in artifacts: utils.zip_path(models_dir, os.path.join(models_dir, "models.zip")) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) elif os.path.splitext(path)[1] == '.pkl': os.remove(path) utils.walk_apply(models_dir, delete, max_depth=0) except Exception: log.warning("Error when saving artifacts.", exc_info=True)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/raw/' results_dir_output = results_dir + 'input/prepared/openml/' ag_results_ablation_1h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_automlbenchmark_ablation_1h.csv', framework_suffix='_1h') ag_results_ablation_4h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_automlbenchmark_ablation_4h.csv', framework_suffix='_4h') results_list = [ ag_results_ablation_1h, ag_results_ablation_4h, ] results_raw = pd.concat(results_list, ignore_index=True, sort=True) frameworks_ablation = [ 'autogluon_nostack_1h', 'autogluon_nobag_1h', 'autogluon_norepeatbag_1h', 'autogluon_nonn_1h', 'autogluon_noknn_1h', 'autogluon_nostack_4h', 'autogluon_nobag_4h', 'autogluon_norepeatbag_4h', 'autogluon_nonn_4h', 'autogluon_noknn_4h', ] results_ablation = results_raw[results_raw[FRAMEWORK].isin( frameworks_ablation)] save_pd.save(path=results_dir_output + 'openml_autogluon_ablation.csv', df=results_ablation)
def save_artifacts(predictor, leaderboard, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: models_dir = make_subdir("models", config) shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True) if 'leaderboard' in artifacts: save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"), df=leaderboard) if 'info' in artifacts: ag_info = predictor.info() info_dir = make_subdir("info", config) save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info) if 'models' not in artifacts: shutil.rmtree(os.path.join(models_dir, "models"), ignore_errors=True) with os.scandir(models_dir) as it: for f in it: if f.is_file() and os.path.splitext(f.name)[1] == '.pkl': os.remove(f.path) except: log.warning("Error when saving artifacts.", exc_info=True)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/raw/' results_dir_output = results_dir + 'input/prepared/kaggle/' kaggle_results = preprocess_kaggle.preprocess_kaggle_input( path=results_dir_input + 'results_kaggle_wpercentile.csv', framework_suffix='') kaggle_results[FRAMEWORK] = kaggle_results[FRAMEWORK].str.replace( 'GoogleAutoMLTables_', 'GCPTables_', regex=False) frameworks_core = [ 'autogluon_4h', 'GCPTables_4h', 'autosklearn_4h', 'H2OAutoML_4h', 'TPOT_4h', 'AutoWEKA_4h', 'autogluon_8h', 'GCPTables_8h', 'H2OAutoML_8h', 'autosklearn_8h', 'TPOT_8h', 'AutoWEKA_8h', ] results_list = [kaggle_results] results_raw = pd.concat(results_list, ignore_index=True, sort=True) results_ablation = results_raw[results_raw[FRAMEWORK].isin( frameworks_core)] save_pd.save(path=results_dir_output + 'kaggle_core.csv', df=results_ablation)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/raw/original/' results_dir_output = results_dir + 'input/prepared/openml/' other_results_large_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_large-8c4h.csv', framework_suffix='_4h') other_results_medium_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_medium-8c4h.csv', framework_suffix='_4h') other_results_small_4h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_small-8c4h.csv', framework_suffix='_4h') other_results_medium_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_medium-8c1h.csv', framework_suffix='_1h') other_results_small_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_small-8c1h.csv', framework_suffix='_1h') results_list = [other_results_large_4h, other_results_medium_4h, other_results_small_4h, other_results_medium_1h, other_results_small_1h] results_raw = pd.concat(results_list, ignore_index=True, sort=True) results_raw[FRAMEWORK] = ['orig_' + name[0] for name in zip(results_raw[FRAMEWORK])] frameworks_original = [ 'orig_H2OAutoML_1h', 'orig_autosklearn_1h', 'orig_TPOT_1h', 'orig_AutoWEKA_1h', 'orig_H2OAutoML_4h', 'orig_autosklearn_4h', 'orig_TPOT_4h', 'orig_AutoWEKA_4h', ] results_original = results_raw[results_raw[FRAMEWORK].isin(frameworks_original)] save_pd.save(path=results_dir_output + 'openml_original.csv', df=results_original)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/core_1h_vs_4h/' results_raw = load_pd.load(path=results_dir_input + 'openml_core.csv') frameworks = [ 'autogluon', 'GCPTables', 'H2OAutoML', 'autosklearn', 'TPOT', 'AutoWEKA', ] folds_to_keep = [0] banned_datasets = [] full_results_pairs_merged_dict = {} for framework in frameworks: run_path_prefix = framework + '/' framework_1h = framework + '_1h' framework_4h = framework + '_4h' results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=[framework_1h, framework_4h], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=[framework_4h], output_dir=results_dir_output + run_path_prefix, ) full_results_pairs_merged_dict.update(results_pairs_merged_dict) dfs = [] for framework in frameworks: framework_1h = framework + '_1h' framework_4h = framework + '_4h' cur_df = full_results_pairs_merged_dict[framework_4h] cur_df = cur_df[cur_df[FRAMEWORK] == framework_1h] cur_columns = list(cur_df.columns) cur_columns[1] = '> 4h' cur_columns[2] = '< 4h' cur_columns[3] = '= 4h' cur_df.columns = cur_columns dfs.append(cur_df) df_final = pd.concat(dfs, ignore_index=True) print(df_final) save_pd.save(path=results_dir_output + 'pairwise/1h_vs_4h.csv', df=df_final)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/raw/' results_dir_output = results_dir + 'input/prepared/openml/' ag_results_1h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_automlbenchmark_1h.csv', framework_suffix='_1h') ag_results_4h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_automlbenchmark_4h.csv', framework_suffix='_4h') ag_results_1h[FRAMEWORK] = ag_results_1h[FRAMEWORK].str.replace( '_benchmark_', '_', regex=False) ag_results_4h[FRAMEWORK] = ag_results_4h[FRAMEWORK].str.replace( '_benchmark_', '_', regex=False) gcp_results_1h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_automlbenchmark_gcptables_1h.csv', framework_suffix='_1h') gcp_results_4h = preprocess_openml.preprocess_openml_input( path=results_dir_input + 'results_automlbenchmark_gcptables_4h.csv', framework_suffix='_4h') gcp_results_1h[FRAMEWORK] = gcp_results_1h[FRAMEWORK].str.replace( 'GoogleAutoMLTables_benchmark_', 'GCPTables_', regex=False) gcp_results_4h[FRAMEWORK] = gcp_results_4h[FRAMEWORK].str.replace( 'GoogleAutoMLTables_benchmark_', 'GCPTables_', regex=False) gcp_results_1h = gcp_results_1h[gcp_results_1h[FRAMEWORK] == 'GCPTables_1h'] gcp_results_4h = gcp_results_4h[gcp_results_4h[FRAMEWORK] == 'GCPTables_4h'] results_list = [ gcp_results_1h, gcp_results_4h, ag_results_1h, ag_results_4h ] results_raw = pd.concat(results_list, ignore_index=True, sort=True) frameworks_core = [ 'autogluon_1h', 'GCPTables_1h', 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', 'autogluon_4h', 'GCPTables_4h', 'H2OAutoML_4h', 'autosklearn_4h', 'TPOT_4h', 'AutoWEKA_4h', ] results_core = results_raw[results_raw[FRAMEWORK].isin(frameworks_core)] save_pd.save(path=results_dir_output + 'openml_core.csv', df=results_core)
def aggregate_from_params(s3_bucket, s3_prefix, version_name, suffix, contains): result_path = s3_prefix + version_name + '/' aggregated_results_name = 'results_automlbenchmark' + suffix + '_' + version_name + '.csv' df = aggregate(path_prefix='s3://' + s3_bucket + '/results/' + result_path, contains=contains) save_pd.save(path='s3://' + s3_bucket + '/aggregated/' + result_path + aggregated_results_name, df=df)
def prepare_data(config, dataset): print('#################') print('Config:') print(config.__json__()) print() print('Dataset:') print(dataset.__dict__) print('#################') metrics_mapping = dict(acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, r2=metrics.r2) perf_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) # un = dataset.train.path # print(un) # raw_data = loadarff(un) # df_data = pd.DataFrame(raw_data[0]) X_train = dataset.train.X y_train = dataset.train.y X_test = dataset.test.X y_test = dataset.test.y X_train = pd.DataFrame(X_train) X_test = pd.DataFrame(X_test) # Save and load data to remove any pre-set dtypes, we want to observe performance from worst-case scenario: raw csv save_pd.save(path='tmp/tmp_file_train.csv', df=X_train) X_train = load_pd.load(path='tmp/tmp_file_train.csv') save_pd.save(path='tmp/tmp_file_test.csv', df=X_test) X_test = load_pd.load(path='tmp/tmp_file_test.csv') is_classification = config.type == 'classification' if is_classification: unique_vals = np.unique(y_train) if len(unique_vals) == 2: problem_type = BINARY else: problem_type = MULTICLASS else: problem_type = REGRESSION return X_train, y_train, X_test, y_test, problem_type, perf_metric
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/raw/' results_dir_output = results_dir + 'input/prepared/openml/' ag_results_autopilot_1h = preprocess_openml.preprocess_openml_input(path=results_dir_input + 'results_automlbenchmark_autopilot_1h.csv', framework_suffix='_1h') results_list = [ag_results_autopilot_1h] results_raw = pd.concat(results_list, ignore_index=True, sort=True) frameworks_autopilot = ['AutoPilot_1h'] results_core = results_raw[results_raw[FRAMEWORK].isin(frameworks_autopilot)] save_pd.save(path=results_dir_output + 'openml_autopilot.csv', df=results_core)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'output/' results_dir_output = results_dir + 'output/combined/4h/tables/' input_openml = results_dir_input + 'openml/core/4h/results_ranked_by_dataset_all.csv' input_kaggle = results_dir_input + 'kaggle/4h/results_ranked_by_dataset_all.csv' results_ranked_by_dataset_all = load_pd.load([input_openml, input_kaggle]) print(results_ranked_by_dataset_all) result = generate_charts.compute_dataset_framework_df(results_ranked_by_dataset_all) print(result) save_pd.save(path=results_dir_output + 'dataset_x_framework.csv', df=result)
def evaluate(results_raw, frameworks=None, banned_datasets=None, folds_to_keep=None, columns_to_agg_extra=None, frameworks_compare_vs_all=None, output_dir=None): if frameworks is None: frameworks = sorted(list(results_raw[FRAMEWORK].unique())) if frameworks_compare_vs_all is None: frameworks_compare_vs_all = [] if folds_to_keep is None: folds_to_keep = sorted(list(results_raw[FOLD].unique())) if banned_datasets is not None: results_raw = results_raw[~results_raw[DATASET].isin(banned_datasets)] total_datasets = sorted(results_raw[DATASET].unique()) results_raw = preprocess_utils.clean_result(result_df=results_raw, folds_to_keep=folds_to_keep, remove_invalid=True) results_raw = results_raw[results_raw[FRAMEWORK].isin(frameworks)] # Calculate each frameworks errored datasets total_frameworks = results_raw[FRAMEWORK].unique() total_folds = results_raw[FOLD].unique() num_frameworks = len(total_frameworks) num_datasets = len(total_datasets) num_folds = len(total_folds) ideal_rows = num_folds * num_datasets * num_frameworks actual_rows = len(results_raw) errors = ideal_rows - actual_rows print('num_datasets:', num_datasets) print('num_folds:', num_folds) print('errors:', errors) for framework in total_frameworks: results_framework = results_raw[results_raw[FRAMEWORK] == framework] num_rows_framework = len(results_framework) datasets_framework = results_framework[DATASET].unique() datasets_framework_errors = [dataset for dataset in total_datasets if dataset not in datasets_framework] datasets_framework_errors_count = len(datasets_framework_errors) framework_fold_errors = num_datasets * num_folds - num_rows_framework print('################################################') print('framework:', framework) print('datasets_framework_errors:', datasets_framework_errors) print('datasets_framework_errors_count:', datasets_framework_errors_count) print('framework_fold_errors:', framework_fold_errors) print('################################################') all_results_pairs = {} for framework_2 in frameworks_compare_vs_all: results_list = [] for framework_1 in total_frameworks: if framework_1 == framework_2: results_ranked, results_ranked_by_dataset = evaluate_utils.compare_frameworks(results_raw=results_raw, frameworks=[framework_2], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets, verbose=False) ties = len(results_ranked_by_dataset) results_list.append([framework_1, 0, 0, ties]) continue results_ranked, results_ranked_by_dataset = evaluate_utils.compare_frameworks(results_raw=results_raw, frameworks=[framework_1, framework_2], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets, verbose=False) datasets_pair = results_ranked_by_dataset[DATASET].unique() framework_1_wins = 0 framework_2_wins = 0 ties = 0 for dataset in datasets_pair: results_isolated = results_ranked_by_dataset[results_ranked_by_dataset[DATASET] == dataset] results_isolated = results_isolated[results_isolated[FRAMEWORK] == framework_1] results_isolated_rank = results_isolated[RANK].iloc[0] if results_isolated_rank == 1: framework_1_wins += 1 elif results_isolated_rank == 2: framework_2_wins += 1 elif results_isolated_rank == 1.5: ties += 1 else: raise AssertionError('Rank not valid: %s' % results_isolated_rank) results_list.append([framework_1, framework_1_wins, framework_2_wins, ties]) results_pairs = pd.DataFrame(data=results_list, columns=[FRAMEWORK, '> ' + framework_2, '< ' + framework_2, '= ' + framework_2]) all_results_pairs[framework_2] = results_pairs print('################################################') print('%s VS %s' % ('all', 'all')) print('\tAll datasets regardless of failures') results_ranked_all, results_ranked_by_dataset_all = evaluate_utils.compare_frameworks(results_raw=results_raw, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, filter_errors=False, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets) if output_dir: save_pd.save(path=output_dir + 'results_ranked_all.csv', df=results_ranked_all) save_pd.save(path=output_dir + 'results_ranked_by_dataset_all.csv', df=results_ranked_by_dataset_all) print('################################################') print('%s VS %s' % ('all', 'all')) print('\tOnly datasets where all frameworks succeeded') results_ranked_valid, results_ranked_by_dataset_valid = evaluate_utils.compare_frameworks(results_raw=results_raw, frameworks=frameworks, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=columns_to_agg_extra, datasets=total_datasets) results_pairs_merged_dict = {} for framework in frameworks_compare_vs_all: columns_to_get_from_all = [RANK_1, 'rank=2_count', 'rank=3_count', 'rank>3_count', ERROR_COUNT] results_pairs = all_results_pairs[framework] results_pairs_merged = pd.merge(results_pairs, results_ranked_valid, on=FRAMEWORK, how='left') results_pairs_merged = results_pairs_merged.drop(columns_to_get_from_all, axis=1) results_pairs_merged = pd.merge(results_pairs_merged, results_ranked_all[[FRAMEWORK] + columns_to_get_from_all], on=FRAMEWORK, how='left') results_pairs_merged = results_pairs_merged.sort_values(by=RANK) print('################################################') print('%s VS %s' % (framework, 'all')) with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(results_pairs_merged) if output_dir: save_pd.save(path=output_dir + 'pairwise/' + framework + '.csv', df=results_pairs_merged) results_pairs_merged_dict[framework] = results_pairs_merged if output_dir: save_pd.save(path=output_dir + 'results_ranked_valid.csv', df=results_ranked_valid) save_pd.save(path=output_dir + 'results_ranked_by_dataset_valid.csv', df=results_ranked_by_dataset_valid) return results_ranked_valid, results_ranked_by_dataset_valid, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/orig_vs_core10fold/' results_raw = load_pd.load(path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_original.csv', ]) frameworks_1h = [ 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', ] frameworks_4h = [ 'H2OAutoML_4h', 'autosklearn_4h', 'TPOT_4h', 'AutoWEKA_4h', ] frameworks_run_list = [frameworks_1h, frameworks_4h] folds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] folds_to_keep_list = [folds, folds] banned_datasets_list = [DATASETS_LARGE, []] num_runs = len(frameworks_run_list) full_results_pairs_merged_dict = {} for i in range(num_runs): frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] banned_datasets = banned_datasets_list[i] for framework in frameworks_run: run_path_prefix = framework + '/' orig_framework = 'orig_' + framework results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=[framework, orig_framework], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=[orig_framework], output_dir=results_dir_output + run_path_prefix, ) full_results_pairs_merged_dict.update(results_pairs_merged_dict) dfs = [] frameworks_full = frameworks_1h + frameworks_4h for framework in frameworks_full: orig_framework = 'orig_' + framework cur_df = full_results_pairs_merged_dict[orig_framework] cur_df = cur_df[cur_df[FRAMEWORK] == framework] cur_columns = list(cur_df.columns) cur_columns[1] = '> Original' cur_columns[2] = '< Original' cur_columns[3] = '= Original' cur_df.columns = cur_columns dfs.append(cur_df) df_final = pd.concat(dfs, ignore_index=True) print(df_final) save_pd.save(path=results_dir_output + 'pairwise/new_vs_old.csv', df=df_final)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/kaggle/' output_prefix = 'output/kaggle/' raw_kaggle_file = 'results_kaggle_wpercentile.csv' results_raw = load_pd.load(path=[ results_dir_input + 'kaggle_core.csv', ]) # First generate datasets x frameworks raw data dumps: metrics = ['LEADER_PERCENTILE', METRIC_SCORE] dataset_order = [ 'house-prices-advanced-regression-techniques', 'mercedes-benz-greener-manufacturing', 'santander-value-prediction-challenge', 'allstate-claims-severity', 'bnp-paribas-cardif-claims-management', 'santander-customer-transaction-prediction', 'santander-customer-satisfaction', 'porto-seguro-safe-driver-prediction', 'ieee-fraud-detection', 'walmart-recruiting-trip-type-classification', 'otto-group-product-classification-challenge' ] dataset_order = [KAGGLE_ABBREVS[dat] for dat in dataset_order] method_order = [ 'AutoWEKA', 'autosklearn', 'TPOT', 'H2OAutoML', 'GCPTables', 'autogluon' ] time_limits = ['4h', '8h'] results_raw2 = results_raw.drop(METRIC_ERROR, axis=1).copy() results_raw2['LEADER_PERCENTILE'] = 1 - results_raw2[ 'LEADER_PERCENTILE'] # convert to actual percentile results_raw2.rename(columns={'LEADER_PERCENTILE': METRIC_ERROR}, inplace=True) # loss_df = generate_charts.compute_dataset_framework_df(results_raw) # values = losses percentile_df = generate_charts.compute_dataset_framework_df(results_raw2) for time_limit in time_limits: methods_t = [meth + "_" + time_limit for meth in method_order] df_time = percentile_df[[DATASET] + methods_t].copy() df_time[DATASET] = df_time[DATASET].map(KAGGLE_ABBREVS) df_ordered = df_time.set_index(DATASET) df_ordered = df_ordered.reindex(dataset_order) # df_ordered.reset_index(inplace=True) # df_ordered.rename(columns={'dataset': 'Dataset'},inplace=True) df_ordered.rename(columns=NOTIME_NAMES, inplace=True) save_pd.save(path=results_dir + output_prefix + time_limit + "/datasetsXframeworks.csv", df=df_ordered) textable_file = results_dir + output_prefix + time_limit + "/allpercentiles.tex" tex_table.tex_table(df_ordered, textable_file, bold='max', nan_char=" x ", max_digits=5) # Next do pairwise comparisons: num_frameworks = 6 valid_frameworks = [ 'autogluon_4h', 'GCPTables_4h', 'autosklearn_4h', 'H2OAutoML_4h', 'TPOT_4h', 'AutoWEKA_4h', 'autogluon_8h', 'GCPTables_8h', 'H2OAutoML_8h', 'autosklearn_8h', 'TPOT_8h', 'AutoWEKA_8h', ] frameworks_compare_vs_all_list = [ 'autogluon_4h', 'autogluon_8h', 'autogluon_4h', 'autogluon_8h' ] results_dir_output_list = [ '4h/', '8h/', 'allVautogluon_4h/', 'allVautogluon_8h/' ] results_dir_output_list = [ results_dir + output_prefix + name for name in results_dir_output_list ] framework_compare_ind_list = [ # list of lists, each corresponding to indices of valid_frameworks that should be compared in a single table. list(range(num_frameworks)), list(range(num_frameworks, num_frameworks * 2)), range(num_frameworks * 2), range(num_frameworks * 2), ] for i in range(len(results_dir_output_list)): results_dir_output = results_dir_output_list[i] frameworks_to_compare = [ valid_frameworks[j] for j in framework_compare_ind_list[i] ] framework_compare_vs_all = frameworks_compare_vs_all_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_to_compare, banned_datasets=[], folds_to_keep=None, frameworks_compare_vs_all=[framework_compare_vs_all], output_dir=results_dir_output, columns_to_agg_extra=['LEADER_PERCENTILE'], ) textab = tex_pairwise_table(results_dir_output, framework_compare_vs_all) # Generate plots: producePlots(time_limits, results_dir, raw_kaggle_file)