# for dataset in runs: # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0] for i, name in enumerate(runs): runs[name].sort() rs = np.random.RandomState(i * 37) ones = np.ones((200, )) zeros = np.zeros((len(runs[name]) - len(ones), )) numbers = np.append(ones, zeros) rs.shuffle(numbers) split_masks[name] = numbers training[name] = [ run for j, run in enumerate(runs[name]) if numbers[j] ] rf = LearnedDistanceRF(**params) filled_runs = rf._apply_surrogates(metafeatures, training) # Now sort the arrays so we can compare it to the ground truth in run for name in runs: filled_runs[name].sort() print(len(filled_runs[name]), len(runs[name])) offset = 0 a1 = [] a2 = [] for i in range(len(filled_runs[name])): while True: if filled_runs[name][i].params == runs[name][i + offset].params: a1.append(filled_runs[name][i].result) a2.append(runs[name][i + offset].result)
keep_configurations = args['keep_configurations'] keep_configurations = keep_configurations.split(',') keep_configurations = tuple( [tuple(kc.split('=')) for kc in keep_configurations]) else: keep_configurations = None meta_base = MetaBase(task_files_list, experiments_list, keep_configurations) metafeatures = meta_base.get_all_train_metafeatures_as_pandas() runs = meta_base.get_all_runs() # This can print the best hyperparameters of every dataset # for dataset in runs: # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0] rf = LearnedDistanceRF(**params) X, Y = rf._create_dataset(metafeatures, runs) import cPickle with open("test.pkl", "w") as fh: cPickle.dump((X, Y, metafeatures), fh, -1) print "Metafeatures", metafeatures.shape print "X", X.shape, np.isfinite(X).all().all() print "Y", Y.shape, np.isfinite(Y).all() metafeature_sets = Queue.Queue() if 'forward_selection' in args: used_metafeatures = [] metafeature_performance = [] print "Starting forward selection ",
keep_configurations = keep_configurations.split(',') keep_configurations = tuple( [tuple(kc.split('=')) for kc in keep_configurations]) else: keep_configurations = None meta_base = MetaBase(task_files_list, experiments_list, keep_configurations) metafeatures = meta_base.get_all_train_metafeatures_as_pandas() runs = meta_base.get_all_runs() # This can print the best hyperparameters of every dataset # for dataset in runs: # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0] rf = LearnedDistanceRF(**params) X, Y = rf._create_dataset(metafeatures, runs) import cPickle with open("test.pkl", "w") as fh: cPickle.dump((X, Y, metafeatures), fh, -1) print "Metafeatures", metafeatures.shape print "X", X.shape, np.isfinite(X).all().all() print "Y", Y.shape, np.isfinite(Y).all() metafeature_sets = Queue.Queue() if 'forward_selection' in args: used_metafeatures = [] metafeature_performance = [] print "Starting forward selection ",
# This can print the best hyperparameters of every dataset # for dataset in runs: # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0] for i, name in enumerate(runs): runs[name].sort() rs = np.random.RandomState(i*37) ones = np.ones((200,)) zeros = np.zeros((len(runs[name]) - len(ones),)) numbers = np.append(ones, zeros) rs.shuffle(numbers) split_masks[name] = numbers training[name] = [run for j, run in enumerate(runs[name]) if numbers[j]] rf = LearnedDistanceRF(**params) filled_runs = rf._apply_surrogates(metafeatures, training) # Now sort the arrays so we can compare it to the ground truth in run for name in runs: filled_runs[name].sort() print len(filled_runs[name]), len(runs[name]) offset = 0 a1 = [] a2 = [] for i in range(len(filled_runs[name])): while True: if filled_runs[name][i].params == runs[name][i+offset].params: a1.append(filled_runs[name][i].result) a2.append(runs[name][i+offset].result)