def read_data(bench_dir, datasets): bench = Benchmark(bench_dir, cache=False) n_configs = bench.get_number_of_configs(datasets[0]) data = [ bench.query(dataset_name=d, tag="Train/val_accuracy", config_id=ind) for d in datasets for ind in range(n_configs) ] configs = [ bench.query(dataset_name=d, tag="config", config_id=ind) for d in datasets for ind in range(n_configs) ] dataset_names = [d for d in datasets for ind in range(n_configs)] y = np.array([curve[-1] for curve in data]) return np.array(configs), y, np.array(dataset_names)
def read_data(bench_dir, dataset_name): bench = Benchmark(bench_dir, cache=False) dataset_name = 'Fashion-MNIST' n_configs = bench.get_number_of_configs(dataset_name) # Query API data = [] for config_id in range(n_configs): data_point = dict() data_point["config"] = bench.query(dataset_name=dataset_name, tag="config", config_id=config_id) for tag in bench.get_queriable_tags(dataset_name=dataset_name, config_id=config_id): if tag.startswith("Train/"): data_point[tag] = bench.query(dataset_name=dataset_name, tag=tag, config_id=config_id) data.append(data_point) # Split: 50% train, 25% validation, 25% test (the data is already shuffled) indices = np.arange(n_configs) ind_train = indices[0:int(np.floor(0.5 * n_configs))] ind_val = indices[int(np.floor(0.5 * n_configs)):int(np.floor(0.75 * n_configs))] ind_test = indices[int(np.floor(0.75 * n_configs)):] array_data = np.array(data) train_data = array_data[ind_train] val_data = array_data[ind_val] test_data = array_data[ind_test] # Cut curves for validation and test cut_position = 11 val_data, val_targets = cut_data(val_data, cut_position) test_data, test_targets = cut_data(test_data, cut_position) train_data, train_targets = cut_data( train_data, 51) # Cut last value as it is repeated return train_data, val_data, test_data, train_targets, val_targets, test_targets
# predictions and true values have to be multiplied by 100 first predictions = predictions * 100 trueVal = trueVal * 100 mse = torch.mean((predictions - trueVal)**2) rmse = torch.sqrt(torch.mean((predictions - trueVal)**2)) return mse, rmse if __name__ == "__main__": print("------------- Let us predict some learning curves --------------") data_path = '/home/sambit/PROGRAMMING/DL_PROJECT/TEAM_WORK_FREIBURG/Extrapolation-of-Learning-Curves/DATA/fashion_mnist.json' data_root = Benchmark(data_dir=data_path) train_data, val_data, test_data, train_targets, val_targets, test_targets = read_data( data_root) print("Train:", len(train_data)) print("Validation:", len(val_data)) print("Test:", len(test_data)) train_X, train_Y, val_X, val_Y, test_X, test_Y = get_data( train_data, val_data, test_data, train_targets, val_targets, test_targets) # get the prepared data print(train_X.shape, train_Y.shape, val_X.shape, val_Y.shape) # get model and send to GPU
import os import pandas as pd # git clone [email protected]:automl/LCBench.git # and install all requirements before from api import Benchmark # Download from https://ndownloader.figshare.com/files/21188598 and unzip bench_dir = "data_2k_lw.json" bench = Benchmark(bench_dir, cache=True) path = "data/runs/mlp_results/" dataset_names = bench.get_dataset_names() openml_task_ids = bench.get_openml_task_ids() os.mkdir(path) for task in dataset_names: nrun = bench.get_number_of_configs(task) df = pd.DataFrame( [bench.query(dataset_name=task, tag="config", config_id=0)]) df['final_val_accuracy'] = bench.query(dataset_name=task, tag="final_val_accuracy", config_id=0) df['final_test_accuracy'] = bench.query(dataset_name=task, tag="final_val_accuracy", config_id=0) df['final_val_balanced_accuracy'] = bench.query(dataset_name=task, tag="final_val_accuracy", config_id=0)