예제 #1
0
def read_data(bench_dir, datasets):

    bench = Benchmark(bench_dir, cache=False)
    n_configs = bench.get_number_of_configs(datasets[0])
    data = [
        bench.query(dataset_name=d, tag="Train/val_accuracy", config_id=ind)
        for d in datasets for ind in range(n_configs)
    ]
    configs = [
        bench.query(dataset_name=d, tag="config", config_id=ind)
        for d in datasets for ind in range(n_configs)
    ]
    dataset_names = [d for d in datasets for ind in range(n_configs)]

    y = np.array([curve[-1] for curve in data])
    return np.array(configs), y, np.array(dataset_names)
def read_data(bench_dir, dataset_name):
    bench = Benchmark(bench_dir, cache=False)
    dataset_name = 'Fashion-MNIST'
    n_configs = bench.get_number_of_configs(dataset_name)
    # Query API
    data = []
    for config_id in range(n_configs):
        data_point = dict()
        data_point["config"] = bench.query(dataset_name=dataset_name,
                                           tag="config",
                                           config_id=config_id)
        for tag in bench.get_queriable_tags(dataset_name=dataset_name,
                                            config_id=config_id):
            if tag.startswith("Train/"):
                data_point[tag] = bench.query(dataset_name=dataset_name,
                                              tag=tag,
                                              config_id=config_id)
        data.append(data_point)

    # Split: 50% train, 25% validation, 25% test (the data is already shuffled)
    indices = np.arange(n_configs)
    ind_train = indices[0:int(np.floor(0.5 * n_configs))]
    ind_val = indices[int(np.floor(0.5 *
                                   n_configs)):int(np.floor(0.75 * n_configs))]
    ind_test = indices[int(np.floor(0.75 * n_configs)):]

    array_data = np.array(data)
    train_data = array_data[ind_train]
    val_data = array_data[ind_val]
    test_data = array_data[ind_test]

    # Cut curves for validation and test
    cut_position = 11
    val_data, val_targets = cut_data(val_data, cut_position)
    test_data, test_targets = cut_data(test_data, cut_position)
    train_data, train_targets = cut_data(
        train_data, 51)  # Cut last value as it is repeated

    return train_data, val_data, test_data, train_targets, val_targets, test_targets
# and install all requirements before
from api import Benchmark

# Download from https://ndownloader.figshare.com/files/21188598 and unzip
bench_dir = "data_2k_lw.json"
bench = Benchmark(bench_dir, cache=True)

path = "data/runs/mlp_results/"

dataset_names = bench.get_dataset_names()
openml_task_ids = bench.get_openml_task_ids()

os.mkdir(path)

for task in dataset_names:
    nrun = bench.get_number_of_configs(task)
    df = pd.DataFrame(
        [bench.query(dataset_name=task, tag="config", config_id=0)])
    df['final_val_accuracy'] = bench.query(dataset_name=task,
                                           tag="final_val_accuracy",
                                           config_id=0)
    df['final_test_accuracy'] = bench.query(dataset_name=task,
                                            tag="final_val_accuracy",
                                            config_id=0)
    df['final_val_balanced_accuracy'] = bench.query(dataset_name=task,
                                                    tag="final_val_accuracy",
                                                    config_id=0)
    df['final_test_balanced_accuracy'] = bench.query(dataset_name=task,
                                                     tag="final_val_accuracy",
                                                     config_id=0)
    data = []