# for dataset in runs:
    # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0]

    for i, name in enumerate(runs):
        runs[name].sort()
        rs = np.random.RandomState(i * 37)
        ones = np.ones((200, ))
        zeros = np.zeros((len(runs[name]) - len(ones), ))
        numbers = np.append(ones, zeros)
        rs.shuffle(numbers)
        split_masks[name] = numbers
        training[name] = [
            run for j, run in enumerate(runs[name]) if numbers[j]
        ]

    rf = LearnedDistanceRF(**params)
    filled_runs = rf._apply_surrogates(metafeatures, training)

    # Now sort the arrays so we can compare it to the ground truth in run
    for name in runs:
        filled_runs[name].sort()
        print(len(filled_runs[name]), len(runs[name]))
        offset = 0
        a1 = []
        a2 = []
        for i in range(len(filled_runs[name])):
            while True:
                if filled_runs[name][i].params == runs[name][i +
                                                             offset].params:
                    a1.append(filled_runs[name][i].result)
                    a2.append(runs[name][i + offset].result)
        keep_configurations = args['keep_configurations']
        keep_configurations = keep_configurations.split(',')
        keep_configurations = tuple(
            [tuple(kc.split('=')) for kc in keep_configurations])
    else:
        keep_configurations = None

    meta_base = MetaBase(task_files_list, experiments_list, keep_configurations)
    metafeatures = meta_base.get_all_train_metafeatures_as_pandas()
    runs = meta_base.get_all_runs()

    # This can print the best hyperparameters of every dataset
    # for dataset in runs:
    # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0]

    rf = LearnedDistanceRF(**params)
    X, Y = rf._create_dataset(metafeatures, runs)
    import cPickle

    with open("test.pkl", "w") as fh:
        cPickle.dump((X, Y, metafeatures), fh, -1)

    print "Metafeatures", metafeatures.shape
    print "X", X.shape, np.isfinite(X).all().all()
    print "Y", Y.shape, np.isfinite(Y).all()

    metafeature_sets = Queue.Queue()
    if 'forward_selection' in args:
        used_metafeatures = []
        metafeature_performance = []
        print "Starting forward selection ",
Example #3
0
        keep_configurations = keep_configurations.split(',')
        keep_configurations = tuple(
            [tuple(kc.split('=')) for kc in keep_configurations])
    else:
        keep_configurations = None

    meta_base = MetaBase(task_files_list, experiments_list,
                         keep_configurations)
    metafeatures = meta_base.get_all_train_metafeatures_as_pandas()
    runs = meta_base.get_all_runs()

    # This can print the best hyperparameters of every dataset
    # for dataset in runs:
    # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0]

    rf = LearnedDistanceRF(**params)
    X, Y = rf._create_dataset(metafeatures, runs)
    import cPickle

    with open("test.pkl", "w") as fh:
        cPickle.dump((X, Y, metafeatures), fh, -1)

    print "Metafeatures", metafeatures.shape
    print "X", X.shape, np.isfinite(X).all().all()
    print "Y", Y.shape, np.isfinite(Y).all()

    metafeature_sets = Queue.Queue()
    if 'forward_selection' in args:
        used_metafeatures = []
        metafeature_performance = []
        print "Starting forward selection ",
    # This can print the best hyperparameters of every dataset
    # for dataset in runs:
    # print dataset, sorted(runs[dataset], key=lambda t: t.result)[0]

    for i, name in enumerate(runs):
        runs[name].sort()
        rs = np.random.RandomState(i*37)
        ones = np.ones((200,))
        zeros = np.zeros((len(runs[name]) - len(ones),))
        numbers = np.append(ones, zeros)
        rs.shuffle(numbers)
        split_masks[name] = numbers
        training[name] = [run for j, run in enumerate(runs[name]) if numbers[j]]

    rf = LearnedDistanceRF(**params)
    filled_runs = rf._apply_surrogates(metafeatures, training)


    # Now sort the arrays so we can compare it to the ground truth in run
    for name in runs:
        filled_runs[name].sort()
        print len(filled_runs[name]), len(runs[name])
        offset = 0
        a1 = []
        a2 = []
        for i in range(len(filled_runs[name])):
            while True:
                if filled_runs[name][i].params == runs[name][i+offset].params:
                    a1.append(filled_runs[name][i].result)
                    a2.append(runs[name][i+offset].result)