Ejemplo n.º 1
0
def data_from_config(config_file):
    """
    Returns features, scores, elapsed time in seconds etc.
    and searchspace from a config file
    """
    config = Config(config_file)
    session = config.trials()

    searchspace = config.search_space()
    history = [[t.parameters, t.test_scores, t.status, t.elapsed]
               for t in session.query(Trial).all()]

    return get_data(history, searchspace) + (searchspace, )
Ejemplo n.º 2
0
    print('usage: sample_db.py config.yaml sample_size n_samples')
    exit(1)

inp_file = argv[1]
num = int(argv[2]) # TOTAL size of samples to use e.g. 100
iter = int(argv[3])  # Number of splits e.g. 5
# This will give 5 splits of 20 samples each.100

if num % iter != 0:
    print('sample_size not strictly divisible by n_samples')
    exit(1)

# Get original database and history
config1 = Config(inp_file)
df1 = config1.trial_results()
hist1 = config1.trials().query(Trial).all()


# Main loop
for name, group in df1.groupby('project_name'):
    # Sample the group
    sample = group.sample(num, random_state=42)
    cv = KFold(n_splits=iter, random_state=42)
    all_keep = sample['id'].values
    for i, (_, test_idx) in enumerate(cv.split(all_keep)):

        keep = all_keep[test_idx]

        db2 = make_session('sqlite:///osprey-trials-{0}-{1}.db'.format(int(num/iter), i), project_name=name)

        # Get the relevant trial objects from original db