Beispiel #1
0
def test_trial_results():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([
            OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml'
        ])
        subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5'])
        assert os.path.exists('osprey-trials.db')

        config = Config('config.yaml')

        df = config.trial_results()

        assert df.shape[0] == 5

        for key in Trial.__table__.columns.keys():
            assert key in df.columns

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Beispiel #2
0
def test_trial_results():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'random_example',
                              '-f', 'config.yaml'])
        subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5'])
        assert os.path.exists('osprey-trials.db')

        config = Config('config.yaml')

        df = config.trial_results()

        assert df.shape[0] == 5

        for key in Trial.__table__.columns.keys():
            assert key in df.columns

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Beispiel #3
0
# In[4]:

root_dir = 'fs-peptide'
# Load Configuation Files
databases = {
    'bayesian': root_dir + '/gp-m52-ei-tica-indv/config-all_tor.yaml',
    'random': root_dir + '/rand-tica-indv/config_random-all_tor.yaml',
    'sobol': root_dir + '/sobol-tica-indv/config-all_tor.yaml',
    'tpe': root_dir + '/tpe-s20-g25-tica-indv/config-all_tor.yaml'
}

all_dfs = []
for k, v in databases.items():
    config = Config(v)
    df = config.trial_results()
    df['method'] = k
    all_dfs.append(df)

df_all = pd.concat(all_dfs)

# In[7]:

df_all.head()

# # Drop unnecessary columns and rename

# In[67]:

df = df_all.loc[:, [
    'parameters', 'project_name', 'mean_test_score', 'mean_train_score',
Beispiel #4
0
if len(argv) != 4:
    print('usage: sample_db.py config.yaml sample_size n_samples')
    exit(1)

inp_file = argv[1]
num = int(argv[2]) # TOTAL size of samples to use e.g. 100
iter = int(argv[3])  # Number of splits e.g. 5
# This will give 5 splits of 20 samples each.100

if num % iter != 0:
    print('sample_size not strictly divisible by n_samples')
    exit(1)

# Get original database and history
config1 = Config(inp_file)
df1 = config1.trial_results()
hist1 = config1.trials().query(Trial).all()


# Main loop
for name, group in df1.groupby('project_name'):
    # Sample the group
    sample = group.sample(num, random_state=42)
    cv = KFold(n_splits=iter, random_state=42)
    all_keep = sample['id'].values
    for i, (_, test_idx) in enumerate(cv.split(all_keep)):

        keep = all_keep[test_idx]

        db2 = make_session('sqlite:///osprey-trials-{0}-{1}.db'.format(int(num/iter), i), project_name=name)