def test_trial_results(): assert OSPREY_BIN is not None cwd = os.path.abspath(os.curdir) dirname = tempfile.mkdtemp() try: os.chdir(dirname) subprocess.check_call([ OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml' ]) subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5']) assert os.path.exists('osprey-trials.db') config = Config('config.yaml') df = config.trial_results() assert df.shape[0] == 5 for key in Trial.__table__.columns.keys(): assert key in df.columns finally: os.chdir(cwd) shutil.rmtree(dirname)
def test_trial_results(): assert OSPREY_BIN is not None cwd = os.path.abspath(os.curdir) dirname = tempfile.mkdtemp() try: os.chdir(dirname) subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml']) subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5']) assert os.path.exists('osprey-trials.db') config = Config('config.yaml') df = config.trial_results() assert df.shape[0] == 5 for key in Trial.__table__.columns.keys(): assert key in df.columns finally: os.chdir(cwd) shutil.rmtree(dirname)
# In[4]: root_dir = 'fs-peptide' # Load Configuation Files databases = { 'bayesian': root_dir + '/gp-m52-ei-tica-indv/config-all_tor.yaml', 'random': root_dir + '/rand-tica-indv/config_random-all_tor.yaml', 'sobol': root_dir + '/sobol-tica-indv/config-all_tor.yaml', 'tpe': root_dir + '/tpe-s20-g25-tica-indv/config-all_tor.yaml' } all_dfs = [] for k, v in databases.items(): config = Config(v) df = config.trial_results() df['method'] = k all_dfs.append(df) df_all = pd.concat(all_dfs) # In[7]: df_all.head() # # Drop unnecessary columns and rename # In[67]: df = df_all.loc[:, [ 'parameters', 'project_name', 'mean_test_score', 'mean_train_score',
if len(argv) != 4: print('usage: sample_db.py config.yaml sample_size n_samples') exit(1) inp_file = argv[1] num = int(argv[2]) # TOTAL size of samples to use e.g. 100 iter = int(argv[3]) # Number of splits e.g. 5 # This will give 5 splits of 20 samples each.100 if num % iter != 0: print('sample_size not strictly divisible by n_samples') exit(1) # Get original database and history config1 = Config(inp_file) df1 = config1.trial_results() hist1 = config1.trials().query(Trial).all() # Main loop for name, group in df1.groupby('project_name'): # Sample the group sample = group.sample(num, random_state=42) cv = KFold(n_splits=iter, random_state=42) all_keep = sample['id'].values for i, (_, test_idx) in enumerate(cv.split(all_keep)): keep = all_keep[test_idx] db2 = make_session('sqlite:///osprey-trials-{0}-{1}.db'.format(int(num/iter), i), project_name=name)