def test_trial_results(): assert OSPREY_BIN is not None cwd = os.path.abspath(os.curdir) dirname = tempfile.mkdtemp() try: os.chdir(dirname) subprocess.check_call([ OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml' ]) subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5']) assert os.path.exists('osprey-trials.db') config = Config('config.yaml') df = config.trial_results() assert df.shape[0] == 5 for key in Trial.__table__.columns.keys(): assert key in df.columns finally: os.chdir(cwd) shutil.rmtree(dirname)
def data_from_config(config_file): """ Returns features, scores, elapsed time in seconds etc. and searchspace from a config file """ config = Config(config_file) session = config.trials() searchspace = config.search_space() history = [[t.parameters, t.test_scores, t.status, t.elapsed] for t in session.query(Trial).all()] return get_data(history, searchspace) + (searchspace, )
def test_search_space(): config = Config.fromdict({ 'search_space': { 'intvar': {'type': 'int', 'min': 1, 'max': 2}, 'logivar': {'type': 'int', 'min': 1, 'max': 2, 'warp': 'log'}, 'fvar': {'type': 'float', 'min': 1, 'max': 3.5}, 'logfvar': {'type': 'float', 'min': 1, 'max': 2.5, 'warp': 'log'}, 'enumvar': {'type': 'enum', 'choices': [1, False]}, 'jumpivar': {'type': 'jump', 'min': 1, 'max': 3, 'num': 3, 'var_type': int}, 'jumpfvar': {'type': 'jump', 'min': 1, 'max': 3, 'num': 3, 'var_type': float}, 'logjumpivar': {'type': 'jump', 'min': 10, 'max': 1000, 'num': 3, 'warp': 'log', 'var_type': int}, 'logjumpfvar': {'type': 'jump', 'min': 10, 'max': 1000, 'num': 3, 'warp': 'log', 'var_type': float} }}, check_fields=False) searchspace = config.search_space() assert searchspace['intvar'] == IntVariable('intvar', 1, 2, warp=None) assert searchspace['logivar'] == IntVariable('logivar', 1, 2, warp='log') assert searchspace['fvar'] == FloatVariable('fvar', 1, 3.5, warp=None) assert searchspace['logfvar'] == FloatVariable('logfvar', 1, 2.5, warp='log') assert searchspace['enumvar'] == EnumVariable('enumvar', [1, False]) assert searchspace['jumpivar'] == EnumVariable('jumpivar', [1, 2, 3]) assert searchspace['jumpfvar'] == EnumVariable('jumpfvar', [1.0, 2.0, 3.0]) assert searchspace['logjumpivar'] == EnumVariable('logjumpivar', [10, 100, 1000]) assert searchspace['logjumpfvar'] == EnumVariable('logjumpfvar', [10.0, 100.0, 1000.0])
def test_search_engine_moe_2(): config = Config.fromdict({ 'strategy': {'name': 'moe', 'params': {'url': 'abc'}} }, check_fields=False) strat = config.strategy() assert isinstance(strat, MOE) assert strat.url == 'abc'
def test_estimator_entry_point(): config = Config.fromdict( {'estimator': { 'entry_point': 'sklearn.cluster.KMeans', }}, check_fields=False) assert isinstance(config.estimator(), KMeans)
def test_estimator_entry_point(): config = Config.fromdict({ 'estimator': { 'entry_point': 'sklearn.cluster.KMeans', } }, check_fields=False) assert isinstance(config.estimator(), KMeans)
def test_stratified_cv(): from sklearn.cross_validation import StratifiedShuffleSplit config = Config.fromdict({ 'cv': {'name': 'stratifiedshufflesplit', 'params': {'n_iter': 10}} }, check_fields=False) cv = config.cv(range(100), np.random.randint(2, size=100)) assert isinstance(cv, StratifiedShuffleSplit) assert cv.n_iter == 10
def test_estimator_eval_2(): config = Config.fromdict( {'estimator': { 'eval': 'KMeans()', 'eval_scope': ['sklearn'], }}, check_fields=False) assert isinstance(config.estimator(), KMeans)
def test_estimator_eval_2(): config = Config.fromdict({ 'estimator': { 'eval': 'KMeans()', 'eval_scope': ['sklearn'], } }, check_fields=False) assert isinstance(config.estimator(), KMeans)
def test_cv_1(): from sklearn.cross_validation import ShuffleSplit for name in ['shufflesplit', 'ShuffleSplit']: config = Config.fromdict({ 'cv': {'name': name, 'params': {'n_iter': 10}} }, check_fields=False) cv = config.cv(range(100)) assert isinstance(cv, ShuffleSplit) assert cv.n_iter == 10
def test_estimator_pickle(): with tempfile.NamedTemporaryFile('w+b', 0) as f: cPickle.dump(KMeans(), f) config = Config.fromdict({ 'estimator': {'pickle': f.name} }, check_fields=False) assert isinstance(config.estimator(), KMeans)
def test_estimator_pickle(): with tempfile.NamedTemporaryFile('w+b', 0) as f: cPickle.dump(KMeans(), f) config = Config.fromdict({'estimator': { 'pickle': f.name }}, check_fields=False) assert isinstance(config.estimator(), KMeans)
def test_estimator_entry_point_params(): config = Config.fromdict({ 'estimator': { 'entry_point': 'sklearn.cluster.KMeans', 'params': { 'n_clusters': 15 } } }, check_fields=False) assert isinstance(config.estimator(), KMeans) assert config.estimator().n_clusters == 15
def test_estimator_entry_point_params(): config = Config.fromdict( { 'estimator': { 'entry_point': 'sklearn.cluster.KMeans', 'params': { 'n_clusters': 15 } } }, check_fields=False) assert isinstance(config.estimator(), KMeans) assert config.estimator().n_clusters == 15
def test_stratified_cv(): from sklearn.model_selection import StratifiedShuffleSplit config = Config.fromdict( {'cv': { 'name': 'stratifiedshufflesplit', 'params': { 'n_splits': 10 } }}, check_fields=False) cv = config.cv(range(100), np.random.randint(2, size=100)) assert isinstance(cv, StratifiedShuffleSplit) assert cv.n_splits == 10
def test_search_space(): config = Config.fromdict({ 'search_space': { 'intvar': {'type': 'int', 'min': 1, 'max': 2}, 'fvar': {'type': 'float', 'min': 1, 'max': 3.5}, 'logvar': {'type': 'float', 'min': 1, 'max': 2.5, 'warp': 'log'}, 'enumvar': {'type': 'enum', 'choices': [1, False]}, }}, check_fields=False) searchspace = config.search_space() assert searchspace['intvar'] == IntVariable('intvar', 1, 2) assert searchspace['fvar'] == FloatVariable('fvar', 1, 3.5, warp=None) assert searchspace['logvar'] == FloatVariable('logvar', 1, 2.5, warp='log') assert searchspace['enumvar'] == EnumVariable('enumvar', [1, False])
def test_cv_1(): from sklearn.model_selection import ShuffleSplit for name in ['shufflesplit', 'ShuffleSplit']: config = Config.fromdict( {'cv': { 'name': name, 'params': { 'n_splits': 10 } }}, check_fields=False) cv = config.cv(range(100)) assert isinstance(cv, ShuffleSplit) assert cv.n_splits == 10
def test_trial_results(): assert OSPREY_BIN is not None cwd = os.path.abspath(os.curdir) dirname = tempfile.mkdtemp() try: os.chdir(dirname) subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml']) subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5']) assert os.path.exists('osprey-trials.db') config = Config('config.yaml') df = config.trial_results() assert df.shape[0] == 5 for key in Trial.__table__.columns.keys(): assert key in df.columns finally: os.chdir(cwd) shutil.rmtree(dirname)
def test_1(): assert OSPREY_BIN is not None cwd = os.path.abspath(os.curdir) dirname = tempfile.mkdtemp() try: os.chdir(dirname) subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'msmbuilder', '-f', 'config.yaml']) assert os.path.exists('config.yaml') with open('config.yaml', 'rb') as f: yaml.load(f) Config('config.yaml') finally: os.chdir(cwd) shutil.rmtree(dirname)
def test_search_engine_bayes(): config = Config.fromdict({ 'strategy': {'name': 'bayes'} }, check_fields=False) assert isinstance(config.strategy(), Bayes)
def test_search_engine_hyperopt_tpe(): config = Config.fromdict({ 'strategy': {'name': 'hyperopt_tpe'} }, check_fields=False) assert isinstance(config.strategy(), HyperoptTPE)
def test_search_engine_moe_1(): config = Config.fromdict({ 'strategy': {'name': 'moe', 'params': {'url': 'sdfsdf'}} }, check_fields=False) assert isinstance(config.strategy(), MOE)
def test_search_engine_hyperopt_tpe(): config = Config.fromdict({'strategy': { 'name': 'hyperopt_tpe' }}, check_fields=False) assert isinstance(config.strategy(), HyperoptTPE)
def test_strategy_random(): config = Config.fromdict({ 'strategy': {'name': 'random'} }, check_fields=False) assert isinstance(config.strategy(), RandomSearch)
def test_scoring(): config = Config.fromdict({'scoring': 'sdfsfsdf'}, check_fields=False) assert config.scoring() is 'sdfsfsdf'
def test_search_engine_bayes(): config = Config.fromdict({'strategy': { 'name': 'bayes' }}, check_fields=False) assert isinstance(config.strategy(), Bayes)
def test_search_space(): config = Config.fromdict( { 'search_space': { 'intvar': { 'type': 'int', 'min': 1, 'max': 2 }, 'logivar': { 'type': 'int', 'min': 1, 'max': 2, 'warp': 'log' }, 'fvar': { 'type': 'float', 'min': 1, 'max': 3.5 }, 'logfvar': { 'type': 'float', 'min': 1, 'max': 2.5, 'warp': 'log' }, 'enumvar': { 'type': 'enum', 'choices': [1, False] }, 'jumpivar': { 'type': 'jump', 'min': 1, 'max': 3, 'num': 3, 'var_type': int }, 'jumpfvar': { 'type': 'jump', 'min': 1, 'max': 3, 'num': 3, 'var_type': float }, 'logjumpivar': { 'type': 'jump', 'min': 10, 'max': 1000, 'num': 3, 'warp': 'log', 'var_type': int }, 'logjumpfvar': { 'type': 'jump', 'min': 10, 'max': 1000, 'num': 3, 'warp': 'log', 'var_type': float } } }, check_fields=False) searchspace = config.search_space() assert searchspace['intvar'] == IntVariable('intvar', 1, 2, warp=None) assert searchspace['logivar'] == IntVariable('logivar', 1, 2, warp='log') assert searchspace['fvar'] == FloatVariable('fvar', 1, 3.5, warp=None) assert searchspace['logfvar'] == FloatVariable('logfvar', 1, 2.5, warp='log') assert searchspace['enumvar'] == EnumVariable('enumvar', [1, False]) assert searchspace['jumpivar'] == EnumVariable('jumpivar', [1, 2, 3]) assert searchspace['jumpfvar'] == EnumVariable('jumpfvar', [1.0, 2.0, 3.0]) assert searchspace['logjumpivar'] == EnumVariable('logjumpivar', [10, 100, 1000]) assert searchspace['logjumpfvar'] == EnumVariable('logjumpfvar', [10.0, 100.0, 1000.0])
def test_random_seed(): config = Config.fromdict({'random_seed': 42}, check_fields=False) assert config.random_seed() == 42
if len(argv) != 4: print('usage: sample_db.py config.yaml sample_size n_samples') exit(1) inp_file = argv[1] num = int(argv[2]) # TOTAL size of samples to use e.g. 100 iter = int(argv[3]) # Number of splits e.g. 5 # This will give 5 splits of 20 samples each.100 if num % iter != 0: print('sample_size not strictly divisible by n_samples') exit(1) # Get original database and history config1 = Config(inp_file) df1 = config1.trial_results() hist1 = config1.trials().query(Trial).all() # Main loop for name, group in df1.groupby('project_name'): # Sample the group sample = group.sample(num, random_state=42) cv = KFold(n_splits=iter, random_state=42) all_keep = sample['id'].values for i, (_, test_idx) in enumerate(cv.split(all_keep)): keep = all_keep[test_idx] db2 = make_session('sqlite:///osprey-trials-{0}-{1}.db'.format(int(num/iter), i), project_name=name)
def test_scoring(): config = Config.fromdict({ 'scoring': 'sdfsfsdf' }, check_fields=False) assert config.scoring() is 'sdfsfsdf'
# Imports from osprey.config import Config import sys if len(sys.argv) != 2: print('Usage: count_project_trails [config file]') # Load Configuation File my_config = sys.argv[1] config = Config(my_config) # Retrieve Trial Results df = config.trial_results() print(df['project_name'].value_counts())
results = { 'id': id_num, 'cse_train_scores': train_scores, 'cse_train_gaps': train_gaps, 'cse_train_n_timescales': train_n_timescales, 'cse_test_scores': test_scores } return results if __name__ == "__main__": np.random.seed(42) config = Config(config_path) trials = config.trial_results() trials = trials.sort_values(by='mean_test_score', ascending=False) # Select the top ten percent trials = trials.iloc[160:, :] trial_configs = [get_parameters(irow) for irow in trials.iterrows()] n_cpu = int(os.environ['SLURM_JOB_CPUS_PER_NODE']) print('Number of cpus detected {}'.format(n_cpu)) pool = Pool(n_cpu) results = pool.imap_unordered(run_trial, trial_configs) results = list(results) all_ids = [x['id'] for x in results]
def test_strategy_random(): config = Config.fromdict({'strategy': { 'name': 'random' }}, check_fields=False) assert isinstance(config.strategy(), RandomSearch)
# # Load the data # In[4]: root_dir = 'fs-peptide' # Load Configuation Files databases = { 'bayesian': root_dir + '/gp-m52-ei-tica-indv/config-all_tor.yaml', 'random': root_dir + '/rand-tica-indv/config_random-all_tor.yaml', 'sobol': root_dir + '/sobol-tica-indv/config-all_tor.yaml', 'tpe': root_dir + '/tpe-s20-g25-tica-indv/config-all_tor.yaml' } all_dfs = [] for k, v in databases.items(): config = Config(v) df = config.trial_results() df['method'] = k all_dfs.append(df) df_all = pd.concat(all_dfs) # In[7]: df_all.head() # # Drop unnecessary columns and rename # In[67]: df = df_all.loc[:, [
def test_random_seed(): config = Config.fromdict({ 'random_seed': 42 }, check_fields=False) assert config.random_seed() == 42