Exemple #1
0
def test_trial_results():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([
            OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml'
        ])
        subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5'])
        assert os.path.exists('osprey-trials.db')

        config = Config('config.yaml')

        df = config.trial_results()

        assert df.shape[0] == 5

        for key in Trial.__table__.columns.keys():
            assert key in df.columns

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Exemple #2
0
def data_from_config(config_file):
    """
    Returns features, scores, elapsed time in seconds etc.
    and searchspace from a config file
    """
    config = Config(config_file)
    session = config.trials()

    searchspace = config.search_space()
    history = [[t.parameters, t.test_scores, t.status, t.elapsed]
               for t in session.query(Trial).all()]

    return get_data(history, searchspace) + (searchspace, )
Exemple #3
0
def test_search_space():
    config = Config.fromdict({
        'search_space': {
            'intvar': {'type': 'int', 'min': 1, 'max': 2},
            'logivar': {'type': 'int', 'min': 1, 'max': 2, 'warp': 'log'},
            'fvar': {'type': 'float', 'min': 1, 'max': 3.5},
            'logfvar': {'type': 'float', 'min': 1, 'max': 2.5, 'warp': 'log'},
            'enumvar': {'type': 'enum', 'choices': [1, False]},
            'jumpivar': {'type': 'jump',  'min': 1, 'max': 3, 'num': 3, 'var_type': int},
            'jumpfvar': {'type': 'jump',  'min': 1, 'max': 3, 'num': 3, 'var_type': float},
            'logjumpivar': {'type': 'jump',  'min': 10, 'max': 1000, 'num': 3, 'warp': 'log', 'var_type': int},
            'logjumpfvar': {'type': 'jump',  'min': 10, 'max': 1000, 'num': 3, 'warp': 'log', 'var_type': float}
        }}, check_fields=False)

    searchspace = config.search_space()
    assert searchspace['intvar'] == IntVariable('intvar', 1, 2, warp=None)
    assert searchspace['logivar'] == IntVariable('logivar', 1, 2, warp='log')
    assert searchspace['fvar'] == FloatVariable('fvar', 1, 3.5, warp=None)
    assert searchspace['logfvar'] == FloatVariable('logfvar', 1, 2.5,
                                                   warp='log')
    assert searchspace['enumvar'] == EnumVariable('enumvar', [1, False])
    assert searchspace['jumpivar'] == EnumVariable('jumpivar', [1, 2, 3])
    assert searchspace['jumpfvar'] == EnumVariable('jumpfvar', [1.0, 2.0, 3.0])
    assert searchspace['logjumpivar'] == EnumVariable('logjumpivar', [10, 100, 1000])
    assert searchspace['logjumpfvar'] == EnumVariable('logjumpfvar', [10.0, 100.0, 1000.0])
Exemple #4
0
def test_search_engine_moe_2():
    config = Config.fromdict({
        'strategy': {'name': 'moe', 'params': {'url': 'abc'}}
    }, check_fields=False)
    strat = config.strategy()
    assert isinstance(strat, MOE)
    assert strat.url == 'abc'
Exemple #5
0
def test_estimator_entry_point():
    config = Config.fromdict(
        {'estimator': {
            'entry_point': 'sklearn.cluster.KMeans',
        }},
        check_fields=False)
    assert isinstance(config.estimator(), KMeans)
Exemple #6
0
def test_estimator_entry_point():
    config = Config.fromdict({
        'estimator': {
            'entry_point': 'sklearn.cluster.KMeans',
        }
    }, check_fields=False)
    assert isinstance(config.estimator(), KMeans)
Exemple #7
0
def test_stratified_cv():
    from sklearn.cross_validation import StratifiedShuffleSplit
    config = Config.fromdict({
        'cv': {'name': 'stratifiedshufflesplit', 'params': {'n_iter': 10}}
    }, check_fields=False)
    cv = config.cv(range(100), np.random.randint(2, size=100))
    assert isinstance(cv, StratifiedShuffleSplit)
    assert cv.n_iter == 10
Exemple #8
0
def test_estimator_eval_2():
    config = Config.fromdict(
        {'estimator': {
            'eval': 'KMeans()',
            'eval_scope': ['sklearn'],
        }},
        check_fields=False)
    assert isinstance(config.estimator(), KMeans)
Exemple #9
0
def test_estimator_eval_2():
    config = Config.fromdict({
        'estimator': {
            'eval': 'KMeans()',
            'eval_scope': ['sklearn'],
        }
    }, check_fields=False)
    assert isinstance(config.estimator(), KMeans)
Exemple #10
0
def test_cv_1():
    from sklearn.cross_validation import ShuffleSplit
    for name in ['shufflesplit', 'ShuffleSplit']:
        config = Config.fromdict({
            'cv': {'name': name, 'params': {'n_iter': 10}}
        }, check_fields=False)
        cv = config.cv(range(100))
        assert isinstance(cv, ShuffleSplit)
        assert cv.n_iter == 10
Exemple #11
0
def test_estimator_pickle():
    with tempfile.NamedTemporaryFile('w+b', 0) as f:

        cPickle.dump(KMeans(), f)

        config = Config.fromdict({
            'estimator': {'pickle': f.name}
        }, check_fields=False)
        assert isinstance(config.estimator(), KMeans)
Exemple #12
0
def test_estimator_pickle():
    with tempfile.NamedTemporaryFile('w+b', 0) as f:

        cPickle.dump(KMeans(), f)

        config = Config.fromdict({'estimator': {
            'pickle': f.name
        }},
                                 check_fields=False)
        assert isinstance(config.estimator(), KMeans)
Exemple #13
0
def test_estimator_entry_point_params():
    config = Config.fromdict({
        'estimator': {
            'entry_point': 'sklearn.cluster.KMeans',
            'params': {
                'n_clusters': 15
            }
        }
    }, check_fields=False)
    assert isinstance(config.estimator(), KMeans)
    assert config.estimator().n_clusters == 15
Exemple #14
0
def test_estimator_entry_point_params():
    config = Config.fromdict(
        {
            'estimator': {
                'entry_point': 'sklearn.cluster.KMeans',
                'params': {
                    'n_clusters': 15
                }
            }
        },
        check_fields=False)
    assert isinstance(config.estimator(), KMeans)
    assert config.estimator().n_clusters == 15
Exemple #15
0
def test_stratified_cv():
    from sklearn.model_selection import StratifiedShuffleSplit
    config = Config.fromdict(
        {'cv': {
            'name': 'stratifiedshufflesplit',
            'params': {
                'n_splits': 10
            }
        }},
        check_fields=False)
    cv = config.cv(range(100), np.random.randint(2, size=100))
    assert isinstance(cv, StratifiedShuffleSplit)
    assert cv.n_splits == 10
Exemple #16
0
def test_search_space():
    config = Config.fromdict({
        'search_space': {
            'intvar': {'type': 'int', 'min': 1, 'max': 2},
            'fvar': {'type': 'float', 'min': 1, 'max': 3.5},
            'logvar': {'type': 'float', 'min': 1, 'max': 2.5, 'warp': 'log'},
            'enumvar': {'type': 'enum', 'choices': [1, False]},
        }}, check_fields=False)
    searchspace = config.search_space()
    assert searchspace['intvar'] == IntVariable('intvar', 1, 2)
    assert searchspace['fvar'] == FloatVariable('fvar', 1, 3.5, warp=None)
    assert searchspace['logvar'] == FloatVariable('logvar', 1, 2.5, warp='log')
    assert searchspace['enumvar'] == EnumVariable('enumvar', [1, False])
Exemple #17
0
def test_cv_1():
    from sklearn.model_selection import ShuffleSplit
    for name in ['shufflesplit', 'ShuffleSplit']:
        config = Config.fromdict(
            {'cv': {
                'name': name,
                'params': {
                    'n_splits': 10
                }
            }},
            check_fields=False)
        cv = config.cv(range(100))
        assert isinstance(cv, ShuffleSplit)
        assert cv.n_splits == 10
Exemple #18
0
def test_trial_results():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'random_example',
                              '-f', 'config.yaml'])
        subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5'])
        assert os.path.exists('osprey-trials.db')

        config = Config('config.yaml')

        df = config.trial_results()

        assert df.shape[0] == 5

        for key in Trial.__table__.columns.keys():
            assert key in df.columns

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Exemple #19
0
def test_1():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'msmbuilder',
                              '-f', 'config.yaml'])
        assert os.path.exists('config.yaml')
        with open('config.yaml', 'rb') as f:
            yaml.load(f)
        Config('config.yaml')

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Exemple #20
0
def test_search_engine_bayes():
    config = Config.fromdict({
        'strategy': {'name': 'bayes'}
    }, check_fields=False)
    assert isinstance(config.strategy(), Bayes)
Exemple #21
0
def test_search_engine_hyperopt_tpe():
    config = Config.fromdict({
        'strategy': {'name': 'hyperopt_tpe'}
    }, check_fields=False)
    assert isinstance(config.strategy(), HyperoptTPE)
Exemple #22
0
def test_search_engine_moe_1():
    config = Config.fromdict({
        'strategy': {'name': 'moe', 'params': {'url': 'sdfsdf'}}
    }, check_fields=False)
    assert isinstance(config.strategy(), MOE)
Exemple #23
0
def test_search_engine_hyperopt_tpe():
    config = Config.fromdict({'strategy': {
        'name': 'hyperopt_tpe'
    }},
                             check_fields=False)
    assert isinstance(config.strategy(), HyperoptTPE)
Exemple #24
0
def test_strategy_random():
    config = Config.fromdict({
        'strategy': {'name': 'random'}
    }, check_fields=False)
    assert isinstance(config.strategy(), RandomSearch)
Exemple #25
0
def test_scoring():
    config = Config.fromdict({'scoring': 'sdfsfsdf'}, check_fields=False)
    assert config.scoring() is 'sdfsfsdf'
Exemple #26
0
def test_search_engine_bayes():
    config = Config.fromdict({'strategy': {
        'name': 'bayes'
    }},
                             check_fields=False)
    assert isinstance(config.strategy(), Bayes)
Exemple #27
0
def test_search_space():
    config = Config.fromdict(
        {
            'search_space': {
                'intvar': {
                    'type': 'int',
                    'min': 1,
                    'max': 2
                },
                'logivar': {
                    'type': 'int',
                    'min': 1,
                    'max': 2,
                    'warp': 'log'
                },
                'fvar': {
                    'type': 'float',
                    'min': 1,
                    'max': 3.5
                },
                'logfvar': {
                    'type': 'float',
                    'min': 1,
                    'max': 2.5,
                    'warp': 'log'
                },
                'enumvar': {
                    'type': 'enum',
                    'choices': [1, False]
                },
                'jumpivar': {
                    'type': 'jump',
                    'min': 1,
                    'max': 3,
                    'num': 3,
                    'var_type': int
                },
                'jumpfvar': {
                    'type': 'jump',
                    'min': 1,
                    'max': 3,
                    'num': 3,
                    'var_type': float
                },
                'logjumpivar': {
                    'type': 'jump',
                    'min': 10,
                    'max': 1000,
                    'num': 3,
                    'warp': 'log',
                    'var_type': int
                },
                'logjumpfvar': {
                    'type': 'jump',
                    'min': 10,
                    'max': 1000,
                    'num': 3,
                    'warp': 'log',
                    'var_type': float
                }
            }
        },
        check_fields=False)

    searchspace = config.search_space()
    assert searchspace['intvar'] == IntVariable('intvar', 1, 2, warp=None)
    assert searchspace['logivar'] == IntVariable('logivar', 1, 2, warp='log')
    assert searchspace['fvar'] == FloatVariable('fvar', 1, 3.5, warp=None)
    assert searchspace['logfvar'] == FloatVariable('logfvar',
                                                   1,
                                                   2.5,
                                                   warp='log')
    assert searchspace['enumvar'] == EnumVariable('enumvar', [1, False])
    assert searchspace['jumpivar'] == EnumVariable('jumpivar', [1, 2, 3])
    assert searchspace['jumpfvar'] == EnumVariable('jumpfvar', [1.0, 2.0, 3.0])
    assert searchspace['logjumpivar'] == EnumVariable('logjumpivar',
                                                      [10, 100, 1000])
    assert searchspace['logjumpfvar'] == EnumVariable('logjumpfvar',
                                                      [10.0, 100.0, 1000.0])
Exemple #28
0
def test_random_seed():
    config = Config.fromdict({'random_seed': 42}, check_fields=False)
    assert config.random_seed() == 42
Exemple #29
0
if len(argv) != 4:
    print('usage: sample_db.py config.yaml sample_size n_samples')
    exit(1)

inp_file = argv[1]
num = int(argv[2]) # TOTAL size of samples to use e.g. 100
iter = int(argv[3])  # Number of splits e.g. 5
# This will give 5 splits of 20 samples each.100

if num % iter != 0:
    print('sample_size not strictly divisible by n_samples')
    exit(1)

# Get original database and history
config1 = Config(inp_file)
df1 = config1.trial_results()
hist1 = config1.trials().query(Trial).all()


# Main loop
for name, group in df1.groupby('project_name'):
    # Sample the group
    sample = group.sample(num, random_state=42)
    cv = KFold(n_splits=iter, random_state=42)
    all_keep = sample['id'].values
    for i, (_, test_idx) in enumerate(cv.split(all_keep)):

        keep = all_keep[test_idx]

        db2 = make_session('sqlite:///osprey-trials-{0}-{1}.db'.format(int(num/iter), i), project_name=name)
Exemple #30
0
def test_scoring():
    config = Config.fromdict({
        'scoring': 'sdfsfsdf'
    }, check_fields=False)
    assert config.scoring() is 'sdfsfsdf'
# Imports
from osprey.config import Config
import sys

if len(sys.argv) != 2:
    print('Usage: count_project_trails [config file]')

# Load Configuation File
my_config = sys.argv[1] 

config = Config(my_config)

# Retrieve Trial Results
df = config.trial_results()
print(df['project_name'].value_counts())
Exemple #32
0
    results = {
        'id': id_num,
        'cse_train_scores': train_scores,
        'cse_train_gaps': train_gaps,
        'cse_train_n_timescales': train_n_timescales,
        'cse_test_scores': test_scores
    }

    return results


if __name__ == "__main__":

    np.random.seed(42)

    config = Config(config_path)
    trials = config.trial_results()
    trials = trials.sort_values(by='mean_test_score', ascending=False)
    # Select the top ten percent
    trials = trials.iloc[160:, :]
    trial_configs = [get_parameters(irow) for irow in trials.iterrows()]

    n_cpu = int(os.environ['SLURM_JOB_CPUS_PER_NODE'])
    print('Number of cpus detected {}'.format(n_cpu))

    pool = Pool(n_cpu)
    results = pool.imap_unordered(run_trial, trial_configs)

    results = list(results)

    all_ids = [x['id'] for x in results]
Exemple #33
0
def test_strategy_random():
    config = Config.fromdict({'strategy': {
        'name': 'random'
    }},
                             check_fields=False)
    assert isinstance(config.strategy(), RandomSearch)
Exemple #34
0
# # Load the data

# In[4]:

root_dir = 'fs-peptide'
# Load Configuation Files
databases = {
    'bayesian': root_dir + '/gp-m52-ei-tica-indv/config-all_tor.yaml',
    'random': root_dir + '/rand-tica-indv/config_random-all_tor.yaml',
    'sobol': root_dir + '/sobol-tica-indv/config-all_tor.yaml',
    'tpe': root_dir + '/tpe-s20-g25-tica-indv/config-all_tor.yaml'
}

all_dfs = []
for k, v in databases.items():
    config = Config(v)
    df = config.trial_results()
    df['method'] = k
    all_dfs.append(df)

df_all = pd.concat(all_dfs)

# In[7]:

df_all.head()

# # Drop unnecessary columns and rename

# In[67]:

df = df_all.loc[:, [
Exemple #35
0
def test_random_seed():
    config = Config.fromdict({
        'random_seed': 42
    }, check_fields=False)
    assert config.random_seed() == 42