Ejemplo n.º 1
0
    def __iter__(self):
        """Search parameter distribution for unique states.

        As each state is defined using hp.choice, we don't explicitly know
        each of the unique states that our estimator can be set to. We
        sample the distribution of states up until max_tries times to get
        these unique states and return an iterable of them. if max_tries is
        None (set in constructor), then we sample the search space and add each
        sampled value.

        Returns:
            iterable of unique states.

        """
        # check if all distributions are given as lists
        # in this case we want to sample without replacement
        rng = check_random_state(self.random_state)
        prev_samples = []
        max_tries = self.max_tries if self.max_tries is not None else 1
        for _ in range(self.n_iter):
            sample = stoch.sample(self.param_distributions(), rng=rng)
            n_tries = 0
            while sample not in prev_samples or n_tries < max_tries:
                if sample not in prev_samples or self.max_tries is None:
                    prev_samples.append(sample)
                    break
                sample = stoch.sample(self.param_distributions(), rng=rng)
                n_tries += 1
        return iter(prev_samples)
Ejemplo n.º 2
0
def add_trials(points):
    test_trials = Trials()

    for tid, row in enumerate(points):
        vals = {}
        for key in sample(space).keys():
            vals[key] = [row['params'][key]]

        hyperopt_trial = Trials().new_trial_docs(
            tids=[tid],
            specs=[None],
            results=[row],
            miscs=[{
                'tid': tid,
                'cmd': ('domain_attachment', 'FMinIter_Domain'),
                'workdir': None,
                'idxs': {
                    **{key: [tid]
                       for key in sample(space).keys()}
                },
                'vals': vals
            }])

        hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE

        test_trials.insert_trial_docs(hyperopt_trial)
        test_trials.refresh()

    return test_trials
Ejemplo n.º 3
0
def test_repeatable():
    u = scope.uniform(0, 1)
    aa = as_apply(
        dict(u=u, n=scope.normal(5, 0.1), l=[0, 1, scope.one_of(2, 3), u]))
    dd1 = sample(aa, np.random.RandomState(3))
    dd2 = sample(aa, np.random.RandomState(3))
    dd3 = sample(aa, np.random.RandomState(4))
    assert dd1 == dd2
    assert dd1 != dd3
Ejemplo n.º 4
0
def test_repeatable():
    u = scope.uniform(0, 1)
    aa = as_apply(dict(
        u=u,
        n=scope.normal(5, 0.1),
        l=[0, 1, scope.one_of(2, 3), u]))
    dd1 = sample(aa, np.random.RandomState(3))
    dd2 = sample(aa, np.random.RandomState(3))
    dd3 = sample(aa, np.random.RandomState(4))
    assert dd1 == dd2
    assert dd1 != dd3
Ejemplo n.º 5
0
    def xgb_cv(self, space):
        """
        Function to perform XGBoost Cross-Validation with stochastic parameters
        for hyperparameter optimization.

        Parameters
        ----------
        space: dict()
            The set of possible arguments to `fn` is the set of objects
            that could be created with non-zero probability by drawing randomly
            from this stochastic program involving involving hp
        """

        # cvr
        cvr = xgb.cv(
            params=sample(self.space),
            dtrain=self.dtrain_,
            num_boost_round=self.num_boost_round,
            nfold=self.n_splits,
            stratified=self.stratified,
            metrics=self.metrics,
            early_stopping_rounds=self.early_stopping_rounds,
            seed=self.random_state,
            shuffle=self.shuffle,
            verbose_eval=self.verbose,
        )

        # loss
        loss = cvr.iloc[-1:, 0]

        return {"loss": loss, "status": STATUS_OK}
Ejemplo n.º 6
0
def test():
    from hyperopt.pyll.stochastic import sample

    space = get_space(overfit=True)

    for _ in range(10):
        print(fix_args(**sample(space)))
Ejemplo n.º 7
0
def PosteriorPlot(space_search, trials, Nsamples=1000):
    # create dic of experiences
    d = {}
    for k in space_search.keys():
        d[k] = np.array([ (t['misc']['vals'][k][0], t['result']['loss']) for t in trials.trials])

    # create dic of samples from prior
    samples = {}
    for k in d.keys():
        samples[k] = [sample(space_search)[k] for x in range(100)]

    # plot prior and posterior
    # inspired by https://github.com/MBKraus/Hyperopt/blob/master/Hyperopt.ipynb
    # https://www.codementor.io/mikekraus/using-bayesian-optimisation-to-reduce-the-time-spent-on-hyperparameter-tuning-tgc3ikmp2
    for k in d.keys():
        f, ax = plt.subplots(figsize=(10,6))
        sns.set_palette("husl")
        sns.despine()
        ax = sns.kdeplot(np.array(samples[k]), label = 'Prior', linewidth = 3)
        ax = sns.kdeplot(d[k][:, 0], label = 'Posterior (as complete path)', linewidth = 3)
        ax.set_ylabel('Density', fontsize=12, fontweight='bold')
        ax2 = ax.twinx()
        ax2.scatter(d[k][:, 0], d[k][:, 1], c='blue', label='Loss ind. value')
        ax2.set_ylabel('loss', fontsize=12, fontweight='bold', color='blue')
        plt.title(k, fontsize=18, fontweight='bold')
        plt.xlabel(k, fontsize=12, fontweight='bold')
        plt.legend()
        #plt.setp(ax.get_legend().get_texts(), fontsize='12', fontweight='bold')
        plt.plot()
        
Ejemplo n.º 8
0
def RandomSample():
    space = {
    'DROPOUT': hp.choice( 'drop', ( 0.2, 0.5)),
    'DELTA': hp.choice( 'delta', ( 1e-04, 1e-06, 1e-08)),
    'MOMENT': hp.choice( 'moment', (0.9, 0.99, 0.999 ))
    }
    params = sample(space)
    return params
Ejemplo n.º 9
0
    def sample_space(self, space):
        value = stochastic.sample(space)

        if isinstance(value, str):
            value = value.strip()
            if " " in value:
                # surround with quotes so it is treated as a single entity
                value = '"' + value + '"'

        return value
Ejemplo n.º 10
0
def get_params():
    """El objetivo de esta funcion es seleccionar aleatoriamente una configuracion
    determinada.

    :returns:
        Retorna una configuracion para el modelo correspondiente.
    """
    params = sample(space)
    params = {k: v for k, v in params.items() if v is not "default"}
    return handle_integers(params)
Ejemplo n.º 11
0
    def next(self):
        value = stochastic.sample(self.dist_func)

        if isinstance(value, str):
            value = value.strip()
            if " " in value:
                # surround with quotes so it is treated as a single entity
                value = '"' + value + '"'

        return value
Ejemplo n.º 12
0
def test_sample():
    u = scope.uniform(0, 1)
    aa = as_apply(
        dict(u=u, n=scope.normal(5, 0.1), l=[0, 1, scope.one_of(2, 3), u]))
    print(aa)
    dd = sample(aa, np.random.RandomState(3))
    assert 0 < dd["u"] < 1
    assert 4 < dd["n"] < 6
    assert dd["u"] == dd["l"][3]
    assert dd["l"][:2] == (0, 1)
    assert dd["l"][2] in (2, 3)
Ejemplo n.º 13
0
def get_params():

    params = sample(space)
    new_params = {}
    for k, v in params.items():
        if type(v) == float and int(v) == v:
            new_params[k] = int(v)
        else:
            new_params[k] = v

    return new_params
Ejemplo n.º 14
0
 def run(self):
     space = {k: hp.choice(k, v) for k, v in self.choices.items()}
     while True:
         values = sample(space)
         yield PredictMLPv2ThresholdVariable(
             mode='evaluation',
             num_orders_per_user=values['num_orders_per_user'],
             product_history=values['product_history'],
             product_embedding=values['product_embedding'],
             hidden_layers=values['hidden_layers'],
             dropout=values['dropout'],
             global_orders_ratio=0.25)
Ejemplo n.º 15
0
def test_uniformint_arguments(arguments):
    """
    Test whether uniformint can accept both positional and keyword arguments.
    Related to PR #704.
    """
    if isinstance(arguments, list):
        space = hp.uniformint(*arguments)
    if isinstance(arguments, dict):
        space = hp.uniformint(**arguments)
    rng = np.random.default_rng(np.random.PCG64(123))
    values = [sample(space, rng=rng) for _ in range(10)]
    assert values == [7, 1, 2, 2, 2, 8, 9, 3, 8, 9]
Ejemplo n.º 16
0
def test_uniformint_arguments(arguments):
    """
    Test whether uniformint can accept both positional and keyword arguments.
    Related to PR #704.
    """
    if isinstance(arguments, list):
        space = hp.uniformint(*arguments)
    if isinstance(arguments, dict):
        space = hp.uniformint(**arguments)
    rng = np.random.RandomState(123)
    values = [sample(space, rng=rng) for _ in range(10)]
    assert values == [7, 3, 2, 6, 7, 4, 10, 7, 5, 4]
Ejemplo n.º 17
0
def get_params():
    space = {
        'learning_rate': hyperopt.hp.choice('lr', [hyperopt.hp.loguniform('lr_', -5., 0.)]),
        'subsample': hyperopt.hp.choice('ss', [hyperopt.hp.uniform('ss_', 0., 1.)]),
        'l2_leaf_reg': hyperopt.hp.choice('l2lr', [hyperopt.hp.loguniform('l2lr_', 0., np.log(10.))]),
        'random_strength': hyperopt.hp.choice('rs', [hyperopt.hp.choice('rs_', np.arange(1, 21))]),
        'leaf_estimation_iterations': hyperopt.hp.choice('lei', [hyperopt.hp.choice('lei_', np.arange(1, 11))])
    }

    params = sample(space)
    params = {k: v for k, v in params.items() if v is not 'default'}
    return handle_integers(params)
Ejemplo n.º 18
0
def test_sample():
    u = scope.uniform(0, 1)
    aa = as_apply(dict(
        u=u,
        n=scope.normal(5, 0.1),
        l=[0, 1, scope.one_of(2, 3), u]))
    print(aa)
    dd = sample(aa, np.random.RandomState(3))
    assert 0 < dd['u'] < 1
    assert 4 < dd['n'] < 6
    assert dd['u'] == dd['l'][3]
    assert dd['l'][:2] == (0, 1)
    assert dd['l'][2] in (2, 3)
Ejemplo n.º 19
0
def get_params():

    params = sample(space)

    # handle floats which should be integers
    new_params = {}
    for k, v in params.items():
        if type(v) == float and int(v) == v:
            new_params[k] = int(v)
        else:
            new_params[k] = v

    return new_params
Ejemplo n.º 20
0
def sample_plot(self, sample_space, n_iter, chart_scale=15):
    """
    Documentation:

        ---
        Definition:
            Visualizes a single hyperopt theoretical distribution. Useful for helping to determine a
            distribution to use when setting up hyperopt distribution objects for actual parameter
            tuning.

        ---
        Parameters:
            sample_space : dictionary
                Dictionary of 'param name: hyperopt distribution object' key/value pairs. The name can
                be arbitrarily chosen, and the value is a defined hyperopt distribution.
            n_iter : int
                Number of iterations to draw from theoretical distribution in order to visualize the
                theoretical distribution. Higher number leads to more robust distribution but can take
                considerably longer to create.
            chart_scale : float, default=15
                Controls proportions of visualizations. larger values scale visual up in size, smaller values
                scale visual down in size.

    """
    # iterate through each parameter
    for param in sample_space.keys():

        # sample from theoretical distribution for n_iters
        theoretical_dist = []
        for _ in range(n_iter):
            theoretical_dist.append(sample(sample_space)[param])
        theoretical_dist = np.array(theoretical_dist)

        # create prettierplot object
        p = PrettierPlot(chart_scale=chart_scale)

        # add canvas to prettierplot object
        ax = p.make_canvas(
            title="actual vs. theoretical plot\n* {}".format(param),
            y_shift=0.8,
            position=111,
        )

        # add kernel density plot to canvas
        p.kde_plot(
            theoretical_dist,
            color=style.style_grey,
            y_units="p",
            x_units="fff" if np.nanmax(theoretical_dist) <= 5.0 else "ff",
            ax=ax,
        )
Ejemplo n.º 21
0
    def sample_space(self) -> Any:
        """Sample from hyperparameter distributions.

        Parameters
        ----------
        None

        Returns
        -------
        dict
            Key/value pairs, key is hyperparameter name and value is a sample from
            the hyperparemeter's statistical distribution
        """
        hypers: Dict[str, Any] = {}
        for param, dist in self.space.items():
            hypers[param] = int(sample(dist)) \
                if param in ['n_estimators', 'max_delta_step', 'max_depth', 'min_child_weight'] \
                    else sample(dist)

        # Add seed
        hypers['random_state'] = self.seed

        return hypers
Ejemplo n.º 22
0
def get_params_VAE(args):

    params = sample(space_VAE)
    params = handle_integers(params)
    params['train_folder'] = args.train_folder
    params['batchsize'] = args.batchsize
    params['seed'] = args.seed
    params['cuda'] = args.cuda
    params['load_model'] = args.load_model
    params['model_type'] = args.model_type
    params['time_gap'] = 1
    params['num_images'] = 1
    params['stat_data_file'] = args.stat_data_file
    return params
Ejemplo n.º 23
0
 def run(self):
     space = {k: hp.choice(k, v) for k, v in self.choices.items()}
     while True:
         values = sample(space)
         yield PredictRNNv5ReorderSizeKnown(
             mode='evaluation',
             product_history=values['product_history'],
             embedding_dim=values['embedding_dim'],
             lstm_size=values['lstm_size'],
             lstm_layers=values['lstm_layers'],
             hidden_layers=values['hidden_layers'],
             hidden_nonlinearily=values['hidden_nonlinearily'],
             dropout=values['dropout'],
             global_orders_ratio=0.1)
Ejemplo n.º 24
0
def RandomSample():
    space = {
        'DROPOUT': hp.choice('drop', (0.2, 0.5)),
        'DELTA': hp.choice('delta', (1e-04, 1e-06, 1e-08)),
        'MOMENT': hp.choice('moment', (0.9, 0.99, 0.999)),

        # 'DELTA1': hp.choice( 'delta1', ( 0.0001, 0.005)),
        # 'MOMENT1': hp.choice( 'moment1', (0.9, 0.99, 0.999 )),
        # # 'MOMENT2': hp.choice( 'moment11', (0.9, 0.99, 0.999 )),
        # 'DELTA2': hp.choice( 'delta1', ( 0.01, 0.001,0.1)),
        # 'MOMENT2': hp.choice( 'moment2', (0.99,0.9)),
    }
    params = sample(space)
    return params
Ejemplo n.º 25
0
def build_hp_sample(num_trials: int) -> pd.DataFrame:
    from addict import Dict
    all_hps = [Dict(deepcopy(template)) for _ in range(num_trials)]
    for i, hp in enumerate(all_hps):
        hp.update(flatten(dict(sample(SEARCH_SPACE))))
    df = pd.concat([
        pd.DataFrame.from_dict({k: [v]
                                for k, v in hp.items()}) for hp in all_hps
    ])
    for col in df.columns:
        if 'float' in str(df[col].dtype):
            if 'distance' in col:
                pass
            else:
                df[col] = df[col].astype(int)
    return df
Ejemplo n.º 26
0
 def dry_run(
     B=None,
     nonfusibles_kvs=None,
     epochs=None,
     iters_per_epoch=None,
     env_vars=None,
 ):
     params = [{
         **handle_integers(sample(fusibles, rng=rng_state)),
         **nonfusibles_kvs
     } for _ in range(max(B, 1))]
     if B > 0:
         params = fuse_dicts(params)
     else:
         params = params[0]
     return _run(None, epochs, iters_per_epoch, params, env_vars=env_vars)
Ejemplo n.º 27
0
def parameter_optimzation(space):
    # Sample from the full space
    x = sample(space)
    x['num_leaves'] = int(x['num_leaves'])
    # Create the parameter optimization algorithm.
    tpe_algorithm = tpe.suggest
    # Record results
    trials = Trials()
    # Run optimization
    fmin(fn=boosting_cv,
         space=x,
         algo=tpe_algorithm,
         trials=trials,
         max_evals=MAX_EVALS)
    # Getting highest auc
    trials = sorted(trials.results, key=lambda x: x['loss'])
    return trials[:1]
Ejemplo n.º 28
0
 def run(self):
     space = {k: hp.choice(k, v) for k, v in self.choices.items()}
     while True:
         values = sample(space)
         yield PredictRNNv3ReorderSizeKnown(
             mode='evaluation',
             max_days=values['max_days'],
             max_products_per_day=values['max_products_per_day'],
             max_prior_orders=values['max_prior_orders'],
             embedding_dim=values['embedding_dim'],
             lstm_layers=values['lstm_layers'],
             lstm_units=values['lstm_units'],
             hidden_layers=values['hidden_layers'],
             dropout=values['dropout'],
             global_orders_ratio=0.25,
             validation_orders_ratio=0.1,
             batch_size=1024,
             epochs=10)
Ejemplo n.º 29
0
def get_sampled_params_for_lm(space, index=1):
    sample = stoc.sample(space)
    sample['learning_rate'] = 10**(sample['learning_rate'])
    sample['embedding_size'] = int(sample['embedding_size'])
    sample['hidden_size'] = int(sample['hidden_size'])
    sample['num_layers'] = int(sample['num_layers'])

    print("Sweep ", index, sample)

    output = 'lr_%.5f_do_%.1f_nl_%d_hs_%d_es_%d.out' % (
        sample['learning_rate'], sample['dropout'], sample['num_layers'],
        sample['hidden_size'], sample['embedding_size'])

    params = '-lr %.5f -do %.1f -nl %d -hs %d -es %d' % (
        sample['learning_rate'], sample['dropout'], sample['num_layers'],
        sample['hidden_size'], sample['embedding_size'])

    return params, output
Ejemplo n.º 30
0
 def run(self):
     rng = RandomState(self.random_seed)
     space = {k: hp.choice(k, v) for k, v in self.choices.items()}
     while True:
         values = sample(space, rng)
         yield PredictRNNv1(
             stage=2,
             imputation=values['imputation'],
             sample_ratio=0.1,
             deploy_date=date(2017, 6, 18),
             from_date=date(2017, 7, 1),
             to_date=date(2017, 8, 31),
             num_days_before=values['num_days_before'],
             lstm_size_factor=values['lstm_size_factor'],
             hidden_layers=values['hidden_layers'],
             hidden_nonlinearily=values['hidden_nonlinearily'],
             hidden_dropout=values['hidden_dropout'],
             loss=values['loss'],
             learning_rate=values['learning_rate'],
             max_grad_norm=values['max_grad_norm'])
Ejemplo n.º 31
0
def load_param(name, space, scope=None):
    """
    Loads a parameter specified by its `name` from the given `scope`.

    When no such parameter can be found in `scope`, a random sample will
    be drawn from the given parameter `space`.
    
    Finally, the parameter value is decoded using `load_model(value)`.
    """
    if scope and name in scope:
        # Use the given value:
        value = scope[name]
    else:
        # Draw a random sample from the parameter space using pyll:
        from hyperopt.pyll import stochastic
        hp_space = label_vars(space, name)
        value = stochastic.sample(hp_space)

    # Decode the value into an object tree:
    return load_model(value)
Ejemplo n.º 32
0
 def run(self):
     from hyperopt import hp
     from hyperopt.pyll.stochastic import sample
     space = {k: hp.choice(k, v) for k, v in self.choices.items()}
     while True:
         values = sample(space)
         yield PredictRNNv2ReorderSizeKnown(
             mode='evaluation',
             max_days=values['max_days'],
             max_products_per_day=values['max_products_per_day'],
             product_embedding_dim=values['product_embedding_dim'],
             days_attention_layers=values['days_attention_layers'],
             days_attention_activation=values['days_attention_activation'],
             lstm_units=values['lstm_units'],
             hidden_layers=values['hidden_layers'],
             hidden_layers_activation=values['hidden_layers_activation'],
             optimizer=values['optimizer'],
             global_orders_ratio=0.25,
             validation_orders_ratio=0.1,
             users_per_batch=8,
             epochs=10)
Ejemplo n.º 33
0
def get_sampled_params_for_classifier(args,
                                      space,
                                      index=1,
                                      has_pretrained_encoder=False):
    sample = stoc.sample(space)
    sample['learning_rate'] = 10**(sample['learning_rate'])
    sample['hidden_size'] = int(sample['hidden_size'])
    sample['embedding_size'] = int(sample['embedding_size'])
    sample['num_layers'] = int(sample['num_layers'])
    sample['encoding_size'] = int(sample['encoding_size'])
    sample['encoder_num_layers'] = int(sample['encoder_num_layers'])

    output = 'lr_%.5f_nl_%d_hs_%d_do_%.1f' % (
        sample['learning_rate'], sample['num_layers'], sample['hidden_size'],
        sample['dropout'])

    params = '-lr %.5f -nl %d -hs %d -do %.1f' % (
        sample['learning_rate'], sample['num_layers'], sample['hidden_size'],
        sample['dropout'])

    embedding_size = sample[
        "embedding_size"] if args.embedding_size is None else args.embedding_size

    if has_pretrained_encoder:
        sample.pop('encoder_num_layers')
        sample.pop('encoding_size')
        sample.pop('embedding_size')
        output += '_ed_%d_es_%d_enl_%d.out' % (
            embedding_size, args.encoder_num_layers, args.encoding_size)
    else:
        output += '_ed_%d_es_%d_enl_%d.out' % (embedding_size,
                                               sample['encoding_size'],
                                               sample['encoder_num_layers'])
        params += ' -es %d --encoding_size %d --encoder_num_layers %d' % (
            embedding_size, sample['encoding_size'],
            sample['encoder_num_layers'])

    print("Sweep ", index, sample)

    return params, output
Ejemplo n.º 34
0
def get_sample_experiment():
    from hyperopt.pyll.stochastic import sample
    from pylearn2.config import yaml_parse
    from os.path import join
    import sys
    sys.path.append('..')
    from hyperopt_api.parser import build
    from yaml_parser import yaml_parser as yp
    from hyperopt_api.search_space import get_search_space
    import configuration.model as config
    from utils.common import get_timestamp

    # prepare all variables that don't need to be updated with each iteration
    spa = get_search_space()    # define search space over possible models

    path = config.data_path

    # obtain the yaml skelton
    with open(config.yaml_skelton_path) as f:
        default_string = f.read()

    samp = sample(spa)  # generate sample (will give a description of a model)
    mod = build(samp)   # based on description generated build an object that will fit into yaml_paser

    # define weight decay parameters. They depend on the number of layers (there is one parameter fo each layer)
    weight_decay_coeffs = yp.parse_weight_decay(mod)

    # generate a filename to store the best model
    pkl_filename = join(config.path_for_storing, get_timestamp() + "_best.pkl")

    # create dictionary with hyper parameters
    hyper_params = {'model': yp.parse_to_yaml(mod), 'path': yp.parse_to_yaml(path),
                    'weight_decay_coeffs': weight_decay_coeffs, 'pkl_filename': pkl_filename}
    # fill the yaml skelton with hyperparameters
    yaml_string = default_string % hyper_params

    network = yaml_parse.load(yaml_string)

    return network
Ejemplo n.º 35
0
from hyperopt import hp, fmin, rand, tpe, space_eval
from hyperopt.pyll.stochastic import sample


def q(args):
    x, y = args
    return x ** 2 + y ** 2


if __name__ == '__main__':
    space = [hp.uniform('x', 0, 1), hp.normal('y', 0, 1)]
    for i in range(0, 100):
        print sample(space)
Ejemplo n.º 36
0
    scope.PCA(
        n_components=1 + hp.qlognormal(
            'pca_n_comp', np.log(10), np.log(10), 1),
        whiten=hp.choice(
            'pca_whiten', [False, True])),
    scope.GMM(
        n_components=1 + hp.qlognormal(
            'gmm_n_comp', np.log(100), np.log(10), 1),
        covariance_type=hp.choice(
            'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])),
    ])

sklearn_space = {'pre_processing': pre_processing,
                 'classifier': classifier}
from hyperopt.pyll.stochastic import sample
print sample(sklearn_space)
print sample(sklearn_space)


# -- (2) DEFINE AN OBJECTIVE FUNCTION
def objective(args):
    preprocessing = args['pre_processing']
    classifier = args['classifier']
    X, y = load_data()
    Xpp = preprocessing.transform(X)
    classifier.fit(Xpp, y)
    return {
            'loss': classifier.score(Xpp, y),
            'status': 'ok',
            'foo': 123,           # -- can save more diagnotics
            'other-stuff': None,
Ejemplo n.º 37
0
spa = get_search_space()    # define search space over possible models

# define data paths
path = config.data_path

# obtain the yaml skelton
with open(config.yaml_skelton_path) as f:
    default_string = f.read()

# for each sample that will be generated from search space space
for i in xrange(20):
    timestamp = get_timestamp()

    print t.bold_red('ITERATION:'), t.bold_red(str(i)), "started at: ", timestamp

    samp = sample(spa)  # generate sample (will give a description of a model)
    print t.bold_cyan('SAMP'), samp

    mod = build(samp)   # based on description generated build an object that will fit into yaml_parser
    print t.bold_blue('MODEL'), mod

    # define weight decay parameters. They depend on the number of layers (there is one parameter fo each layer)
    weight_decay_coeffs = yp.parse_weight_decay(mod)

    # generate a filename to store the best model
    pkl_filename = join(config.path_for_storing, timestamp+"best_"+str(i)+'_'+".pkl")

    # create dictionary with hyper parameters
    hyper_params = {'model': yp.parse_to_yaml(mod), 'path': yp.parse_to_yaml(path),
                    'weight_decay_coeffs': weight_decay_coeffs, 'pkl_filename': pkl_filename}
Ejemplo n.º 38
0
def main():
    parser = ArgumentParser()

    parser.add_argument('-p', '--space',
                        dest='spaceFile', help='Where is the space.py located?')
    parser.add_argument('--use_optimal_design',
                        dest='use_optimal_design', help='Use optimal design or pure random initialization?')
    parser.add_argument('--init_budget',
                        dest='init_budget', help='How many evaluations for random burning period?')
    parser.add_argument('--ei_budget',
                        dest='ei_budget', help='How many evaluations for EI controlled online period?')
    parser.add_argument('--bopt_budget',
                        dest='bopt_budget', help='How many evaluations for Bayesian optimization after get subspace?')
    parser.add_argument('--ei_xi',
                        dest='ei_xi', help='What is the exploration parameter for computing EI?')
    parser.add_argument('--top_k_pipelines',
                        dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?')
    parser.add_argument('-s', '--seed', default='1',
                        dest='seed', type=int, help='Seed for the algorithm')

    parser.add_argument('-a', '--algo', default='SMAC',
                        dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE')

    parser.add_argument('-r', '--restore', action='store_true',
                        dest='restore', help='When this flag is set state.pkl is restored in ' +
                             'the current working directory')
    parser.add_argument('--random', default=False, action='store_true',
                        dest='random', help='Use a random search')
    parser.add_argument('--cwd', help='Change the working directory before '
                                      'optimizing.')

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    if not os.path.exists(args.spaceFile):
        logger.critical('Search space not found: %s' % args.spaceFile)
        sys.exit(1)

    # First remove '.py'
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append('./')
    sys.path.append('')

    module = import_module(space)
    search_space = module.space
    ni = [len(d) for d in module.layer_dict_list]  # number of units in each layer
    cum_ni = np.cumsum(ni)

    log_filename = 'lr.pkl'

    # Random burning period as initialization
    init_budget = int(args.init_budget)
    if args.use_optimal_design == '1':
        picks = get_random_picks_by_optimal_design(ni, init_budget)
    else:
        picks = get_pure_random_picks(ni, init_budget)
    for i in range(init_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo)
        logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times))
        logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times))
        subspace = construct_subspace(module, picks[i])
        params = sample(subspace)
        cv.main(params)
    valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True)

    # Train the first LR model before entering into EI controlled period
    fh = open(log_filename)
    log = cPickle.load(fh)
    trials = log['trials']
    fh.close()
    X = []
    y = []
    y_time = []
    for trial in trials:
        result = trial['result']
        time = trial['duration']
        # make sure the logged result is a number (accept evaluations return 100.0)
        if result <= 100:
            params = trial['params']
            rescaling = params['-rescaling']
            balancing = params['-balancing']
            feat_pre = params['-feat_pre']
            clf = params['-classifier']
            x = [[0]*n for n in ni]
            x[0][module.d_rescaling[rescaling]] = 1
            x[1][module.d_balancing[balancing]] = 1
            x[2][module.d_feat_pre[feat_pre]] = 1
            x[3][module.d_clf[clf]] = 1
            x_flat = np.array(x[0]+x[1]+x[2]+x[3])
            X.append(x_flat)
            y.append(result)
            y_time.append(np.log(time))
    X = np.array(X)
    alpha = 1.0
    lr = linear_model.Ridge(alpha=alpha)
    lr.fit(X, y)
    lr_time = linear_model.Ridge(alpha=alpha)
    lr_time.fit(X, y_time)

    # Online period controlled by EI
    ei_budget = int(args.ei_budget)
    for i in range(ei_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times))
        logger.info('EI controlled period times: %d, valid times: %d' % (times - init_budget,
                                                                         valid_times - valid_times_in_random_period))
        ebeta = lr.coef_[:cum_ni[0]], \
                lr.coef_[cum_ni[0]:cum_ni[1]], \
                lr.coef_[cum_ni[1]:cum_ni[2]], \
                lr.coef_[cum_ni[2]:]
        logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()),
                                                                      str(ebeta[1].argsort()),
                                                                      str(ebeta[2].argsort()),
                                                                      str(ebeta[3].argsort())))
        ebeta_time = lr_time.coef_[:cum_ni[0]], \
                     lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                     lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                     lr_time.coef_[cum_ni[2]:]
        logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()),
                                                                           str(ebeta_time[1].argsort()),
                                                                           str(ebeta_time[2].argsort()),
                                                                           str(ebeta_time[3].argsort())))
        # pick the best pipeline by EI
        x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi))
        pick = [[np.argmax(x_next_i)] for x_next_i in x_next]
        subspace = construct_subspace(module, pick)
        params = sample(subspace)
        cv.main(params)

        result, time = get_last_run(log_filename)
        if result <= 100:
            x_next_flat = np.array(x_next[0]+x_next[1]+x_next[2]+x_next[3])
            X = np.vstack([X, x_next_flat])
            y.append(result)
            y_time.append(np.log(time))
            lr = linear_model.Ridge(alpha=alpha)
            lr.fit(X, y)
            lr_time = linear_model.Ridge(alpha=alpha)
            lr_time.fit(X, y_time)
    valid_times_in_ei_period = get_num_of_trials(log_filename, filter_valid=True) - valid_times_in_random_period

    # Construct subspace based on LR prediction
    final_ebeta = lr.coef_[:cum_ni[0]], \
                  lr.coef_[cum_ni[0]:cum_ni[1]], \
                  lr.coef_[cum_ni[1]:cum_ni[2]], \
                  lr.coef_[cum_ni[2]:]
    final_ebeta_time = lr_time.coef_[:cum_ni[0]], \
                       lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                       lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                       lr_time.coef_[cum_ni[2]:]
    final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines))
    final_subspace = construct_subspace(module, final_pick)

    logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()),
                                                                  str(final_ebeta[1].argsort()),
                                                                  str(final_ebeta[2].argsort()),
                                                                  str(final_ebeta[3].argsort())))
    logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()),
                                                                       str(final_ebeta_time[1].argsort()),
                                                                       str(final_ebeta_time[2].argsort()),
                                                                       str(final_ebeta_time[3].argsort())))
    logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0],
                                                     final_pick[1],
                                                     final_pick[2],
                                                     final_pick[3]))

    # Phase 3 with SMAC
    if args.algo == 'SMAC':
        fh = file('pickup.txt', 'w')
        for layer_pick in final_pick:
            for i in layer_pick:
                fh.write('%d ' % i)
            fh.write('\n')
        fh.close()
        subspace = construct_subspace(module, final_pick)
        new_space = convert_tpe_to_smac_from_object(subspace)
        fh = open('params.pcs', 'w')
        fh.write(new_space)
        fh.close()

    # Phase 3 with TPE
    elif args.algo == 'TPE':
        fn = cv.main
        domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed))
        trials = hyperopt.Trials()
        bopt_budget = int(args.bopt_budget)
        for i in range(bopt_budget):
            times = get_num_of_trials(log_filename, filter_valid=False)
            valid_times = get_num_of_trials(log_filename, filter_valid=True)
            logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times))
            logger.info('TPE period times: %d, valid times: %d' %
                        (times - init_budget - ei_budget,
                         valid_times - valid_times_in_random_period - valid_times_in_ei_period))
            logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()),
                                                                          str(final_ebeta[1].argsort()),
                                                                          str(final_ebeta[2].argsort()),
                                                                          str(final_ebeta[3].argsort())))
            logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()),
                                                                               str(final_ebeta_time[1].argsort()),
                                                                               str(final_ebeta_time[2].argsort()),
                                                                               str(final_ebeta_time[3].argsort())))
            logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0],
                                                             final_pick[1],
                                                             final_pick[2],
                                                             final_pick[3]))
            # in exhaust, the number of evaluations is max_evals - num_done
            tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))
            rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i)
            rval.exhaust()
Ejemplo n.º 39
0
def test_sample_deterministic():
    aa = as_apply([0, 1])
    print(aa)
    dd = sample(aa, np.random.RandomState(3))
    assert dd == (0, 1)