Ejemplo n.º 1
0
def test_clone():
    config = config0()
    config2 = clone(config)

    nodeset = set(dfs(config))
    assert not any(n in nodeset for n in dfs(config2))

    foo = recursive_set_rng_kwarg(config, scope.rng_from_seed(5))
    r = rec_eval(foo)
    print r
    r2 = rec_eval(recursive_set_rng_kwarg(config2, scope.rng_from_seed(5)))

    print r2
    assert r == r2
Ejemplo n.º 2
0
def get_random_idxs_vals(new_id, domain, rng):
    return pyll.rec_eval(
        domain.s_idxs_vals,
        memo={
            domain.s_new_ids: [new_id],
            domain.s_rng: rng,
        })
Ejemplo n.º 3
0
def test_vectorize_simple():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = p0**2
    print loss
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)
    vloss = vh.v_expr

    full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()])
    fo2 = replace_repeat_stochastic(full_output)

    new_vc = recursive_set_rng_kwarg(
        fo2,
        as_apply(np.random.RandomState(1)),
    )

    #print new_vc
    losses, idxs, vals = rec_eval(new_vc)
    print 'losses', losses
    print 'idxs p0', idxs['p0']
    print 'vals p0', vals['p0']
    p0dct = dict(zip(idxs['p0'], vals['p0']))
    for ii, li in enumerate(losses):
        assert p0dct[ii]**2 == li
Ejemplo n.º 4
0
        def work(self):
            bandit = self.bandit
            random_algo = Random(bandit)
            # build an experiment of 10 trials
            trials = Trials()
            exp = Experiment(trials, random_algo)
            #print random_algo.s_specs_idxs_vals
            exp.run(10)
            ids = trials.tids
            assert len(ids) == 10
            tpe_algo = TreeParzenEstimator(bandit)
            #print pyll.as_apply(tpe_algo.post_idxs)
            #print pyll.as_apply(tpe_algo.post_vals)
            argmemo = {}

            print trials.miscs
            idxs, vals = miscs_to_idxs_vals(trials.miscs)
            argmemo[tpe_algo.observed['idxs']] = idxs
            argmemo[tpe_algo.observed['vals']] = vals
            argmemo[tpe_algo.observed_loss['idxs']] = trials.tids
            argmemo[tpe_algo.observed_loss['vals']] = trials.losses()
            stuff = pyll.rec_eval(
                [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']],
                memo=argmemo)
            print stuff
Ejemplo n.º 5
0
def test_pyll_list_tuple_nested():
    x = as_partialplus([[5, 3, (5, 3)], (4, 5)])
    y = as_pyll(x)
    # rec_eval always uses tuple
    val_y = rec_eval(y)
    # Correct for tuple-only in rec_eval.
    assert evaluate(x) == [list(val_y[0]), val_y[1]]
Ejemplo n.º 6
0
def test_vectorize_simple():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = p0 ** 2
    print(loss)
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)
    vloss = vh.v_expr

    full_output = as_apply([vloss,
        vh.idxs_by_label(),
        vh.vals_by_label()])
    fo2 = replace_repeat_stochastic(full_output)

    new_vc = recursive_set_rng_kwarg(
            fo2,
            as_apply(np.random.RandomState(1)),
            )

    #print new_vc
    losses, idxs, vals = rec_eval(new_vc)
    print('losses', losses)
    print('idxs p0', idxs['p0'])
    print('vals p0', vals['p0'])
    p0dct = dict(list(zip(idxs['p0'], vals['p0'])))
    for ii, li in enumerate(losses):
        assert p0dct[ii] ** 2 == li
Ejemplo n.º 7
0
        def work(self):
            bandit = self.bandit
            random_algo = Random(bandit)
            # build an experiment of 10 trials
            trials = Trials()
            exp = Experiment(trials, random_algo)
            #print random_algo.s_specs_idxs_vals
            exp.run(10)
            ids = trials.tids
            assert len(ids) == 10
            tpe_algo = TreeParzenEstimator(bandit)
            #print pyll.as_apply(tpe_algo.post_idxs)
            #print pyll.as_apply(tpe_algo.post_vals)
            argmemo = {}

            print trials.miscs
            idxs, vals = miscs_to_idxs_vals(trials.miscs)
            argmemo[tpe_algo.observed['idxs']] = idxs
            argmemo[tpe_algo.observed['vals']] = vals
            argmemo[tpe_algo.observed_loss['idxs']] = trials.tids
            argmemo[tpe_algo.observed_loss['vals']] = trials.losses()
            stuff = pyll.rec_eval([tpe_algo.post_below['idxs'],
                        tpe_algo.post_below['vals']],
                        memo=argmemo)
            print stuff
Ejemplo n.º 8
0
def test_vectorize_multipath():
    N = as_apply(15)

    p0 = hp_uniform("p0", 0, 1)
    loss = hp_choice("p1", [1, p0, -p0])**2
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)

    vloss = vh.v_expr
    print(vloss)

    full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()])

    new_vc = recursive_set_rng_kwarg(full_output,
                                     as_apply(np.random.RandomState(1)))

    losses, idxs, vals = rec_eval(new_vc)
    print("losses", losses)
    print("idxs p0", idxs["p0"])
    print("vals p0", vals["p0"])
    print("idxs p1", idxs["p1"])
    print("vals p1", vals["p1"])
    p0dct = dict(list(zip(idxs["p0"], vals["p0"])))
    p1dct = dict(list(zip(idxs["p1"], vals["p1"])))
    for ii, li in enumerate(losses):
        print(ii, li)
        if p1dct[ii] != 0:
            assert li == p0dct[ii]**2
        else:
            assert li == 1
Ejemplo n.º 9
0
def test_vectorize_multipath():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = hp_choice('p1', [1, p0, -p0])**2
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)

    vloss = vh.v_expr
    print vloss

    full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()])

    new_vc = recursive_set_rng_kwarg(
        full_output,
        as_apply(np.random.RandomState(1)),
    )

    losses, idxs, vals = rec_eval(new_vc)
    print 'losses', losses
    print 'idxs p0', idxs['p0']
    print 'vals p0', vals['p0']
    print 'idxs p1', idxs['p1']
    print 'vals p1', vals['p1']
    p0dct = dict(zip(idxs['p0'], vals['p0']))
    p1dct = dict(zip(idxs['p1'], vals['p1']))
    for ii, li in enumerate(losses):
        print ii, li
        if p1dct[ii] != 0:
            assert li == p0dct[ii]**2
        else:
            assert li == 1
Ejemplo n.º 10
0
def test_vectorize_multipath():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = hp_choice('p1', [1, p0, -p0]) ** 2
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)

    vloss = vh.v_expr
    print(vloss)

    full_output = as_apply([vloss,
        vh.idxs_by_label(),
        vh.vals_by_label()])

    new_vc = recursive_set_rng_kwarg(
            full_output,
            as_apply(np.random.RandomState(1)),
            )

    losses, idxs, vals = rec_eval(new_vc)
    print('losses', losses)
    print('idxs p0', idxs['p0'])
    print('vals p0', vals['p0'])
    print('idxs p1', idxs['p1'])
    print('vals p1', vals['p1'])
    p0dct = dict(list(zip(idxs['p0'], vals['p0'])))
    p1dct = dict(list(zip(idxs['p1'], vals['p1'])))
    for ii, li in enumerate(losses):
        print(ii, li)
        if p1dct[ii] != 0:
            assert li == p0dct[ii] ** 2
        else:
            assert li == 1
Ejemplo n.º 11
0
 def draw_n_feature_vecs(self, N, rng):
     fake_ids = range(N)
     idxs, vals = pyll.rec_eval(self.domain.s_idxs_vals,
                                memo={
                                    self.domain.s_new_ids: fake_ids,
                                    self.domain.s_rng: rng,
                                })
     return self.features_from_idxs_vals(fake_ids, idxs, vals)
Ejemplo n.º 12
0
 def __call__(self, expr, memo, ctrl):
     pyll_rval = pyll.rec_eval(expr, memo=memo, print_node_on_error=False)
     if 'loss' in ctrl.current_trial['misc']['vals']:
         loss = ctrl.current_trial['misc']['vals']['loss'][0]
     else:
         loss = None
     pyll_rval.update({'loss': loss})
     return self.model_evaluator(pyll_rval)
Ejemplo n.º 13
0
def test_dict():
        x = as_partialplus({'x': partial(float,
                                         partial(float,
                                                 partial(int, 3.3))) / 2,
                            'y': partial(float, 3)
                            })
        y = as_pyll(x)
        assert evaluate(x) == rec_eval(y)
Ejemplo n.º 14
0
def test_recursive_set_rng_kwarg():
    uniform = scope.uniform
    a = as_apply([uniform(0, 1), uniform(2, 3)])
    rng = np.random.RandomState(234)
    recursive_set_rng_kwarg(a, rng)
    print a
    val_a = rec_eval(a)
    assert 0 < val_a[0] < 1
    assert 2 < val_a[1] < 3
Ejemplo n.º 15
0
 def draw_n_feature_vecs(self, N, rng):
     fake_ids = range(N)
     idxs, vals = pyll.rec_eval(
         self.domain.s_idxs_vals,
         memo={
             self.domain.s_new_ids: fake_ids,
             self.domain.s_rng: rng,
         })
     return self.features_from_idxs_vals(fake_ids, idxs, vals)
Ejemplo n.º 16
0
def test_recursive_set_rng_kwarg():
    uniform = scope.uniform
    a = as_apply([uniform(0, 1), uniform(2, 3)])
    rng = np.random.RandomState(234)
    recursive_set_rng_kwarg(a, rng)
    print a
    val_a = rec_eval(a)
    assert 0 < val_a[0] < 1
    assert 2 < val_a[1] < 3
Ejemplo n.º 17
0
def test_clone():
    config = config0()
    config2 = clone(config)

    nodeset = set(dfs(config))
    assert not any(n in nodeset for n in dfs(config2))

    foo = recursive_set_rng_kwarg(
                config,
                scope.rng_from_seed(5))
    r = rec_eval(foo)
    print(r)
    r2 = rec_eval(
            recursive_set_rng_kwarg(
                config2,
                scope.rng_from_seed(5)))

    print(r2)
    assert r == r2
Ejemplo n.º 18
0
def exhaustive_search(new_ids,
                      domain,
                      trials,
                      seed,
                      nbMaxSucessiveFailures=1000):
    r""" This is for exhaustive search in HyperTuning.

    """
    from hyperopt import pyll
    from hyperopt.base import miscs_update_idxs_vals
    # Build a hash set for previous trials
    hashset = set([
        hash(
            frozenset([(key, value[0]) if len(value) > 0 else ((key, None))
                       for key, value in trial['misc']['vals'].items()]))
        for trial in trials.trials
    ])

    rng = np.random.RandomState(seed)
    rval = []
    for _, new_id in enumerate(new_ids):
        newSample = False
        nbSucessiveFailures = 0
        while not newSample:
            # -- sample new specs, idxs, vals
            idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                                       memo={
                                           domain.s_new_ids: [new_id],
                                           domain.s_rng: rng,
                                       })
            new_result = domain.new_result()
            new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)

            # Compare with previous hashes
            h = hash(
                frozenset([(key, value[0]) if len(value) > 0 else ((key, None))
                           for key, value in vals.items()]))
            if h not in hashset:
                newSample = True
            else:
                # Duplicated sample, ignore
                nbSucessiveFailures += 1

            if nbSucessiveFailures > nbMaxSucessiveFailures:
                # No more samples to produce
                return []

        rval.extend(
            trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))
    return rval
Ejemplo n.º 19
0
def test_pyll_deeply_nested_func():
    # N.B. uses stuff that isn't in the SymbolTable yet, must remove.
    try:
        def my_add(x, y):
            return x + y

        x = as_partialplus(
            (partial(float, partial(my_add, 0, partial(int, 3.3))) / 2,
             partial(float, 3))
        )
        y = as_pyll(x)
        evaluate(x) == rec_eval(y)
    finally:
        scope.undefine(my_add)
Ejemplo n.º 20
0
    def exception_thrower():
        argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False)
        visitor = PrimalVisitor(
            pipeline=argdict['pipeline'],
            ctrl=argdict['ctrl'],
            data_view=argdict['data_view'],
            max_n_features=argdict['max_n_features'],
            # TODO: just pass memmap_name directly
            memmap_name=memmap_name_template % (os.getpid(),
                                           np.random.randint(10000)),
            thresh_rank=1,
            optimize_l2_reg=True,
            batched_lmap_speed_thresh=argdict[
                'batched_lmap_speed_thresh'],
            badfit_thresh=None,
            batchsize=argdict['batchsize'],
            )

        protocol_iter = argdict['data_view'].protocol_iter(visitor)
        msg, model = protocol_iter.next()
        assert msg == 'model validation complete'

        # -- save the loss, but don't save attachments yet.
        rdict = visitor.hyperopt_rval()
        rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction'])
        rdict['in_progress'] = True
        rdict['status'] = hyperopt.STATUS_OK
        argdict['ctrl'].checkpoint(rdict)

        if assume_promising:
            promising = True
        else:
            promising = view2_worth_calculating(
                loss=rdict['loss'],
                ctrl=argdict['ctrl'],
                thresh_loss=1.0,
                thresh_rank=1)

        logger.info('Promising: %s' % promising)
        if promising:
            msg, model2 = protocol_iter.next()
            assert msg == 'model testing complete'
            rdict = visitor.hyperopt_rval()
            rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction'])
            rdict['true_loss'] = true_loss_fn(visitor)
            visitor.attach_obj_results()
        else:
            logger.warn('Not testing unpromising model %s' % str(model))
            del rdict['in_progress']
        return visitor, rdict
Ejemplo n.º 21
0
    def foo():
        argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False)
        visitor = ESVC_SLM_Visitor(pipeline=argdict['pipeline'],
                    ctrl=argdict['ctrl'],
                    data_view=argdict['data_view'],
                    max_n_features=argdict['max_n_features'],
                    memmap_name='%s_%i' % (__name__, os.getpid()),
                    svm_crossvalid_max_evals=50,
                    optimize_l2_reg=True,
                    batched_lmap_speed_thresh=argdict[
                        'batched_lmap_speed_thresh'],
                    comparison_names=comparison_names,
                    batchsize=argdict['batchsize'],
                    )
        # -- drive the visitor according to the protocol of the data set
        protocol_iter = argdict['data_view'].protocol_iter(visitor)
        msg, model = protocol_iter.next()
        assert msg == 'model validation complete'

        # -- save the loss, but don't save attachments yet.
        rdict = visitor.hyperopt_rval(save_grams=False)
        rdict['in_progress'] = True
        loss_fn(visitor, rdict, argdict['bagging_fraction'])
        argdict['ctrl'].checkpoint(rdict)

        if assume_promising:
            promising = True
        else:
            promising = view2_worth_calculating(
                loss=rdict['loss'],
                ctrl=argdict['ctrl'],
                thresh_loss=1.0,
                thresh_rank=1)


        info('Promising: %s' % promising)

        if maybe_test_view2:
            if promising:
                info('Disabling trace verification for view2')
                foobar.trace_verify = False
                msg = protocol_iter.next()
                assert msg == 'model testing complete'
            else:
                warn('Not testing unpromising model %s' % str(model))
        else:
            warn('Skipping view2 stuff for model %s' % str(model))
        rdict = visitor.hyperopt_rval(save_grams=promising)
        loss_fn(visitor, rdict, argdict['bagging_fraction'])
        return visitor, rdict
Ejemplo n.º 22
0
def test_uniform_categorical():
    p = as_pyll(variable('foo', value_type=[-1, 1, 4]))
    assert p.name == 'getitem'
    assert p.pos_args[0].name == 'pos_args'
    assert p.pos_args[1].name == 'hyperopt_param'
    assert p.pos_args[1].pos_args[0].name == 'literal'
    assert p.pos_args[1].pos_args[0].obj == 'foo'
    assert p.pos_args[1].pos_args[1].name == 'randint'
    # Make sure this executes and yields a value in the right domain.
    recursive_set_rng_kwarg(p, np.random)
    try:
        values = [rec_eval(p) for _ in xrange(10)]
    except Exception:
        assert False
    assert all(v in [-1, 1, 4] for v in values)
Ejemplo n.º 23
0
    def exception_thrower():
        argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False)
        visitor = PrimalVisitor(
            pipeline=argdict['pipeline'],
            ctrl=argdict['ctrl'],
            data_view=argdict['data_view'],
            max_n_features=argdict['max_n_features'],
            # TODO: just pass memmap_name directly
            memmap_name=memmap_name_template %
            (os.getpid(), np.random.randint(10000)),
            thresh_rank=1,
            optimize_l2_reg=True,
            batched_lmap_speed_thresh=argdict['batched_lmap_speed_thresh'],
            badfit_thresh=None,
            batchsize=argdict['batchsize'],
        )

        protocol_iter = argdict['data_view'].protocol_iter(visitor)
        msg, model = protocol_iter.next()
        assert msg == 'model validation complete'

        # -- save the loss, but don't save attachments yet.
        rdict = visitor.hyperopt_rval()
        rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction'])
        rdict['in_progress'] = True
        rdict['status'] = hyperopt.STATUS_OK
        argdict['ctrl'].checkpoint(rdict)

        if assume_promising:
            promising = True
        else:
            promising = view2_worth_calculating(loss=rdict['loss'],
                                                ctrl=argdict['ctrl'],
                                                thresh_loss=1.0,
                                                thresh_rank=1)

        logger.info('Promising: %s' % promising)
        if promising:
            msg, model2 = protocol_iter.next()
            assert msg == 'model testing complete'
            rdict = visitor.hyperopt_rval()
            rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction'])
            rdict['true_loss'] = true_loss_fn(visitor)
            visitor.attach_obj_results()
        else:
            logger.warn('Not testing unpromising model %s' % str(model))
            del rdict['in_progress']
        return visitor, rdict
Ejemplo n.º 24
0
def test_uniform_choice():
    p = as_pyll(choice(variable('foo', value_type=[7, 9, 11]),
                       (7, 'rst'),
                       (9, 'uvw'),
                       (11, 'xyz')))
    assert p.name == 'switch'
    assert p.pos_args[0].name == 'hyperopt_param'
    assert p.pos_args[0].pos_args[0].obj == 'foo'
    assert p.pos_args[0].pos_args[1].name == 'randint'
    assert p.pos_args[0].pos_args[1].arg['upper'].obj == 3
    # Make sure this executes and yields a value in the right domain.
    recursive_set_rng_kwarg(p, np.random)
    try:
        values = [rec_eval(p) for _ in xrange(10)]
    except Exception:
        assert False
    assert all(v in ['rst', 'uvw', 'xyz'] for v in values)
Ejemplo n.º 25
0
 def best_model_vector_classification(self, train, valid):
     # TODO: use validation set if not-None
     memo = dict(self.memo)
     use_obj_for_literal_in_memo(self.expr, train, pyll_stubs.train_task, memo)
     use_obj_for_literal_in_memo(self.expr, valid, pyll_stubs.valid_task, memo)
     use_obj_for_literal_in_memo(self.expr, self.ctrl, pyll_stubs.ctrl, memo)
     model, report = rec_eval(self.expr, memo=memo)
     if model:
         model.trained_on = train.name
     if valid and valid.name not in self.validation_sets:
         self.validation_sets.append(valid.name)
     self.results['best_model'].append(
         {
             'train_name': train.name,
             'valid_name': valid.name if valid else None,
             'model': model,
             'report': report,
         })
     return model
Ejemplo n.º 26
0
def test_nonuniform_categorical():
    p = as_pyll(variable('baz', value_type=[3, 5, 9],
                         distribution='categorical',
                         p=[0.1, 0.4, 0.5]))
    assert p.name == 'getitem'
    assert p.pos_args[0].name == 'pos_args'
    assert p.pos_args[1].name == 'hyperopt_param'
    assert p.pos_args[1].pos_args[0].name == 'literal'
    assert p.pos_args[1].pos_args[0].obj == 'baz'
    assert p.pos_args[1].pos_args[1].name == 'categorical'
    assert p.pos_args[1].pos_args[1].arg['p'].name == 'pos_args'
    assert p.pos_args[1].pos_args[1].arg['p'].pos_args[0].obj == 0.1
    assert p.pos_args[1].pos_args[1].arg['p'].pos_args[1].obj == 0.4
    assert p.pos_args[1].pos_args[1].arg['p'].pos_args[2].obj == 0.5
    # Make sure this executes and yields a value in the right domain.
    recursive_set_rng_kwarg(p, np.random)
    try:
        values = [rec_eval(p) for _ in xrange(10)]
    except Exception:
        assert False
    assert all(v in [3, 5, 9] for v in values)
Ejemplo n.º 27
0
    def evaluate(self, config, ctrl, attach_attachments=True):
        memo = self.memo_from_config(config)
        use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo)
        if self.pass_expr_memo_ctrl:
            rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(
                self.expr,
                memo=memo,
                print_node_on_error=self.rec_eval_print_node_on_error)
            rval = self.fn(pyll_rval)

        if isinstance(rval, (float, int, np.number)):
            dict_rval = {'loss': float(rval), 'status': STATUS_OK}
        else:
            dict_rval = dict(rval)
            status = dict_rval['status']
            if status not in STATUS_STRINGS:
                raise InvalidResultStatus(dict_rval)

            if status == STATUS_OK:
                # -- make sure that the loss is present and valid
                try:
                    dict_rval['loss'] = float(dict_rval['loss'])
                except (TypeError, KeyError):
                    raise InvalidLoss(dict_rval)

        if attach_attachments:
            attachments = dict_rval.pop('attachments', {})
            for key, val in attachments.items():
                ctrl.attachments[key] = val

        # -- don't do this here because SON-compatibility is only a requirement
        #    for trials destined for a mongodb. In-memory rvals can contain
        #    anything.
        #return base.SONify(dict_rval)
        return dict_rval
Ejemplo n.º 28
0
def test_nonuniform_choice():
    var = variable('blu', value_type=[2, 4, 8], distribution='categorical',
                   p=[0.2, 0.7, 0.1])
    p = as_pyll(choice(var,
                       (2, 'abc'),
                       (4, 'def'),
                       (8, 'ghi')))
    assert p.name == 'switch'
    assert p.pos_args[0].name == 'hyperopt_param'
    assert p.pos_args[0].pos_args[0].obj == 'blu'
    assert p.pos_args[0].pos_args[1].name == 'categorical'
    assert p.pos_args[0].pos_args[1].arg['p'].name == 'pos_args'
    assert p.pos_args[0].pos_args[1].arg['p'].pos_args[0].obj == 0.2
    assert p.pos_args[0].pos_args[1].arg['p'].pos_args[1].obj == 0.7
    assert p.pos_args[0].pos_args[1].arg['p'].pos_args[2].obj == 0.1
    # Make sure this executes and yields a value in the right domain.
    recursive_set_rng_kwarg(p, np.random)
    try:
        values = [rec_eval(p) for _ in xrange(10)]
    except Exception:
        assert False
    assert all(v in ['abc', 'def', 'ghi'] for v in values)
Ejemplo n.º 29
0
def test_vectorize_trivial():
    N = as_apply(15)

    p0 = hp_uniform("p0", 0, 1)
    loss = p0
    print(loss)
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)
    vloss = vh.v_expr

    full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()])
    fo2 = replace_repeat_stochastic(full_output)

    new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1)))

    # print new_vc
    losses, idxs, vals = rec_eval(new_vc)
    print("losses", losses)
    print("idxs p0", idxs["p0"])
    print("vals p0", vals["p0"])
    p0dct = dict(list(zip(idxs["p0"], vals["p0"])))
    for ii, li in enumerate(losses):
        assert p0dct[ii] == li
Ejemplo n.º 30
0
    def exception_thrower():

        argdict = rec_eval(expr, memo=memo, print_node_on_error=False)

        dataset_info = argdict['dataset_info']
        learning_algo = argdict['learning_algo']
        hp_space = argdict['hp_space']
        pipeline = argdict['pipeline']
        n_startup_trials = argdict['n_startup_trials']
        n_ok_trials = argdict['n_ok_trials']
        batchsize = argdict['batchsize']
        min_features = argdict['min_features']
        max_features = argdict['max_features']
        checkpoint_fname = argdict['checkpoint_fname']
        batched_lmap_speed_thresh = argdict['batched_lmap_speed_thresh']
        ctrl = argdict['ctrl']

        tid = ctrl.current_trial['tid']

        # -- checkpoint
        if isinstance(ctrl.trials, Trials):
            if tid > 0 and tid % checkpoint_every == 0:
                save_hp(hp_space, ctrl.trials, n_startup_trials,
                        checkpoint_fname)

        # -- retrieve trials from database
        if isinstance(ctrl.trials, MongoTrials):
            ctrl.trials.refresh()

        # -- check and signal stopping to optimizer
        current_ok_trials = count_ok_trials(ctrl.trials)
        if current_ok_trials >= n_ok_trials:
            raise SimpleHpStop(
                'number of ok trials reached - '
                'stopping process with %d ok trials out of '
                '%d trials.' % (current_ok_trials, tid), ctrl.trials)

        # -- feature extraction
        slm_t0 = time()

        fn_imgs = getattr(dataset_info['data_obj'], dataset_info['fn_imgs'])
        imgs = fn_imgs()

        limgs = lmap_ndarray(imgs)

        X = pyll_theano_batched_lmap(
            partial(callpipe1, pipeline),
            limgs,
            batchsize=batchsize,
            print_progress_every=10,
            speed_thresh=batched_lmap_speed_thresh,
            abort_on_rows_larger_than=max_features,
            x_dtype='uint8',
        )[:]

        feat_set = rec_eval(X, print_node_on_error=False)
        slm_time = time() - slm_t0

        # -- classification
        eval_t0 = time()

        # -- feat_set in 2-D
        feat_shape = feat_set.shape
        feat_set.shape = feat_set.shape[0], -1

        assert feat_set.shape[1] >= min_features, 'min_features not satisfied'

        fn_eval = getattr(dataset_info['data_obj'], dataset_info['fn_eval'])
        r_dict = fn_eval(learning_algo, feat_set)
        eval_time = time() - eval_t0

        r_dict['status'] = hyperopt.STATUS_OK
        r_dict['feat_shape'] = feat_shape
        r_dict['slm_time'] = slm_time
        r_dict['eval_time'] = eval_time

        return r_dict
Ejemplo n.º 31
0
    def exception_thrower():

        argdict = rec_eval(expr, memo=memo, print_node_on_error=False)

        dataset_info = argdict['dataset_info']
        learning_algo = argdict['learning_algo']
        hp_space = argdict['hp_space']
        pipeline = argdict['pipeline']
        n_startup_trials = argdict['n_startup_trials']
        n_ok_trials = argdict['n_ok_trials']
        batchsize = argdict['batchsize']
        min_features = argdict['min_features']
        max_features = argdict['max_features']
        checkpoint_fname = argdict['checkpoint_fname']
        batched_lmap_speed_thresh = argdict['batched_lmap_speed_thresh']
        ctrl = argdict['ctrl']

        tid = ctrl.current_trial['tid']

        # -- checkpoint
        if isinstance(ctrl.trials, Trials):
            if tid > 0 and tid % checkpoint_every == 0:
                save_hp(hp_space, ctrl.trials, n_startup_trials,
                        checkpoint_fname)

        # -- retrieve trials from database
        if isinstance(ctrl.trials, MongoTrials):
            ctrl.trials.refresh()

        # -- check and signal stopping to optimizer
        current_ok_trials = count_ok_trials(ctrl.trials)
        if current_ok_trials >= n_ok_trials:
            raise SimpleHpStop('number of ok trials reached - '
                               'stopping process with %d ok trials out of '
                               '%d trials.' % (
                               current_ok_trials, tid),
                               ctrl.trials)

        # -- feature extraction
        slm_t0 = time()

        fn_imgs = getattr(dataset_info['data_obj'], dataset_info['fn_imgs'])
        imgs = fn_imgs()

        limgs = lmap_ndarray(imgs)

        X = pyll_theano_batched_lmap(
            partial(callpipe1, pipeline),
            limgs,
            batchsize=batchsize,
            print_progress_every=10,
            speed_thresh=batched_lmap_speed_thresh,
            abort_on_rows_larger_than=max_features,
            x_dtype='uint8',
            )[:]

        feat_set = rec_eval(X, print_node_on_error=False)
        slm_time = time() - slm_t0

        # -- classification
        eval_t0 = time()

        # -- feat_set in 2-D
        feat_shape = feat_set.shape
        feat_set.shape = feat_set.shape[0], -1

        assert feat_set.shape[1] >= min_features, 'min_features not satisfied'

        fn_eval = getattr(dataset_info['data_obj'], dataset_info['fn_eval'])
        r_dict = fn_eval(learning_algo, feat_set)
        eval_time = time() - eval_t0

        r_dict['status'] = hyperopt.STATUS_OK
        r_dict['feat_shape'] = feat_shape
        r_dict['slm_time'] = slm_time
        r_dict['eval_time'] = eval_time

        return r_dict
Ejemplo n.º 32
0
def test_vectorize_config0():
    p0 = hp_uniform('p0', 0, 1)
    p1 = hp_loguniform('p1', 2, 3)
    p2 = hp_choice('p2', [-1, p0])
    p3 = hp_choice('p3', [-2, p1])
    p4 = 1
    p5 = [3, 4, p0]
    p6 = hp_choice('p6', [-3, p1])
    d = locals()
    d['p1'] = None # -- don't sample p1 all the time, only if p3 says so
    config = as_apply(d)

    N = as_apply('N:TBA')
    expr = config
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(expr, expr_idxs, build=True)
    vconfig = vh.v_expr

    full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()])

    if 1:
        print('=' * 80)
        print('VECTORIZED')
        print(full_output)
        print('\n' * 1)

    fo2 = replace_repeat_stochastic(full_output)
    if 0:
        print('=' * 80)
        print('VECTORIZED STOCHASTIC')
        print(fo2)
        print('\n' * 1)

    new_vc = recursive_set_rng_kwarg(
            fo2,
            as_apply(np.random.RandomState(1))
            )
    if 0:
        print('=' * 80)
        print('VECTORIZED STOCHASTIC WITH RNGS')
        print(new_vc)

    Nval = 10
    foo, idxs, vals = rec_eval(new_vc, memo={N: Nval})

    print('foo[0]', foo[0])
    print('foo[1]', foo[1])
    assert len(foo) == Nval
    if 0:  # XXX refresh these values to lock down sampler
        assert foo[0] == {
            'p0': 0.39676747423066994,
            'p1': None,
            'p2': 0.39676747423066994,
            'p3': 2.1281244479293568,
            'p4': 1,
            'p5': (3, 4, 0.39676747423066994) }
    assert foo[1] != foo[2]

    print(idxs)
    print(vals['p3'])
    print(vals['p6'])
    print(idxs['p1'])
    print(vals['p1'])
    assert len(vals['p3']) == Nval
    assert len(vals['p6']) == Nval
    assert len(idxs['p1']) < Nval
    p1d = dict(list(zip(idxs['p1'], vals['p1'])))
    for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])):
        if p3v == p6v == 0:
            assert ii not in idxs['p1']
        if p3v:
            assert foo[ii]['p3'] == p1d[ii]
        if p6v:
            print('p6', foo[ii]['p6'], p1d[ii])
            assert foo[ii]['p6'] == p1d[ii]
Ejemplo n.º 33
0
def test_pyll_nested_func():
    x = partial(float, partial(int, 5.5))
    y = as_pyll(x)
    assert evaluate(x) == rec_eval(y)
Ejemplo n.º 34
0
def test_vectorize_config0():
    p0 = hp_uniform("p0", 0, 1)
    p1 = hp_loguniform("p1", 2, 3)
    p2 = hp_choice("p2", [-1, p0])
    p3 = hp_choice("p3", [-2, p1])
    p4 = 1
    p5 = [3, 4, p0]
    p6 = hp_choice("p6", [-3, p1])
    d = locals()
    d["p1"] = None  # -- don't sample p1 all the time, only if p3 says so
    config = as_apply(d)

    N = as_apply("N:TBA")
    expr = config
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(expr, expr_idxs, build=True)
    vconfig = vh.v_expr

    full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()])

    if 1:
        print("=" * 80)
        print("VECTORIZED")
        print(full_output)
        print("\n" * 1)

    fo2 = replace_repeat_stochastic(full_output)
    if 0:
        print("=" * 80)
        print("VECTORIZED STOCHASTIC")
        print(fo2)
        print("\n" * 1)

    new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1)))
    if 0:
        print("=" * 80)
        print("VECTORIZED STOCHASTIC WITH RNGS")
        print(new_vc)

    Nval = 10
    foo, idxs, vals = rec_eval(new_vc, memo={N: Nval})

    print("foo[0]", foo[0])
    print("foo[1]", foo[1])
    assert len(foo) == Nval
    if 0:  # XXX refresh these values to lock down sampler
        assert foo[0] == {
            "p0": 0.39676747423066994,
            "p1": None,
            "p2": 0.39676747423066994,
            "p3": 2.1281244479293568,
            "p4": 1,
            "p5": (3, 4, 0.39676747423066994),
        }
    assert (foo[1].keys() != foo[2].keys()) or (foo[1].values() !=
                                                foo[2].values())

    print(idxs)
    print(vals["p3"])
    print(vals["p6"])
    print(idxs["p1"])
    print(vals["p1"])
    assert len(vals["p3"]) == Nval
    assert len(vals["p6"]) == Nval
    assert len(idxs["p1"]) < Nval
    p1d = dict(list(zip(idxs["p1"], vals["p1"])))
    for ii, (p3v, p6v) in enumerate(zip(vals["p3"], vals["p6"])):
        if p3v == p6v == 0:
            assert ii not in idxs["p1"]
        if p3v:
            assert foo[ii]["p3"] == p1d[ii]
        if p6v:
            print("p6", foo[ii]["p6"], p1d[ii])
            assert foo[ii]["p6"] == p1d[ii]
Ejemplo n.º 35
0
def test_vectorize_config0():
    p0 = hp_uniform('p0', 0, 1)
    p1 = hp_loguniform('p1', 2, 3)
    p2 = hp_choice('p2', [-1, p0])
    p3 = hp_choice('p3', [-2, p1])
    p4 = 1
    p5 = [3, 4, p0]
    p6 = hp_choice('p6', [-3, p1])
    d = locals()
    d['p1'] = None  # -- don't sample p1 all the time, only if p3 says so
    config = as_apply(d)

    N = as_apply('N:TBA')
    expr = config
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(expr, expr_idxs, build=True)
    vconfig = vh.v_expr

    full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()])

    if 1:
        print '=' * 80
        print 'VECTORIZED'
        print full_output
        print '\n' * 1

    fo2 = replace_repeat_stochastic(full_output)
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC'
        print fo2
        print '\n' * 1

    new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1)))
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC WITH RNGS'
        print new_vc

    Nval = 10
    foo, idxs, vals = rec_eval(new_vc, memo={N: Nval})

    print 'foo[0]', foo[0]
    print 'foo[1]', foo[1]
    assert len(foo) == Nval
    if 0:  # XXX refresh these values to lock down sampler
        assert foo[0] == {
            'p0': 0.39676747423066994,
            'p1': None,
            'p2': 0.39676747423066994,
            'p3': 2.1281244479293568,
            'p4': 1,
            'p5': (3, 4, 0.39676747423066994)
        }
    assert foo[1] != foo[2]

    print idxs
    print vals['p3']
    print vals['p6']
    print idxs['p1']
    print vals['p1']
    assert len(vals['p3']) == Nval
    assert len(vals['p6']) == Nval
    assert len(idxs['p1']) < Nval
    p1d = dict(zip(idxs['p1'], vals['p1']))
    for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])):
        if p3v == p6v == 0:
            assert ii not in idxs['p1']
        if p3v:
            assert foo[ii]['p3'] == p1d[ii]
        if p6v:
            print 'p6', foo[ii]['p6'], p1d[ii]
            assert foo[ii]['p6'] == p1d[ii]
Ejemplo n.º 36
0
def test_pyll_tuple():
    x = as_partialplus((6, 9, 4))
    y = as_pyll(x)
    assert evaluate(x) == rec_eval(y)
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = partial(tpe.suggest,
                       prior_weight=prior_weight,
                       n_startup_jobs=2,
                       n_EI_candidates=128,
                       gamma=gamma)
        #print algo.opt_idxs['x']
        #print algo.opt_vals['x']

        trials = Trials()
        fmin(passthrough,
             space=bandit.expr,
             algo=algo,
             trials=trials,
             max_evals=self.LEN)
        if self.show_vars:
            import hyperopt.plotting
            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs['x']
        vals = vals['x']

        losses = trials.losses()

        from hyperopt.tpe import ap_filter_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers['quniform']
        fn_kwargs = dict(size=(4, ), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        #print b_post
        #print a_post
        fn_lpdf = getattr(scope, a_post.name + '_lpdf')
        print fn_lpdf
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                         if n not in ('rng', 'size')])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                         if n not in ('rng', 'size')])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print '=' * 80

        do_show = self.show_steps

        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print '-' * 80
            print 'ROUND', ii
            print '-' * 80
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii],
                                            losses[:ii], gamma)
            below = below.astype('int')
            above = above.astype('int')
            print 'BB0', below
            print 'BB1', above
            #print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            #print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                                       memo=memo)
            #print bl - al
            print 'BB2', dict(zip(all_vals, bl - al))
            print 'BB3', dict(zip(all_vals, bl))
            print 'BB4', dict(zip(all_vals, al))
            print 'ORIG PICKED', vals[ii]
            print 'PROPER OPT PICKS:', nv

            #assert np.allclose(below, [3, 3, 9])
            #assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                #plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                #plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c='g')
                plt.scatter(all_vals, al, c='r')
        if do_show:
            plt.show()
Ejemplo n.º 38
0
def hyper_optimization(year, mode, human_metric, best_params):
    """
    Execute a hyper optimization algorithm in order to obtain the best parameters for a specific model
    when we are testing on 'year' with mode='mode'
    :param year: The year we are testing.
    :param mode: Depending on your choice : ['Single Task', 'Multi Task-1', 'Multi Task-5'].
    :param human_metric: The metric for which the model is trained. It is needed only on 'Single Task' mode.
    :param best_params: A dictionary where will be saved all the best parameters obtained by hyper-optimization
    """
    search_space = json.load(
        open(CONFIG_PATH))['hyper_optimization']['search_space']

    # Clears the session for each run of the algorithm
    K.clear_session()

    global TRIAL_NO
    TRIAL_NO = 0

    log_path = os.path.join(
        LOGS_DIR, 'hyper_opt_log_{}_{}_{}.txt'.format(human_metric, year,
                                                      mode))
    logger_name = 'LOGGER_{}_{}_{}'.format(year, human_metric, mode)

    setup_logger(logger_name=logger_name,
                 log_path=log_path,
                 level=logging.INFO)
    global LOGGER
    LOGGER = logging.getLogger(logger_name)

    train_x, train_y, val_x, val_y, val_ordered_ids = load_train_data(year)
    test_x, test_y, test_ordered_ids, test_empty_ids = load_test_data(year)

    if mode == 'Single Task':  # 1 Dense -> 1 predictions
        human_metric_index = int(human_metric[1]) - 1
        train_y = train_y[:, human_metric_index]
        val_y = val_y[:, human_metric_index]
        test_y = test_y[:, human_metric_index]

    train_samples = {'x': train_x, 'y': train_y}
    test_samples = {
        'x': test_x,
        'y': test_y,
        'ordered_ids': test_ordered_ids,
        'empty_ids': test_empty_ids
    }
    val_samples = {'x': val_x, 'y': val_y, 'ordered_ids': val_ordered_ids}

    search_space = dict([(key, hp.choice(key, value))
                         for key, value in search_space.items()])
    space_item = pyll.rec_eval(
        {key: value.pos_args[-1]
         for key, value in search_space.items()})

    network = compile_bigrus_attention(
        shape=(300, 300),
        n_hidden_layers=space_item['n_hidden_layers'],
        hidden_units_size=space_item['hidden_units_size'],
        dropout_rate=space_item['dropout_rate'],
        word_dropout_rate=space_item['word_dropout_rate'],
        lr=space_item['learning_rate'],
        mode=mode)

    # Start hyper-opt trials
    while True:
        try:
            trials = pickle.load(
                open(
                    os.path.join(TRIALS_DIR,
                                 '{}_{}_{}'.format(year, human_metric, mode)),
                    'rb'))
            max_evaluations = len(trials.trials) + 1
            print("Found it")
        except FileNotFoundError:
            trials = Trials()
            max_evaluations = 1

        TRIAL_NO = max_evaluations

        if max_evaluations > HYPER_OPT_CONFIG['trials']:
            break

        fmin(fn=lambda space_item: optimization_function(
            network=network,
            train_samples=train_samples,
            test_samples=test_samples,
            val_samples=val_samples,
            current_space=space_item,
            year=year,
            mode=mode,
            metric=human_metric),
             space=search_space,
             algo=tpe.suggest,
             max_evals=max_evaluations,
             trials=trials)

        with open(
                os.path.join(TRIALS_DIR,
                             '{}_{}_{}'.format(year, human_metric, mode)),
                'wb') as f:
            pickle.dump(trials, f)

    flag = True  # We want to write only the best parameters each time
    LOGGER.info(
        '\n\n--------------------- Results Summary Best to Worst ------------------'
    )
    for t in sorted(trials.results,
                    key=lambda trial: trial['loss'],
                    reverse=False):
        conf = t['results']['configuration']
        average_statistics = t['results']['statistics']

        if flag:
            best_params[year][human_metric][mode] = {
                "HL": conf['n_hidden_layers'],
                "HU": conf['hidden_units_size'],
                "BS": conf['batch_size'],
                "D": conf['dropout_rate'],
                "WD": conf['word_dropout_rate'],
                "LR": conf['learning_rate']
            }
            flag = False

        log_msg = MSG_TEMPLATE.format(
            t['trial_no'], HYPER_OPT_CONFIG['trials'],
            str(conf['n_hidden_layers']), str(conf['hidden_units_size']),
            conf['batch_size'], conf['dropout_rate'],
            conf['word_dropout_rate'], conf['attention_mechanism'],
            conf['learning_rate'], year, human_metric, mode) + '\n'

        if mode == 'Multi Task-1' or mode == 'Multi Task-5':
            log_msg += 'Val: \n Q1 -> {} \n Q2 -> {} \n Q3 -> {} \n Q4 -> {} \n Q5 -> {} \n'.format(
                ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['validation']['Q1'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['validation']['Q2'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['validation']['Q3'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['validation']['Q4'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['validation']['Q5'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]))

            log_msg += 'Test: \n Q1 -> {} \n Q2 -> {} \n Q3 -> {} \n Q4 -> {} \n Q5 -> {} \n'.format(
                ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['test']['Q1'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['test']['Q2'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['test']['Q3'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['test']['Q4'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]), ''.join([
                    '{}={:.3f}  '.format(
                        metric, average_statistics['test']['Q5'][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]))

        elif mode == 'Single Task':
            log_msg += 'Val: \n {} -> {} \n'.format(
                human_metric, ''.join([
                    '{}={:.3f}  '.format(
                        metric,
                        average_statistics['validation'][human_metric][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]))

            log_msg += 'Test: \n {} -> {} \n'.format(
                human_metric, ''.join([
                    '{}={:.3f}  '.format(
                        metric,
                        average_statistics['test'][human_metric][metric])
                    for metric in ['Spearman', 'Kendall', 'Pearson']
                ]))

        LOGGER.info(log_msg)

    trials_training_time = sum(
        [trial['results']['time'] for trial in trials.results])
    LOGGER.info('\n Hyper Optimization search took {} days {}\n\n'.format(
        int(trials_training_time / (24 * 60 * 60)),
        time.strftime("%H:%M:%S", time.gmtime(trials_training_time))))
Ejemplo n.º 39
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = partial(tpe.suggest,
                prior_weight=prior_weight,
                n_startup_jobs=2,
                n_EI_candidates=128,
                gamma=gamma)
        #print algo.opt_idxs['x']
        #print algo.opt_vals['x']

        trials = Trials()
        fmin(passthrough,
            space=bandit.expr,
            algo=algo,
            trials=trials,
            max_evals=self.LEN)
        if self.show_vars:
            import hyperopt.plotting
            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs['x']
        vals = vals['x']

        losses = trials.losses()

        from hyperopt.tpe import ap_filter_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers['quniform']
        fn_kwargs = dict(size=(4,), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        #print b_post
        #print a_post
        fn_lpdf = getattr(scope, a_post.name + '_lpdf')
        print fn_lpdf
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                    if n not in ('rng', 'size')])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                    if n not in ('rng', 'size')])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print '=' * 80

        do_show = self.show_steps

        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print '-' * 80
            print 'ROUND', ii
            print '-' * 80
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_filter_trials(idxs[:ii],
                    vals[:ii], idxs[:ii], losses[:ii], gamma)
            below = below.astype('int')
            above = above.astype('int')
            print 'BB0', below
            print 'BB1', above
            #print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            #print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                    memo=memo)
            #print bl - al
            print 'BB2', dict(zip(all_vals, bl - al))
            print 'BB3', dict(zip(all_vals, bl))
            print 'BB4', dict(zip(all_vals, al))
            print 'ORIG PICKED', vals[ii]
            print 'PROPER OPT PICKS:', nv

            #assert np.allclose(below, [3, 3, 9])
            #assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                #plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                #plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c='g')
                plt.scatter(all_vals, al, c='r')
        if do_show:
            plt.show()
Ejemplo n.º 40
0
def test_pyll_list():
    x = as_partialplus([5, 3, 9])
    y = as_pyll(x)
    # rec_eval always uses tuple
    assert evaluate(x) == list(rec_eval(y))
Ejemplo n.º 41
0
def pyll_theano_batched_lmap(pipeline, seq, batchsize,
        _debug_call_counts=None,
        print_progress_every=float('inf'),
        abort_on_rows_larger_than=None,
        speed_thresh=None,
        x_dtype='float32',
        ):
    """
    This function returns a skdata.larray.lmap object whose function
    is defined by a theano expression.

    The theano expression will be built and compiled specifically for the
    dimensions of the given `seq`. Therefore, in_rows, and out_rows should
    actually be a *pyll* graph, that evaluates to a theano graph.
    """

    in_shp = (batchsize,) + seq.shape[1:]
    batch = np.zeros(in_shp, dtype=x_dtype)
    s_ibatch = theano.shared(batch)
    s_xi = theano.tensor.as_tensor_variable(s_ibatch).type()
    s_N = s_xi.shape[0]
    s_X = theano.tensor.set_subtensor(s_ibatch[:s_N], s_xi)
    #print 'PIPELINE', pipeline
    thing = pipeline((s_X, in_shp))
    #print 'THING'
    #print thing
    #print '==='
    s_obatch, oshp = pyll.rec_eval(thing)
    assert oshp[0] == batchsize
    logger.info('batched_lmap oshp %s' % str(oshp))
    if abort_on_rows_larger_than:
        rowlen = np.prod(oshp[1:])
        if rowlen > abort_on_rows_larger_than:
            raise ValueError('rowlen %i exceeds limit %i' % (
                rowlen, abort_on_rows_larger_than))

    # Compile a function that takes a variable number of elements in,
    # returns the same number of processed elements out,
    # but does all internal computations using a fixed number of elements,
    # because convolutions are fastest when they're hard-coded to a certain
    # size.
    logger.debug('pyll_theano_batched_lmap compiling fn')
    _fn = theano.function([theano.Param(s_xi, strict=True)],
            s_obatch[:s_N],
            updates={
                s_ibatch: s_X, # this allows the inc_subtensor to be in-place
                })
    logger.debug('pyll_theano_batched_lmap compiling fn -> done')

    sums = {'elems': 0, 'times': 0.0}
    if speed_thresh is None:
        time_fn = _fn
    else:
        def time_fn(X):
            t0 = time.time()
            if str(X.dtype) != x_dtype:
                print 'time_fn dtype problem', X.dtype, x_dtype
            rval = _fn(X)
            dt = time.time() - t0
            #print 'DEBUG time_fn dt:', dt
            sums['elems'] += len(X)
            sums['times'] += dt
            return rval

        def raise_if_slow():
            exc = EvalTimeout(
                'batched_lmap failed to compute %i elements in %f secs'
                % (speed_thresh['elements'], speed_thresh['seconds']))
            if sums['elems'] >= speed_thresh['elements']:
                observed_ratio = sums['elems'] / sums['times']
                required_ratio = (speed_thresh['elements'] /
                        speed_thresh['seconds'])
                if observed_ratio < required_ratio:
                    raise exc
                else:
                    sums['elems'] = 0
                    sums['times'] = 0.0

    def fn_1(x):
        if _debug_call_counts:
            _debug_call_counts['fn_1'] += 1
        return time_fn(x[None, :, :, :])[0]

    attrs = {
            'shape': oshp[1:],
            'ndim': len(oshp) -1,
            'dtype': s_obatch.dtype }
    def rval_getattr(attr, objs):
        # -- objs don't matter to the structure of the return value
        try:
            return attrs[attr]
        except KeyError:
            raise AttributeError(attr)

    fn_1.rval_getattr = rval_getattr

    last_print_time = [time.time()]

    def check_for_print(offset, X):
        curtime = time.time()
        if (curtime - last_print_time[0]) > print_progress_every:
            logger.info('pyll_theano_batched_lmap.f_map %i %i' % (
                offset, len(X)))
            last_print_time[0] = curtime

        if speed_thresh is not None:
            raise_if_slow()

    def f_map(X):
        if _debug_call_counts:
            _debug_call_counts['f_map'] += 1

        if len(X) == batchsize:
            check_for_print(offset=0, X=X)
            return time_fn(X)

        rval = np.empty((len(X),) + oshp[1:], dtype=s_obatch.dtype)
        offset = 0
        while offset < len(X):
            check_for_print(offset, X)
            xi = X[offset: offset + batchsize]
            fn_i = time_fn(xi)
            if not np.all(np.isfinite(fn_i)):
                raise ValueError('non-finite features')
            rval[offset:offset + len(xi)] = fn_i
            offset += len(xi)
        return rval

    return larray.lmap(fn_1, seq, f_map=f_map)
Ejemplo n.º 42
0
def test_pyll_func():
    # N.B. Only uses stuff that's already in the SymbolTable.
    x = partial(float, 5)
    y = as_pyll(x)
    assert evaluate(x) == rec_eval(y)
Ejemplo n.º 43
0
    def suggest(self, new_ids, domain, trials, seed):
        rng = np.random.RandomState(seed)
        rval = []    # print('new_ids', new_ids)
        for ii, new_id in enumerate(new_ids):
            while self._cnt <= self.num_combinations:
                # -- sample new specs, idxs, vals
                idxs, vals = pyll.rec_eval(
                    domain.s_idxs_vals,
                    memo={
                        domain.s_new_ids: [new_id],
                        domain.s_rng: rng,
                    })
                new_result = domain.new_result()
                new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
                miscs_update_idxs_vals([new_misc], idxs, vals)
                new_trial = trials.new_trial_docs([new_id],
                            [None], [new_result], [new_misc])
                # Except the `while`, until here, code is copied from rand.suggest

                # new code from here
                self.executed_params = self.executed_params.union(
                    self._get_historical_params(trials))

                # avoid counting floating zero twice (as +0.0 and -0.0)
                this_run_params = hyperopt_grid._convert_neg_zeros_to_zeros(
                    dict(new_misc['vals']))
                # represent the params as a hashed string
                this_run_params_str = dict_to_sorted_str(this_run_params)

                # if these params are seen for the first time, then generate a new
                # trial for them
                if this_run_params_str not in self.executed_params:

                    # add the new trial to returned list
                    rval.extend(new_trial)

                    # log the new trial as executed, in order to avoid duplication
                    self._cnt += 1
                    self.executed_params = \
                        self.executed_params.union([this_run_params_str])
                    print(self._cnt, this_run_params)
                    break
                else:
                    # otherwise (params were seen), skip this trial
                    # update internal counter
                    self._cnt_skip += 1

                # Stopping condition (breaking the hyperopt loop)
                if len(self.executed_params) >= self.num_combinations:
                    # returning an empty list, breaks the hyperopt loop
                    return []


                # "Emergency" stopping condition, breaking the hyperopt loop when
                # loop runs for too long without submitted experiments
                if self._cnt_skip >= 100*self.num_combinations:
                    warnings.warn('Warning: Exited due to too many skips.'
                          ' This can happen if most of the param combinationa have '
                                  'been encountered, and drawing a new '
                                  'unseen combination, involves a very low probablity.')
                    # returning an empty list, breaks the hyperopt loop
                    return []

        return rval
Ejemplo n.º 44
0
def get_args(params):
    memo = {node: params[node.arg['label'].obj]
            for node in pyll.dfs(expr) if node.name == 'hyperopt_param'}
    return pyll.rec_eval(expr, memo=memo)
Ejemplo n.º 45
0
def pyll_theano_batched_lmap(
    pipeline,
    seq,
    batchsize,
    _debug_call_counts=None,
    print_progress_every=float('inf'),
    abort_on_rows_larger_than=None,
    speed_thresh=None,
    x_dtype='float32',
):
    """
    This function returns a skdata.larray.lmap object whose function
    is defined by a theano expression.

    The theano expression will be built and compiled specifically for the
    dimensions of the given `seq`. Therefore, in_rows, and out_rows should
    actually be a *pyll* graph, that evaluates to a theano graph.
    """

    in_shp = (batchsize, ) + seq.shape[1:]
    batch = np.zeros(in_shp, dtype=x_dtype)
    s_ibatch = theano.shared(batch)
    s_xi = theano.tensor.as_tensor_variable(s_ibatch).type()
    s_N = s_xi.shape[0]
    s_X = theano.tensor.set_subtensor(s_ibatch[:s_N], s_xi)
    #print 'PIPELINE', pipeline
    thing = pipeline((s_X, in_shp))
    #print 'THING'
    #print thing
    #print '==='
    s_obatch, oshp = pyll.rec_eval(thing)
    assert oshp[0] == batchsize
    logger.info('batched_lmap oshp %s' % str(oshp))
    if abort_on_rows_larger_than:
        rowlen = np.prod(oshp[1:])
        if rowlen > abort_on_rows_larger_than:
            raise ValueError('rowlen %i exceeds limit %i' %
                             (rowlen, abort_on_rows_larger_than))

    # Compile a function that takes a variable number of elements in,
    # returns the same number of processed elements out,
    # but does all internal computations using a fixed number of elements,
    # because convolutions are fastest when they're hard-coded to a certain
    # size.
    logger.debug('pyll_theano_batched_lmap compiling fn')
    _fn = theano.function(
        [theano.Param(s_xi, strict=True)],
        s_obatch[:s_N],
        updates={
            s_ibatch: s_X,  # this allows the inc_subtensor to be in-place
        })
    logger.debug('pyll_theano_batched_lmap compiling fn -> done')

    sums = {'elems': 0, 'times': 0.0}
    if speed_thresh is None:
        time_fn = _fn
    else:

        def time_fn(X):
            t0 = time.time()
            if str(X.dtype) != x_dtype:
                print 'time_fn dtype problem', X.dtype, x_dtype
            rval = _fn(X)
            dt = time.time() - t0
            #print 'DEBUG time_fn dt:', dt
            sums['elems'] += len(X)
            sums['times'] += dt
            return rval

        def raise_if_slow():
            exc = EvalTimeout(
                'batched_lmap failed to compute %i elements in %f secs' %
                (speed_thresh['elements'], speed_thresh['seconds']))
            if sums['elems'] >= speed_thresh['elements']:
                observed_ratio = sums['elems'] / sums['times']
                required_ratio = (speed_thresh['elements'] /
                                  speed_thresh['seconds'])
                if observed_ratio < required_ratio:
                    raise exc
                else:
                    sums['elems'] = 0
                    sums['times'] = 0.0

    def fn_1(x):
        if _debug_call_counts:
            _debug_call_counts['fn_1'] += 1
        return time_fn(x[None, :, :, :])[0]

    attrs = {'shape': oshp[1:], 'ndim': len(oshp) - 1, 'dtype': s_obatch.dtype}

    def rval_getattr(attr, objs):
        # -- objs don't matter to the structure of the return value
        try:
            return attrs[attr]
        except KeyError:
            raise AttributeError(attr)

    fn_1.rval_getattr = rval_getattr

    last_print_time = [time.time()]

    def check_for_print(offset, X):
        curtime = time.time()
        if (curtime - last_print_time[0]) > print_progress_every:
            logger.info('pyll_theano_batched_lmap.f_map %i %i' %
                        (offset, len(X)))
            last_print_time[0] = curtime

        if speed_thresh is not None:
            raise_if_slow()

    def f_map(X):
        if _debug_call_counts:
            _debug_call_counts['f_map'] += 1

        if len(X) == batchsize:
            check_for_print(offset=0, X=X)
            return time_fn(X)

        rval = np.empty((len(X), ) + oshp[1:], dtype=s_obatch.dtype)
        offset = 0
        while offset < len(X):
            check_for_print(offset, X)
            xi = X[offset:offset + batchsize]
            fn_i = time_fn(xi)
            if not np.all(np.isfinite(fn_i)):
                raise ValueError('non-finite features')
            rval[offset:offset + len(xi)] = fn_i
            offset += len(xi)
        return rval

    return larray.lmap(fn_1, seq, f_map=f_map)