Beispiel #1
0
def test_clone():
    config = config0()
    config2 = clone(config)

    nodeset = set(dfs(config))
    assert not any(n in nodeset for n in dfs(config2))

    foo = recursive_set_rng_kwarg(config, scope.rng_from_seed(5))
    r = rec_eval(foo)
    print r
    r2 = rec_eval(recursive_set_rng_kwarg(config2, scope.rng_from_seed(5)))

    print r2
    assert r == r2
Beispiel #2
0
def test_qlognormal_never_0():
    rng = np.random.RandomState(234)
    s = scope.qlognormal(-5, 3, 0.1)
    recursive_set_rng_kwarg(s, rng)
    results = [rec_eval(s) for i in range(100)]
    assert min(results) == 0.1
    assert max(results) != 0.1
Beispiel #3
0
def test_vectorize_multipath():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = hp_choice('p1', [1, p0, -p0]) ** 2
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)

    vloss = vh.v_expr
    print vloss

    full_output = as_apply([vloss,
        vh.idxs_by_label(),
        vh.vals_by_label()])

    new_vc = recursive_set_rng_kwarg(
            full_output,
            as_apply(np.random.RandomState(1)),
            )

    losses, idxs, vals = rec_eval(new_vc)
    print 'losses', losses
    print 'idxs p0', idxs['p0']
    print 'vals p0', vals['p0']
    print 'idxs p1', idxs['p1']
    print 'vals p1', vals['p1']
    p0dct = dict(zip(idxs['p0'], vals['p0']))
    p1dct = dict(zip(idxs['p1'], vals['p1']))
    for ii, li in enumerate(losses):
        print ii, li
        if p1dct[ii] != 0:
            assert li == p0dct[ii] ** 2
        else:
            assert li == 1
Beispiel #4
0
 def evaluate(self, config, ctrl):
     memo = self.memo_from_config(config)
     memo[self.pyll_ctrl] = ctrl
     if self.init_pyll_memo:
         memo = self.init_pyll_memo(memo, config=config, ctrl=ctrl)
     if self.rng is not None and not self.installed_rng:
         # -- N.B. this modifies the expr graph in-place
         #    XXX this feels wrong
         self.expr = recursive_set_rng_kwarg(self.expr,
             pyll.as_apply(self.rng))
         self.installed_rng = True
     try:
         # -- the "work" of evaluating `config` can be written
         #    either into the pyll part (self.expr)
         #    or the normal Python part (self.fn)
         pyll_rval = pyll.rec_eval(self.expr, memo=memo)
         rval = self.fn(pyll_rval)
     except Exception, e:
         n_match = 0
         for match, match_pair in self.exceptions:
             if match(e):
                 rval = match_pair(e)
                 logger.info('Caught fn exception %s' % str(rval))
                 n_match += 1
                 break
         if n_match == 0:
             raise
Beispiel #5
0
def test_vectorize_multipath():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = hp_choice('p1', [1, p0, -p0])**2
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)

    vloss = vh.v_expr
    print vloss

    full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()])

    new_vc = recursive_set_rng_kwarg(
        full_output,
        as_apply(np.random.RandomState(1)),
    )

    losses, idxs, vals = rec_eval(new_vc)
    print 'losses', losses
    print 'idxs p0', idxs['p0']
    print 'vals p0', vals['p0']
    print 'idxs p1', idxs['p1']
    print 'vals p1', vals['p1']
    p0dct = dict(zip(idxs['p0'], vals['p0']))
    p1dct = dict(zip(idxs['p1'], vals['p1']))
    for ii, li in enumerate(losses):
        print ii, li
        if p1dct[ii] != 0:
            assert li == p0dct[ii]**2
        else:
            assert li == 1
Beispiel #6
0
        def work(self):
            bandit = self.bandit
            random_algo = Random(bandit)
            # build an experiment of 10 trials
            trials = Trials()
            exp = Experiment(trials, random_algo)
            #print random_algo.s_specs_idxs_vals
            exp.run(10)
            ids = trials.tids
            assert len(ids) == 10
            tpe_algo = TreeParzenEstimator(bandit)
            #print pyll.as_apply(tpe_algo.post_idxs)
            #print pyll.as_apply(tpe_algo.post_vals)
            argmemo = {}

            print trials.miscs
            idxs, vals = miscs_to_idxs_vals(trials.miscs)
            argmemo[tpe_algo.observed['idxs']] = idxs
            argmemo[tpe_algo.observed['vals']] = vals
            argmemo[tpe_algo.observed_loss['idxs']] = trials.tids
            argmemo[tpe_algo.observed_loss['vals']] = trials.losses()
            stuff = pyll.rec_eval(
                [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']],
                memo=argmemo)
            print stuff
Beispiel #7
0
def test_vectorize_simple():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = p0 ** 2
    print loss
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)
    vloss = vh.v_expr

    full_output = as_apply([vloss,
        vh.idxs_by_label(),
        vh.vals_by_label()])
    fo2 = replace_repeat_stochastic(full_output)

    new_vc = recursive_set_rng_kwarg(
            fo2,
            as_apply(np.random.RandomState(1)),
            )

    #print new_vc
    losses, idxs, vals = rec_eval(new_vc)
    print 'losses', losses
    print 'idxs p0', idxs['p0']
    print 'vals p0', vals['p0']
    p0dct = dict(zip(idxs['p0'], vals['p0']))
    for ii, li in enumerate(losses):
        assert p0dct[ii] ** 2 == li
Beispiel #8
0
        def work(self):
            bandit = self.bandit
            random_algo = Random(bandit)
            # build an experiment of 10 trials
            trials = Trials()
            exp = Experiment(trials, random_algo)
            #print random_algo.s_specs_idxs_vals
            exp.run(10)
            ids = trials.tids
            assert len(ids) == 10
            tpe_algo = TreeParzenEstimator(bandit)
            #print pyll.as_apply(tpe_algo.post_idxs)
            #print pyll.as_apply(tpe_algo.post_vals)
            argmemo = {}

            print trials.miscs
            idxs, vals = miscs_to_idxs_vals(trials.miscs)
            argmemo[tpe_algo.observed['idxs']] = idxs
            argmemo[tpe_algo.observed['vals']] = vals
            argmemo[tpe_algo.observed_loss['idxs']] = trials.tids
            argmemo[tpe_algo.observed_loss['vals']] = trials.losses()
            stuff = pyll.rec_eval([tpe_algo.post_below['idxs'],
                        tpe_algo.post_below['vals']],
                        memo=argmemo)
            print stuff
Beispiel #9
0
    def evaluate_async(
        self,
        config,
        ctrl,
        attach_attachments=True,
    ):
        '''
        this is the first part of async evaluation for ipython parallel engines (see ipy.py)

        This breaks evaluate into two parts to allow for the apply_async call
        to only pass the objective function and arguments.
        '''
        memo = self.memo_from_config(config)
        use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo)
        if self.pass_expr_memo_ctrl:
            rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(
                self.expr,
                memo=memo,
                print_node_on_error=self.rec_eval_print_node_on_error)
            return (self.fn, pyll_rval)
Beispiel #10
0
def get_performance(slm, decisions, preproc, comparison,
                    namebase=None, progkey='result_w_cleanup',
                    return_multi=False, ctrl=None):
    if decisions is None:
        decisions = np.zeros((1, 3200))
    else:
        decisions = np.asarray(decisions)
    assert decisions.shape == (1, 3200)
    if namebase is None:
        namebase = 'memmap_' + str(np.random.randint(1e8))
    image_features = scope.slm_memmap(
            desc=slm,
            X=scope.get_images('float32', preproc=preproc),
            name=namebase + '_img_feat')
    if return_multi:
        comps = ['mult', 'sqrtabsdiff']
    else:
        comps = [comparison]
    cmp_progs = []
    for comp in comps:
        sresult = screening_program(
                    slm_desc=slm,
                    preproc=preproc,
                    comparison=comp,
                    namebase=namebase,
                    decisions=decisions,
                    image_features=image_features,
                    ctrl=ctrl)[1][progkey]
        cmp_progs.append([comp, sresult])
    cmp_results = pyll.rec_eval(cmp_progs)
    if return_multi:
        return cmp_results
    else:
        return cmp_results[0][1]
Beispiel #11
0
def suggest(new_ids, domain, trials, seed):
    #logger.debug("in suggest with seed: %s" % (str(seed)))
    #logger.debug('generating trials for new_ids: %s' % str(new_ids))

    #print("\n\n")
    #print_apply_object(domain.s_idxs_vals, "")
    #print("\n\n")
    #import pdb; pdb.set_trace()
    rng = np.random.RandomState(seed)
    rval = []
    for ii, new_id in enumerate(new_ids):
        # -- sample new specs, idxs, vals
        idxs, vals = pyll.rec_eval(
            domain.s_idxs_vals,
            memo={
                domain.s_new_ids: [new_id],
                domain.s_rng: rng,
            })
        #print("new_ids: ", new_ids)
        #print("idxs: ", idxs)
        #print("vals: ", vals)
        #print("domain.s_idxs_vals: ", domain.s_idxs_vals)
        #print("domain.s_new_ids: ", domain.s_new_ids)
        #print("new_result: ", domain.new_result())
        #print("\nprinting domain.s_new_ids, an apply object:")
        #print_apply_object(domain.s_new_ids, "")
        #print("\nprinting domain.s_idxs_vals, an apply object:")
        #print_apply_object(domain.s_idxs_vals, "")
        #print("")
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(trials.new_trial_docs([new_id],
                    [None], [new_result], [new_misc]))
    return rval
Beispiel #12
0
def test_vectorize_simple():
    N = as_apply(15)

    p0 = hp_uniform('p0', 0, 1)
    loss = p0**2
    print loss
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(loss, expr_idxs, build=True)
    vloss = vh.v_expr

    full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()])
    fo2 = replace_repeat_stochastic(full_output)

    new_vc = recursive_set_rng_kwarg(
        fo2,
        as_apply(np.random.RandomState(1)),
    )

    #print new_vc
    losses, idxs, vals = rec_eval(new_vc)
    print 'losses', losses
    print 'idxs p0', idxs['p0']
    print 'vals p0', vals['p0']
    p0dct = dict(zip(idxs['p0'], vals['p0']))
    for ii, li in enumerate(losses):
        assert p0dct[ii]**2 == li
Beispiel #13
0
    def suggest(self, new_ids, trials):
        """
        new_ids - a list of unique identifiers (not necessarily ints!)
                  for the suggestions that this function should return.

        All lists have the same length.
        """
        # XXX: this used to be the implementation for the Random class and the
        # base class.  But then I was doing an experiment with Random() a
        # different seed every time and I was surprised to see it generating
        # the same thing all the time!  In response, I gave the Random
        # subclass its own simpler and more random implementation of suggest
        # that does not re-seed self.rng based on the new_ids. That leaves
        # this strange implementation here in the base class, and I'm not sure
        # whether to delete it. -JB June 19 2012
        #
        # -- install new_ids as program arguments
        rval = []
        for new_id in new_ids:
            # the results are not computed all at once so that we can
            # seed the generator based on each new_id
            sh1 = hashlib.sha1()
            sh1.update(str(new_id))
            self.rng.seed(int(int(sh1.hexdigest(), base=16) % (2 ** 31)))

            # -- sample new specs, idxs, vals
            idxs, vals = pyll.rec_eval(self.s_idxs_vals, memo={self.s_new_ids: [new_id]})
            # print 'BandigAlgo.suggest IDXS', idxs
            # print 'BandigAlgo.suggest VALS', vals
            new_result = self.bandit.new_result()
            new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)
            rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))
        return rval
Beispiel #14
0
def test_recursive_set_rng_kwarg():
    uniform = scope.uniform
    a = as_apply([uniform(0, 1), uniform(2, 3)])
    rng = np.random.RandomState(234)
    recursive_set_rng_kwarg(a, rng)
    print a
    val_a = rec_eval(a)
    assert 0 < val_a[0] < 1
    assert 2 < val_a[1] < 3
Beispiel #15
0
def test_recursive_set_rng_kwarg():
    uniform = scope.uniform
    a = as_apply([uniform(0, 1), uniform(2, 3)])
    rng = np.random.RandomState(234)
    recursive_set_rng_kwarg(a, rng)
    print a
    val_a = rec_eval(a)
    assert 0 < val_a[0] < 1
    assert 2 < val_a[1] < 3
Beispiel #16
0
def suggest_batch(new_ids, domain, trials, seed):

    rng = np.random.RandomState(seed)
    # -- sample new specs, idxs, vals
    idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                               memo={
                                   domain.s_new_ids: new_ids,
                                   domain.s_rng: rng,
                               })
    return idxs, vals
Beispiel #17
0
def test_clone():
    config = config0()
    config2 = clone(config)

    nodeset = set(dfs(config))
    assert not any(n in nodeset for n in dfs(config2))

    foo = recursive_set_rng_kwarg(
                config,
                scope.rng_from_seed(5))
    r = rec_eval(foo)
    print r
    r2 = rec_eval(
            recursive_set_rng_kwarg(
                config2,
                scope.rng_from_seed(5)))

    print r2
    assert r == r2
Beispiel #18
0
def test_screening_prog_for_smoke():
    # smoke test
    prog = toyproblem.screening_prog(ctrl=Ctrl(None), **config_tiny)
    sprog = str(prog)
    #print sprog
    rval = pyll.rec_eval(prog)
    #print rval
    assert 'loss' in rval
    assert 'decisions' in rval
    assert len(rval['splits']) == 2
    assert rval['splits'][0] != rval['splits'][1]
Beispiel #19
0
def suggest_batch(new_ids, domain, trials, seed):

    rng = np.random.RandomState(seed)
    # -- sample new specs, idxs, vals
    idxs, vals = pyll.rec_eval(
        domain.s_idxs_vals,
        memo={
            domain.s_new_ids: new_ids,
            domain.s_rng: rng,
        })
    return idxs, vals
Beispiel #20
0
def suggest(new_ids, domain, trials, seed=123):

    rval = []
    for new_id in new_ids:
        # -- sample new specs, idxs, vals
        idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                memo={domain.s_new_ids: [new_id]})
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(trials.new_trial_docs([new_id],
                [None], [new_result], [new_misc]))
    return rval
Beispiel #21
0
 def logp(apply_node):
     val = memo_cpy[apply_node]
     if val is pyll.base.GarbageCollected:
         # -- XXX: confirm this happens because the hyperparam is unused.
         return 0
     if 'uniform' in apply_node.name:
         low = apply_node.arg['low'].obj
         high = apply_node.arg['high'].obj
         if 'q' in apply_node.name:
             q = apply_node.arg['q'].obj
         if apply_node.name == 'uniform':
             return rdists.uniform_gen(a=low, b=high).logpdf(
                 val, loc=low, scale=(high - low))
         elif apply_node.name == 'quniform':
             return rdists.quniform_gen(
                 low=low, high=high, q=q).logpmf(val)
         elif apply_node.name == 'loguniform':
             return rdists.loguniform_gen(
                 low=low, high=high).logpdf(val)
         elif apply_node.name == 'qloguniform':
             return rdists.qloguniform_gen(
                 low=low, high=high, q=q).logpmf(val)
         else:
             raise NotImplementedError(apply_node.name) 
     elif 'normal' in apply_node.name:
         mu = apply_node.arg['mu'].obj
         sigma = apply_node.arg['sigma'].obj
         if 'q' in apply_node.name:
             q = apply_node.arg['q'].obj
         if apply_node.name == 'normal':
             return scipy.stats.norm(
                 loc=mu, scale=sigma).logpdf(val)
         elif apply_node.name == 'qnormal':
             return rdists.qnormal_gen(
                 mu=mu, sigma=sigma, q=q).logpmf(val)
         elif apply_node.name == 'lognormal':
             return rdists.lognorm_gen(
                 mu=mu, sigma=sigma).logpdf(val)
         elif apply_node.name == 'qlognormal':
             return rdists.qlognormal_gen(
                 mu=mu, sigma=sigma, q=q).logpmf(val)
         else:
             raise NotImplementedError(apply_node.name) 
     elif apply_node.name == 'randint':
         return -math.log(apply_node.arg['upper'].obj)
     elif apply_node.name == 'categorical':
         assert val == int(val), val
         p = pyll.rec_eval(apply_node.arg['p'])
         return math.log(p[int(val)])
     else:
         raise NotImplementedError(apply_node.name)
Beispiel #22
0
    def evaluate(self, config, ctrl, attach_attachments=True):
        memo = self.memo_from_config(config)
        self.use_obj_for_literal_in_memo(ctrl, base.Ctrl, memo)
        if self.rng is not None and not self.installed_rng:
            # -- N.B. this modifies the expr graph in-place
            #    XXX this feels wrong
            self.expr = recursive_set_rng_kwarg(self.expr,
                pyll.as_apply(self.rng))
            self.installed_rng = True
        if self.pass_expr_memo_ctrl:
            rval = self.fn(
                    expr=self.expr,
                    memo=memo,
                    ctrl=ctrl,
                    *self.args)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(self.expr, memo=memo,
                    print_node_on_error=self.rec_eval_print_node_on_error)
            rval = self.fn(pyll_rval, *self.args)

        if isinstance(rval, (float, int, np.number)):
            dict_rval = {'loss': rval}
        elif isinstance(rval, (dict,)):
            dict_rval = rval
            if 'loss' not in dict_rval:
                raise ValueError('dictionary must have "loss" key',
                        dict_rval.keys())
        else:
            raise TypeError('invalid return type (neither number nor dict)', rval)

        if dict_rval['loss'] is not None:
            # -- fail if cannot be cast to float
            dict_rval['loss'] = float(dict_rval['loss'])

        dict_rval.setdefault('status', base.STATUS_OK)
        if dict_rval['status'] not in base.STATUS_STRINGS:
            raise ValueError('invalid status string', dict_rval['status'])

        if attach_attachments:
            attachments = dict_rval.pop('attachments', {})
            for key, val in attachments.items():
                ctrl.attachments[key] = val

        # -- don't do this here because SON-compatibility is only a requirement
        #    for trials destined for a mongodb. In-memory rvals can contain
        #    anything.
        #return base.SONify(dict_rval)
        return dict_rval
Beispiel #23
0
    def evaluate(self, config, ctrl, attach_attachments=True):
        memo = self.memo_from_config(config)
        self.use_obj_for_literal_in_memo(ctrl, base.Ctrl, memo)
        if self.rng is not None and not self.installed_rng:
            # -- N.B. this modifies the expr graph in-place
            #    XXX this feels wrong
            self.expr = recursive_set_rng_kwarg(self.expr,
                                                pyll.as_apply(self.rng))
            self.installed_rng = True
        if self.pass_expr_memo_ctrl:
            rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(
                self.expr,
                memo=memo,
                print_node_on_error=self.rec_eval_print_node_on_error)
            rval = self.fn(pyll_rval)

        if isinstance(rval, (float, int, np.number)):
            dict_rval = {'loss': rval}
        elif isinstance(rval, (dict, )):
            dict_rval = rval
            if 'loss' not in dict_rval:
                raise ValueError('dictionary must have "loss" key',
                                 dict_rval.keys())
        else:
            raise TypeError('invalid return type (neither number nor dict)',
                            rval)

        if dict_rval['loss'] is not None:
            # -- fail if cannot be cast to float
            dict_rval['loss'] = float(dict_rval['loss'])

        dict_rval.setdefault('status', base.STATUS_OK)
        if dict_rval['status'] not in base.STATUS_STRINGS:
            raise ValueError('invalid status string', dict_rval['status'])

        if attach_attachments:
            attachments = dict_rval.pop('attachments', {})
            for key, val in attachments.items():
                ctrl.attachments[key] = val

        # -- don't do this here because SON-compatibility is only a requirement
        #    for trials destined for a mongodb. In-memory rvals can contain
        #    anything.
        #return base.SONify(dict_rval)
        return dict_rval
Beispiel #24
0
def suggest(new_ids, domain, trials, seed=123):
    logger.info('generating trials for new_ids: %s' % str(new_ids))

    rval = []
    for new_id in new_ids:
        # -- hack - domain should be read-only here :/
        #    in fact domain should not have its own seed or rng
        domain.rng.seed(seed + new_id)
        # -- sample new specs, idxs, vals
        idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                memo={domain.s_new_ids: [new_id]})
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(trials.new_trial_docs([new_id],
                [None], [new_result], [new_misc]))
    return rval
Beispiel #25
0
def suggest(new_ids, domain, trials, seed=123):
    logger.info('generating trials for new_ids: %s' % str(new_ids))

    rval = []
    for new_id in new_ids:
        # -- hack - domain should be read-only here :/
        #    in fact domain should not have its own seed or rng
        domain.rng.seed(seed + new_id)
        # -- sample new specs, idxs, vals
        idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                                   memo={domain.s_new_ids: [new_id]})
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(
            trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))
    return rval
Beispiel #26
0
def space_eval(space, hp_assignment):
    """Compute a point in a search space from a hyperparameter assignment.

    Parameters:
    -----------
    space - a pyll graph involving hp nodes (see `pyll_utils`).

    hp_assignment - a dictionary mapping hp node labels to values.
    """
    nodes = pyll.toposort(space)
    memo = {}
    for node in nodes:
        if node.name == 'hyperopt_param':
            label = node.arg['label'].eval()
            if label in hp_assignment:
                memo[node] = hp_assignment[label]
    rval = pyll.rec_eval(space, memo=memo)
    return rval
Beispiel #27
0
    def suggest(self, new_ids, trials):
        """
        new_ids - a list of unique identifiers (not necessarily ints!)
                  for the suggestions that this function should return.

        All lists have the same length.
        """
        rval = []
        for new_id in new_ids:
            # -- sample new specs, idxs, vals
            idxs, vals = pyll.rec_eval(self.s_idxs_vals, memo={self.s_new_ids: [new_id]})
            # print 'BandigAlgo.suggest IDXS', idxs
            # print 'BandigAlgo.suggest VALS', vals
            new_result = self.bandit.new_result()
            new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)
            rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))
        return rval
Beispiel #28
0
def space_eval(space, hp_assignment):
    """Compute a point in a search space from a hyperparameter assignment.

    Parameters:
    -----------
    space - a pyll graph involving hp nodes (see `pyll_utils`).

    hp_assignment - a dictionary mapping hp node labels to values.
    """
    nodes = pyll.toposort(space)
    memo = {}
    for node in nodes:
        if node.name == 'hyperopt_param':
            label = node.arg['label'].eval()
            if label in hp_assignment:
                memo[node] = hp_assignment[label]
    rval = pyll.rec_eval(space, memo=memo)
    return rval
Beispiel #29
0
def suggest(new_ids, domain, trials, seed):
    #logger.debug("in suggest with seed: %s" % (str(seed)))
    #logger.debug('generating trials for new_ids: %s' % str(new_ids))

    rng = np.random.RandomState(seed)
    rval = []
    for ii, new_id in enumerate(new_ids):
        # -- sample new specs, idxs, vals
        idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                                   memo={
                                       domain.s_new_ids: [new_id],
                                       domain.s_rng: rng,
                                   })
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(
            trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))
    return rval
Beispiel #30
0
    def evaluate_async(self, config, ctrl, attach_attachments=True,):
        '''
        this is the first part of async evaluation for ipython parallel engines (see ipy.py)

        This breaks evaluate into two parts to allow for the apply_async call
        to only pass the objective function and arguments.
        '''
        memo = self.memo_from_config(config)
        use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo)
        if self.pass_expr_memo_ctrl:
            rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(
                self.expr,
                memo=memo,
                print_node_on_error=self.rec_eval_print_node_on_error)
            return (self.fn,pyll_rval)
Beispiel #31
0
def suggest(new_ids, domain, trials, seed):
    #logger.debug("in suggest with seed: %s" % (str(seed)))
    #logger.debug('generating trials for new_ids: %s' % str(new_ids))

    rng = np.random.RandomState(seed)
    rval = []
    for ii, new_id in enumerate(new_ids):
        # -- sample new specs, idxs, vals
        idxs, vals = pyll.rec_eval(
            domain.s_idxs_vals,
            memo={
                domain.s_new_ids: [new_id],
                domain.s_rng: rng,
            })
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(trials.new_trial_docs([new_id],
                    [None], [new_result], [new_misc]))
    return rval
Beispiel #32
0
    def evaluate(self, config, ctrl, attach_attachments=True):
        memo = self.memo_from_config(config)
        use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo)
        if self.pass_expr_memo_ctrl:
            rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(
                self.expr,
                memo=memo,
                print_node_on_error=self.rec_eval_print_node_on_error)
            rval = self.fn(pyll_rval)

        if isinstance(rval, (float, int, np.number)):
            dict_rval = {'loss': float(rval), 'status': STATUS_OK}
        else:
            dict_rval = dict(rval)
            status = dict_rval['status']
            if status not in STATUS_STRINGS:
                print 'status', status
                raise InvalidResultStatus(dict_rval)

            if status == STATUS_OK:
                # -- make sure that the loss is present and valid
                try:
                    dict_rval['loss'] = float(dict_rval['loss'])
                except (TypeError, KeyError):
                    raise InvalidLoss(dict_rval)

        if attach_attachments:
            attachments = dict_rval.pop('attachments', {})
            for key, val in attachments.items():
                ctrl.attachments[key] = val

        # -- don't do this here because SON-compatibility is only a requirement
        #    for trials destined for a mongodb. In-memory rvals can contain
        #    anything.
        #return base.SONify(dict_rval)
        return dict_rval
    def suggest(self, new_ids, trials):
        """
        new_ids - a list of unique identifiers (not necessarily ints!)
                  for the suggestions that this function should return.

        All lists have the same length.
        """
        rval = []
        for new_id in new_ids:
            # -- sample new specs, idxs, vals
            idxs, vals = pyll.rec_eval(self.s_idxs_vals,
                                       memo={self.s_new_ids: [new_id]})
            #print 'BandigAlgo.suggest IDXS', idxs
            #print 'BandigAlgo.suggest VALS', vals
            new_result = self.bandit.new_result()
            new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)
            rval.extend(
                trials.new_trial_docs([new_id], [None], [new_result],
                                      [new_misc]))
        return rval
Beispiel #34
0
 def evaluate(self, config, ctrl):
     """Return a result document
     """
     memo = self.memo_from_config(config)
     self.use_obj_for_literal_in_memo(ctrl, Ctrl, memo)
     if self.rng is not None and not self.installed_rng:
         # -- N.B. this modifies the expr graph in-place
         #    XXX this feels wrong
         self.expr = recursive_set_rng_kwarg(self.expr, pyll.as_apply(self.rng))
         self.installed_rng = True
     try:
         r_dct = pyll.rec_eval(self.expr, memo=memo)
     except Exception, e:
         n_match = 0
         for match, match_pair in self.exceptions:
             if match(e):
                 r_dct = match_pair(e)
                 n_match += 1
                 break
         if n_match == 0:
             raise
Beispiel #35
0
    def evaluate(self, config, ctrl, attach_attachments=True):
        memo = self.memo_from_config(config)
        use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo)
        if self.pass_expr_memo_ctrl:
            rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl)
        else:
            # -- the "work" of evaluating `config` can be written
            #    either into the pyll part (self.expr)
            #    or the normal Python part (self.fn)
            pyll_rval = pyll.rec_eval(
                self.expr,
                memo=memo,
                print_node_on_error=self.rec_eval_print_node_on_error)
            rval = self.fn(pyll_rval)

        if isinstance(rval, (float, int, np.number)):
            dict_rval = {'loss': float(rval), 'status': STATUS_OK}
        else:
            dict_rval = dict(rval)
            status = dict_rval['status']
            if status not in STATUS_STRINGS:
                raise InvalidResultStatus(dict_rval)

            if status == STATUS_OK:
                # -- make sure that the loss is present and valid
                try:
                    dict_rval['loss'] = float(dict_rval['loss'])
                except (TypeError, KeyError):
                    raise InvalidLoss(dict_rval)

        if attach_attachments:
            attachments = dict_rval.pop('attachments', {})
            for key, val in attachments.items():
                ctrl.attachments[key] = val

        # -- don't do this here because SON-compatibility is only a requirement
        #    for trials destined for a mongodb. In-memory rvals can contain
        #    anything.
        #return base.SONify(dict_rval)
        return dict_rval
 def evaluate(self, config, ctrl):
     """Return a result document
     """
     memo = self.memo_from_config(config)
     self.use_obj_for_literal_in_memo(ctrl, Ctrl, memo)
     if self.rng is not None and not self.installed_rng:
         # -- N.B. this modifies the expr graph in-place
         #    XXX this feels wrong
         self.expr = recursive_set_rng_kwarg(self.expr,
                                             pyll.as_apply(self.rng))
         self.installed_rng = True
     try:
         r_dct = pyll.rec_eval(self.expr, memo=memo)
     except Exception, e:
         n_match = 0
         for match, match_pair in self.exceptions:
             if match(e):
                 r_dct = match_pair(e)
                 n_match += 1
                 break
         if n_match == 0:
             raise
    def suggest(self, new_ids, trials):
        """
        new_ids - a list of unique identifiers (not necessarily ints!)
                  for the suggestions that this function should return.

        All lists have the same length.
        """
        # XXX: this used to be the implementation for the Random class and the
        # base class.  But then I was doing an experiment with Random() a
        # different seed every time and I was surprised to see it generating
        # the same thing all the time!  In response, I gave the Random
        # subclass its own simpler and more random implementation of suggest
        # that does not re-seed self.rng based on the new_ids. That leaves
        # this strange implementation here in the base class, and I'm not sure
        # whether to delete it. -JB June 19 2012
        #
        # -- install new_ids as program arguments
        rval = []
        for new_id in new_ids:
            # the results are not computed all at once so that we can
            # seed the generator based on each new_id
            sh1 = hashlib.sha1()
            sh1.update(str(new_id))
            self.rng.seed(int(int(sh1.hexdigest(), base=16) % (2**31)))

            # -- sample new specs, idxs, vals
            idxs, vals = pyll.rec_eval(self.s_idxs_vals,
                                       memo={self.s_new_ids: [new_id]})
            #print 'BandigAlgo.suggest IDXS', idxs
            #print 'BandigAlgo.suggest VALS', vals
            new_result = self.bandit.new_result()
            new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)
            rval.extend(
                trials.new_trial_docs([new_id], [None], [new_result],
                                      [new_misc]))
        return rval
Beispiel #38
0
    def suggest(self, new_ids, trials):
        """
        new_ids - a list of unique identifiers (not necessarily ints!)
                  for the suggestions that this function should return.

        All lists have the same length.
        """
        # -- install new_ids as program arguments
        rval = []
        for new_id in new_ids:
            self.new_ids[:] = [new_id]

            sh1 = hashlib.sha1()
            sh1.update(str(new_id))
            self.rng.seed(int(int(sh1.hexdigest(), base=16) % (2**31)))

            # -- sample new specs, idxs, vals
            new_specs, idxs, vals = pyll.rec_eval(self.s_specs_idxs_vals)
            new_result = self.bandit.new_result()
            new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)
            rval.extend(trials.new_trial_docs([new_id],
                    new_specs, [new_result], [new_misc]))
        return rval
Beispiel #39
0
def sample_hparam_space(space, algo, max_evals, dpp_dist='l2',discretize_space=True, discretize_num=0, 
                        rstate=np.random.RandomState()):

    trials = base.Trials()
    trials.max_evals = max_evals
    trials.discretize_space=discretize_space
    trials.dpp_dist = dpp_dist
    trials.discretize_num = discretize_num

    # here we're using placeholders for fn and pass_expr_memo_ctrl, since
    # neither will actually be used.
    domain = base.Domain(False, space, pass_expr_memo_ctrl=False)
    algo([0], domain, trials,rstate.randint(2 ** 31 - 1))
    hparam_sets_to_return = []
    for hparam_set in trials.hparams_to_try:
        spec = base.spec_from_misc(hparam_set[0]['misc'])
        ctrl = base.Ctrl(trials, current_trial=hparam_set[0])
        
        memo = domain.memo_from_config(spec)
        # this doesn't seem to do anything, but it might
        domain.use_obj_for_literal_in_memo(ctrl, base.Ctrl, memo)
        
        hparam_sets_to_return.append(pyll.rec_eval(domain.expr, memo=memo))
    return hparam_sets_to_return
Beispiel #40
0
def test_vectorize_config0():
    p0 = hp_uniform('p0', 0, 1)
    p1 = hp_loguniform('p1', 2, 3)
    p2 = hp_choice('p2', [-1, p0])
    p3 = hp_choice('p3', [-2, p1])
    p4 = 1
    p5 = [3, 4, p0]
    p6 = hp_choice('p6', [-3, p1])
    d = locals()
    d['p1'] = None # -- don't sample p1 all the time, only if p3 says so
    config = as_apply(d)

    N = as_apply('N:TBA')
    expr = config
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(expr, expr_idxs, build=True)
    vconfig = vh.v_expr

    full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()])

    if 1:
        print '=' * 80
        print 'VECTORIZED'
        print full_output
        print '\n' * 1

    fo2 = replace_repeat_stochastic(full_output)
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC'
        print fo2
        print '\n' * 1

    new_vc = recursive_set_rng_kwarg(
            fo2,
            as_apply(np.random.RandomState(1))
            )
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC WITH RNGS'
        print new_vc

    Nval = 10
    foo, idxs, vals = rec_eval(new_vc, memo={N: Nval})

    print 'foo[0]', foo[0]
    print 'foo[1]', foo[1]
    assert len(foo) == Nval
    if 0:  # XXX refresh these values to lock down sampler
        assert foo[0] == {
            'p0': 0.39676747423066994,
            'p1': None,
            'p2': 0.39676747423066994,
            'p3': 2.1281244479293568,
            'p4': 1,
            'p5': (3, 4, 0.39676747423066994) }
    assert foo[1] != foo[2]

    print idxs
    print vals['p3']
    print vals['p6']
    print idxs['p1']
    print vals['p1']
    assert len(vals['p3']) == Nval
    assert len(vals['p6']) == Nval
    assert len(idxs['p1']) < Nval
    p1d = dict(zip(idxs['p1'], vals['p1']))
    for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])):
        if p3v == p6v == 0:
            assert ii not in idxs['p1']
        if p3v:
            assert foo[ii]['p3'] == p1d[ii]
        if p6v:
            print 'p6', foo[ii]['p6'], p1d[ii]
            assert foo[ii]['p6'] == p1d[ii]
Beispiel #41
0
def suggest(
    new_ids,
    domain,
    trials,
    seed=123,
    prior_weight=_default_prior_weight,
    n_startup_jobs=_default_n_startup_jobs,
    n_EI_candidates=_default_n_EI_candidates,
    gamma=_default_gamma,
    linear_forgetting=_default_linear_forgetting,
):

    if len(new_ids) > 1:
        # write a loop to draw new points sequentially
        # TODO: insert constant liar for tentative suggestions
        raise NotImplementedError("generates one at a time")
    else:
        new_id, = new_ids

    t0 = time.time()
    (s_prior_weight, observed, observed_loss, specs, opt_idxs, opt_vals) = tpe_transform(domain, prior_weight, gamma)
    tt = time.time() - t0
    logger.info("tpe_transform took %f seconds" % tt)

    docs_by_tid = dict([(d["tid"], d) for d in trials.trials])
    best_docs = dict()
    best_docs_loss = dict()
    for doc in trials.trials:
        # get either this docs own tid or the one that it's from
        tid = doc["misc"].get("from_tid", doc["tid"])
        loss = domain.loss(doc["result"], doc["spec"])
        if loss is None:
            # -- associate infinite loss to new/running/failed jobs
            loss = float("inf")
        else:
            loss = float(loss)
        best_docs_loss.setdefault(tid, loss)
        if loss <= best_docs_loss[tid]:
            best_docs_loss[tid] = loss
            best_docs[tid] = doc

    tid_docs = best_docs.items()
    # -- sort docs by order of suggestion
    #    so that linear_forgetting removes the oldest ones
    tid_docs.sort()
    losses = [best_docs_loss[k] for k, v in tid_docs]
    tids = [k for k, v in tid_docs]
    docs = [v for k, v in tid_docs]

    if docs:
        logger.info("TPE using %i/%i trials with best loss %f" % (len(docs), len(trials), min(best_docs_loss.values())))
    else:
        logger.info("TPE using 0 trials")

    if len(docs) < n_startup_jobs:
        # N.B. THIS SEEDS THE RNG BASED ON THE new_id
        return rand.suggest(new_ids, domain, trials, seed)

    #    Sample and compute log-probability.
    if tids:
        # -- the +2 co-ordinates with an assertion above
        #    to ensure that fake ids are used during sampling
        fake_id_0 = max(max(tids), new_id) + 2
    else:
        # -- weird - we're running the TPE algo from scratch
        assert n_startup_jobs <= 0
        fake_id_0 = new_id + 2

    fake_ids = range(fake_id_0, fake_id_0 + n_EI_candidates)

    # -- this dictionary will map pyll nodes to the values
    #    they should take during the evaluation of the pyll program
    memo = {domain.s_new_ids: fake_ids}

    o_idxs_d, o_vals_d = miscs_to_idxs_vals([d["misc"] for d in docs], keys=domain.params.keys())
    memo[observed["idxs"]] = o_idxs_d
    memo[observed["vals"]] = o_vals_d

    memo[observed_loss["idxs"]] = tids
    memo[observed_loss["vals"]] = losses

    idxs, vals = pyll.rec_eval([opt_idxs, opt_vals], memo=memo)

    # -- retrieve the best of the samples and form the return tuple
    # the build_posterior makes all specs the same

    rval_specs = [None]  # -- specs are deprecated
    rval_results = [domain.new_result()]
    rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)]

    miscs_update_idxs_vals(rval_miscs, idxs, vals, idxs_map={fake_ids[0]: new_id}, assert_all_vals_used=False)
    rval_docs = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)

    return rval_docs
Beispiel #42
0
def suggest(new_ids, domain, trials,
        seed=123,
        prior_weight=_default_prior_weight,
        n_startup_jobs=_default_n_startup_jobs,
        n_EI_candidates=_default_n_EI_candidates,
        gamma=_default_gamma,
        linear_forgetting=_default_linear_forgetting,
        ):

    if len(new_ids) > 1:
        # -- greedy loop rolling forward
        trials_copy = Trials()
        trials_copy._dynamic_trials = trials.trials
        trials_copy.refresh()
        rval = []
        for new_id in new_ids:
            new_trials1 = suggest([new_id], domain, trials_copy,
                seed=seed,
                prior_weight=prior_weight,
                n_startup_jobs=n_startup_jobs,
                n_EI_candidates=n_EI_candidates,
                gamma=gamma,
                linear_forgetting=linear_forgetting)
            trials_copy.insert_trial_docs(new_trials1)
            trials_copy.refresh()
            rval.extend(new_trials1)
        return rval
    else:
        new_id, = new_ids

    t0 = time.time()
    (s_prior_weight, observed, observed_loss, specs, opt_idxs, opt_vals) \
            = tpe_transform(domain, prior_weight, gamma)
    tt = time.time() - t0
    logger.info('tpe_transform took %f seconds' % tt)

    docs_by_tid = dict([(d['tid'], d) for d in trials.trials])
    best_docs = dict()
    best_docs_loss = dict()
    for doc in trials.trials:
        # get either this docs own tid or the one that it's from
        tid = doc['misc'].get('from_tid', doc['tid'])
        loss = domain.loss(doc['result'], doc['spec'])
        if loss is None:
            # -- associate infinite loss to new/running/failed jobs
            loss = float('inf')
        else:
            loss = float(loss)
        best_docs_loss.setdefault(tid, loss)
        if loss <= best_docs_loss[tid]:
            best_docs_loss[tid] = loss
            best_docs[tid] = doc

    tid_docs = best_docs.items()
    # -- sort docs by order of suggestion
    #    so that linear_forgetting removes the oldest ones
    tid_docs.sort()
    losses = [best_docs_loss[k] for k, v in tid_docs]
    tids = [k for k, v in tid_docs]
    docs = [v for k, v in tid_docs]

    if docs:
        logger.info('TPE using %i/%i trials with best loss %f' % (
            len(docs), len(trials), min(best_docs_loss.values())))
    else:
        logger.info('TPE using 0 trials')

    if len(docs) < n_startup_jobs:
        # N.B. THIS SEEDS THE RNG BASED ON THE new_id
        return rand.suggest(new_ids, domain, trials, seed)

    #    Sample and compute log-probability.
    if tids:
        # -- the +2 co-ordinates with an assertion above
        #    to ensure that fake ids are used during sampling
        fake_id_0 = max(max(tids), new_id) + 2
    else:
        # -- weird - we're running the TPE algo from scratch
        assert n_startup_jobs <= 0
        fake_id_0 = new_id + 2

    fake_ids = range(fake_id_0, fake_id_0 + n_EI_candidates)

    # -- this dictionary will map pyll nodes to the values
    #    they should take during the evaluation of the pyll program
    memo = {domain.s_new_ids: fake_ids}

    o_idxs_d, o_vals_d = miscs_to_idxs_vals(
        [d['misc'] for d in docs], keys=domain.params.keys())
    memo[observed['idxs']] = o_idxs_d
    memo[observed['vals']] = o_vals_d

    memo[observed_loss['idxs']] = tids
    memo[observed_loss['vals']] = losses

    idxs, vals = pyll.rec_eval([opt_idxs, opt_vals], memo=memo,
            print_node_on_error=False)

    # -- retrieve the best of the samples and form the return tuple
    # the build_posterior makes all specs the same

    rval_specs = [None]  # -- specs are deprecated
    rval_results = [domain.new_result()]
    rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)]

    miscs_update_idxs_vals(rval_miscs, idxs, vals,
            idxs_map={fake_ids[0]: new_id},
            assert_all_vals_used=False)
    rval_docs = trials.new_trial_docs([new_id],
            rval_specs, rval_results, rval_miscs)

    return rval_docs
Beispiel #43
0
def test_vectorize_config0():
    config = config0()
    assert 'p3' == config.named_args[2][0]
    p1 = config.named_args[2][1].pos_args[1]
    assert p1.name == 'uniform'
    assert p1.pos_args[0]._obj == 2
    assert p1.pos_args[1]._obj == 3

    N = as_apply(5)
    expr = config
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(expr, expr_idxs)
    vh.build_idxs()
    vh.build_vals()
    vconfig = vh.vals_memo[expr]

    full_output = as_apply([vconfig, vh.idxs_by_id(), vh.vals_by_id()])

    if 0:
        print '=' * 80
        print 'VECTORIZED'
        print full_output
        print '\n' * 1

    fo2 = replace_repeat_stochastic(full_output)
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC'
        print fo2
        print '\n' * 1

    new_vc = recursive_set_rng_kwarg(
            fo2,
            as_apply(np.random.RandomState(1))
            )
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC WITH RNGS'
        print new_vc

    foo, idxs, vals = rec_eval(new_vc)

    print foo
    #print idxs
    #print vals
    assert len(foo) == 5
    assert foo[0] == {
            'p0': 0.39676747423066994,
            'p2': 0.39676747423066994,
            'p3': 2.1281244479293568,
            'p4': 1,
            'p5': (3, 4, 0.39676747423066994) }
    assert foo[1] != foo[2]

    if 0:
        print idxs[vh.node_id[p1]]
        print vals[vh.node_id[p1]]

    # - p1 is only used sometimes
    assert len(idxs[vh.node_id[p1]]) < 5
    for ii in range(5):
        if ii in idxs[vh.node_id[p1]]:
            assert foo[ii]['p3'] == vals[vh.node_id[p1]][list(idxs[vh.node_id[p1]]).index(ii)]
        else:
            assert foo[ii]['p3'] == -2, foo[ii]['p3']
Beispiel #44
0
def test_vectorize_config0():
    p0 = hp_uniform('p0', 0, 1)
    p1 = hp_loguniform('p1', 2, 3)
    p2 = hp_choice('p2', [-1, p0])
    p3 = hp_choice('p3', [-2, p1])
    p4 = 1
    p5 = [3, 4, p0]
    p6 = hp_choice('p6', [-3, p1])
    d = locals()
    d['p1'] = None  # -- don't sample p1 all the time, only if p3 says so
    config = as_apply(d)

    N = as_apply('N:TBA')
    expr = config
    expr_idxs = scope.range(N)
    vh = VectorizeHelper(expr, expr_idxs, build=True)
    vconfig = vh.v_expr

    full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()])

    if 1:
        print '=' * 80
        print 'VECTORIZED'
        print full_output
        print '\n' * 1

    fo2 = replace_repeat_stochastic(full_output)
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC'
        print fo2
        print '\n' * 1

    new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1)))
    if 0:
        print '=' * 80
        print 'VECTORIZED STOCHASTIC WITH RNGS'
        print new_vc

    Nval = 10
    foo, idxs, vals = rec_eval(new_vc, memo={N: Nval})

    print 'foo[0]', foo[0]
    print 'foo[1]', foo[1]
    assert len(foo) == Nval
    if 0:  # XXX refresh these values to lock down sampler
        assert foo[0] == {
            'p0': 0.39676747423066994,
            'p1': None,
            'p2': 0.39676747423066994,
            'p3': 2.1281244479293568,
            'p4': 1,
            'p5': (3, 4, 0.39676747423066994)
        }
    assert foo[1] != foo[2]

    print idxs
    print vals['p3']
    print vals['p6']
    print idxs['p1']
    print vals['p1']
    assert len(vals['p3']) == Nval
    assert len(vals['p6']) == Nval
    assert len(idxs['p1']) < Nval
    p1d = dict(zip(idxs['p1'], vals['p1']))
    for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])):
        if p3v == p6v == 0:
            assert ii not in idxs['p1']
        if p3v:
            assert foo[ii]['p3'] == p1d[ii]
        if p6v:
            print 'p6', foo[ii]['p6'], p1d[ii]
            assert foo[ii]['p6'] == p1d[ii]
Beispiel #45
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = TreeParzenEstimator(bandit,
                                   prior_weight=prior_weight,
                                   n_startup_jobs=2,
                                   n_EI_candidates=128,
                                   gamma=gamma)
        print algo.opt_idxs['x']
        print algo.opt_vals['x']

        trials = Trials()
        experiment = Experiment(trials, algo)
        experiment.run(self.LEN)
        if self.show_vars:
            import hyperopt.plotting
            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs['x']
        vals = vals['x']
        print "VALS", vals

        losses = trials.losses()

        from hyperopt.tpe import ap_filter_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers['quniform']
        fn_kwargs = dict(size=(4, ), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        #print b_post
        #print a_post
        fn_lpdf = getattr(scope, a_post.name + '_lpdf')
        print fn_lpdf
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                         if n not in ('rng', 'size')])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                         if n not in ('rng', 'size')])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print '=' * 80

        do_show = self.show_steps

        import matplotlib.pyplot as plt
        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print '-' * 80
            print 'ROUND', ii
            print '-' * 80
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii],
                                            losses[:ii], gamma)
            below = below.astype('int')
            above = above.astype('int')
            print 'BB0', below
            print 'BB1', above
            #print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            #print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                                       memo=memo)
            #print bl - al
            print 'BB2', dict(zip(all_vals, bl - al))
            print 'BB3', dict(zip(all_vals, bl))
            print 'BB4', dict(zip(all_vals, al))
            print 'ORIG PICKED', vals[ii]
            print 'PROPER OPT PICKS:', nv

            #assert np.allclose(below, [3, 3, 9])
            #assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                #plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                #plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c='g')
                plt.scatter(all_vals, al, c='r')
        if do_show:
            plt.show()
Beispiel #46
0
 def evaluate(self, config, ctrl):
     prog = screening_prog(ctrl=ctrl, **config)
     rval = pyll.rec_eval(prog, deepcopy_inputs=False)
     return rval
Beispiel #47
0
    def suggest1(self, new_id, trials):
        """Suggest a single new document"""
        #print self.post_llik

        bandit = self.bandit
        docs_by_tid = dict([(d['tid'], d) for d in trials.trials])
        if len(docs_by_tid) != len(trials.trials):
            import cPickle
            cPickle.dump(trials.trials, open('assert_fail_tpe_637.pkl', 'w'))
            assert 0, 'non-unique docid, dumped to assert_fail_tpe_637.pkl'
        best_docs = dict()
        best_docs_loss = dict()
        for doc in trials.trials:
            # get either this docs own tid or the one that it's from
            tid = doc['misc'].get('from_tid', doc['tid'])
            loss = bandit.loss(doc['result'], doc['spec'])
            if loss is None:
                # -- associate infinite loss to new/running/failed jobs
                loss = float('inf')
            else:
                loss = float(loss)
            best_docs_loss.setdefault(tid, loss)
            if loss <= best_docs_loss[tid]:
                best_docs_loss[tid] = loss
                best_docs[tid] = doc

        tid_docs = best_docs.items()
        # -- sort docs by order of suggestion
        #    so that linear_forgetting removes the oldest ones
        tid_docs.sort()
        losses = [best_docs_loss[k] for k, v in tid_docs]
        tids = [k for k, v in tid_docs]
        docs = [v for k, v in tid_docs]

        n_ok = len([d for d in docs if d['result']['status'] == STATUS_OK])

        if docs:
            logger.info('TPE %i/%i w best loss %f' % (
                n_ok, len(docs), min(best_docs_loss.values())))
        else:
            logger.info('TPE using 0 trials')

        if n_ok < self.n_startup_jobs:
            # N.B. THIS SEEDS THE RNG BASED ON THE new_id
            return BanditAlgo.suggest(self, [new_id], trials)

        #    Sample and compute log-probability.
        if tids:
            # -- the +2 co-ordinates with an assertion above
            #    to ensure that fake ids are used during sampling
            fake_id_0 = max(max(tids), new_id) + 2
        else:
            # -- weird - we're running the TPE algo from scratch
            assert self.n_startup_jobs <= 0
            fake_id_0 = new_id + 2

        fake_ids = range(fake_id_0, fake_id_0 + self.n_EI_candidates)

        # -- this dictionary will map pyll nodes to the values
        #    they should take during the evaluation of the pyll program
        memo = {self.s_new_ids: fake_ids}

        o_idxs_d, o_vals_d = miscs_to_idxs_vals(
            [d['misc'] for d in docs], keys=bandit.params.keys())
        memo[self.observed['idxs']] = o_idxs_d
        memo[self.observed['vals']] = o_vals_d

        memo[self.observed_loss['idxs']] = tids
        memo[self.observed_loss['vals']] = losses

        idxs, vals = pyll.rec_eval(
                [self.opt_idxs, self.opt_vals],
                memo=memo)

        # -- retrieve the best of the samples and form the return tuple
        # the build_posterior makes all specs the same

        rval_specs = [None]  # -- specs are deprecated
        rval_results = [bandit.new_result()]
        rval_miscs = [dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)]

        miscs_update_idxs_vals(rval_miscs, idxs, vals,
                idxs_map={fake_ids[0]: new_id},
                assert_all_vals_used=False)
        rval_docs = trials.new_trial_docs([new_id],
                rval_specs, rval_results, rval_miscs)

        return rval_docs
Beispiel #48
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = TreeParzenEstimator(bandit,
                prior_weight=prior_weight,
                n_startup_jobs=2,
                n_EI_candidates=128,
                gamma=gamma)
        print algo.opt_idxs['x']
        print algo.opt_vals['x']

        trials = Trials()
        experiment = Experiment(trials, algo)
        experiment.run(self.LEN)
        if self.show_vars:
            import hyperopt.plotting
            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs['x']
        vals = vals['x']
        print "VALS", vals

        losses = trials.losses()

        from hyperopt.tpe import ap_filter_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers['quniform']
        fn_kwargs = dict(size=(4,), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        #print b_post
        #print a_post
        fn_lpdf = getattr(scope, a_post.name + '_lpdf')
        print fn_lpdf
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                    if n not in ('rng', 'size')])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                    if n not in ('rng', 'size')])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print '=' * 80

        do_show = self.show_steps

        import matplotlib.pyplot as plt
        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print '-' * 80
            print 'ROUND', ii
            print '-' * 80
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_filter_trials(idxs[:ii],
                    vals[:ii], idxs[:ii], losses[:ii], gamma)
            below = below.astype('int')
            above = above.astype('int')
            print 'BB0', below
            print 'BB1', above
            #print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            #print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                    memo=memo)
            #print bl - al
            print 'BB2', dict(zip(all_vals, bl - al))
            print 'BB3', dict(zip(all_vals, bl))
            print 'BB4', dict(zip(all_vals, al))
            print 'ORIG PICKED', vals[ii]
            print 'PROPER OPT PICKS:', nv

            #assert np.allclose(below, [3, 3, 9])
            #assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                #plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                #plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c='g')
                plt.scatter(all_vals, al, c='r')
        if do_show:
            plt.show()