def test_clone(): config = config0() config2 = clone(config) nodeset = set(dfs(config)) assert not any(n in nodeset for n in dfs(config2)) foo = recursive_set_rng_kwarg(config, scope.rng_from_seed(5)) r = rec_eval(foo) print r r2 = rec_eval(recursive_set_rng_kwarg(config2, scope.rng_from_seed(5))) print r2 assert r == r2
def test_qlognormal_never_0(): rng = np.random.RandomState(234) s = scope.qlognormal(-5, 3, 0.1) recursive_set_rng_kwarg(s, rng) results = [rec_eval(s) for i in range(100)] assert min(results) == 0.1 assert max(results) != 0.1
def test_vectorize_multipath(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = hp_choice('p1', [1, p0, -p0]) ** 2 expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr print vloss full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) new_vc = recursive_set_rng_kwarg( full_output, as_apply(np.random.RandomState(1)), ) losses, idxs, vals = rec_eval(new_vc) print 'losses', losses print 'idxs p0', idxs['p0'] print 'vals p0', vals['p0'] print 'idxs p1', idxs['p1'] print 'vals p1', vals['p1'] p0dct = dict(zip(idxs['p0'], vals['p0'])) p1dct = dict(zip(idxs['p1'], vals['p1'])) for ii, li in enumerate(losses): print ii, li if p1dct[ii] != 0: assert li == p0dct[ii] ** 2 else: assert li == 1
def evaluate(self, config, ctrl): memo = self.memo_from_config(config) memo[self.pyll_ctrl] = ctrl if self.init_pyll_memo: memo = self.init_pyll_memo(memo, config=config, ctrl=ctrl) if self.rng is not None and not self.installed_rng: # -- N.B. this modifies the expr graph in-place # XXX this feels wrong self.expr = recursive_set_rng_kwarg(self.expr, pyll.as_apply(self.rng)) self.installed_rng = True try: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval(self.expr, memo=memo) rval = self.fn(pyll_rval) except Exception, e: n_match = 0 for match, match_pair in self.exceptions: if match(e): rval = match_pair(e) logger.info('Caught fn exception %s' % str(rval)) n_match += 1 break if n_match == 0: raise
def test_vectorize_multipath(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = hp_choice('p1', [1, p0, -p0])**2 expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr print vloss full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) new_vc = recursive_set_rng_kwarg( full_output, as_apply(np.random.RandomState(1)), ) losses, idxs, vals = rec_eval(new_vc) print 'losses', losses print 'idxs p0', idxs['p0'] print 'vals p0', vals['p0'] print 'idxs p1', idxs['p1'] print 'vals p1', vals['p1'] p0dct = dict(zip(idxs['p0'], vals['p0'])) p1dct = dict(zip(idxs['p1'], vals['p1'])) for ii, li in enumerate(losses): print ii, li if p1dct[ii] != 0: assert li == p0dct[ii]**2 else: assert li == 1
def work(self): bandit = self.bandit random_algo = Random(bandit) # build an experiment of 10 trials trials = Trials() exp = Experiment(trials, random_algo) #print random_algo.s_specs_idxs_vals exp.run(10) ids = trials.tids assert len(ids) == 10 tpe_algo = TreeParzenEstimator(bandit) #print pyll.as_apply(tpe_algo.post_idxs) #print pyll.as_apply(tpe_algo.post_vals) argmemo = {} print trials.miscs idxs, vals = miscs_to_idxs_vals(trials.miscs) argmemo[tpe_algo.observed['idxs']] = idxs argmemo[tpe_algo.observed['vals']] = vals argmemo[tpe_algo.observed_loss['idxs']] = trials.tids argmemo[tpe_algo.observed_loss['vals']] = trials.losses() stuff = pyll.rec_eval( [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']], memo=argmemo) print stuff
def test_vectorize_simple(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = p0 ** 2 print loss expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) fo2 = replace_repeat_stochastic(full_output) new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)), ) #print new_vc losses, idxs, vals = rec_eval(new_vc) print 'losses', losses print 'idxs p0', idxs['p0'] print 'vals p0', vals['p0'] p0dct = dict(zip(idxs['p0'], vals['p0'])) for ii, li in enumerate(losses): assert p0dct[ii] ** 2 == li
def work(self): bandit = self.bandit random_algo = Random(bandit) # build an experiment of 10 trials trials = Trials() exp = Experiment(trials, random_algo) #print random_algo.s_specs_idxs_vals exp.run(10) ids = trials.tids assert len(ids) == 10 tpe_algo = TreeParzenEstimator(bandit) #print pyll.as_apply(tpe_algo.post_idxs) #print pyll.as_apply(tpe_algo.post_vals) argmemo = {} print trials.miscs idxs, vals = miscs_to_idxs_vals(trials.miscs) argmemo[tpe_algo.observed['idxs']] = idxs argmemo[tpe_algo.observed['vals']] = vals argmemo[tpe_algo.observed_loss['idxs']] = trials.tids argmemo[tpe_algo.observed_loss['vals']] = trials.losses() stuff = pyll.rec_eval([tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']], memo=argmemo) print stuff
def evaluate_async( self, config, ctrl, attach_attachments=True, ): ''' this is the first part of async evaluation for ipython parallel engines (see ipy.py) This breaks evaluate into two parts to allow for the apply_async call to only pass the objective function and arguments. ''' memo = self.memo_from_config(config) use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo) if self.pass_expr_memo_ctrl: rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval( self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) return (self.fn, pyll_rval)
def get_performance(slm, decisions, preproc, comparison, namebase=None, progkey='result_w_cleanup', return_multi=False, ctrl=None): if decisions is None: decisions = np.zeros((1, 3200)) else: decisions = np.asarray(decisions) assert decisions.shape == (1, 3200) if namebase is None: namebase = 'memmap_' + str(np.random.randint(1e8)) image_features = scope.slm_memmap( desc=slm, X=scope.get_images('float32', preproc=preproc), name=namebase + '_img_feat') if return_multi: comps = ['mult', 'sqrtabsdiff'] else: comps = [comparison] cmp_progs = [] for comp in comps: sresult = screening_program( slm_desc=slm, preproc=preproc, comparison=comp, namebase=namebase, decisions=decisions, image_features=image_features, ctrl=ctrl)[1][progkey] cmp_progs.append([comp, sresult]) cmp_results = pyll.rec_eval(cmp_progs) if return_multi: return cmp_results else: return cmp_results[0][1]
def suggest(new_ids, domain, trials, seed): #logger.debug("in suggest with seed: %s" % (str(seed))) #logger.debug('generating trials for new_ids: %s' % str(new_ids)) #print("\n\n") #print_apply_object(domain.s_idxs_vals, "") #print("\n\n") #import pdb; pdb.set_trace() rng = np.random.RandomState(seed) rval = [] for ii, new_id in enumerate(new_ids): # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval( domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) #print("new_ids: ", new_ids) #print("idxs: ", idxs) #print("vals: ", vals) #print("domain.s_idxs_vals: ", domain.s_idxs_vals) #print("domain.s_new_ids: ", domain.s_new_ids) #print("new_result: ", domain.new_result()) #print("\nprinting domain.s_new_ids, an apply object:") #print_apply_object(domain.s_new_ids, "") #print("\nprinting domain.s_idxs_vals, an apply object:") #print_apply_object(domain.s_idxs_vals, "") #print("") new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def test_vectorize_simple(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = p0**2 print loss expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) fo2 = replace_repeat_stochastic(full_output) new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)), ) #print new_vc losses, idxs, vals = rec_eval(new_vc) print 'losses', losses print 'idxs p0', idxs['p0'] print 'vals p0', vals['p0'] p0dct = dict(zip(idxs['p0'], vals['p0'])) for ii, li in enumerate(losses): assert p0dct[ii]**2 == li
def suggest(self, new_ids, trials): """ new_ids - a list of unique identifiers (not necessarily ints!) for the suggestions that this function should return. All lists have the same length. """ # XXX: this used to be the implementation for the Random class and the # base class. But then I was doing an experiment with Random() a # different seed every time and I was surprised to see it generating # the same thing all the time! In response, I gave the Random # subclass its own simpler and more random implementation of suggest # that does not re-seed self.rng based on the new_ids. That leaves # this strange implementation here in the base class, and I'm not sure # whether to delete it. -JB June 19 2012 # # -- install new_ids as program arguments rval = [] for new_id in new_ids: # the results are not computed all at once so that we can # seed the generator based on each new_id sh1 = hashlib.sha1() sh1.update(str(new_id)) self.rng.seed(int(int(sh1.hexdigest(), base=16) % (2 ** 31))) # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(self.s_idxs_vals, memo={self.s_new_ids: [new_id]}) # print 'BandigAlgo.suggest IDXS', idxs # print 'BandigAlgo.suggest VALS', vals new_result = self.bandit.new_result() new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def test_recursive_set_rng_kwarg(): uniform = scope.uniform a = as_apply([uniform(0, 1), uniform(2, 3)]) rng = np.random.RandomState(234) recursive_set_rng_kwarg(a, rng) print a val_a = rec_eval(a) assert 0 < val_a[0] < 1 assert 2 < val_a[1] < 3
def suggest_batch(new_ids, domain, trials, seed): rng = np.random.RandomState(seed) # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={ domain.s_new_ids: new_ids, domain.s_rng: rng, }) return idxs, vals
def test_clone(): config = config0() config2 = clone(config) nodeset = set(dfs(config)) assert not any(n in nodeset for n in dfs(config2)) foo = recursive_set_rng_kwarg( config, scope.rng_from_seed(5)) r = rec_eval(foo) print r r2 = rec_eval( recursive_set_rng_kwarg( config2, scope.rng_from_seed(5))) print r2 assert r == r2
def test_screening_prog_for_smoke(): # smoke test prog = toyproblem.screening_prog(ctrl=Ctrl(None), **config_tiny) sprog = str(prog) #print sprog rval = pyll.rec_eval(prog) #print rval assert 'loss' in rval assert 'decisions' in rval assert len(rval['splits']) == 2 assert rval['splits'][0] != rval['splits'][1]
def suggest_batch(new_ids, domain, trials, seed): rng = np.random.RandomState(seed) # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval( domain.s_idxs_vals, memo={ domain.s_new_ids: new_ids, domain.s_rng: rng, }) return idxs, vals
def suggest(new_ids, domain, trials, seed=123): rval = [] for new_id in new_ids: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={domain.s_new_ids: [new_id]}) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def logp(apply_node): val = memo_cpy[apply_node] if val is pyll.base.GarbageCollected: # -- XXX: confirm this happens because the hyperparam is unused. return 0 if 'uniform' in apply_node.name: low = apply_node.arg['low'].obj high = apply_node.arg['high'].obj if 'q' in apply_node.name: q = apply_node.arg['q'].obj if apply_node.name == 'uniform': return rdists.uniform_gen(a=low, b=high).logpdf( val, loc=low, scale=(high - low)) elif apply_node.name == 'quniform': return rdists.quniform_gen( low=low, high=high, q=q).logpmf(val) elif apply_node.name == 'loguniform': return rdists.loguniform_gen( low=low, high=high).logpdf(val) elif apply_node.name == 'qloguniform': return rdists.qloguniform_gen( low=low, high=high, q=q).logpmf(val) else: raise NotImplementedError(apply_node.name) elif 'normal' in apply_node.name: mu = apply_node.arg['mu'].obj sigma = apply_node.arg['sigma'].obj if 'q' in apply_node.name: q = apply_node.arg['q'].obj if apply_node.name == 'normal': return scipy.stats.norm( loc=mu, scale=sigma).logpdf(val) elif apply_node.name == 'qnormal': return rdists.qnormal_gen( mu=mu, sigma=sigma, q=q).logpmf(val) elif apply_node.name == 'lognormal': return rdists.lognorm_gen( mu=mu, sigma=sigma).logpdf(val) elif apply_node.name == 'qlognormal': return rdists.qlognormal_gen( mu=mu, sigma=sigma, q=q).logpmf(val) else: raise NotImplementedError(apply_node.name) elif apply_node.name == 'randint': return -math.log(apply_node.arg['upper'].obj) elif apply_node.name == 'categorical': assert val == int(val), val p = pyll.rec_eval(apply_node.arg['p']) return math.log(p[int(val)]) else: raise NotImplementedError(apply_node.name)
def evaluate(self, config, ctrl, attach_attachments=True): memo = self.memo_from_config(config) self.use_obj_for_literal_in_memo(ctrl, base.Ctrl, memo) if self.rng is not None and not self.installed_rng: # -- N.B. this modifies the expr graph in-place # XXX this feels wrong self.expr = recursive_set_rng_kwarg(self.expr, pyll.as_apply(self.rng)) self.installed_rng = True if self.pass_expr_memo_ctrl: rval = self.fn( expr=self.expr, memo=memo, ctrl=ctrl, *self.args) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval(self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) rval = self.fn(pyll_rval, *self.args) if isinstance(rval, (float, int, np.number)): dict_rval = {'loss': rval} elif isinstance(rval, (dict,)): dict_rval = rval if 'loss' not in dict_rval: raise ValueError('dictionary must have "loss" key', dict_rval.keys()) else: raise TypeError('invalid return type (neither number nor dict)', rval) if dict_rval['loss'] is not None: # -- fail if cannot be cast to float dict_rval['loss'] = float(dict_rval['loss']) dict_rval.setdefault('status', base.STATUS_OK) if dict_rval['status'] not in base.STATUS_STRINGS: raise ValueError('invalid status string', dict_rval['status']) if attach_attachments: attachments = dict_rval.pop('attachments', {}) for key, val in attachments.items(): ctrl.attachments[key] = val # -- don't do this here because SON-compatibility is only a requirement # for trials destined for a mongodb. In-memory rvals can contain # anything. #return base.SONify(dict_rval) return dict_rval
def evaluate(self, config, ctrl, attach_attachments=True): memo = self.memo_from_config(config) self.use_obj_for_literal_in_memo(ctrl, base.Ctrl, memo) if self.rng is not None and not self.installed_rng: # -- N.B. this modifies the expr graph in-place # XXX this feels wrong self.expr = recursive_set_rng_kwarg(self.expr, pyll.as_apply(self.rng)) self.installed_rng = True if self.pass_expr_memo_ctrl: rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval( self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) rval = self.fn(pyll_rval) if isinstance(rval, (float, int, np.number)): dict_rval = {'loss': rval} elif isinstance(rval, (dict, )): dict_rval = rval if 'loss' not in dict_rval: raise ValueError('dictionary must have "loss" key', dict_rval.keys()) else: raise TypeError('invalid return type (neither number nor dict)', rval) if dict_rval['loss'] is not None: # -- fail if cannot be cast to float dict_rval['loss'] = float(dict_rval['loss']) dict_rval.setdefault('status', base.STATUS_OK) if dict_rval['status'] not in base.STATUS_STRINGS: raise ValueError('invalid status string', dict_rval['status']) if attach_attachments: attachments = dict_rval.pop('attachments', {}) for key, val in attachments.items(): ctrl.attachments[key] = val # -- don't do this here because SON-compatibility is only a requirement # for trials destined for a mongodb. In-memory rvals can contain # anything. #return base.SONify(dict_rval) return dict_rval
def suggest(new_ids, domain, trials, seed=123): logger.info('generating trials for new_ids: %s' % str(new_ids)) rval = [] for new_id in new_ids: # -- hack - domain should be read-only here :/ # in fact domain should not have its own seed or rng domain.rng.seed(seed + new_id) # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={domain.s_new_ids: [new_id]}) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def suggest(new_ids, domain, trials, seed=123): logger.info('generating trials for new_ids: %s' % str(new_ids)) rval = [] for new_id in new_ids: # -- hack - domain should be read-only here :/ # in fact domain should not have its own seed or rng domain.rng.seed(seed + new_id) # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={domain.s_new_ids: [new_id]}) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def space_eval(space, hp_assignment): """Compute a point in a search space from a hyperparameter assignment. Parameters: ----------- space - a pyll graph involving hp nodes (see `pyll_utils`). hp_assignment - a dictionary mapping hp node labels to values. """ nodes = pyll.toposort(space) memo = {} for node in nodes: if node.name == 'hyperopt_param': label = node.arg['label'].eval() if label in hp_assignment: memo[node] = hp_assignment[label] rval = pyll.rec_eval(space, memo=memo) return rval
def suggest(self, new_ids, trials): """ new_ids - a list of unique identifiers (not necessarily ints!) for the suggestions that this function should return. All lists have the same length. """ rval = [] for new_id in new_ids: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(self.s_idxs_vals, memo={self.s_new_ids: [new_id]}) # print 'BandigAlgo.suggest IDXS', idxs # print 'BandigAlgo.suggest VALS', vals new_result = self.bandit.new_result() new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def suggest(new_ids, domain, trials, seed): #logger.debug("in suggest with seed: %s" % (str(seed))) #logger.debug('generating trials for new_ids: %s' % str(new_ids)) rng = np.random.RandomState(seed) rval = [] for ii, new_id in enumerate(new_ids): # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def evaluate_async(self, config, ctrl, attach_attachments=True,): ''' this is the first part of async evaluation for ipython parallel engines (see ipy.py) This breaks evaluate into two parts to allow for the apply_async call to only pass the objective function and arguments. ''' memo = self.memo_from_config(config) use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo) if self.pass_expr_memo_ctrl: rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval( self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) return (self.fn,pyll_rval)
def suggest(new_ids, domain, trials, seed): #logger.debug("in suggest with seed: %s" % (str(seed))) #logger.debug('generating trials for new_ids: %s' % str(new_ids)) rng = np.random.RandomState(seed) rval = [] for ii, new_id in enumerate(new_ids): # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval( domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def evaluate(self, config, ctrl, attach_attachments=True): memo = self.memo_from_config(config) use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo) if self.pass_expr_memo_ctrl: rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval( self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) rval = self.fn(pyll_rval) if isinstance(rval, (float, int, np.number)): dict_rval = {'loss': float(rval), 'status': STATUS_OK} else: dict_rval = dict(rval) status = dict_rval['status'] if status not in STATUS_STRINGS: print 'status', status raise InvalidResultStatus(dict_rval) if status == STATUS_OK: # -- make sure that the loss is present and valid try: dict_rval['loss'] = float(dict_rval['loss']) except (TypeError, KeyError): raise InvalidLoss(dict_rval) if attach_attachments: attachments = dict_rval.pop('attachments', {}) for key, val in attachments.items(): ctrl.attachments[key] = val # -- don't do this here because SON-compatibility is only a requirement # for trials destined for a mongodb. In-memory rvals can contain # anything. #return base.SONify(dict_rval) return dict_rval
def suggest(self, new_ids, trials): """ new_ids - a list of unique identifiers (not necessarily ints!) for the suggestions that this function should return. All lists have the same length. """ rval = [] for new_id in new_ids: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(self.s_idxs_vals, memo={self.s_new_ids: [new_id]}) #print 'BandigAlgo.suggest IDXS', idxs #print 'BandigAlgo.suggest VALS', vals new_result = self.bandit.new_result() new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def evaluate(self, config, ctrl): """Return a result document """ memo = self.memo_from_config(config) self.use_obj_for_literal_in_memo(ctrl, Ctrl, memo) if self.rng is not None and not self.installed_rng: # -- N.B. this modifies the expr graph in-place # XXX this feels wrong self.expr = recursive_set_rng_kwarg(self.expr, pyll.as_apply(self.rng)) self.installed_rng = True try: r_dct = pyll.rec_eval(self.expr, memo=memo) except Exception, e: n_match = 0 for match, match_pair in self.exceptions: if match(e): r_dct = match_pair(e) n_match += 1 break if n_match == 0: raise
def evaluate(self, config, ctrl, attach_attachments=True): memo = self.memo_from_config(config) use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo) if self.pass_expr_memo_ctrl: rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval( self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) rval = self.fn(pyll_rval) if isinstance(rval, (float, int, np.number)): dict_rval = {'loss': float(rval), 'status': STATUS_OK} else: dict_rval = dict(rval) status = dict_rval['status'] if status not in STATUS_STRINGS: raise InvalidResultStatus(dict_rval) if status == STATUS_OK: # -- make sure that the loss is present and valid try: dict_rval['loss'] = float(dict_rval['loss']) except (TypeError, KeyError): raise InvalidLoss(dict_rval) if attach_attachments: attachments = dict_rval.pop('attachments', {}) for key, val in attachments.items(): ctrl.attachments[key] = val # -- don't do this here because SON-compatibility is only a requirement # for trials destined for a mongodb. In-memory rvals can contain # anything. #return base.SONify(dict_rval) return dict_rval
def suggest(self, new_ids, trials): """ new_ids - a list of unique identifiers (not necessarily ints!) for the suggestions that this function should return. All lists have the same length. """ # XXX: this used to be the implementation for the Random class and the # base class. But then I was doing an experiment with Random() a # different seed every time and I was surprised to see it generating # the same thing all the time! In response, I gave the Random # subclass its own simpler and more random implementation of suggest # that does not re-seed self.rng based on the new_ids. That leaves # this strange implementation here in the base class, and I'm not sure # whether to delete it. -JB June 19 2012 # # -- install new_ids as program arguments rval = [] for new_id in new_ids: # the results are not computed all at once so that we can # seed the generator based on each new_id sh1 = hashlib.sha1() sh1.update(str(new_id)) self.rng.seed(int(int(sh1.hexdigest(), base=16) % (2**31))) # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(self.s_idxs_vals, memo={self.s_new_ids: [new_id]}) #print 'BandigAlgo.suggest IDXS', idxs #print 'BandigAlgo.suggest VALS', vals new_result = self.bandit.new_result() new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def suggest(self, new_ids, trials): """ new_ids - a list of unique identifiers (not necessarily ints!) for the suggestions that this function should return. All lists have the same length. """ # -- install new_ids as program arguments rval = [] for new_id in new_ids: self.new_ids[:] = [new_id] sh1 = hashlib.sha1() sh1.update(str(new_id)) self.rng.seed(int(int(sh1.hexdigest(), base=16) % (2**31))) # -- sample new specs, idxs, vals new_specs, idxs, vals = pyll.rec_eval(self.s_specs_idxs_vals) new_result = self.bandit.new_result() new_misc = dict(tid=new_id, cmd=self.cmd, workdir=self.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], new_specs, [new_result], [new_misc])) return rval
def sample_hparam_space(space, algo, max_evals, dpp_dist='l2',discretize_space=True, discretize_num=0, rstate=np.random.RandomState()): trials = base.Trials() trials.max_evals = max_evals trials.discretize_space=discretize_space trials.dpp_dist = dpp_dist trials.discretize_num = discretize_num # here we're using placeholders for fn and pass_expr_memo_ctrl, since # neither will actually be used. domain = base.Domain(False, space, pass_expr_memo_ctrl=False) algo([0], domain, trials,rstate.randint(2 ** 31 - 1)) hparam_sets_to_return = [] for hparam_set in trials.hparams_to_try: spec = base.spec_from_misc(hparam_set[0]['misc']) ctrl = base.Ctrl(trials, current_trial=hparam_set[0]) memo = domain.memo_from_config(spec) # this doesn't seem to do anything, but it might domain.use_obj_for_literal_in_memo(ctrl, base.Ctrl, memo) hparam_sets_to_return.append(pyll.rec_eval(domain.expr, memo=memo)) return hparam_sets_to_return
def test_vectorize_config0(): p0 = hp_uniform('p0', 0, 1) p1 = hp_loguniform('p1', 2, 3) p2 = hp_choice('p2', [-1, p0]) p3 = hp_choice('p3', [-2, p1]) p4 = 1 p5 = [3, 4, p0] p6 = hp_choice('p6', [-3, p1]) d = locals() d['p1'] = None # -- don't sample p1 all the time, only if p3 says so config = as_apply(d) N = as_apply('N:TBA') expr = config expr_idxs = scope.range(N) vh = VectorizeHelper(expr, expr_idxs, build=True) vconfig = vh.v_expr full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()]) if 1: print '=' * 80 print 'VECTORIZED' print full_output print '\n' * 1 fo2 = replace_repeat_stochastic(full_output) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC' print fo2 print '\n' * 1 new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)) ) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC WITH RNGS' print new_vc Nval = 10 foo, idxs, vals = rec_eval(new_vc, memo={N: Nval}) print 'foo[0]', foo[0] print 'foo[1]', foo[1] assert len(foo) == Nval if 0: # XXX refresh these values to lock down sampler assert foo[0] == { 'p0': 0.39676747423066994, 'p1': None, 'p2': 0.39676747423066994, 'p3': 2.1281244479293568, 'p4': 1, 'p5': (3, 4, 0.39676747423066994) } assert foo[1] != foo[2] print idxs print vals['p3'] print vals['p6'] print idxs['p1'] print vals['p1'] assert len(vals['p3']) == Nval assert len(vals['p6']) == Nval assert len(idxs['p1']) < Nval p1d = dict(zip(idxs['p1'], vals['p1'])) for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])): if p3v == p6v == 0: assert ii not in idxs['p1'] if p3v: assert foo[ii]['p3'] == p1d[ii] if p6v: print 'p6', foo[ii]['p6'], p1d[ii] assert foo[ii]['p6'] == p1d[ii]
def suggest( new_ids, domain, trials, seed=123, prior_weight=_default_prior_weight, n_startup_jobs=_default_n_startup_jobs, n_EI_candidates=_default_n_EI_candidates, gamma=_default_gamma, linear_forgetting=_default_linear_forgetting, ): if len(new_ids) > 1: # write a loop to draw new points sequentially # TODO: insert constant liar for tentative suggestions raise NotImplementedError("generates one at a time") else: new_id, = new_ids t0 = time.time() (s_prior_weight, observed, observed_loss, specs, opt_idxs, opt_vals) = tpe_transform(domain, prior_weight, gamma) tt = time.time() - t0 logger.info("tpe_transform took %f seconds" % tt) docs_by_tid = dict([(d["tid"], d) for d in trials.trials]) best_docs = dict() best_docs_loss = dict() for doc in trials.trials: # get either this docs own tid or the one that it's from tid = doc["misc"].get("from_tid", doc["tid"]) loss = domain.loss(doc["result"], doc["spec"]) if loss is None: # -- associate infinite loss to new/running/failed jobs loss = float("inf") else: loss = float(loss) best_docs_loss.setdefault(tid, loss) if loss <= best_docs_loss[tid]: best_docs_loss[tid] = loss best_docs[tid] = doc tid_docs = best_docs.items() # -- sort docs by order of suggestion # so that linear_forgetting removes the oldest ones tid_docs.sort() losses = [best_docs_loss[k] for k, v in tid_docs] tids = [k for k, v in tid_docs] docs = [v for k, v in tid_docs] if docs: logger.info("TPE using %i/%i trials with best loss %f" % (len(docs), len(trials), min(best_docs_loss.values()))) else: logger.info("TPE using 0 trials") if len(docs) < n_startup_jobs: # N.B. THIS SEEDS THE RNG BASED ON THE new_id return rand.suggest(new_ids, domain, trials, seed) # Sample and compute log-probability. if tids: # -- the +2 co-ordinates with an assertion above # to ensure that fake ids are used during sampling fake_id_0 = max(max(tids), new_id) + 2 else: # -- weird - we're running the TPE algo from scratch assert n_startup_jobs <= 0 fake_id_0 = new_id + 2 fake_ids = range(fake_id_0, fake_id_0 + n_EI_candidates) # -- this dictionary will map pyll nodes to the values # they should take during the evaluation of the pyll program memo = {domain.s_new_ids: fake_ids} o_idxs_d, o_vals_d = miscs_to_idxs_vals([d["misc"] for d in docs], keys=domain.params.keys()) memo[observed["idxs"]] = o_idxs_d memo[observed["vals"]] = o_vals_d memo[observed_loss["idxs"]] = tids memo[observed_loss["vals"]] = losses idxs, vals = pyll.rec_eval([opt_idxs, opt_vals], memo=memo) # -- retrieve the best of the samples and form the return tuple # the build_posterior makes all specs the same rval_specs = [None] # -- specs are deprecated rval_results = [domain.new_result()] rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)] miscs_update_idxs_vals(rval_miscs, idxs, vals, idxs_map={fake_ids[0]: new_id}, assert_all_vals_used=False) rval_docs = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs) return rval_docs
def suggest(new_ids, domain, trials, seed=123, prior_weight=_default_prior_weight, n_startup_jobs=_default_n_startup_jobs, n_EI_candidates=_default_n_EI_candidates, gamma=_default_gamma, linear_forgetting=_default_linear_forgetting, ): if len(new_ids) > 1: # -- greedy loop rolling forward trials_copy = Trials() trials_copy._dynamic_trials = trials.trials trials_copy.refresh() rval = [] for new_id in new_ids: new_trials1 = suggest([new_id], domain, trials_copy, seed=seed, prior_weight=prior_weight, n_startup_jobs=n_startup_jobs, n_EI_candidates=n_EI_candidates, gamma=gamma, linear_forgetting=linear_forgetting) trials_copy.insert_trial_docs(new_trials1) trials_copy.refresh() rval.extend(new_trials1) return rval else: new_id, = new_ids t0 = time.time() (s_prior_weight, observed, observed_loss, specs, opt_idxs, opt_vals) \ = tpe_transform(domain, prior_weight, gamma) tt = time.time() - t0 logger.info('tpe_transform took %f seconds' % tt) docs_by_tid = dict([(d['tid'], d) for d in trials.trials]) best_docs = dict() best_docs_loss = dict() for doc in trials.trials: # get either this docs own tid or the one that it's from tid = doc['misc'].get('from_tid', doc['tid']) loss = domain.loss(doc['result'], doc['spec']) if loss is None: # -- associate infinite loss to new/running/failed jobs loss = float('inf') else: loss = float(loss) best_docs_loss.setdefault(tid, loss) if loss <= best_docs_loss[tid]: best_docs_loss[tid] = loss best_docs[tid] = doc tid_docs = best_docs.items() # -- sort docs by order of suggestion # so that linear_forgetting removes the oldest ones tid_docs.sort() losses = [best_docs_loss[k] for k, v in tid_docs] tids = [k for k, v in tid_docs] docs = [v for k, v in tid_docs] if docs: logger.info('TPE using %i/%i trials with best loss %f' % ( len(docs), len(trials), min(best_docs_loss.values()))) else: logger.info('TPE using 0 trials') if len(docs) < n_startup_jobs: # N.B. THIS SEEDS THE RNG BASED ON THE new_id return rand.suggest(new_ids, domain, trials, seed) # Sample and compute log-probability. if tids: # -- the +2 co-ordinates with an assertion above # to ensure that fake ids are used during sampling fake_id_0 = max(max(tids), new_id) + 2 else: # -- weird - we're running the TPE algo from scratch assert n_startup_jobs <= 0 fake_id_0 = new_id + 2 fake_ids = range(fake_id_0, fake_id_0 + n_EI_candidates) # -- this dictionary will map pyll nodes to the values # they should take during the evaluation of the pyll program memo = {domain.s_new_ids: fake_ids} o_idxs_d, o_vals_d = miscs_to_idxs_vals( [d['misc'] for d in docs], keys=domain.params.keys()) memo[observed['idxs']] = o_idxs_d memo[observed['vals']] = o_vals_d memo[observed_loss['idxs']] = tids memo[observed_loss['vals']] = losses idxs, vals = pyll.rec_eval([opt_idxs, opt_vals], memo=memo, print_node_on_error=False) # -- retrieve the best of the samples and form the return tuple # the build_posterior makes all specs the same rval_specs = [None] # -- specs are deprecated rval_results = [domain.new_result()] rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)] miscs_update_idxs_vals(rval_miscs, idxs, vals, idxs_map={fake_ids[0]: new_id}, assert_all_vals_used=False) rval_docs = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs) return rval_docs
def test_vectorize_config0(): config = config0() assert 'p3' == config.named_args[2][0] p1 = config.named_args[2][1].pos_args[1] assert p1.name == 'uniform' assert p1.pos_args[0]._obj == 2 assert p1.pos_args[1]._obj == 3 N = as_apply(5) expr = config expr_idxs = scope.range(N) vh = VectorizeHelper(expr, expr_idxs) vh.build_idxs() vh.build_vals() vconfig = vh.vals_memo[expr] full_output = as_apply([vconfig, vh.idxs_by_id(), vh.vals_by_id()]) if 0: print '=' * 80 print 'VECTORIZED' print full_output print '\n' * 1 fo2 = replace_repeat_stochastic(full_output) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC' print fo2 print '\n' * 1 new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)) ) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC WITH RNGS' print new_vc foo, idxs, vals = rec_eval(new_vc) print foo #print idxs #print vals assert len(foo) == 5 assert foo[0] == { 'p0': 0.39676747423066994, 'p2': 0.39676747423066994, 'p3': 2.1281244479293568, 'p4': 1, 'p5': (3, 4, 0.39676747423066994) } assert foo[1] != foo[2] if 0: print idxs[vh.node_id[p1]] print vals[vh.node_id[p1]] # - p1 is only used sometimes assert len(idxs[vh.node_id[p1]]) < 5 for ii in range(5): if ii in idxs[vh.node_id[p1]]: assert foo[ii]['p3'] == vals[vh.node_id[p1]][list(idxs[vh.node_id[p1]]).index(ii)] else: assert foo[ii]['p3'] == -2, foo[ii]['p3']
def test_vectorize_config0(): p0 = hp_uniform('p0', 0, 1) p1 = hp_loguniform('p1', 2, 3) p2 = hp_choice('p2', [-1, p0]) p3 = hp_choice('p3', [-2, p1]) p4 = 1 p5 = [3, 4, p0] p6 = hp_choice('p6', [-3, p1]) d = locals() d['p1'] = None # -- don't sample p1 all the time, only if p3 says so config = as_apply(d) N = as_apply('N:TBA') expr = config expr_idxs = scope.range(N) vh = VectorizeHelper(expr, expr_idxs, build=True) vconfig = vh.v_expr full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()]) if 1: print '=' * 80 print 'VECTORIZED' print full_output print '\n' * 1 fo2 = replace_repeat_stochastic(full_output) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC' print fo2 print '\n' * 1 new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1))) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC WITH RNGS' print new_vc Nval = 10 foo, idxs, vals = rec_eval(new_vc, memo={N: Nval}) print 'foo[0]', foo[0] print 'foo[1]', foo[1] assert len(foo) == Nval if 0: # XXX refresh these values to lock down sampler assert foo[0] == { 'p0': 0.39676747423066994, 'p1': None, 'p2': 0.39676747423066994, 'p3': 2.1281244479293568, 'p4': 1, 'p5': (3, 4, 0.39676747423066994) } assert foo[1] != foo[2] print idxs print vals['p3'] print vals['p6'] print idxs['p1'] print vals['p1'] assert len(vals['p3']) == Nval assert len(vals['p6']) == Nval assert len(idxs['p1']) < Nval p1d = dict(zip(idxs['p1'], vals['p1'])) for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])): if p3v == p6v == 0: assert ii not in idxs['p1'] if p3v: assert foo[ii]['p3'] == p1d[ii] if p6v: print 'p6', foo[ii]['p6'], p1d[ii] assert foo[ii]['p6'] == p1d[ii]
def work(self, **kwargs): self.__dict__.update(kwargs) bandit = opt_q_uniform(self.target) prior_weight = 2.5 gamma = 0.20 algo = TreeParzenEstimator(bandit, prior_weight=prior_weight, n_startup_jobs=2, n_EI_candidates=128, gamma=gamma) print algo.opt_idxs['x'] print algo.opt_vals['x'] trials = Trials() experiment = Experiment(trials, algo) experiment.run(self.LEN) if self.show_vars: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) idxs, vals = miscs_to_idxs_vals(trials.miscs) idxs = idxs['x'] vals = vals['x'] print "VALS", vals losses = trials.losses() from hyperopt.tpe import ap_filter_trials from hyperopt.tpe import adaptive_parzen_samplers qu = scope.quniform(1.01, 10, 1) fn = adaptive_parzen_samplers['quniform'] fn_kwargs = dict(size=(4, ), rng=np.random) s_below = pyll.Literal() s_above = pyll.Literal() b_args = [s_below, prior_weight] + qu.pos_args b_post = fn(*b_args, **fn_kwargs) a_args = [s_above, prior_weight] + qu.pos_args a_post = fn(*a_args, **fn_kwargs) #print b_post #print a_post fn_lpdf = getattr(scope, a_post.name + '_lpdf') print fn_lpdf # calculate the llik of b_post under both distributions a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) new_node = scope.broadcast_best(b_post, below_llik, above_llik) print '=' * 80 do_show = self.show_steps import matplotlib.pyplot as plt for ii in range(2, 9): if ii > len(idxs): break print '-' * 80 print 'ROUND', ii print '-' * 80 all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii], losses[:ii], gamma) below = below.astype('int') above = above.astype('int') print 'BB0', below print 'BB1', above #print 'BELOW', zip(range(100), np.bincount(below, minlength=11)) #print 'ABOVE', zip(range(100), np.bincount(above, minlength=11)) memo = {b_post: all_vals, s_below: below, s_above: above} bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node], memo=memo) #print bl - al print 'BB2', dict(zip(all_vals, bl - al)) print 'BB3', dict(zip(all_vals, bl)) print 'BB4', dict(zip(all_vals, al)) print 'ORIG PICKED', vals[ii] print 'PROPER OPT PICKS:', nv #assert np.allclose(below, [3, 3, 9]) #assert len(below) + len(above) == len(vals) if do_show: plt.subplot(8, 1, ii) #plt.scatter(all_vals, # np.bincount(below, minlength=11)[2:], c='b') #plt.scatter(all_vals, # np.bincount(above, minlength=11)[2:], c='c') plt.scatter(all_vals, bl, c='g') plt.scatter(all_vals, al, c='r') if do_show: plt.show()
def evaluate(self, config, ctrl): prog = screening_prog(ctrl=ctrl, **config) rval = pyll.rec_eval(prog, deepcopy_inputs=False) return rval
def suggest1(self, new_id, trials): """Suggest a single new document""" #print self.post_llik bandit = self.bandit docs_by_tid = dict([(d['tid'], d) for d in trials.trials]) if len(docs_by_tid) != len(trials.trials): import cPickle cPickle.dump(trials.trials, open('assert_fail_tpe_637.pkl', 'w')) assert 0, 'non-unique docid, dumped to assert_fail_tpe_637.pkl' best_docs = dict() best_docs_loss = dict() for doc in trials.trials: # get either this docs own tid or the one that it's from tid = doc['misc'].get('from_tid', doc['tid']) loss = bandit.loss(doc['result'], doc['spec']) if loss is None: # -- associate infinite loss to new/running/failed jobs loss = float('inf') else: loss = float(loss) best_docs_loss.setdefault(tid, loss) if loss <= best_docs_loss[tid]: best_docs_loss[tid] = loss best_docs[tid] = doc tid_docs = best_docs.items() # -- sort docs by order of suggestion # so that linear_forgetting removes the oldest ones tid_docs.sort() losses = [best_docs_loss[k] for k, v in tid_docs] tids = [k for k, v in tid_docs] docs = [v for k, v in tid_docs] n_ok = len([d for d in docs if d['result']['status'] == STATUS_OK]) if docs: logger.info('TPE %i/%i w best loss %f' % ( n_ok, len(docs), min(best_docs_loss.values()))) else: logger.info('TPE using 0 trials') if n_ok < self.n_startup_jobs: # N.B. THIS SEEDS THE RNG BASED ON THE new_id return BanditAlgo.suggest(self, [new_id], trials) # Sample and compute log-probability. if tids: # -- the +2 co-ordinates with an assertion above # to ensure that fake ids are used during sampling fake_id_0 = max(max(tids), new_id) + 2 else: # -- weird - we're running the TPE algo from scratch assert self.n_startup_jobs <= 0 fake_id_0 = new_id + 2 fake_ids = range(fake_id_0, fake_id_0 + self.n_EI_candidates) # -- this dictionary will map pyll nodes to the values # they should take during the evaluation of the pyll program memo = {self.s_new_ids: fake_ids} o_idxs_d, o_vals_d = miscs_to_idxs_vals( [d['misc'] for d in docs], keys=bandit.params.keys()) memo[self.observed['idxs']] = o_idxs_d memo[self.observed['vals']] = o_vals_d memo[self.observed_loss['idxs']] = tids memo[self.observed_loss['vals']] = losses idxs, vals = pyll.rec_eval( [self.opt_idxs, self.opt_vals], memo=memo) # -- retrieve the best of the samples and form the return tuple # the build_posterior makes all specs the same rval_specs = [None] # -- specs are deprecated rval_results = [bandit.new_result()] rval_miscs = [dict(tid=new_id, cmd=self.cmd, workdir=self.workdir)] miscs_update_idxs_vals(rval_miscs, idxs, vals, idxs_map={fake_ids[0]: new_id}, assert_all_vals_used=False) rval_docs = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs) return rval_docs
def work(self, **kwargs): self.__dict__.update(kwargs) bandit = opt_q_uniform(self.target) prior_weight = 2.5 gamma = 0.20 algo = TreeParzenEstimator(bandit, prior_weight=prior_weight, n_startup_jobs=2, n_EI_candidates=128, gamma=gamma) print algo.opt_idxs['x'] print algo.opt_vals['x'] trials = Trials() experiment = Experiment(trials, algo) experiment.run(self.LEN) if self.show_vars: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) idxs, vals = miscs_to_idxs_vals(trials.miscs) idxs = idxs['x'] vals = vals['x'] print "VALS", vals losses = trials.losses() from hyperopt.tpe import ap_filter_trials from hyperopt.tpe import adaptive_parzen_samplers qu = scope.quniform(1.01, 10, 1) fn = adaptive_parzen_samplers['quniform'] fn_kwargs = dict(size=(4,), rng=np.random) s_below = pyll.Literal() s_above = pyll.Literal() b_args = [s_below, prior_weight] + qu.pos_args b_post = fn(*b_args, **fn_kwargs) a_args = [s_above, prior_weight] + qu.pos_args a_post = fn(*a_args, **fn_kwargs) #print b_post #print a_post fn_lpdf = getattr(scope, a_post.name + '_lpdf') print fn_lpdf # calculate the llik of b_post under both distributions a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) new_node = scope.broadcast_best(b_post, below_llik, above_llik) print '=' * 80 do_show = self.show_steps import matplotlib.pyplot as plt for ii in range(2, 9): if ii > len(idxs): break print '-' * 80 print 'ROUND', ii print '-' * 80 all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii], losses[:ii], gamma) below = below.astype('int') above = above.astype('int') print 'BB0', below print 'BB1', above #print 'BELOW', zip(range(100), np.bincount(below, minlength=11)) #print 'ABOVE', zip(range(100), np.bincount(above, minlength=11)) memo = {b_post: all_vals, s_below: below, s_above: above} bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node], memo=memo) #print bl - al print 'BB2', dict(zip(all_vals, bl - al)) print 'BB3', dict(zip(all_vals, bl)) print 'BB4', dict(zip(all_vals, al)) print 'ORIG PICKED', vals[ii] print 'PROPER OPT PICKS:', nv #assert np.allclose(below, [3, 3, 9]) #assert len(below) + len(above) == len(vals) if do_show: plt.subplot(8, 1, ii) #plt.scatter(all_vals, # np.bincount(below, minlength=11)[2:], c='b') #plt.scatter(all_vals, # np.bincount(above, minlength=11)[2:], c='c') plt.scatter(all_vals, bl, c='g') plt.scatter(all_vals, al, c='r') if do_show: plt.show()