def __init__(self, bandit, seed=seed, cmd=None, workdir=None): self.bandit = bandit self.seed = seed self.rng = np.random.RandomState(self.seed) self.cmd = cmd self.workdir = workdir self.s_new_ids = pyll.Literal('new_ids') # -- list at eval-time before = pyll.dfs(self.bandit.expr) # -- raises exception if expr contains cycles pyll.toposort(self.bandit.expr) vh = self.vh = VectorizeHelper(self.bandit.expr, self.s_new_ids) # -- raises exception if v_expr contains cycles pyll.toposort(vh.v_expr) idxs_by_label = vh.idxs_by_label() vals_by_label = vh.vals_by_label() after = pyll.dfs(self.bandit.expr) # -- try to detect if VectorizeHelper screwed up anything inplace assert before == after assert set(idxs_by_label.keys()) == set(vals_by_label.keys()) assert set(idxs_by_label.keys()) == set(self.bandit.params.keys()) # -- make the graph runnable and SON-encodable # N.B. operates inplace self.s_idxs_vals = recursive_set_rng_kwarg( scope.pos_args(idxs_by_label, vals_by_label), pyll.as_apply(self.rng)) # -- raises an exception if no topological ordering exists pyll.toposort(self.s_idxs_vals)
def __init__(self, expr, expr_idxs): self.expr = expr self.expr_idxs = expr_idxs self.idxs_memo = {expr: expr_idxs} self.vals_memo = {} self.choice_memo = {} self.dfs_nodes = dfs(expr) self.node_id = dict([(node, "node_%i" % ii) for ii, node in enumerate(dfs(expr))])
def test_clone(): config = config0() config2 = clone(config) nodeset = set(dfs(config)) assert not any(n in nodeset for n in dfs(config2)) foo = recursive_set_rng_kwarg(config, scope.rng_from_seed(5)) r = rec_eval(foo) print r r2 = rec_eval(recursive_set_rng_kwarg(config2, scope.rng_from_seed(5))) print r2 assert r == r2
def __init__(self, expr, name=None, rseed=None, loss_target=None, exceptions=None, do_checks=True): if do_checks: if isinstance(expr, pyll.Apply): self.expr = expr # XXX: verify that expr is a dictionary with the right keys, # then refactor the code below elif isinstance(expr, dict): if "loss" not in expr: raise ValueError("expr must define a loss") if "status" not in expr: expr["status"] = STATUS_OK self.expr = pyll.as_apply(expr) else: raise TypeError("expr must be a dictionary") else: self.expr = pyll.as_apply(expr) self.params = {} for node in pyll.dfs(self.expr): if node.name == "hyperopt_param": self.params[node.arg["label"].obj] = node.arg["obj"] if exceptions is not None: self.exceptions = exceptions self.loss_target = loss_target self.installed_rng = False if rseed is None: self.rng = None else: self.rng = np.random.RandomState(rseed) self.name = name
def __init__(self, expr, name=None, loss_target=None, exceptions=None, do_checks=True, ): if do_checks: if isinstance(expr, pyll.Apply): self.expr = expr # XXX: verify that expr is a dictionary with the right keys, # then refactor the code below elif isinstance(expr, dict): if 'loss' not in expr: raise ValueError('expr must define a loss') if 'status' not in expr: expr['status'] = STATUS_OK self.expr = pyll.as_apply(expr) else: raise TypeError('expr must be a dictionary') else: self.expr = pyll.as_apply(expr) self.params = {} for node in pyll.dfs(self.expr): if node.name == 'hyperopt_param': label = node.arg['label'].obj if label in self.params: raise DuplicateLabel(label) self.params[label] = node.arg['obj'] if exceptions is not None: self.exceptions = exceptions self.loss_target = loss_target self.name = name
def __init__(self, fn, expr, args=[], workdir=None, pass_expr_memo_ctrl=None, **bandit_kwargs): self.cmd = ('domain_attachment', 'FMinIter_Domain') self.fn = fn self.expr = expr self.args = args if pass_expr_memo_ctrl is None: self.pass_expr_memo_ctrl = getattr(fn, 'fmin_pass_expr_memo_ctrl', False) else: self.pass_expr_memo_ctrl = pass_expr_memo_ctrl base.Bandit.__init__(self, expr, do_checks=False, **bandit_kwargs) # -- This code was stolen from base.BanditAlgo, a class which may soon # be gone self.workdir = workdir self.s_new_ids = pyll.Literal('new_ids') # -- list at eval-time before = pyll.dfs(self.expr) # -- raises exception if expr contains cycles pyll.toposort(self.expr) vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids) # -- raises exception if v_expr contains cycles pyll.toposort(vh.v_expr) idxs_by_label = vh.idxs_by_label() vals_by_label = vh.vals_by_label() after = pyll.dfs(self.expr) # -- try to detect if VectorizeHelper screwed up anything inplace assert before == after assert set(idxs_by_label.keys()) == set(vals_by_label.keys()) assert set(idxs_by_label.keys()) == set(self.params.keys()) # -- make the graph runnable and SON-encodable # N.B. operates inplace self.s_idxs_vals = recursive_set_rng_kwarg( pyll.scope.pos_args(idxs_by_label, vals_by_label), pyll.as_apply(self.rng)) # -- raises an exception if no topological ordering exists pyll.toposort(self.s_idxs_vals)
def test_clone(): config = config0() config2 = clone(config) nodeset = set(dfs(config)) assert not any(n in nodeset for n in dfs(config2)) foo = recursive_set_rng_kwarg( config, scope.rng_from_seed(5)) r = rec_eval(foo) print r r2 = rec_eval( recursive_set_rng_kwarg( config2, scope.rng_from_seed(5))) print r2 assert r == r2
def memo_from_config(self, config): memo = {} for node in pyll.dfs(self.expr): if node.name == 'hyperopt_param': label = node.arg['label'].obj # -- hack because it's not really garbagecollected # this does have the desired effect of crashing the # function if rec_eval actually needs a value that # the the optimization algorithm thought to be unnecessary memo[node] = config.get(label, pyll.base.GarbageCollected) return memo
def assert_integrity_idxs_take(self): idxs_memo = self.idxs_memo take_memo = self.take_memo after = dfs(self.expr) assert after == self.dfs_nodes assert set(idxs_memo.keys()) == set(take_memo.keys()) for node in idxs_memo: idxs = idxs_memo[node] assert idxs.name == 'array_union' vals = take_memo[node][0].pos_args[1] for take in take_memo[node]: assert take.name == 'idxs_take' assert [idxs, vals] == take.pos_args[:2]
def pretty_names(expr, prefix=None): dfs_order = dfs(expr) # -- compute the seq like pyll.dfs just to ensure that # the order of our names matches the dfs order. # It's not clear to me right now that the match is important, # but it's certainly suspicious if not. seq = [] names = [] seqset = set() if prefix is None: prefixes = () else: prefixes = (prefix,) pretty_names_helper(expr, seq, seqset, prefixes, names) assert seq == dfs_order return dict(zip(seq, names))
def replace_repeat_stochastic(expr, return_memo=False): nodes = dfs(expr) memo = {} for ii, orig in enumerate(nodes): if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch: # -- this is an idxs_map of a random draw of distribution `dist` idxs = orig.pos_args[0] dist = orig.pos_args[1]._obj def foo(arg): # -- each argument is an idxs, vals pair assert arg.name == 'pos_args' assert len(arg.pos_args) == 2 arg_vals = arg.pos_args[1] if (arg_vals.name == 'asarray' and arg_vals.inputs()[0].name == 'repeat'): # -- draws are iid, so forget about # repeating the distribution parameters repeated_thing = arg_vals.inputs()[0].inputs()[1] return repeated_thing else: if arg.pos_args[0] is idxs: return arg_vals else: # -- arg.pos_args[0] is a superset of idxs # TODO: slice out correct elements using # idxs_take, but more importantly - test this case. raise NotImplementedError() new_pos_args = [foo(arg) for arg in orig.pos_args[2:]] new_named_args = [[aname, foo(arg)] for aname, arg in orig.named_args] vnode = Apply(dist, new_pos_args, new_named_args, None) n_times = scope.len(idxs) if 'size' in dict(vnode.named_args): raise NotImplementedError('random node already has size') vnode.named_args.append(['size', n_times]) # -- loop over all nodes that *use* this one, and change them for client in nodes[ii + 1:]: client.replace_input(orig, vnode) if expr is orig: expr = vnode memo[orig] = vnode if return_memo: return expr, memo else: return expr
def work(self): """Test that all prior samplers are gone""" tpe_algo = TreeParzenEstimator(self.bandit) foo = pyll.as_apply( [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']]) prior_names = [ 'uniform', 'quniform', 'loguniform', 'qloguniform', 'normal', 'qnormal', 'lognormal', 'qlognormal', 'randint', ] for node in pyll.dfs(foo): assert node.name not in prior_names
def use_obj_for_literal_in_memo(expr, obj, lit, memo): """ Set `memo[node] = obj` for all nodes in expr such that `node.obj == lit` This is a useful routine for fmin-compatible functions that are searching domains that include some leaf nodes that are complicated runtime-generated objects. One option is to make such leaf nodes pyll functions, but it can be easier to construct those objects the normal Python way in the fmin function, and just stick them into the evaluation memo. The experiment ctrl object itself is inserted using this technique. """ for node in pyll.dfs(expr): try: if node.obj == lit: memo[node] = obj except AttributeError: # -- non-literal nodes don't have node.obj pass return memo
def work(self): """Test that all prior samplers are gone""" tpe_algo = TreeParzenEstimator(self.bandit) foo = pyll.as_apply([ tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']]) prior_names = [ 'uniform', 'quniform', 'loguniform', 'qloguniform', 'normal', 'qnormal', 'lognormal', 'qlognormal', 'randint', ] for node in pyll.dfs(foo): assert node.name not in prior_names
def replace_repeat_stochastic(expr, return_memo=False): stoch = stochastic.implicit_stochastic_symbols nodes = dfs(expr) memo = {} for ii, orig in enumerate(nodes): if orig.name == "idxs_map" and orig.pos_args[1]._obj in stoch: # -- this is an idxs_map of a random draw of distribution `dist` idxs = orig.pos_args[0] dist = orig.pos_args[1]._obj def foo(arg): # -- each argument is an idxs, vals pair assert arg.name == "pos_args" assert len(arg.pos_args) == 2 assert arg.pos_args[0] is idxs, str(orig) arg_vals = arg.pos_args[1] if arg_vals.name == "asarray" and arg_vals.inputs()[0].name == "repeat": # -- draws are iid, so forget about # repeating the distribution parameters repeated_thing = arg_vals.inputs()[0].inputs()[1] return repeated_thing else: return arg_vals new_pos_args = [foo(arg) for arg in orig.pos_args[2:]] new_named_args = [[aname, foo(arg)] for aname, arg in orig.named_args] vnode = Apply(dist, new_pos_args, new_named_args, None) n_times = scope.len(idxs) vnode.named_args.append(["size", n_times]) # -- loop over all nodes that *use* this one, and change them for client in nodes[ii + 1 :]: client.replace_input(orig, vnode) if expr is orig: expr = vnode memo[orig] = vnode if return_memo: return expr, memo else: return expr
def __init__(self, expr, expr_idxs, build=True): self.expr = expr self.expr_idxs = expr_idxs self.dfs_nodes = dfs(expr) self.params = {} for ii, node in enumerate(self.dfs_nodes): if node.name == 'hyperopt_param': label = node.arg['label'].obj self.params[label] = node.arg['obj'] # -- recursive construction # This makes one term in each idxs, vals memo for every # directed path through the switches in the graph. self.idxs_memo = {} # node -> union, all idxs computed self.take_memo = {} # node -> list of idxs_take retrieving node vals self.v_expr = self.build_idxs_vals(expr, expr_idxs) #TODO: graph-optimization pass to remove cruft: # - unions of 1 # - unions of full sets with their subsets # - idxs_take that can be merged self.assert_integrity_idxs_take()
def __init__(self, fn, expr, workdir=None, pass_expr_memo_ctrl=None, name=None, loss_target=None, ): """ Paramaters ---------- fn : callable This stores the `fn` argument to `fmin`. (See `hyperopt.fmin.fmin`) expr : hyperopt.pyll.Apply This is the `space` argument to `fmin`. (See `hyperopt.fmin.fmin`) workdir : string (or None) If non-None, the current working directory will be `workdir`while `expr` and `fn` are evaluated. (XXX Currently only respected by jobs run via MongoWorker) pass_expr_memo_ctrl : bool If True, `fn` will be called like this: `fn(self.expr, memo, ctrl)`, where `memo` is a dictionary mapping `Apply` nodes to their computed values, and `ctrl` is a `Ctrl` instance for communicating with a Trials database. This lower-level calling convention is useful if you want to call e.g. `hyperopt.pyll.rec_eval` yourself in some customized way. name : string (or None) Label, used for pretty-printing. loss_target : float (or None) The actual or estimated minimum of `fn`. Some optimization algorithms may behave differently if their first objective is to find an input that achieves a certain value, rather than the more open-ended objective of pure minimization. XXX: Move this from Domain to be an fmin arg. """ self.fn = fn if pass_expr_memo_ctrl is None: self.pass_expr_memo_ctrl = getattr(fn, 'fmin_pass_expr_memo_ctrl', False) else: self.pass_expr_memo_ctrl = pass_expr_memo_ctrl self.expr = pyll.as_apply(expr) self.params = {} for node in pyll.dfs(self.expr): if node.name == 'hyperopt_param': label = node.arg['label'].obj if label in self.params: raise DuplicateLabel(label) self.params[label] = node.arg['obj'] self.loss_target = loss_target self.name = name self.workdir = workdir self.s_new_ids = pyll.Literal('new_ids') # -- list at eval-time before = pyll.dfs(self.expr) # -- raises exception if expr contains cycles pyll.toposort(self.expr) vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids) # -- raises exception if v_expr contains cycles pyll.toposort(vh.v_expr) idxs_by_label = vh.idxs_by_label() vals_by_label = vh.vals_by_label() after = pyll.dfs(self.expr) # -- try to detect if VectorizeHelper screwed up anything inplace assert before == after assert set(idxs_by_label.keys()) == set(vals_by_label.keys()) assert set(idxs_by_label.keys()) == set(self.params.keys()) self.s_rng = pyll.Literal('rng-placeholder') # -- N.B. operates inplace: self.s_idxs_vals = recursive_set_rng_kwarg( pyll.scope.pos_args(idxs_by_label, vals_by_label), self.s_rng) # -- raises an exception if no topological ordering exists pyll.toposort(self.s_idxs_vals) # -- Protocol for serialization. # self.cmd indicates to e.g. MongoWorker how this domain # should be [un]serialized. # XXX This mechanism deserves review as support for ipython # workers improves. self.cmd = ('domain_attachment', 'FMinIter_Domain')
def __init__( self, fn, expr, workdir=None, pass_expr_memo_ctrl=None, name=None, loss_target=None, ): """ Paramaters ---------- fn : callable This stores the `fn` argument to `fmin`. (See `hyperopt.fmin.fmin`) expr : hyperopt.pyll.Apply This is the `space` argument to `fmin`. (See `hyperopt.fmin.fmin`) workdir : string (or None) If non-None, the current working directory will be `workdir`while `expr` and `fn` are evaluated. (XXX Currently only respected by jobs run via MongoWorker) pass_expr_memo_ctrl : bool If True, `fn` will be called like this: `fn(self.expr, memo, ctrl)`, where `memo` is a dictionary mapping `Apply` nodes to their computed values, and `ctrl` is a `Ctrl` instance for communicating with a Trials database. This lower-level calling convention is useful if you want to call e.g. `hyperopt.pyll.rec_eval` yourself in some customized way. name : string (or None) Label, used for pretty-printing. loss_target : float (or None) The actual or estimated minimum of `fn`. Some optimization algorithms may behave differently if their first objective is to find an input that achieves a certain value, rather than the more open-ended objective of pure minimization. XXX: Move this from Domain to be an fmin arg. """ self.fn = fn if pass_expr_memo_ctrl is None: self.pass_expr_memo_ctrl = getattr(fn, 'fmin_pass_expr_memo_ctrl', False) else: self.pass_expr_memo_ctrl = pass_expr_memo_ctrl self.expr = pyll.as_apply(expr) self.params = {} for node in pyll.dfs(self.expr): if node.name == 'hyperopt_param': label = node.arg['label'].obj if label in self.params: raise DuplicateLabel(label) self.params[label] = node.arg['obj'] self.loss_target = loss_target self.name = name self.workdir = workdir self.s_new_ids = pyll.Literal('new_ids') # -- list at eval-time before = pyll.dfs(self.expr) # -- raises exception if expr contains cycles pyll.toposort(self.expr) vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids) # -- raises exception if v_expr contains cycles pyll.toposort(vh.v_expr) idxs_by_label = vh.idxs_by_label() vals_by_label = vh.vals_by_label() after = pyll.dfs(self.expr) # -- try to detect if VectorizeHelper screwed up anything inplace assert before == after assert set(idxs_by_label.keys()) == set(vals_by_label.keys()) assert set(idxs_by_label.keys()) == set(self.params.keys()) self.s_rng = pyll.Literal('rng-placeholder') # -- N.B. operates inplace: self.s_idxs_vals = recursive_set_rng_kwarg( pyll.scope.pos_args(idxs_by_label, vals_by_label), self.s_rng) # -- raises an exception if no topological ordering exists pyll.toposort(self.s_idxs_vals) # -- Protocol for serialization. # self.cmd indicates to e.g. MongoWorker how this domain # should be [un]serialized. # XXX This mechanism deserves review as support for ipython # workers improves. self.cmd = ('domain_attachment', 'FMinIter_Domain')
def build_posterior(specs, prior_idxs, prior_vals, obs_idxs, obs_vals, oloss_idxs, oloss_vals, oloss_gamma, prior_weight): """ This method clones a posterior inference graph by iterating forward in topological order, and replacing prior random-variables (prior_vals) with new posterior distributions that make use of observations (obs_vals). """ assert all(isinstance(arg, pyll.Apply) for arg in [oloss_idxs, oloss_vals, oloss_gamma]) expr = pyll.as_apply([specs, prior_idxs, prior_vals]) nodes = pyll.dfs(expr) # build the joint posterior distribution as the values in this memo memo = {} # map prior RVs to observations obs_memo = {} for nid in prior_vals: # construct the leading args for each call to adaptive_parzen_sampler # which will permit the "adaptive parzen samplers" to adapt to the # correct samples. obs_below, obs_above = scope.ap_filter_trials( obs_idxs[nid], obs_vals[nid], oloss_idxs, oloss_vals, oloss_gamma) obs_memo[prior_vals[nid]] = [obs_below, obs_above] for node in nodes: if node not in memo: new_inputs = [memo[arg] for arg in node.inputs()] if node in obs_memo: # -- this case corresponds to an observed Random Var # node.name is a distribution like "normal", "randint", etc. obs_below, obs_above = obs_memo[node] aa = [memo[a] for a in node.pos_args] fn = adaptive_parzen_samplers[node.name] b_args = [obs_below, prior_weight] + aa named_args = [[kw, memo[arg]] for (kw, arg) in node.named_args] b_post = fn(*b_args, **dict(named_args)) a_args = [obs_above, prior_weight] + aa a_post = fn(*a_args, **dict(named_args)) assert a_post.name == b_post.name fn_lpdf = getattr(scope, a_post.name + '_lpdf') #print fn_lpdf a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) # calculate the llik of b_post under both distributions below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) #improvement = below_llik - above_llik #new_node = scope.broadcast_best(b_post, improvement) new_node = scope.broadcast_best(b_post, below_llik, above_llik) elif hasattr(node, 'obj'): # -- keep same literals in the graph new_node = node else: # -- this case is for all the other stuff in the graph new_node = node.clone_from_inputs(new_inputs) memo[node] = new_node post_specs = memo[specs] post_idxs = dict([(nid, memo[idxs]) for nid, idxs in prior_idxs.items()]) post_vals = dict([(nid, memo[vals]) for nid, vals in prior_vals.items()]) assert set(post_idxs.keys()) == set(post_vals.keys()) assert set(post_idxs.keys()) == set(prior_idxs.keys()) return post_specs, post_idxs, post_vals