def __init__(self, bandit, seed=seed, cmd=None, workdir=None):
        self.bandit = bandit
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.cmd = cmd
        self.workdir = workdir
        self.s_new_ids = pyll.Literal('new_ids')  # -- list at eval-time
        before = pyll.dfs(self.bandit.expr)
        # -- raises exception if expr contains cycles
        pyll.toposort(self.bandit.expr)
        vh = self.vh = VectorizeHelper(self.bandit.expr, self.s_new_ids)
        # -- raises exception if v_expr contains cycles
        pyll.toposort(vh.v_expr)

        idxs_by_label = vh.idxs_by_label()
        vals_by_label = vh.vals_by_label()
        after = pyll.dfs(self.bandit.expr)
        # -- try to detect if VectorizeHelper screwed up anything inplace
        assert before == after
        assert set(idxs_by_label.keys()) == set(vals_by_label.keys())
        assert set(idxs_by_label.keys()) == set(self.bandit.params.keys())

        # -- make the graph runnable and SON-encodable
        # N.B. operates inplace
        self.s_idxs_vals = recursive_set_rng_kwarg(
            scope.pos_args(idxs_by_label, vals_by_label),
            pyll.as_apply(self.rng))

        # -- raises an exception if no topological ordering exists
        pyll.toposort(self.s_idxs_vals)
Example #2
0
    def __init__(self, bandit, seed=seed, cmd=None, workdir=None):
        self.bandit = bandit
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
        self.cmd = cmd
        self.workdir = workdir
        self.s_new_ids = pyll.Literal('new_ids')  # -- list at eval-time
        before = pyll.dfs(self.bandit.expr)
        # -- raises exception if expr contains cycles
        pyll.toposort(self.bandit.expr)
        vh = self.vh = VectorizeHelper(self.bandit.expr, self.s_new_ids)
        # -- raises exception if v_expr contains cycles
        pyll.toposort(vh.v_expr)

        idxs_by_label = vh.idxs_by_label()
        vals_by_label = vh.vals_by_label()
        after = pyll.dfs(self.bandit.expr)
        # -- try to detect if VectorizeHelper screwed up anything inplace
        assert before == after
        assert set(idxs_by_label.keys()) == set(vals_by_label.keys())
        assert set(idxs_by_label.keys()) == set(self.bandit.params.keys())

        # -- make the graph runnable and SON-encodable
        # N.B. operates inplace
        self.s_idxs_vals = recursive_set_rng_kwarg(
                scope.pos_args(idxs_by_label, vals_by_label),
                pyll.as_apply(self.rng))

        # -- raises an exception if no topological ordering exists
        pyll.toposort(self.s_idxs_vals)
Example #3
0
 def __init__(self, expr, expr_idxs):
     self.expr = expr
     self.expr_idxs = expr_idxs
     self.idxs_memo = {expr: expr_idxs}
     self.vals_memo = {}
     self.choice_memo = {}
     self.dfs_nodes = dfs(expr)
     self.node_id = dict([(node, "node_%i" % ii) for ii, node in enumerate(dfs(expr))])
Example #4
0
def test_clone():
    config = config0()
    config2 = clone(config)

    nodeset = set(dfs(config))
    assert not any(n in nodeset for n in dfs(config2))

    foo = recursive_set_rng_kwarg(config, scope.rng_from_seed(5))
    r = rec_eval(foo)
    print r
    r2 = rec_eval(recursive_set_rng_kwarg(config2, scope.rng_from_seed(5)))

    print r2
    assert r == r2
Example #5
0
    def __init__(self, expr, name=None, rseed=None, loss_target=None, exceptions=None, do_checks=True):

        if do_checks:
            if isinstance(expr, pyll.Apply):
                self.expr = expr
                # XXX: verify that expr is a dictionary with the right keys,
                #      then refactor the code below
            elif isinstance(expr, dict):
                if "loss" not in expr:
                    raise ValueError("expr must define a loss")
                if "status" not in expr:
                    expr["status"] = STATUS_OK
                self.expr = pyll.as_apply(expr)
            else:
                raise TypeError("expr must be a dictionary")
        else:
            self.expr = pyll.as_apply(expr)

        self.params = {}
        for node in pyll.dfs(self.expr):
            if node.name == "hyperopt_param":
                self.params[node.arg["label"].obj] = node.arg["obj"]

        if exceptions is not None:
            self.exceptions = exceptions
        self.loss_target = loss_target
        self.installed_rng = False
        if rseed is None:
            self.rng = None
        else:
            self.rng = np.random.RandomState(rseed)

        self.name = name
Example #6
0
    def __init__(self, expr,
                 name=None,
                 loss_target=None,
                 exceptions=None,
                 do_checks=True,
                 ):
        if do_checks:
            if isinstance(expr, pyll.Apply):
                self.expr = expr
                # XXX: verify that expr is a dictionary with the right keys,
                #      then refactor the code below
            elif isinstance(expr, dict):
                if 'loss' not in expr:
                    raise ValueError('expr must define a loss')
                if 'status' not in expr:
                    expr['status'] = STATUS_OK
                self.expr = pyll.as_apply(expr)
            else:
                raise TypeError('expr must be a dictionary')
        else:
            self.expr = pyll.as_apply(expr)

        self.params = {}
        for node in pyll.dfs(self.expr):
            if node.name == 'hyperopt_param':
                label = node.arg['label'].obj
                if label in self.params:
                    raise DuplicateLabel(label)
                self.params[label] = node.arg['obj']

        if exceptions is not None:
            self.exceptions = exceptions
        self.loss_target = loss_target
        self.name = name
Example #7
0
    def __init__(self,
                 fn,
                 expr,
                 args=[],
                 workdir=None,
                 pass_expr_memo_ctrl=None,
                 **bandit_kwargs):
        self.cmd = ('domain_attachment', 'FMinIter_Domain')
        self.fn = fn
        self.expr = expr
        self.args = args
        if pass_expr_memo_ctrl is None:
            self.pass_expr_memo_ctrl = getattr(fn, 'fmin_pass_expr_memo_ctrl',
                                               False)
        else:
            self.pass_expr_memo_ctrl = pass_expr_memo_ctrl
        base.Bandit.__init__(self, expr, do_checks=False, **bandit_kwargs)

        # -- This code was stolen from base.BanditAlgo, a class which may soon
        #    be gone
        self.workdir = workdir
        self.s_new_ids = pyll.Literal('new_ids')  # -- list at eval-time
        before = pyll.dfs(self.expr)
        # -- raises exception if expr contains cycles
        pyll.toposort(self.expr)
        vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids)
        # -- raises exception if v_expr contains cycles
        pyll.toposort(vh.v_expr)

        idxs_by_label = vh.idxs_by_label()
        vals_by_label = vh.vals_by_label()
        after = pyll.dfs(self.expr)
        # -- try to detect if VectorizeHelper screwed up anything inplace
        assert before == after
        assert set(idxs_by_label.keys()) == set(vals_by_label.keys())
        assert set(idxs_by_label.keys()) == set(self.params.keys())

        # -- make the graph runnable and SON-encodable
        # N.B. operates inplace
        self.s_idxs_vals = recursive_set_rng_kwarg(
            pyll.scope.pos_args(idxs_by_label, vals_by_label),
            pyll.as_apply(self.rng))

        # -- raises an exception if no topological ordering exists
        pyll.toposort(self.s_idxs_vals)
Example #8
0
def test_clone():
    config = config0()
    config2 = clone(config)

    nodeset = set(dfs(config))
    assert not any(n in nodeset for n in dfs(config2))

    foo = recursive_set_rng_kwarg(
                config,
                scope.rng_from_seed(5))
    r = rec_eval(foo)
    print r
    r2 = rec_eval(
            recursive_set_rng_kwarg(
                config2,
                scope.rng_from_seed(5)))

    print r2
    assert r == r2
Example #9
0
    def __init__(self, fn, expr, args=[],
            workdir=None,
            pass_expr_memo_ctrl=None,
            **bandit_kwargs):
        self.cmd = ('domain_attachment', 'FMinIter_Domain')
        self.fn = fn
        self.expr = expr
        self.args = args
        if pass_expr_memo_ctrl is None:
            self.pass_expr_memo_ctrl = getattr(fn,
                    'fmin_pass_expr_memo_ctrl', False)
        else:
            self.pass_expr_memo_ctrl = pass_expr_memo_ctrl
        base.Bandit.__init__(self, expr, do_checks=False, **bandit_kwargs)

        # -- This code was stolen from base.BanditAlgo, a class which may soon
        #    be gone
        self.workdir = workdir
        self.s_new_ids = pyll.Literal('new_ids')  # -- list at eval-time
        before = pyll.dfs(self.expr)
        # -- raises exception if expr contains cycles
        pyll.toposort(self.expr)
        vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids)
        # -- raises exception if v_expr contains cycles
        pyll.toposort(vh.v_expr)

        idxs_by_label = vh.idxs_by_label()
        vals_by_label = vh.vals_by_label()
        after = pyll.dfs(self.expr)
        # -- try to detect if VectorizeHelper screwed up anything inplace
        assert before == after
        assert set(idxs_by_label.keys()) == set(vals_by_label.keys())
        assert set(idxs_by_label.keys()) == set(self.params.keys())

        # -- make the graph runnable and SON-encodable
        # N.B. operates inplace
        self.s_idxs_vals = recursive_set_rng_kwarg(
                pyll.scope.pos_args(idxs_by_label, vals_by_label),
                pyll.as_apply(self.rng))

        # -- raises an exception if no topological ordering exists
        pyll.toposort(self.s_idxs_vals)
 def memo_from_config(self, config):
     memo = {}
     for node in pyll.dfs(self.expr):
         if node.name == 'hyperopt_param':
             label = node.arg['label'].obj
             # -- hack because it's not really garbagecollected
             #    this does have the desired effect of crashing the
             #    function if rec_eval actually needs a value that
             #    the the optimization algorithm thought to be unnecessary
             memo[node] = config.get(label, pyll.base.GarbageCollected)
     return memo
Example #11
0
 def memo_from_config(self, config):
     memo = {}
     for node in pyll.dfs(self.expr):
         if node.name == 'hyperopt_param':
             label = node.arg['label'].obj
             # -- hack because it's not really garbagecollected
             #    this does have the desired effect of crashing the
             #    function if rec_eval actually needs a value that
             #    the the optimization algorithm thought to be unnecessary
             memo[node] = config.get(label, pyll.base.GarbageCollected)
     return memo
Example #12
0
 def assert_integrity_idxs_take(self):
     idxs_memo = self.idxs_memo
     take_memo = self.take_memo
     after = dfs(self.expr)
     assert after == self.dfs_nodes
     assert set(idxs_memo.keys()) == set(take_memo.keys())
     for node in idxs_memo:
         idxs = idxs_memo[node]
         assert idxs.name == 'array_union'
         vals = take_memo[node][0].pos_args[1]
         for take in take_memo[node]:
             assert take.name == 'idxs_take'
             assert [idxs, vals] == take.pos_args[:2]
Example #13
0
 def assert_integrity_idxs_take(self):
     idxs_memo = self.idxs_memo
     take_memo = self.take_memo
     after = dfs(self.expr)
     assert after == self.dfs_nodes
     assert set(idxs_memo.keys()) == set(take_memo.keys())
     for node in idxs_memo:
         idxs = idxs_memo[node]
         assert idxs.name == 'array_union'
         vals = take_memo[node][0].pos_args[1]
         for take in take_memo[node]:
             assert take.name == 'idxs_take'
             assert [idxs, vals] == take.pos_args[:2]
Example #14
0
def pretty_names(expr, prefix=None):
    dfs_order = dfs(expr)
    # -- compute the seq like pyll.dfs just to ensure that
    #    the order of our names matches the dfs order.
    #    It's not clear to me right now that the match is important,
    #    but it's certainly suspicious if not.
    seq = []
    names = []
    seqset = set()
    if prefix is None:
        prefixes = ()
    else:
        prefixes = (prefix,)
    pretty_names_helper(expr, seq, seqset, prefixes, names)
    assert seq == dfs_order
    return dict(zip(seq, names))
Example #15
0
def replace_repeat_stochastic(expr, return_memo=False):
    nodes = dfs(expr)
    memo = {}
    for ii, orig in enumerate(nodes):
        if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch:
            # -- this is an idxs_map of a random draw of distribution `dist`
            idxs = orig.pos_args[0]
            dist = orig.pos_args[1]._obj

            def foo(arg):
                # -- each argument is an idxs, vals pair
                assert arg.name == 'pos_args'
                assert len(arg.pos_args) == 2
                arg_vals = arg.pos_args[1]
                if (arg_vals.name == 'asarray'
                    and arg_vals.inputs()[0].name == 'repeat'):
                    # -- draws are iid, so forget about
                    #    repeating the distribution parameters
                    repeated_thing = arg_vals.inputs()[0].inputs()[1]
                    return repeated_thing
                else:
                    if arg.pos_args[0] is idxs:
                        return arg_vals
                    else:
                        # -- arg.pos_args[0] is a superset of idxs
                        #    TODO: slice out correct elements using
                        #    idxs_take, but more importantly - test this case.
                        raise NotImplementedError()

            new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
            new_named_args = [[aname, foo(arg)]
                              for aname, arg in orig.named_args]
            vnode = Apply(dist, new_pos_args, new_named_args, None)
            n_times = scope.len(idxs)
            if 'size' in dict(vnode.named_args):
                raise NotImplementedError('random node already has size')
            vnode.named_args.append(['size', n_times])
            # -- loop over all nodes that *use* this one, and change them
            for client in nodes[ii + 1:]:
                client.replace_input(orig, vnode)
            if expr is orig:
                expr = vnode
            memo[orig] = vnode
    if return_memo:
        return expr, memo
    else:
        return expr
Example #16
0
def replace_repeat_stochastic(expr, return_memo=False):
    nodes = dfs(expr)
    memo = {}
    for ii, orig in enumerate(nodes):
        if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch:
            # -- this is an idxs_map of a random draw of distribution `dist`
            idxs = orig.pos_args[0]
            dist = orig.pos_args[1]._obj

            def foo(arg):
                # -- each argument is an idxs, vals pair
                assert arg.name == 'pos_args'
                assert len(arg.pos_args) == 2
                arg_vals = arg.pos_args[1]
                if (arg_vals.name == 'asarray'
                        and arg_vals.inputs()[0].name == 'repeat'):
                    # -- draws are iid, so forget about
                    #    repeating the distribution parameters
                    repeated_thing = arg_vals.inputs()[0].inputs()[1]
                    return repeated_thing
                else:
                    if arg.pos_args[0] is idxs:
                        return arg_vals
                    else:
                        # -- arg.pos_args[0] is a superset of idxs
                        #    TODO: slice out correct elements using
                        #    idxs_take, but more importantly - test this case.
                        raise NotImplementedError()

            new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
            new_named_args = [[aname, foo(arg)]
                              for aname, arg in orig.named_args]
            vnode = Apply(dist, new_pos_args, new_named_args, None)
            n_times = scope.len(idxs)
            if 'size' in dict(vnode.named_args):
                raise NotImplementedError('random node already has size')
            vnode.named_args.append(['size', n_times])
            # -- loop over all nodes that *use* this one, and change them
            for client in nodes[ii + 1:]:
                client.replace_input(orig, vnode)
            if expr is orig:
                expr = vnode
            memo[orig] = vnode
    if return_memo:
        return expr, memo
    else:
        return expr
Example #17
0
 def work(self):
     """Test that all prior samplers are gone"""
     tpe_algo = TreeParzenEstimator(self.bandit)
     foo = pyll.as_apply(
         [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']])
     prior_names = [
         'uniform',
         'quniform',
         'loguniform',
         'qloguniform',
         'normal',
         'qnormal',
         'lognormal',
         'qlognormal',
         'randint',
     ]
     for node in pyll.dfs(foo):
         assert node.name not in prior_names
Example #18
0
def use_obj_for_literal_in_memo(expr, obj, lit, memo):
    """
    Set `memo[node] = obj` for all nodes in expr such that `node.obj == lit`

    This is a useful routine for fmin-compatible functions that are searching
    domains that include some leaf nodes that are complicated
    runtime-generated objects. One option is to make such leaf nodes pyll
    functions, but it can be easier to construct those objects the normal
    Python way in the fmin function, and just stick them into the evaluation
    memo.  The experiment ctrl object itself is inserted using this technique.
    """
    for node in pyll.dfs(expr):
        try:
            if node.obj == lit:
                memo[node] = obj
        except AttributeError:
            # -- non-literal nodes don't have node.obj
            pass
    return memo
Example #19
0
 def work(self):
     """Test that all prior samplers are gone"""
     tpe_algo = TreeParzenEstimator(self.bandit)
     foo = pyll.as_apply([
         tpe_algo.post_below['idxs'],
         tpe_algo.post_below['vals']])
     prior_names = [
             'uniform',
             'quniform',
             'loguniform',
             'qloguniform',
             'normal',
             'qnormal',
             'lognormal',
             'qlognormal',
             'randint',
             ]
     for node in pyll.dfs(foo):
         assert node.name not in prior_names
Example #20
0
def use_obj_for_literal_in_memo(expr, obj, lit, memo):
    """
    Set `memo[node] = obj` for all nodes in expr such that `node.obj == lit`

    This is a useful routine for fmin-compatible functions that are searching
    domains that include some leaf nodes that are complicated
    runtime-generated objects. One option is to make such leaf nodes pyll
    functions, but it can be easier to construct those objects the normal
    Python way in the fmin function, and just stick them into the evaluation
    memo.  The experiment ctrl object itself is inserted using this technique.
    """
    for node in pyll.dfs(expr):
        try:
            if node.obj == lit:
                memo[node] = obj
        except AttributeError:
            # -- non-literal nodes don't have node.obj
            pass
    return memo
Example #21
0
def replace_repeat_stochastic(expr, return_memo=False):
    stoch = stochastic.implicit_stochastic_symbols
    nodes = dfs(expr)
    memo = {}
    for ii, orig in enumerate(nodes):
        if orig.name == "idxs_map" and orig.pos_args[1]._obj in stoch:
            # -- this is an idxs_map of a random draw of distribution `dist`
            idxs = orig.pos_args[0]
            dist = orig.pos_args[1]._obj

            def foo(arg):
                # -- each argument is an idxs, vals pair
                assert arg.name == "pos_args"
                assert len(arg.pos_args) == 2
                assert arg.pos_args[0] is idxs, str(orig)
                arg_vals = arg.pos_args[1]
                if arg_vals.name == "asarray" and arg_vals.inputs()[0].name == "repeat":
                    # -- draws are iid, so forget about
                    #    repeating the distribution parameters
                    repeated_thing = arg_vals.inputs()[0].inputs()[1]
                    return repeated_thing
                else:
                    return arg_vals

            new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
            new_named_args = [[aname, foo(arg)] for aname, arg in orig.named_args]
            vnode = Apply(dist, new_pos_args, new_named_args, None)
            n_times = scope.len(idxs)
            vnode.named_args.append(["size", n_times])
            # -- loop over all nodes that *use* this one, and change them
            for client in nodes[ii + 1 :]:
                client.replace_input(orig, vnode)
            if expr is orig:
                expr = vnode
            memo[orig] = vnode
    if return_memo:
        return expr, memo
    else:
        return expr
Example #22
0
    def __init__(self, expr, expr_idxs, build=True):
        self.expr = expr
        self.expr_idxs = expr_idxs
        self.dfs_nodes = dfs(expr)
        self.params = {}
        for ii, node in enumerate(self.dfs_nodes):
            if node.name == 'hyperopt_param':
                label = node.arg['label'].obj
                self.params[label] = node.arg['obj']
        # -- recursive construction
        #    This makes one term in each idxs, vals memo for every
        #    directed path through the switches in the graph.

        self.idxs_memo = {}  # node -> union, all idxs computed
        self.take_memo = {}  # node -> list of idxs_take retrieving node vals
        self.v_expr = self.build_idxs_vals(expr, expr_idxs)

        #TODO: graph-optimization pass to remove cruft:
        #  - unions of 1
        #  - unions of full sets with their subsets
        #  - idxs_take that can be merged

        self.assert_integrity_idxs_take()
Example #23
0
    def __init__(self, expr, expr_idxs, build=True):
        self.expr = expr
        self.expr_idxs = expr_idxs
        self.dfs_nodes = dfs(expr)
        self.params = {}
        for ii, node in enumerate(self.dfs_nodes):
            if node.name == 'hyperopt_param':
                label = node.arg['label'].obj
                self.params[label] = node.arg['obj']
        # -- recursive construction
        #    This makes one term in each idxs, vals memo for every
        #    directed path through the switches in the graph.

        self.idxs_memo = {}  # node -> union, all idxs computed
        self.take_memo = {}  # node -> list of idxs_take retrieving node vals
        self.v_expr = self.build_idxs_vals(expr, expr_idxs)

        #TODO: graph-optimization pass to remove cruft:
        #  - unions of 1
        #  - unions of full sets with their subsets
        #  - idxs_take that can be merged

        self.assert_integrity_idxs_take()
Example #24
0
    def __init__(self, fn, expr,
                 workdir=None,
                 pass_expr_memo_ctrl=None,
                 name=None,
                 loss_target=None,
                 ):
        """
        Paramaters
        ----------

        fn : callable
            This stores the `fn` argument to `fmin`. (See `hyperopt.fmin.fmin`)

        expr : hyperopt.pyll.Apply
            This is the `space` argument to `fmin`. (See `hyperopt.fmin.fmin`)

        workdir : string (or None)
            If non-None, the current working directory will be `workdir`while
            `expr` and `fn` are evaluated. (XXX Currently only respected by
            jobs run via MongoWorker)

        pass_expr_memo_ctrl : bool
            If True, `fn` will be called like this:
            `fn(self.expr, memo, ctrl)`,
            where `memo` is a dictionary mapping `Apply` nodes to their
            computed values, and `ctrl` is a `Ctrl` instance for communicating
            with a Trials database.  This lower-level calling convention is
            useful if you want to call e.g. `hyperopt.pyll.rec_eval` yourself
            in some customized way.

        name : string (or None)
            Label, used for pretty-printing.

        loss_target : float (or None)
            The actual or estimated minimum of `fn`.
            Some optimization algorithms may behave differently if their first
            objective is to find an input that achieves a certain value,
            rather than the more open-ended objective of pure minimization.
            XXX: Move this from Domain to be an fmin arg.

        """
        self.fn = fn
        if pass_expr_memo_ctrl is None:
            self.pass_expr_memo_ctrl = getattr(fn,
                                               'fmin_pass_expr_memo_ctrl',
                                               False)
        else:
            self.pass_expr_memo_ctrl = pass_expr_memo_ctrl

        self.expr = pyll.as_apply(expr)

        self.params = {}
        for node in pyll.dfs(self.expr):
            if node.name == 'hyperopt_param':
                label = node.arg['label'].obj
                if label in self.params:
                    raise DuplicateLabel(label)
                self.params[label] = node.arg['obj']

        self.loss_target = loss_target
        self.name = name

        self.workdir = workdir
        self.s_new_ids = pyll.Literal('new_ids')  # -- list at eval-time
        before = pyll.dfs(self.expr)
        # -- raises exception if expr contains cycles
        pyll.toposort(self.expr)
        vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids)
        # -- raises exception if v_expr contains cycles
        pyll.toposort(vh.v_expr)

        idxs_by_label = vh.idxs_by_label()
        vals_by_label = vh.vals_by_label()
        after = pyll.dfs(self.expr)
        # -- try to detect if VectorizeHelper screwed up anything inplace
        assert before == after
        assert set(idxs_by_label.keys()) == set(vals_by_label.keys())
        assert set(idxs_by_label.keys()) == set(self.params.keys())

        self.s_rng = pyll.Literal('rng-placeholder')
        # -- N.B. operates inplace:
        self.s_idxs_vals = recursive_set_rng_kwarg(
            pyll.scope.pos_args(idxs_by_label, vals_by_label),
            self.s_rng)

        # -- raises an exception if no topological ordering exists
        pyll.toposort(self.s_idxs_vals)

        # -- Protocol for serialization.
        #    self.cmd indicates to e.g. MongoWorker how this domain
        #    should be [un]serialized.
        #    XXX This mechanism deserves review as support for ipython
        #        workers improves.
        self.cmd = ('domain_attachment', 'FMinIter_Domain')
Example #25
0
    def __init__(
        self,
        fn,
        expr,
        workdir=None,
        pass_expr_memo_ctrl=None,
        name=None,
        loss_target=None,
    ):
        """
        Paramaters
        ----------

        fn : callable
            This stores the `fn` argument to `fmin`. (See `hyperopt.fmin.fmin`)

        expr : hyperopt.pyll.Apply
            This is the `space` argument to `fmin`. (See `hyperopt.fmin.fmin`)

        workdir : string (or None)
            If non-None, the current working directory will be `workdir`while
            `expr` and `fn` are evaluated. (XXX Currently only respected by
            jobs run via MongoWorker)

        pass_expr_memo_ctrl : bool
            If True, `fn` will be called like this:
            `fn(self.expr, memo, ctrl)`,
            where `memo` is a dictionary mapping `Apply` nodes to their
            computed values, and `ctrl` is a `Ctrl` instance for communicating
            with a Trials database.  This lower-level calling convention is
            useful if you want to call e.g. `hyperopt.pyll.rec_eval` yourself
            in some customized way.

        name : string (or None)
            Label, used for pretty-printing.

        loss_target : float (or None)
            The actual or estimated minimum of `fn`.
            Some optimization algorithms may behave differently if their first
            objective is to find an input that achieves a certain value,
            rather than the more open-ended objective of pure minimization.
            XXX: Move this from Domain to be an fmin arg.

        """
        self.fn = fn
        if pass_expr_memo_ctrl is None:
            self.pass_expr_memo_ctrl = getattr(fn, 'fmin_pass_expr_memo_ctrl',
                                               False)
        else:
            self.pass_expr_memo_ctrl = pass_expr_memo_ctrl

        self.expr = pyll.as_apply(expr)

        self.params = {}
        for node in pyll.dfs(self.expr):
            if node.name == 'hyperopt_param':
                label = node.arg['label'].obj
                if label in self.params:
                    raise DuplicateLabel(label)
                self.params[label] = node.arg['obj']

        self.loss_target = loss_target
        self.name = name

        self.workdir = workdir
        self.s_new_ids = pyll.Literal('new_ids')  # -- list at eval-time
        before = pyll.dfs(self.expr)
        # -- raises exception if expr contains cycles
        pyll.toposort(self.expr)
        vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids)
        # -- raises exception if v_expr contains cycles
        pyll.toposort(vh.v_expr)

        idxs_by_label = vh.idxs_by_label()
        vals_by_label = vh.vals_by_label()
        after = pyll.dfs(self.expr)
        # -- try to detect if VectorizeHelper screwed up anything inplace
        assert before == after
        assert set(idxs_by_label.keys()) == set(vals_by_label.keys())
        assert set(idxs_by_label.keys()) == set(self.params.keys())

        self.s_rng = pyll.Literal('rng-placeholder')
        # -- N.B. operates inplace:
        self.s_idxs_vals = recursive_set_rng_kwarg(
            pyll.scope.pos_args(idxs_by_label, vals_by_label), self.s_rng)

        # -- raises an exception if no topological ordering exists
        pyll.toposort(self.s_idxs_vals)

        # -- Protocol for serialization.
        #    self.cmd indicates to e.g. MongoWorker how this domain
        #    should be [un]serialized.
        #    XXX This mechanism deserves review as support for ipython
        #        workers improves.
        self.cmd = ('domain_attachment', 'FMinIter_Domain')
Example #26
0
def build_posterior(specs, prior_idxs, prior_vals, obs_idxs, obs_vals,
        oloss_idxs, oloss_vals, oloss_gamma, prior_weight):
    """
    This method clones a posterior inference graph by iterating forward in
    topological order, and replacing prior random-variables (prior_vals) with
    new posterior distributions that make use of observations (obs_vals).

    """
    assert all(isinstance(arg, pyll.Apply)
            for arg in [oloss_idxs, oloss_vals, oloss_gamma])

    expr = pyll.as_apply([specs, prior_idxs, prior_vals])
    nodes = pyll.dfs(expr)

    # build the joint posterior distribution as the values in this memo
    memo = {}
    # map prior RVs to observations
    obs_memo = {}

    for nid in prior_vals:
        # construct the leading args for each call to adaptive_parzen_sampler
        # which will permit the "adaptive parzen samplers" to adapt to the
        # correct samples.
        obs_below, obs_above = scope.ap_filter_trials(
                obs_idxs[nid], obs_vals[nid],
                oloss_idxs, oloss_vals, oloss_gamma)
        obs_memo[prior_vals[nid]] = [obs_below, obs_above]
    for node in nodes:
        if node not in memo:
            new_inputs = [memo[arg] for arg in node.inputs()]
            if node in obs_memo:
                # -- this case corresponds to an observed Random Var
                # node.name is a distribution like "normal", "randint", etc.
                obs_below, obs_above = obs_memo[node]
                aa = [memo[a] for a in node.pos_args]
                fn = adaptive_parzen_samplers[node.name]
                b_args = [obs_below, prior_weight] + aa
                named_args = [[kw, memo[arg]]
                        for (kw, arg) in node.named_args]
                b_post = fn(*b_args, **dict(named_args))
                a_args = [obs_above, prior_weight] + aa
                a_post = fn(*a_args, **dict(named_args))

                assert a_post.name == b_post.name
                fn_lpdf = getattr(scope, a_post.name + '_lpdf')
                #print fn_lpdf
                a_kwargs = dict([(n, a) for n, a in a_post.named_args
                            if n not in ('rng', 'size')])
                b_kwargs = dict([(n, a) for n, a in b_post.named_args
                            if n not in ('rng', 'size')])

                # calculate the llik of b_post under both distributions
                below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
                above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)

                #improvement = below_llik - above_llik
                #new_node = scope.broadcast_best(b_post, improvement)
                new_node = scope.broadcast_best(b_post, below_llik, above_llik)
            elif hasattr(node, 'obj'):
                # -- keep same literals in the graph
                new_node = node
            else:
                # -- this case is for all the other stuff in the graph
                new_node = node.clone_from_inputs(new_inputs)
            memo[node] = new_node
    post_specs = memo[specs]
    post_idxs = dict([(nid, memo[idxs])
        for nid, idxs in prior_idxs.items()])
    post_vals = dict([(nid, memo[vals])
        for nid, vals in prior_vals.items()])
    assert set(post_idxs.keys()) == set(post_vals.keys())
    assert set(post_idxs.keys()) == set(prior_idxs.keys())
    return post_specs, post_idxs, post_vals