예제 #1
0
def ap_categorical_sampler(obs, prior_weight, upper,
        size=(), rng=None, LF=DEFAULT_LF):
    weights = scope.linear_forgetting_weights(scope.len(obs), LF=LF)
    counts = scope.bincount(obs, minlength=upper, weights=weights)
    # -- add in some prior pseudocounts
    pseudocounts = counts + prior_weight
    return scope.categorical(pseudocounts / scope.sum(pseudocounts),
            upper=upper, size=size, rng=rng)
예제 #2
0
def ap_categorical_sampler(obs,
                           prior_weight,
                           p,
                           upper=None,
                           size=(),
                           rng=None,
                           LF=DEFAULT_LF):
    weights = scope.linear_forgetting_weights(scope.len(obs), LF=LF)
    counts = scope.bincount(obs, minlength=upper, weights=weights)
    pseudocounts = scope.tpe_cat_pseudocounts(counts, upper, prior_weight, p,
                                              size)
    return scope.categorical(pseudocounts, upper=upper, size=size, rng=rng)
예제 #3
0
 def build_idxs(self):
     for node in reversed(self.dfs_nodes):
         node_idxs = self.idxs_memo[node]
         if node.name == "one_of":
             n_options = len(node.pos_args)
             choices = scope.randint(n_options, size=scope.len(node_idxs))
             self.choice_memo[node] = choices
             self.merge(node_idxs, choices)
             self.node_id[choices] = "node_%i" % len(self.node_id)
             sub_idxs = scope.vchoice_split(node_idxs, choices, n_options)
             for ii, arg in enumerate(node.pos_args):
                 self.merge(sub_idxs[ii], arg)
         else:
             for arg in node.inputs():
                 self.merge(node_idxs, arg)
예제 #4
0
def replace_repeat_stochastic(expr, return_memo=False):
    nodes = dfs(expr)
    memo = {}
    for ii, orig in enumerate(nodes):
        if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch:
            # -- this is an idxs_map of a random draw of distribution `dist`
            idxs = orig.pos_args[0]
            dist = orig.pos_args[1]._obj

            def foo(arg):
                # -- each argument is an idxs, vals pair
                assert arg.name == 'pos_args'
                assert len(arg.pos_args) == 2
                arg_vals = arg.pos_args[1]
                if (arg_vals.name == 'asarray'
                    and arg_vals.inputs()[0].name == 'repeat'):
                    # -- draws are iid, so forget about
                    #    repeating the distribution parameters
                    repeated_thing = arg_vals.inputs()[0].inputs()[1]
                    return repeated_thing
                else:
                    if arg.pos_args[0] is idxs:
                        return arg_vals
                    else:
                        # -- arg.pos_args[0] is a superset of idxs
                        #    TODO: slice out correct elements using
                        #    idxs_take, but more importantly - test this case.
                        raise NotImplementedError()

            new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
            new_named_args = [[aname, foo(arg)]
                              for aname, arg in orig.named_args]
            vnode = Apply(dist, new_pos_args, new_named_args, None)
            n_times = scope.len(idxs)
            if 'size' in dict(vnode.named_args):
                raise NotImplementedError('random node already has size')
            vnode.named_args.append(['size', n_times])
            # -- loop over all nodes that *use* this one, and change them
            for client in nodes[ii + 1:]:
                client.replace_input(orig, vnode)
            if expr is orig:
                expr = vnode
            memo[orig] = vnode
    if return_memo:
        return expr, memo
    else:
        return expr
예제 #5
0
def replace_repeat_stochastic(expr, return_memo=False):
    nodes = dfs(expr)
    memo = {}
    for ii, orig in enumerate(nodes):
        if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch:
            # -- this is an idxs_map of a random draw of distribution `dist`
            idxs = orig.pos_args[0]
            dist = orig.pos_args[1]._obj

            def foo(arg):
                # -- each argument is an idxs, vals pair
                assert arg.name == 'pos_args'
                assert len(arg.pos_args) == 2
                arg_vals = arg.pos_args[1]
                if (arg_vals.name == 'asarray'
                        and arg_vals.inputs()[0].name == 'repeat'):
                    # -- draws are iid, so forget about
                    #    repeating the distribution parameters
                    repeated_thing = arg_vals.inputs()[0].inputs()[1]
                    return repeated_thing
                else:
                    if arg.pos_args[0] is idxs:
                        return arg_vals
                    else:
                        # -- arg.pos_args[0] is a superset of idxs
                        #    TODO: slice out correct elements using
                        #    idxs_take, but more importantly - test this case.
                        raise NotImplementedError()

            new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
            new_named_args = [[aname, foo(arg)]
                              for aname, arg in orig.named_args]
            vnode = Apply(dist, new_pos_args, new_named_args, None)
            n_times = scope.len(idxs)
            if 'size' in dict(vnode.named_args):
                raise NotImplementedError('random node already has size')
            vnode.named_args.append(['size', n_times])
            # -- loop over all nodes that *use* this one, and change them
            for client in nodes[ii + 1:]:
                client.replace_input(orig, vnode)
            if expr is orig:
                expr = vnode
            memo[orig] = vnode
    if return_memo:
        return expr, memo
    else:
        return expr
예제 #6
0
def vectorize_stochastic(orig):
    if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch:
        # -- this is an idxs_map of a random draw of distribution `dist`
        idxs = orig.pos_args[0]
        dist = orig.pos_args[1]._obj

        def foo(arg):
            # -- each argument is an idxs, vals pair
            assert arg.name == 'pos_args'
            assert len(arg.pos_args) == 2
            arg_vals = arg.pos_args[1]

            # XXX: write a pattern-substitution rule for this case
            if arg_vals.name == 'idxs_take':
                if arg_vals.arg['vals'].name == 'asarray':
                    if arg_vals.arg['vals'].inputs()[0].name == 'repeat':
                        # -- draws are iid, so forget about
                        #    repeating the distribution parameters
                        repeated_thing = arg_vals.arg['vals'].inputs()[0].inputs()[1]
                        return repeated_thing
            if arg.pos_args[0] is idxs:
                return arg_vals
            else:
                # -- arg.pos_args[0] is a superset of idxs
                #    TODO: slice out correct elements using
                #    idxs_take, but more importantly - test this case.
                raise NotImplementedError()

        new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
        new_named_args = [[aname, foo(arg)]
                          for aname, arg in orig.named_args]
        vnode = Apply(dist, new_pos_args, new_named_args, o_len=None)
        n_times = scope.len(idxs)
        if 'size' in dict(vnode.named_args):
            raise NotImplementedError('random node already has size')
        vnode.named_args.append(['size', n_times])
        return vnode
    else:
        return orig
예제 #7
0
def replace_repeat_stochastic(expr, return_memo=False):
    stoch = stochastic.implicit_stochastic_symbols
    nodes = dfs(expr)
    memo = {}
    for ii, orig in enumerate(nodes):
        if orig.name == "idxs_map" and orig.pos_args[1]._obj in stoch:
            # -- this is an idxs_map of a random draw of distribution `dist`
            idxs = orig.pos_args[0]
            dist = orig.pos_args[1]._obj

            def foo(arg):
                # -- each argument is an idxs, vals pair
                assert arg.name == "pos_args"
                assert len(arg.pos_args) == 2
                assert arg.pos_args[0] is idxs, str(orig)
                arg_vals = arg.pos_args[1]
                if arg_vals.name == "asarray" and arg_vals.inputs()[0].name == "repeat":
                    # -- draws are iid, so forget about
                    #    repeating the distribution parameters
                    repeated_thing = arg_vals.inputs()[0].inputs()[1]
                    return repeated_thing
                else:
                    return arg_vals

            new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
            new_named_args = [[aname, foo(arg)] for aname, arg in orig.named_args]
            vnode = Apply(dist, new_pos_args, new_named_args, None)
            n_times = scope.len(idxs)
            vnode.named_args.append(["size", n_times])
            # -- loop over all nodes that *use* this one, and change them
            for client in nodes[ii + 1 :]:
                client.replace_input(orig, vnode)
            if expr is orig:
                expr = vnode
            memo[orig] = vnode
    if return_memo:
        return expr, memo
    else:
        return expr
예제 #8
0
 def build_vals(self):
     for node in self.dfs_nodes:
         if node.name == "literal":
             n_times = scope.len(self.idxs_memo[node])
             vnode = scope.asarray(scope.repeat(n_times, node))
         elif node in self.choice_memo:
             # -- choices are natively vectorized
             choices = self.choice_memo[node]
             self.vals_memo[choices] = choices
             # -- this stitches together the various sub-graphs
             #    to define the original node
             vnode = scope.vchoice_merge(self.idxs_memo[node], self.choice_memo[node])
             vnode.pos_args.extend(
                 [as_apply([self.idxs_memo[inode], self.vals_memo[inode]]) for inode in node.pos_args]
             )
         else:
             vnode = scope.idxs_map(self.idxs_memo[node], node.name)
             vnode.pos_args.extend(node.pos_args)
             vnode.named_args.extend(node.named_args)
             for arg in node.inputs():
                 vnode.replace_input(arg, as_apply([self.idxs_memo[arg], self.vals_memo[arg]]))
         self.vals_memo[node] = vnode
예제 #9
0
def vectorize_stochastic(orig):
    if orig.name == 'idxs_map' and orig.pos_args[1]._obj in stoch:
        # -- this is an idxs_map of a random draw of distribution `dist`
        idxs = orig.pos_args[0]
        dist = orig.pos_args[1]._obj

        def foo(arg):
            # -- each argument is an idxs, vals pair
            assert arg.name == 'pos_args'
            assert len(arg.pos_args) == 2
            arg_vals = arg.pos_args[1]

            # XXX: write a pattern-substitution rule for this case
            if arg_vals.name == 'idxs_take':
                if arg_vals.arg['vals'].name == 'asarray':
                    if arg_vals.arg['vals'].inputs()[0].name == 'repeat':
                        # -- draws are iid, so forget about
                        #    repeating the distribution parameters
                        repeated_thing = arg_vals.arg['vals'].inputs(
                        )[0].inputs()[1]
                        return repeated_thing
            if arg.pos_args[0] is idxs:
                return arg_vals
            else:
                # -- arg.pos_args[0] is a superset of idxs
                #    TODO: slice out correct elements using
                #    idxs_take, but more importantly - test this case.
                raise NotImplementedError()

        new_pos_args = [foo(arg) for arg in orig.pos_args[2:]]
        new_named_args = [[aname, foo(arg)] for aname, arg in orig.named_args]
        vnode = Apply(dist, new_pos_args, new_named_args, o_len=None)
        n_times = scope.len(idxs)
        if 'size' in dict(vnode.named_args):
            raise NotImplementedError('random node already has size')
        vnode.named_args.append(['size', n_times])
        return vnode
    else:
        return orig
예제 #10
0
파일: tpe.py 프로젝트: gabekron/hyperopt
def ap_categorical_sampler(obs, prior_weight, p, upper=None,
        size=(), rng=None, LF=DEFAULT_LF):
    weights = scope.linear_forgetting_weights(scope.len(obs), LF=LF)
    counts = scope.bincount(obs, minlength=upper, weights=weights)
    pseudocounts = scope.tpe_cat_pseudocounts(counts, upper, prior_weight, p, size)
    return scope.categorical(pseudocounts, upper=upper, size=size, rng=rng)
예제 #11
0
    def build_idxs_vals(self, node, wanted_idxs):
        """
        This recursive procedure should be called on an output-node.
        """
        checkpoint_asserts = False

        def checkpoint():
            if checkpoint_asserts:
                self.assert_integrity_idxs_take()
                if node in self.idxs_memo:
                    toposort(self.idxs_memo[node])
                if node in self.take_memo:
                    for take in self.take_memo[node]:
                        toposort(take)

        checkpoint()

        # wanted_idxs are fixed, whereas idxs_memo
        # is full of unions, that can grow in subsequent recursive
        # calls to build_idxs_vals with node as argument.
        assert wanted_idxs != self.idxs_memo.get(node)

        # -- easy exit case
        if node.name == 'hyperopt_param':
            # -- ignore, not vectorizing
            return self.build_idxs_vals(node.arg['obj'], wanted_idxs)

        # -- easy exit case
        elif node.name == 'hyperopt_result':
            # -- ignore, not vectorizing
            return self.build_idxs_vals(node.arg['obj'], wanted_idxs)

        # -- literal case: always take from universal set
        elif node.name == 'literal':
            if node in self.idxs_memo:
                all_idxs, all_vals = self.take_memo[node][0].pos_args[:2]
                wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                self.take_memo[node].append(wanted_vals)
                checkpoint()
            else:
                # -- initialize idxs_memo to full set
                all_idxs = self.expr_idxs
                n_times = scope.len(all_idxs)
                # -- put array_union into graph for consistency, though it is
                # not necessary
                all_idxs = scope.array_union(all_idxs)
                self.idxs_memo[node] = all_idxs
                all_vals = scope.asarray(scope.repeat(n_times, node))
                wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                assert node not in self.take_memo
                self.take_memo[node] = [wanted_vals]
                checkpoint()
            return wanted_vals

        # -- switch case: complicated
        elif node.name == 'switch':
            if (node in self.idxs_memo
                and wanted_idxs in self.idxs_memo[node].pos_args):
                # -- phew, easy case
                all_idxs, all_vals = self.take_memo[node][0].pos_args[:2]
                wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                self.take_memo[node].append(wanted_vals)
                checkpoint()
            else:
                # -- we need to add some indexes
                if node in self.idxs_memo:
                    all_idxs = self.idxs_memo[node]
                    assert all_idxs.name == 'array_union'
                    all_idxs.pos_args.append(wanted_idxs)
                else:
                    all_idxs = scope.array_union(wanted_idxs)

                choice = node.pos_args[0]
                all_choices = self.build_idxs_vals(choice, all_idxs)

                options = node.pos_args[1:]
                args_idxs = scope.vchoice_split(all_idxs, all_choices,
                                                len(options))
                all_vals = scope.vchoice_merge(all_idxs, all_choices)
                for opt_ii, idxs_ii in zip(options, args_idxs):
                    all_vals.pos_args.append(
                        as_apply([
                            idxs_ii,
                            self.build_idxs_vals(opt_ii, idxs_ii),
                        ]))

                wanted_vals = scope.idxs_take(
                    all_idxs,  # -- may grow in future
                    all_vals,  # -- may be replaced in future
                    wanted_idxs)  # -- fixed.
                if node in self.idxs_memo:
                    assert self.idxs_memo[node].name == 'array_union'
                    self.idxs_memo[node].pos_args.append(wanted_idxs)
                    for take in self.take_memo[node]:
                        assert take.name == 'idxs_take'
                        take.pos_args[1] = all_vals
                    self.take_memo[node].append(wanted_vals)
                else:
                    self.idxs_memo[node] = all_idxs
                    self.take_memo[node] = [wanted_vals]
                checkpoint()

        # -- general case
        else:
            # -- this is a general node.
            #    It is generally handled with idxs_memo,
            #    but vectorize_stochastic may immediately transform it into
            #    a more compact form.
            if (node in self.idxs_memo
                and wanted_idxs in self.idxs_memo[node].pos_args):
                # -- phew, easy case
                for take in self.take_memo[node]:
                    if take.pos_args[2] == wanted_idxs:
                        return take
                raise NotImplementedError('how did this happen?')
                #all_idxs, all_vals = self.take_memo[node][0].pos_args[:2]
                #wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                #self.take_memo[node].append(wanted_vals)
                #checkpoint()
            else:
                # XXX
                # -- determine if wanted_idxs is actually a subset of the idxs
                # that we are already computing.  This is not only an
                # optimization, but prevents the creation of cycles, which
                # would otherwise occur if we have a graph of the form
                # switch(f(a), g(a), 0). If there are other switches inside f
                # and g, does this get trickier?

                # -- assume we need to add some indexes
                checkpoint()
                if node in self.idxs_memo:
                    all_idxs = self.idxs_memo[node]

                else:
                    all_idxs = scope.array_union(wanted_idxs)
                checkpoint()

                all_vals = scope.idxs_map(all_idxs, node.name)
                for ii, aa in enumerate(node.pos_args):
                    all_vals.pos_args.append(as_apply([
                        all_idxs, self.build_idxs_vals(aa, all_idxs)]))
                    checkpoint()
                for ii, (nn, aa) in enumerate(node.named_args):
                    all_vals.named_args.append([nn, as_apply([
                        all_idxs, self.build_idxs_vals(aa, all_idxs)])])
                    checkpoint()
                all_vals = vectorize_stochastic(all_vals)

                checkpoint()
                wanted_vals = scope.idxs_take(
                    all_idxs,  # -- may grow in future
                    all_vals,  # -- may be replaced in future
                    wanted_idxs)  # -- fixed.
                if node in self.idxs_memo:
                    assert self.idxs_memo[node].name == 'array_union'
                    self.idxs_memo[node].pos_args.append(wanted_idxs)
                    toposort(self.idxs_memo[node])
                    # -- this catches the cycle bug mentioned above
                    for take in self.take_memo[node]:
                        assert take.name == 'idxs_take'
                        take.pos_args[1] = all_vals
                    self.take_memo[node].append(wanted_vals)
                else:
                    self.idxs_memo[node] = all_idxs
                    self.take_memo[node] = [wanted_vals]
                checkpoint()

        return wanted_vals
예제 #12
0
    def build_idxs_vals(self, node, wanted_idxs):
        """
        This recursive procedure should be called on an output-node.
        """
        checkpoint_asserts = False

        def checkpoint():
            if checkpoint_asserts:
                self.assert_integrity_idxs_take()
                if node in self.idxs_memo:
                    toposort(self.idxs_memo[node])
                if node in self.take_memo:
                    for take in self.take_memo[node]:
                        toposort(take)

        checkpoint()

        # wanted_idxs are fixed, whereas idxs_memo
        # is full of unions, that can grow in subsequent recursive
        # calls to build_idxs_vals with node as argument.
        assert wanted_idxs != self.idxs_memo.get(node)

        # -- easy exit case
        if node.name == 'hyperopt_param':
            # -- ignore, not vectorizing
            return self.build_idxs_vals(node.arg['obj'], wanted_idxs)

        # -- easy exit case
        elif node.name == 'hyperopt_result':
            # -- ignore, not vectorizing
            return self.build_idxs_vals(node.arg['obj'], wanted_idxs)

        # -- literal case: always take from universal set
        elif node.name == 'literal':
            if node in self.idxs_memo:
                all_idxs, all_vals = self.take_memo[node][0].pos_args[:2]
                wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                self.take_memo[node].append(wanted_vals)
                checkpoint()
            else:
                # -- initialize idxs_memo to full set
                all_idxs = self.expr_idxs
                n_times = scope.len(all_idxs)
                # -- put array_union into graph for consistency, though it is
                # not necessary
                all_idxs = scope.array_union(all_idxs)
                self.idxs_memo[node] = all_idxs
                all_vals = scope.asarray(scope.repeat(n_times, node))
                wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                assert node not in self.take_memo
                self.take_memo[node] = [wanted_vals]
                checkpoint()
            return wanted_vals

        # -- switch case: complicated
        elif node.name == 'switch':
            if (node in self.idxs_memo
                    and wanted_idxs in self.idxs_memo[node].pos_args):
                # -- phew, easy case
                all_idxs, all_vals = self.take_memo[node][0].pos_args[:2]
                wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                self.take_memo[node].append(wanted_vals)
                checkpoint()
            else:
                # -- we need to add some indexes
                if node in self.idxs_memo:
                    all_idxs = self.idxs_memo[node]
                    assert all_idxs.name == 'array_union'
                    all_idxs.pos_args.append(wanted_idxs)
                else:
                    all_idxs = scope.array_union(wanted_idxs)

                choice = node.pos_args[0]
                all_choices = self.build_idxs_vals(choice, all_idxs)

                options = node.pos_args[1:]
                args_idxs = scope.vchoice_split(all_idxs, all_choices,
                                                len(options))
                all_vals = scope.vchoice_merge(all_idxs, all_choices)
                for opt_ii, idxs_ii in zip(options, args_idxs):
                    all_vals.pos_args.append(
                        as_apply([
                            idxs_ii,
                            self.build_idxs_vals(opt_ii, idxs_ii),
                        ]))

                wanted_vals = scope.idxs_take(
                    all_idxs,  # -- may grow in future
                    all_vals,  # -- may be replaced in future
                    wanted_idxs)  # -- fixed.
                if node in self.idxs_memo:
                    assert self.idxs_memo[node].name == 'array_union'
                    self.idxs_memo[node].pos_args.append(wanted_idxs)
                    for take in self.take_memo[node]:
                        assert take.name == 'idxs_take'
                        take.pos_args[1] = all_vals
                    self.take_memo[node].append(wanted_vals)
                else:
                    self.idxs_memo[node] = all_idxs
                    self.take_memo[node] = [wanted_vals]
                checkpoint()

        # -- general case
        else:
            # -- this is a general node.
            #    It is generally handled with idxs_memo,
            #    but vectorize_stochastic may immediately transform it into
            #    a more compact form.
            if (node in self.idxs_memo
                    and wanted_idxs in self.idxs_memo[node].pos_args):
                # -- phew, easy case
                for take in self.take_memo[node]:
                    if take.pos_args[2] == wanted_idxs:
                        return take
                raise NotImplementedError('how did this happen?')
                #all_idxs, all_vals = self.take_memo[node][0].pos_args[:2]
                #wanted_vals = scope.idxs_take(all_idxs, all_vals, wanted_idxs)
                #self.take_memo[node].append(wanted_vals)
                #checkpoint()
            else:
                # XXX
                # -- determine if wanted_idxs is actually a subset of the idxs
                # that we are already computing.  This is not only an
                # optimization, but prevents the creation of cycles, which
                # would otherwise occur if we have a graph of the form
                # switch(f(a), g(a), 0). If there are other switches inside f
                # and g, does this get trickier?

                # -- assume we need to add some indexes
                checkpoint()
                if node in self.idxs_memo:
                    all_idxs = self.idxs_memo[node]

                else:
                    all_idxs = scope.array_union(wanted_idxs)
                checkpoint()

                all_vals = scope.idxs_map(all_idxs, node.name)
                for ii, aa in enumerate(node.pos_args):
                    all_vals.pos_args.append(
                        as_apply(
                            [all_idxs,
                             self.build_idxs_vals(aa, all_idxs)]))
                    checkpoint()
                for ii, (nn, aa) in enumerate(node.named_args):
                    all_vals.named_args.append([
                        nn,
                        as_apply(
                            [all_idxs,
                             self.build_idxs_vals(aa, all_idxs)])
                    ])
                    checkpoint()
                all_vals = vectorize_stochastic(all_vals)

                checkpoint()
                wanted_vals = scope.idxs_take(
                    all_idxs,  # -- may grow in future
                    all_vals,  # -- may be replaced in future
                    wanted_idxs)  # -- fixed.
                if node in self.idxs_memo:
                    assert self.idxs_memo[node].name == 'array_union'
                    self.idxs_memo[node].pos_args.append(wanted_idxs)
                    toposort(self.idxs_memo[node])
                    # -- this catches the cycle bug mentioned above
                    for take in self.take_memo[node]:
                        assert take.name == 'idxs_take'
                        take.pos_args[1] = all_vals
                    self.take_memo[node].append(wanted_vals)
                else:
                    self.idxs_memo[node] = all_idxs
                    self.take_memo[node] = [wanted_vals]
                checkpoint()

        return wanted_vals