Beispiel #1
0
    def recall(self, idlist):
        """Construct an IdxsValsList representation of the elements of idlist.

        The result will not be renumbered.
        """
        if idlist:
            idset = set(idlist)
            if len(idset) != len(idlist):
                raise NotImplementedError('dups in idlist')

            # for each variable in the bandit (each idxs, vals pair)
            # extract the database elements and put them into a new (idxs, vals)
            # pair that we can return.
            rval_idxs = []
            rval_vals = []
            for idxs, vals in zip(self.db_idxs, self.db_vals):
                assert len(idxs) == len(vals)
                ii_vv = [(ii, vv)
                        for (ii, vv) in zip(idxs, vals) if ii in idset]
                rval_idxs.append([iv[0] for iv in ii_vv])
                rval_vals.append([iv[1] for iv in ii_vv])
        else:
            rval_idxs = [[] for s in self.s_idxs]
            rval_vals = [[] for s in self.s_idxs]
        return IdxsValsList.fromlists(rval_idxs, rval_vals)
Beispiel #2
0
 def suggest_from_prior(self, trials, results, N):
     logger.info('suggest_from_prior')
     if not hasattr(self, '_prior_sampler'):
         self._prior_sampler = theano.function([self.s_N],
                                               self.s_prior.flatten(),
                                               mode=self.mode)
     rvals = self._prior_sampler(N)
     return IdxsValsList.fromflattened(rvals)
Beispiel #3
0
 def suggest_from_prior(self, trials, results, N):
     logger.info('suggest_from_prior')
     if not hasattr(self, '_prior_sampler'):
         self._prior_sampler = theano.function(
                 [self.s_N],
                 self.s_prior.flatten(),
                 mode=self.mode)
     rvals = self._prior_sampler(N)
     return IdxsValsList.fromflattened(rvals)
Beispiel #4
0
 def suggest_from_prior(self, N):
     try:
         prior_sampler = self._prior_sampler
     except AttributeError:
         prior_sampler = self._prior_sampler = theano.function(
                 [self.helper_locals['n_to_draw']],
                 self.helper_locals['s_prior'].flatten(),
                 mode=self.mode)
     rvals = prior_sampler(N)
     return IdxsValsList.fromflattened(rvals)
Beispiel #5
0
 def suggest_from_prior(self, N):
     try:
         prior_sampler = self._prior_sampler
     except AttributeError:
         prior_sampler = self._prior_sampler = theano.function(
             [self.helper_locals['n_to_draw']],
             self.helper_locals['s_prior'].flatten(),
             mode=self.mode)
     rvals = prior_sampler(N)
     return IdxsValsList.fromflattened(rvals)
Beispiel #6
0
    def suggest(self, trials, results, N):
        # normally a TheanoBanditAlto.suggest would start with this call:
        ##  ivls = self.idxs_vals_by_status(trials, results)
        rvals = self._sampler(N)

        # A TheanoBanditAlgo.suggest implementation should usually
        # return suggest_ivl(...).
        return self.suggest_ivl(
                IdxsValsList.fromlists(
                    rvals[:len(rvals)/2],
                    rvals[len(rvals)/2:]))
Beispiel #7
0
    def posterior(self, priors, observations, s_rng):
        """
        priors - an IdxsValsList of random variables
        observations - an IdxsValsList of corresponding samples

        returns - an IdxsValsList of posterior random variables
        """

        assert len(priors) == len(observations)

        # observation.idxs could be invalid.
        # They could be invalid, in the sense of describing samples that
        # could not have been drawn from the prior.
        # This code does not try to detect that situation.
        post_vals = [
            self.s_posterior_helper(p, o, s_rng)
            for p, o in zip(priors, observations)
        ]

        # At this point, each post_vals[i] is connected to the original graph
        # formed of prior nodes.
        #
        # We want each post_vals[i] to be connected instead to the other
        # post_vals just created, corresponding to the posterior values of other
        # random vars.
        #
        # The way to get all the new_s_val nodes hooked up to one another is to
        # use the clone_keep_replacements function.

        # XXX: this clones everything. It should be possible to do a more
        #      selective clone of just the pieces that change.
        inputs = theano.gof.graph.inputs(priors.flatten() + post_vals)
        env = ienv.std_interactive_env(inputs,
                                       priors.flatten() + post_vals,
                                       clone_inputs_and_orphans=False)
        env.prefer_replace(zip(priors.valslist(), post_vals),
                           reason='IndependentNodeTreeEstimator.posterior')

        # raise an exception if we created cycles
        env.toposort()

        # extract the cloned results from the env
        rval = IdxsValsList.fromlists(
            [env.newest(v) for v in priors.idxslist()],
            [env.newest(v) for v in post_vals])

        # remove all references in the variables to the env. Prepare them
        # to be inserted into another env if necessary.
        env.disown()
        return rval
Beispiel #8
0
    def posterior(self, priors, observations, s_rng):
        """
        priors - an IdxsValsList of random variables
        observations - an IdxsValsList of corresponding samples

        returns - an IdxsValsList of posterior random variables
        """

        assert len(priors) == len(observations)

        # observation.idxs could be invalid.
        # They could be invalid, in the sense of describing samples that
        # could not have been drawn from the prior.
        # This code does not try to detect that situation.
        post_vals = [self.s_posterior_helper(p, o, s_rng)
            for p, o in zip(priors, observations)]

        # At this point, each post_vals[i] is connected to the original graph
        # formed of prior nodes.
        #
        # We want each post_vals[i] to be connected instead to the other
        # post_vals just created, corresponding to the posterior values of other
        # random vars.
        #
        # The way to get all the new_s_val nodes hooked up to one another is to
        # use the clone_keep_replacements function.

        # XXX: this clones everything. It should be possible to do a more
        #      selective clone of just the pieces that change.
        inputs = theano.gof.graph.inputs(priors.flatten() + post_vals)
        env = ienv.std_interactive_env(inputs, priors.flatten() + post_vals,
                clone_inputs_and_orphans=False)
        env.prefer_replace(
                zip(priors.valslist(), post_vals),
                reason='IndependentNodeTreeEstimator.posterior')

        # raise an exception if we created cycles
        env.toposort()

        # extract the cloned results from the env
        rval = IdxsValsList.fromlists(
                [env.newest(v) for v in priors.idxslist()],
                [env.newest(v) for v in post_vals])

        # remove all references in the variables to the env. Prepare them
        # to be inserted into another env if necessary.
        env.disown()
        return rval
Beispiel #9
0
    def __init__(self, bandit):
        TheanoBanditAlgo.__init__(self, bandit)
        self.numpy_rng = numpy.random.RandomState(234)
        self.s_prior = IdxsValsList.fromlists(self.s_idxs, self.s_vals)
        self.s_n_train = tensor.lscalar('n_train')
        self.s_n_test = tensor.lscalar('n_test')
        self.y_obs = tensor.vector('y_obs')
        self.y_obs_var = tensor.vector('y_obs_var')
        self.x_obs_IVL = self.s_prior.new_like_self()

        self.cand_x = self.s_prior.new_like_self()
        self.cand_EI_thresh = tensor.scalar()

        self.init_kernels()
        self.init_gram_weights()
        self.params.extend(self.convex_coefficient_params)
        self.param_bounds.extend(self.convex_coefficient_params_bounds)

        self.s_big_param_vec = tensor.vector()
        ### assumes all variables are refinable
        ### assumes all variables are vectors
        n_elements_used = 0
        for k, iv in zip(self.kernels, self.cand_x):
            if self.is_refinable[k]:
                n_elements_in_v = iv.idxs.shape[0]
                start = n_elements_used
                stop = n_elements_used + n_elements_in_v
                iv.vals = self.s_big_param_vec[start:stop]
                n_elements_used += n_elements_in_v

        self.gprmath = GPR_math(self.x_obs_IVL,
                self.y_obs,
                self.y_obs_var,
                picklable_instancemethod(self, 'K_fn'),
                N=self.s_n_train,
                min_variance=self.y_minvar)

        self.nll_obs = self.gprmath.s_nll()
            
        self.cand_EI = tensor.log(self.gprmath.s_expectation_lt_thresh(
                    self.cand_x,
                    self.cand_EI_thresh))

        # self.gm_algo is used to draw candidates for subsequent refinement
        # It is also entirely responsible for choosing categorical variables.
        self.gm_algo = AdaptiveParzenGM(self.bandit)
        self.gm_algo.n_EI_candidates = self.n_candidates_to_draw_in_GM
Beispiel #10
0
    def __init__(self, bandit):
        TheanoBanditAlgo.__init__(self, bandit)
        self.numpy_rng = numpy.random.RandomState(234)
        self.s_prior = IdxsValsList.fromlists(self.s_idxs, self.s_vals)
        self.s_n_train = tensor.lscalar('n_train')
        self.s_n_test = tensor.lscalar('n_test')
        self.y_obs = tensor.vector('y_obs')
        self.y_obs_var = tensor.vector('y_obs_var')
        self.x_obs_IVL = self.s_prior.new_like_self()

        self.cand_x = self.s_prior.new_like_self()
        self.cand_EI_thresh = tensor.scalar()

        self.init_kernels()
        self.init_gram_weights()
        self.params.extend(self.convex_coefficient_params)
        self.param_bounds.extend(self.convex_coefficient_params_bounds)

        self.s_big_param_vec = tensor.vector()
        ### assumes all variables are refinable
        ### assumes all variables are vectors
        n_elements_used = 0
        for k, iv in zip(self.kernels, self.cand_x):
            if self.is_refinable[k]:
                n_elements_in_v = iv.idxs.shape[0]
                start = n_elements_used
                stop = n_elements_used + n_elements_in_v
                iv.vals = self.s_big_param_vec[start:stop]
                n_elements_used += n_elements_in_v

        self.gprmath = GPR_math(self.x_obs_IVL,
                                self.y_obs,
                                self.y_obs_var,
                                picklable_instancemethod(self, 'K_fn'),
                                N=self.s_n_train,
                                min_variance=self.y_minvar)

        self.nll_obs = self.gprmath.s_nll()

        self.cand_EI = tensor.log(
            self.gprmath.s_expectation_lt_thresh(self.cand_x,
                                                 self.cand_EI_thresh))

        # self.gm_algo is used to draw candidates for subsequent refinement
        # It is also entirely responsible for choosing categorical variables.
        self.gm_algo = AdaptiveParzenGM(self.bandit)
        self.gm_algo.n_EI_candidates = self.n_candidates_to_draw_in_GM
Beispiel #11
0
    def setUp(self):
        self.TE = IndependentNullEstimator()
        self.bandit = NestedUniform()
        self.experiment = SerialExperiment(
            GM_BanditAlgo(self.bandit,
                    good_estimator=IndependentNullEstimator(),
                    bad_estimator=IndependentNullEstimator()))

        self.s_rng = montetheano.RandomStreams(123)
        prior_idxs, prior_vals, s_N = self.bandit.template.theano_sampler(self.s_rng)
        #print prior_idxs
        #print prior_vals
        self.prior = IdxsValsList.fromlists(
                [i for i in prior_idxs if i is not None],
                [v for v in prior_vals if v is not None])
        self.s_N = s_N
        self.observations = self.prior.new_like_self()
        for i, o in enumerate(self.observations):
            o.idxs.name = 'Obs_idxs{%i}' % i
            o.vals.name = 'Obs_vals{%i}' % i
Beispiel #12
0
    def setUp(self):
        self.TE = IndependentNullEstimator()
        self.bandit = NestedUniform()
        self.experiment = SerialExperiment(
            GM_BanditAlgo(self.bandit,
                          good_estimator=IndependentNullEstimator(),
                          bad_estimator=IndependentNullEstimator()))

        self.s_rng = montetheano.RandomStreams(123)
        prior_idxs, prior_vals, s_N = self.bandit.template.theano_sampler(
            self.s_rng)
        #print prior_idxs
        #print prior_vals
        self.prior = IdxsValsList.fromlists(
            [i for i in prior_idxs if i is not None],
            [v for v in prior_vals if v is not None])
        self.s_N = s_N
        self.observations = self.prior.new_like_self()
        for i, o in enumerate(self.observations):
            o.idxs.name = 'Obs_idxs{%i}' % i
            o.vals.name = 'Obs_vals{%i}' % i
Beispiel #13
0
    def build_helpers(self):
        s_prior = IdxsValsList.fromlists(self.s_idxs, self.s_vals)
        s_obs = s_prior.new_like_self()

        # y_thresh is the boundary between 'good' and 'bad' regions of the
        # search space.
        y_thresh = tensor.scalar()

        yvals = tensor.vector()
        n_to_draw = self.s_N
        n_to_keep = tensor.iscalar()

        s_rng = montetheano.RandomStreams(self.seed + 9)

        GE = self.good_estimator
        BE = self.bad_estimator

        Gobs = s_obs.symbolic_take(where(yvals < y_thresh))
        Bobs = s_obs.symbolic_take(where(yvals >= y_thresh))

        # To "optimize" EI we just draw a pile of samples from the density
        # of good points and then just take the best of those.
        Gsamples = GE.posterior(s_prior, Gobs, s_rng)
        Bsamples = BE.posterior(s_prior, Bobs, s_rng)

        G_ll = GE.log_likelihood(Gsamples,
                                 Gsamples,
                                 llik=tensor.zeros((n_to_draw, )))
        B_ll = BE.log_likelihood(Bsamples,
                                 Gsamples,
                                 llik=tensor.zeros((n_to_draw, )))

        # subtract B_ll from G_ll
        log_EI = G_ll - B_ll
        keep_idxs = argsort(log_EI)[-n_to_keep:]

        # store all these vars for the unittests
        self.helper_locals = locals()
        del self.helper_locals['self']
Beispiel #14
0
    def build_helpers(self):
        s_prior = IdxsValsList.fromlists(self.s_idxs, self.s_vals)
        s_obs = s_prior.new_like_self()

        # y_thresh is the boundary between 'good' and 'bad' regions of the
        # search space.
        y_thresh = tensor.scalar()

        yvals = tensor.vector()
        n_to_draw = self.s_N
        n_to_keep = tensor.iscalar()

        s_rng = montetheano.RandomStreams(self.seed + 9)

        GE = self.good_estimator
        BE = self.bad_estimator

        Gobs = s_obs.symbolic_take(where(yvals < y_thresh))
        Bobs = s_obs.symbolic_take(where(yvals >= y_thresh))

        # To "optimize" EI we just draw a pile of samples from the density
        # of good points and then just take the best of those.
        Gsamples = GE.posterior(s_prior, Gobs, s_rng)
        Bsamples = BE.posterior(s_prior, Bobs, s_rng)

        G_ll = GE.log_likelihood(Gsamples, Gsamples,
                llik = tensor.zeros((n_to_draw,)))
        B_ll = BE.log_likelihood(Bsamples, Gsamples,
                llik = tensor.zeros((n_to_draw,)))

        # subtract B_ll from G_ll
        log_EI = G_ll - B_ll
        keep_idxs = argsort(log_EI)[-n_to_keep:]

        # store all these vars for the unittests
        self.helper_locals = locals()
        del self.helper_locals['self']
Beispiel #15
0
    def suggest_from_model(self, ivls, N):
        helper = self._suggest_from_model_fn

        ylist = numpy.asarray(sorted(ivls['losses']['ok'].vals), dtype='float')
        y_thresh_idx = int(self.gamma * len(ylist))
        y_thresh = ylist[y_thresh_idx : y_thresh_idx + 2].mean()

        logger.info('GM_BanditAlgo splitting results at y_thresh = %f'
                % y_thresh)
        logger.info('GM_BanditAlgo keeping %i results as good'
                % y_thresh_idx)
        logger.info('GM_BanditAlgo keeping %i results as bad'
                % (len(ylist) - y_thresh_idx))
        logger.info('GM_BanditAlgo good scores: %s'
                % str(ylist[:y_thresh_idx]))

        x_all = ivls['x_IVLs']['ok'].as_list()
        y_all_iv = ivls['losses']['ok'].as_list()

        assert y_all_iv.idxset() == x_all.idxset(), (y_all_iv.idxset(),
                x_all.idxset())

        for pseudo_bad_status in 'new', 'running':
            logger.info('GM_BanditAlgo assigning bad scores to %i new jobs'
                    % len(ivls['losses'][pseudo_bad_status].idxs))
            x_all.stack(ivls['x_IVLs'][pseudo_bad_status])
            y_all_iv.stack(IdxsVals(
                ivls['losses'][pseudo_bad_status].idxs,
                [y_thresh + 1] * len(ivls['losses'][pseudo_bad_status].idxs)))
            assert y_all_iv.idxset() == x_all.idxset(), (y_all_iv.idxset(),
                    x_all.idxset())

        # renumber the configurations in x_all to be 0 .. (n_train - 1)
        idmap = y_all_iv.reindex()
        idmap = x_all.reindex(idmap)

        assert y_all_iv.idxset() == x_all.idxset(), (y_all_iv.idxset(),
                x_all.idxset())

        assert numpy.all(y_all_iv.idxs == numpy.arange(len(y_all_iv.idxs))), (
                y_all_iv.idxs)

        y_all = y_all_iv.as_numpy(vdtype=theano.config.floatX).vals
        x_all = x_all.as_numpy_floatX()

        logger.info('GM_BanditAlgo drawing %i candidates'
                % self.n_EI_candidates)

        helper_rval = helper(self.n_EI_candidates, N,
            y_thresh, y_all, *x_all.flatten())
        assert len(helper_rval) == 6 * len(x_all)

        keep_flat = helper_rval[:2 * len(x_all)]
        Gobs_flat = helper_rval[2 * len(x_all): 4 * len(x_all)]
        Bobs_flat = helper_rval[4 * len(x_all):]
        assert len(keep_flat) == len(Gobs_flat) == len(Bobs_flat)

        Gobs = IdxsValsList.fromflattened(Gobs_flat)
        Bobs = IdxsValsList.fromflattened(Bobs_flat)

        # guard against book-keeping error
        # ensure that all observations were counted as either good or bad
        gis = Gobs.idxset()
        bis = Bobs.idxset()
        xis = x_all.idxset()
        assert len(xis) == len(y_all)
        assert gis.union(bis) == xis
        assert gis.intersection(bis) == set()

        rval = IdxsValsList.fromflattened(keep_flat)
        # relabel the return values to be elements 0 ... N - 1
        rval.reindex()
        return rval
Beispiel #16
0
    def suggest_from_model(self, ivls, N):
        helper = self._suggest_from_model_fn

        ylist = numpy.asarray(sorted(ivls['losses']['ok'].vals), dtype='float')
        y_thresh_idx = int(self.gamma * len(ylist))
        y_thresh = ylist[y_thresh_idx:y_thresh_idx + 2].mean()

        logger.info('GM_BanditAlgo splitting results at y_thresh = %f' %
                    y_thresh)
        logger.info('GM_BanditAlgo keeping %i results as good' % y_thresh_idx)
        logger.info('GM_BanditAlgo keeping %i results as bad' %
                    (len(ylist) - y_thresh_idx))
        logger.info('GM_BanditAlgo good scores: %s' %
                    str(ylist[:y_thresh_idx]))

        x_all = ivls['x_IVLs']['ok'].as_list()
        y_all_iv = ivls['losses']['ok'].as_list()

        assert y_all_iv.idxset() == x_all.idxset(), (y_all_iv.idxset(),
                                                     x_all.idxset())

        for pseudo_bad_status in 'new', 'running':
            logger.info('GM_BanditAlgo assigning bad scores to %i new jobs' %
                        len(ivls['losses'][pseudo_bad_status].idxs))
            x_all.stack(ivls['x_IVLs'][pseudo_bad_status])
            y_all_iv.stack(
                IdxsVals(ivls['losses'][pseudo_bad_status].idxs,
                         [y_thresh + 1] *
                         len(ivls['losses'][pseudo_bad_status].idxs)))
            assert y_all_iv.idxset() == x_all.idxset(), (y_all_iv.idxset(),
                                                         x_all.idxset())

        # renumber the configurations in x_all to be 0 .. (n_train - 1)
        idmap = y_all_iv.reindex()
        idmap = x_all.reindex(idmap)

        assert y_all_iv.idxset() == x_all.idxset(), (y_all_iv.idxset(),
                                                     x_all.idxset())

        assert numpy.all(
            y_all_iv.idxs == numpy.arange(len(y_all_iv.idxs))), (y_all_iv.idxs)

        y_all = y_all_iv.as_numpy(vdtype=theano.config.floatX).vals
        x_all = x_all.as_numpy_floatX()

        logger.info('GM_BanditAlgo drawing %i candidates' %
                    self.n_EI_candidates)

        helper_rval = helper(self.n_EI_candidates, N, y_thresh, y_all,
                             *x_all.flatten())
        assert len(helper_rval) == 6 * len(x_all)

        keep_flat = helper_rval[:2 * len(x_all)]
        Gobs_flat = helper_rval[2 * len(x_all):4 * len(x_all)]
        Bobs_flat = helper_rval[4 * len(x_all):]
        assert len(keep_flat) == len(Gobs_flat) == len(Bobs_flat)

        Gobs = IdxsValsList.fromflattened(Gobs_flat)
        Bobs = IdxsValsList.fromflattened(Bobs_flat)

        # guard against book-keeping error
        # ensure that all observations were counted as either good or bad
        gis = Gobs.idxset()
        bis = Bobs.idxset()
        xis = x_all.idxset()
        assert len(xis) == len(y_all)
        assert gis.union(bis) == xis
        assert gis.intersection(bis) == set()

        rval = IdxsValsList.fromflattened(keep_flat)
        # relabel the return values to be elements 0 ... N - 1
        rval.reindex()
        return rval