Esempio n. 1
0
    def learn_posterior(self,
                        obs_xs,
                        n_samples,
                        n_comps,
                        maxepochs=5000,
                        lreg=0.01,
                        minibatch=100,
                        step=ss.Adam(),
                        store_sims=False,
                        logger=sys.stdout,
                        rng=np.random):
        """
        Trains a Bayesian MDN to learn the posterior using the proposal.
        """

        # TODO: deal with tuning maxepochs

        # create an svi mdn
        if self.mdn_prop is None:
            self.mdn_post = mdns.MDN_SVI(n_inputs=len(obs_xs),
                                         n_outputs=self.prior.n_dims,
                                         n_hiddens=self.n_hiddens,
                                         act_fun=self.act_fun,
                                         n_components=n_comps,
                                         rng=rng)

        else:
            self.mdn_post = mdns.replicate_gaussian_mdn(self.mdn_prop,
                                                        n_comps,
                                                        rng=rng)

        logger.write('Learning posterior\n')

        # simulate data
        ps, xs = self._sim_data(n_samples, store_sims, logger, rng)

        # train mdn
        self._train_mdn(ps, xs, self.mdn_post, maxepochs, lreg,
                        min(minibatch, n_samples), step, logger)

        try:
            # calculate the approximate posterior
            self.posterior = self._calc_posterior(self.mdn_post, obs_xs)

        except pdfs.gaussian.ImproperCovarianceError:
            logger.write(
                'WARNING: learning posterior failed due to negative variance.\n'
            )
            self.posterior = self.proposal

        return self.posterior
Esempio n. 2
0
    def learn_proposal(self,
                       obs_xs,
                       n_samples,
                       n_rounds,
                       maxepochs=1000,
                       lreg=0.01,
                       minibatch=50,
                       step=ss.Adam(),
                       store_sims=False,
                       logger=sys.stdout,
                       rng=np.random):
        """
        Iteratively trains an bayesian MDN to learn a gaussian proposal.
        """

        # TODO: deal with tuning maxepochs

        # create mdn, if haven't already
        if self.mdn_prop is None:
            self.mdn_prop = mdns.MDN_SVI(n_inputs=len(obs_xs),
                                         n_outputs=self.prior.n_dims,
                                         n_hiddens=self.n_hiddens,
                                         act_fun=self.act_fun,
                                         n_components=1,
                                         rng=rng)

        for i in xrange(n_rounds):

            logger.write('Learning proposal, round {0}\n'.format(i + 1))

            # simulate new batch of data
            ps, xs = self._sim_data(n_samples, store_sims, logger, rng)

            # train mdn
            self._train_mdn(ps, xs, self.mdn_prop, maxepochs, lreg,
                            min(minibatch, n_samples), step, logger)

            try:
                # calculate the proposal
                self.proposal = self._calc_posterior(
                    self.mdn_prop, obs_xs).project_to_gaussian()
                self.all_proposals.append(self.proposal)

            except pdfs.gaussian.ImproperCovarianceError:
                logger.write(
                    'WARNING: learning proposal failed in iteration {0} due to negative variance.\n'
                    .format(i + 1))
                break

        return self.proposal
Esempio n. 3
0
    def __init__(self,
                 model,
                 trn_data,
                 trn_loss,
                 trn_target=None,
                 val_data=None,
                 val_loss=None,
                 val_target=None,
                 step=ss.Adam(),
                 max_norm=None):
        """
        Constructs and configures the trainer.
        :param model: the model to be trained
        :param trn_data: training inputs and (possibly) training targets
        :param trn_loss: theano variable representing the training loss to minimize
        :param trn_target: theano variable representing the training target
        :param val_data: validation inputs and (possibly) validation targets
        :param val_loss: theano variable representing the validation loss
        :param val_target: theano variable representing the validation target
        :param step: step size strategy object
        :param max_norm: constrain the gradients to have this maximum norm, ignore if None
        """

        assert isinstance(
            step, ss.StepStrategy), 'step must be a step strategy object'

        SGD_Template.__init__(self, model, trn_data, trn_target, val_data,
                              val_target)

        # compile theano function for a single training update
        idx = tt.ivector('idx')
        grads = tt.grad(trn_loss, model.parms)
        grads = [tt.switch(tt.isnan(g), 0., g) for g in grads]
        grads = grads if max_norm is None else util.ml.total_norm_constraint(
            grads, max_norm)
        self.make_update = theano.function(
            inputs=[idx],
            outputs=trn_loss,
            givens=zip(self.trn_inputs, [x[idx] for x in self.trn_data]),
            updates=step.updates(model.parms, grads))

        if self.do_validation:

            # compile theano function for validation
            self.validate = theano.function(
                inputs=[],
                outputs=val_loss,
                givens=zip(self.val_inputs, self.val_data) +
                self.batch_norm_givens)
Esempio n. 4
0
def train(model, a):

    assert is_data_loaded(), 'Dataset hasn\'t been loaded'

    regularizer = lf.WeightDecay(model.parms, weight_decay_rate)

    trainer = trainers.SGD(model=model,
                           trn_data=[data.trn.x],
                           trn_loss=model.trn_loss + regularizer,
                           val_data=[data.val.x],
                           val_loss=model.trn_loss,
                           step=ss.Adam(a=a))

    trainer.train(minibatch=minibatch,
                  patience=patience,
                  monitor_every=monitor_every)
Esempio n. 5
0
    def __init__(self,
                 model,
                 trn_data,
                 trn_loss,
                 trn_target=None,
                 val_data=None,
                 val_loss=None,
                 val_target=None,
                 step=ss.Adam()):
        """
        Constructs and configures the trainer.
        :param model: the model to be trained
        :param trn_data: train inputs and (possibly) train targets
        :param trn_loss: theano variable representing the train loss to minimize
        :param trn_target: theano variable representing the train target
        :param val_data: validation inputs and (possibly) validation targets
        :param val_loss: theano variable representing the validation loss
        :param val_target: theano variable representing the validation target
        :param step: step size strategy object
        :return: None
        """

        # parse input
        # TODO: it would be good to type check the other inputs too
        assert isinstance(
            step, ss.StepStrategy), 'Step must be a step strategy object.'

        # prepare train data
        n_trn_data_list = set([x.shape[0] for x in trn_data])
        assert len(
            n_trn_data_list) == 1, 'Number of train data is not consistent.'
        self.n_trn_data = list(n_trn_data_list)[0]
        trn_data = [
            theano.shared(x.astype(dtype), borrow=True) for x in trn_data
        ]

        #! privatise this
        # compile theano function for a single training update
        grads = tt.grad(trn_loss, model.parms)
        idx = tt.ivector('idx')
        trn_inputs = [model.input
                      ] if trn_target is None else [model.input, trn_target]
        self.make_update = theano.function(
            inputs=[idx],
            outputs=trn_loss,
            givens=list(zip(trn_inputs, [x[idx] for x in trn_data])),
            updates=step.updates(model.parms, grads))

        # private version
        # self.make_update = theano.function(
        #     inputs=[idx],
        #     outputs=trn_loss,
        #     givens=list(zip(trn_inputs, [x[idx] for x in trn_data])),
        #     updates=step.updates(model.parms, grads)
        # )

        # if model uses batch norm, compile a theano function for setting up stats
        if getattr(model, 'batch_norm', False):
            batch_norm_givens = [(bn.m, bn.bm)
                                 for bn in model.bns] + [(bn.v, bn.bv)
                                                         for bn in model.bns]
            self.set_batch_norm_stats = theano.function(
                inputs=[],
                givens=list(zip(trn_inputs, trn_data)),
                updates=[(bn.bm, bn.m)
                         for bn in model.bns] + [(bn.bv, bn.v)
                                                 for bn in model.bns])
        else:
            self.set_batch_norm_stats = None
            batch_norm_givens = []

        # if validation data is given, then set up validation too
        self.do_validation = val_data is not None

        if self.do_validation:

            # prepare validation data
            n_val_data_list = set([x.shape[0] for x in val_data])
            assert len(n_val_data_list
                       ) == 1, 'Number of validation data is not consistent.'
            self.n_val_data = list(n_val_data_list)[0]
            val_data = [
                theano.shared(x.astype(dtype), borrow=True) for x in val_data
            ]

            # compile theano function for validation
            val_inputs = [model.input] if val_target is None else [
                model.input, val_target
            ]
            self.validate = theano.function(
                inputs=[],
                outputs=val_loss,
                givens=list(zip(val_inputs, val_data)) + batch_norm_givens)

            # create checkpointer to store best model
            self.checkpointer = ModelCheckpointer(model)
            self.best_val_loss = float('inf')

        # initialize some variables
        self.trn_loss = float('inf')
        self.idx_stream = ds.IndexSubSampler(self.n_trn_data)
Esempio n. 6
0
    def __init__(self,
                 model,
                 trn_data,
                 trn_losses,
                 trn_weights=None,
                 trn_reg=None,
                 trn_target=None,
                 val_data=None,
                 val_losses=None,
                 val_weights=None,
                 val_reg=None,
                 val_target=None,
                 step=ss.Adam(),
                 max_norm=None):
        """
        :param model: the model to be trained
        :param trn_data: training inputs and (possibly) training targets
        :param trn_losses: theano variable representing the training losses at training points
        :param trn_weights: weights for training points
        :param trn_reg: theano variable representing the training regularizer
        :param trn_target: theano variable representing the training target
        :param val_data: validation inputs and (possibly) validation targets
        :param val_losses: theano variable representing the validation losses at validation points
        :param val_weights: weights for validation points
        :param val_reg: theano variable representing the validation regularizer
        :param val_target: theano variable representing the validation target
        :param step: step size strategy object
        :param max_norm: constrain the gradients to have this maximum norm, ignore if None
        """

        assert isinstance(
            step, ss.StepStrategy), 'step must be a step strategy object'

        SGD_Template.__init__(self, model, trn_data, trn_target, val_data,
                              val_target)

        # prepare training weights
        trn_weights = np.ones(
            self.n_trn_data,
            dtype=dtype) if trn_weights is None else trn_weights
        trn_weights = theano.shared(trn_weights.astype(dtype), borrow=True)

        # prepare training regularizer
        trn_reg = 0.0 if trn_reg is None else trn_reg

        # compile theano function for a single training update
        idx = tt.ivector('idx')
        trn_loss = tt.mean(trn_weights[idx] * trn_losses) + trn_reg
        grads = tt.grad(trn_loss, model.parms)
        grads = [tt.switch(tt.isnan(g), 0., g) for g in grads]
        grads = grads if max_norm is None else util.ml.total_norm_constraint(
            grads, max_norm)
        self.make_update = theano.function(
            inputs=[idx],
            outputs=trn_loss,
            givens=zip(self.trn_inputs, [x[idx] for x in self.trn_data]),
            updates=step.updates(model.parms, grads))

        if self.do_validation:

            # prepare validation weights
            val_weights = np.ones(
                self.n_val_data,
                dtype=dtype) if val_weights is None else val_weights
            val_weights = theano.shared(val_weights.astype(dtype), borrow=True)

            # prepare validation regularizer
            val_reg = 0.0 if val_reg is None else val_reg

            # compile theano function for validation
            self.validate = theano.function(
                inputs=[],
                outputs=tt.mean(val_weights * val_losses) + val_reg,
                givens=zip(self.val_inputs, self.val_data) +
                self.batch_norm_givens)
Esempio n. 7
0
    def learn_posterior(self,
                        obs_xs,
                        n_samples,
                        n_rounds,
                        maxepochs=1000,
                        minibatch=100,
                        step=ss.Adam(),
                        normalize_weights=True,
                        store_sims=False,
                        logger=sys.stdout,
                        rng=np.random):
        """
        Sequentially trains an SVI MDN to learn the posterior. Previous posteriors guide simulations.
        Simulated data are importance weighted when retraining the model.
        """

        # create an svi mdn
        if self.mdn is None:

            self.mdn = mdns.MDN_SVI(n_inputs=len(obs_xs),
                                    n_outputs=self.prior.n_dims,
                                    n_hiddens=self.n_hiddens,
                                    act_fun=self.act_fun,
                                    n_components=self.n_comps,
                                    rng=rng)

            self.regularizer = lf.SviRegularizer(self.mdn.mps, self.mdn.sps,
                                                 self.lreg)

        for i in xrange(n_rounds):

            logger.write('Learning posterior, round {0}\n'.format(i + 1))

            # simulate data
            logger.write('simulating data... ')
            ps, xs = simulators.sim_data(self.posterior.gen,
                                         self.sim_model,
                                         n_samples,
                                         rng=rng)
            logger.write('done\n')

            # importance weights
            if normalize_weights:
                log_ws = self.prior.eval(ps) - self.posterior.eval(ps)
                ws = n_samples * np.exp(log_ws - logsumexp(log_ws))
            else:
                ws = np.exp(self.prior.eval(ps) - self.posterior.eval(ps))

            if store_sims:
                self.all_ps.append(ps)
                self.all_xs.append(xs)
                self.all_ws.append(ws)

            # train model
            logger.write('training model...\n')

            trainer = trainers.WeightedSGD(model=self.mdn,
                                           trn_data=[xs, ps],
                                           trn_losses=-self.mdn.L,
                                           trn_weights=ws,
                                           trn_reg=self.regularizer /
                                           n_samples,
                                           trn_target=self.mdn.y,
                                           step=step,
                                           max_norm=0.1)
            trainer.train(minibatch=minibatch,
                          maxepochs=maxepochs,
                          monitor_every=1,
                          logger=logger)

            logger.write('training model done\n')

            # update regularizer
            m0s = [mp.get_value() for mp in self.mdn.mps]
            s0s = [sp.get_value() for sp in self.mdn.sps]
            self.regularizer = lf.SviRegularizer_DiagCov(
                self.mdn.mps, self.mdn.sps, m0s, s0s)

            self.posterior = self.mdn.get_mog(obs_xs)
            self.all_posteriors.append(self.posterior)

        return self.posterior
Esempio n. 8
0
def learn_conditional_density(model,
                              xs,
                              ys,
                              ws=None,
                              regularizer=None,
                              val_frac=0.05,
                              step=ss.Adam(a=1.e-4),
                              minibatch=100,
                              patience=20,
                              monitor_every=1,
                              logger=sys.stdout,
                              rng=np.random):
    """
    Train model to learn the conditional density p(y|x).
    """

    xs = np.asarray(xs, np.float32)
    ys = np.asarray(ys, np.float32)

    n_data = xs.shape[0]
    assert ys.shape[0] == n_data, 'wrong sizes'

    # shuffle data, so that training and validation sets come from the same distribution
    idx = rng.permutation(n_data)
    xs = xs[idx]
    ys = ys[idx]

    # split data into training and validation sets
    n_trn = int(n_data - val_frac * n_data)
    xs_trn, xs_val = xs[:n_trn], xs[n_trn:]
    ys_trn, ys_val = ys[:n_trn], ys[n_trn:]

    if ws is None:

        # train model without weights
        trainer = trainers.SGD(model=model,
                               trn_data=[xs_trn, ys_trn],
                               trn_loss=model.trn_loss if regularizer is None
                               else model.trn_loss + regularizer,
                               trn_target=model.y,
                               val_data=[xs_val, ys_val],
                               val_loss=model.trn_loss,
                               val_target=model.y,
                               step=step)
        trainer.train(minibatch=minibatch,
                      patience=patience,
                      monitor_every=monitor_every,
                      logger=logger)

    else:

        # prepare weights
        ws = np.asarray(ws, np.float32)
        assert ws.size == n_data, 'wrong sizes'
        ws = ws[idx]
        ws_trn, ws_val = ws[:n_trn], ws[n_trn:]

        # train model with weights
        trainer = trainers.WeightedSGD(model=model,
                                       trn_data=[xs_trn, ys_trn],
                                       trn_losses=-model.L,
                                       trn_weights=ws_trn,
                                       trn_reg=regularizer,
                                       trn_target=model.y,
                                       val_data=[xs_val, ys_val],
                                       val_losses=-model.L,
                                       val_weights=ws_val,
                                       val_target=model.y,
                                       step=step)
        trainer.train(minibatch=minibatch,
                      patience=patience,
                      monitor_every=monitor_every,
                      logger=logger)

    return model