Exemple #1
0
    def simple_train_sgd(self, trainset, epoch, epoch_end):
        # train with SGD
        print 'Train %s with SGD' % self.__class__
        idx = range(trainset.shape[0])

        minibatch_idx_overall = utils.generate_minibatch_idx(
            trainset.shape[0], self.minibatch_size)
        while (epoch < epoch_end):
            costs_epoch = []
            costs_by_step_epoch = []
            for k, use_idx in enumerate(minibatch_idx_overall):
                if self.verbose:
                    sys.stdout.write('\rTraining minibatches %d/%d' %
                                     (k, len(minibatch_idx_overall)))
                    sys.stdout.flush()
                minibatch_data = trainset[use_idx, :]
                minibatch_mask = utils.generate_masks_deep_orderless_nade(
                    minibatch_data.shape, self.rng_numpy)
                if 0:
                    # this is deep nade
                    cost = self.train_fn(minibatch_data, minibatch_mask)
                else:
                    # len(results)==2
                    results = self.train_fn(minibatch_data, minibatch_mask)
                    cost = results[0]
                    # results[1]: (1,k)
                    costs_by_step = results[1].flatten()
                costs_epoch.append(cost)
                costs_by_step_epoch.append(costs_by_step)
            # now linearly decrease the learning rate
            current_lr = self.learning_rate.get_value()
            new_lr = current_lr - numpy.float32(self.lr_decrease)
            self.learning_rate.set_value(new_lr)
            cost_epoch_avg = numpy.mean(costs_epoch)
            cost_by_step_avg = numpy.asarray(costs_by_step_epoch).mean(axis=0)

            self.costs_steps.append(cost_by_step_avg)
            self.costs.append(cost_epoch_avg)
            print '\rTraining %d/%d epochs, cost %.2f, costs by step %s lr %.5f' % (
                epoch, epoch_end, cost_epoch_avg,
                numpy.round(cost_by_step_avg, 2), current_lr)
            if epoch != 0 and (epoch + 1) % self.valid_freq == 0:
                numpy.savetxt(self.save_model_path + 'epoch_costs_by_step.txt',
                              self.costs_steps)
                numpy.savetxt(self.save_model_path + 'epoch_costs.txt',
                              self.costs)
                if self.channel:
                    self.channel.save()
                self.sample_nade_v0(epoch)
                self.make_plots(self.costs)
                self.visualize_filters(epoch)
                self.LL(epoch, save_nothing=False)
                self.inpainting(epoch, self.k)
                self.save_model(epoch)
            epoch += 1
        # end of training
        print
Exemple #2
0
    def build_theano_fn_nade_k_rbm(self):
        # this is the variational rbm version of NADE-K

        self.x = T.fmatrix('inputs')
        self.x.tag.test_value = numpy.random.binomial(
            n=1, p=0.5,
            size=(self.minibatch_size, self.n_visible)).astype(floatX)
        self.m = T.fmatrix('masks')
        self.m.tag.test_value = numpy.random.binomial(
            n=1, p=0.5,
            size=(self.minibatch_size, self.n_visible)).astype(floatX)
        t = self.trainset[:self.minibatch_size]
        self.x.tag.test_value = t
        self.m.tag.test_value = utils.generate_masks_deep_orderless_nade(
            t.shape, self.rng_numpy)
        # params of first layer
        self.W1 = utils.build_weights(n_row=self.n_visible,
                                      n_col=self.n_hidden,
                                      style=self.init_weights,
                                      name='W1',
                                      rng_numpy=self.rng_numpy)

        self.Wflags = utils.build_weights(n_row=self.n_visible,
                                          n_col=self.n_hidden,
                                          style=self.init_weights,
                                          name='Wflags',
                                          rng_numpy=self.rng_numpy)
        self.b1 = utils.build_bias(size=self.n_hidden, name='b_1')
        self.c = utils.build_bias(size=self.n_visible, name='c')
        if self.tied_weights:
            print 'W1 and V are tied'
            self.V = self.W1
            self.params = [self.W1, self.Wflags, self.b1, self.c]
        else:
            print 'W1 and V are untied'
            self.V = utils.build_weights(n_row=self.n_visible,
                                         n_col=self.n_hidden,
                                         style=self.init_weights,
                                         name='V',
                                         rng_numpy=self.rng_numpy)
            self.params = [self.W1, self.Wflags, self.b1, self.c, self.V]

        if self.n_layers == 2:
            self.W2 = utils.build_weights(n_row=self.n_hidden,
                                          n_col=self.n_hidden,
                                          style=self.init_weights,
                                          name='W2',
                                          rng_numpy=self.rng_numpy)
            self.b2 = utils.build_bias(size=self.n_hidden, name='b_2')
            self.params += [self.W2, self.b2]

        # (B,k,D)
        self.mf = theano.shared(value=numpy.zeros(
            (self.minibatch_size, self.k, self.n_visible)).astype(floatX),
                                name='mean_field_v')
        cost, costs_by_step = self.get_nade_k_rbm_cost_theano(
            self.x, self.m, self.k)

        #L2_cost = T.sum(self.W1**2)  + T.sum(self.Wflags**2)
        #reg_cost = cost + self.l2 * L2_cost
        for param in self.params:
            if param.ndim == 2:
                cost += T.sum(param**2) * constantX(self.l2)
        # get gradients
        self.learning_rate = theano.shared(numpy.float32(self.lr),
                                           name='learning_rate')

        updates = OrderedDict()
        consider_constant = None
        if self.sgd_type == 0:
            print 'use momentum sgd'
            which_type = 0
        elif self.sgd_type == 1:
            print 'use adadelta sgd'
            which_type = 1
        else:
            raise NotImplementedError()
        updates = utils.build_updates_with_rules(
            cost, self.params, consider_constant, updates, self.learning_rate,
            self.lr_decrease, self.momentum, floatX, which_type)

        # compile training functions
        print 'compiling fns ...'
        self.train_fn = theano.function(inputs=(self.x, self.m),
                                        outputs=[cost, costs_by_step],
                                        updates=updates,
                                        name='train_fn')

        self.sampling_fn = self.get_nade_k_rbm_sampling_fn_theano(self.k)
        self.compute_LL_with_ordering_fn = self.get_nade_k_rbm_LL_theano(
            self.k)
        # this is build later
        self.inpainting_fn = None