Ejemplo n.º 1
0
    def finetune(self, train_set, test_set, n_epochs, learning_rate,
                 batch_size):

        train_set_x, train_set_y, train_set_c = train_set
        test_set_x, test_set_y, test_set_c = test_set

        train_set_z = np.zeros(train_set_c.shape) - 1
        for i in xrange(train_set_z.shape[0]):
            train_set_z[i][train_set_y[i]] = 1

        train_set_x = make_shared_data(train_set_x)
        train_set_c = make_shared_data(train_set_c)
        train_set_z = make_shared_data(train_set_z)
        train_set_y = T.cast(make_shared_data(train_set_y), 'int32')

        test_set_x = make_shared_data(test_set_x)
        test_set_c = make_shared_data(test_set_c)
        test_set_y = T.cast(make_shared_data(test_set_y), 'int32')

        index = T.lscalar()  # symbolic variable for index to a mini-batch

        cost = self.logLayer.one_sided_regression_loss

        gparams = T.grad(cost, self.params)

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=[(param, param - learning_rate * gparam)
                     for param, gparam in zip(self.params, gparams)],
            givens={
                self.input:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.logLayer.cost_vector:
                train_set_c[index * batch_size:(index + 1) * batch_size],
                self.logLayer.Z_nk:
                train_set_z[index * batch_size:(index + 1) * batch_size]
            },
            name='train_model')

        in_sample_result = theano.function(
            inputs=[],
            outputs=[self.logLayer.error, self.logLayer.future_cost],
            givens={
                self.input: train_set_x,
                self.logLayer.y: train_set_y,
                self.logLayer.cost_vector: train_set_c
            },
            name='in_sample_result')

        out_sample_result = theano.function(
            inputs=[],
            outputs=[self.logLayer.error, self.logLayer.future_cost],
            givens={
                self.input: test_set_x,
                self.logLayer.y: test_set_y,
                self.logLayer.cost_vector: test_set_c
            },
            name='out_sample_result')

        n_train_batches = train_set_x.get_value(
            borrow=True).shape[0] / batch_size

        best_Cout = np.inf
        corresponding_epoch = None
        corresponding_Eout = None
        for epoch in xrange(n_epochs):
            current_batch_cost = 0.
            for batch_index in xrange(n_train_batches):
                current_batch_cost += train_model(batch_index)
            print '    epoch #%d, loss = %f' % (epoch + 1, current_batch_cost /
                                                n_train_batches)
            # TODO: for acceleration
            Ein, Cin = in_sample_result()
            Eout, Cout = out_sample_result()
            if Cout < best_Cout:
                best_Cout = Cout
                corresponding_Eout = Eout
                corresponding_epoch = epoch + 1
                print '        better performance achieved ... best_Cout = %f' % best_Cout

        print 'after training %d epochs, best_Cout = %f, occured in epoch #%d, and corresponding_Eout = %f'   \
               % (n_epochs, best_Cout, corresponding_epoch, corresponding_Eout)
Ejemplo n.º 2
0
    def learning_feature(
        self,
        train_set,
        n_epochs, learning_rate, batch_size,
        corruption_level, balance_coef
    ):
        # perform `denoising`
        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        # map the corrupted input to hidden layer
        y = T.nnet.sigmoid(T.dot(tilde_x, self.W) + self.b)
        # maps back hidden representation to unsupervised reconstruction
        z1 = T.nnet.sigmoid(T.dot(y, self.Wu) + self.bu)
        L1 = T.mean(-T.sum(self.x * T.log(z1) + (1 - self.x) * T.log(1 - z1), axis=1))

        # perform one-sided regression to fit the cost !
        z2 = T.dot(y, self.Ws) + self.bs
        # cost_vector = T.matrix('cost_vector')
        # Z_nk = T.matrix('Z_nk')
        # xi = T.maximum((Z_nk * (z2 - cost_vector)), 0.) # xi is a matrix
        # L2 = T.sum(xi)
        # TODO: smooth logistic loss function (upper bound)
        delta = T.log(1 + T.exp(Z_nk * (z2 - cost_vector)))
        L2 = T.sum(delta)

        # symbolic variable for balance_coef
        bc = T.scalar('bc')

        cost = L1 + bc * L2

        gparams = T.grad(cost, self.params)

        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        batch_index = T.lscalar('batch_index')

        train_set_x, train_set_y, train_set_c = train_set

        train_set_z = np.zeros(train_set_c.shape) - 1
        for i in xrange(train_set_z.shape[0]):
            train_set_z[i][train_set_y[i]] = 1

        train_set_x = make_shared_data(train_set_x)
        train_set_c = make_shared_data(train_set_c)
        train_set_z = make_shared_data(train_set_z)

        pretrain_model = theano.function(
            inputs=[batch_index, bc],
            outputs=[cost, L1, L2],
            updates=updates,
            givens={
                self.x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                cost_vector: train_set_c[batch_index * batch_size: (batch_index + 1) * batch_size],
                Z_nk: train_set_z[batch_index * batch_size: (batch_index + 1) * batch_size]
            },
            name='pretrain_model'
        )

        n_batches = train_set_x.get_value().shape[0] / batch_size
        for epoch in xrange(n_epochs):
            epoch_cost = 0.
            L1_cost = 0.
            L2_cost = 0.
            for batch in xrange(n_batches):
                batch_cost = pretrain_model(batch, balance_coef)
                epoch_cost += batch_cost[0]
                L1_cost += batch_cost[1]
                L2_cost += batch_cost[2]
            epoch_cost /= n_batches
            L1_cost /= n_batches
            L2_cost /= n_batches
            print '        epoch #%d, loss = (%f, %f, %f)' % (epoch + 1, epoch_cost, L1_cost, L2_cost)

        y_new = T.nnet.sigmoid(T.dot(self.x, self.W) + self.b)

        transform_data = theano.function(
            inputs=[],
            outputs=y_new,
            givens={
                self.x: train_set_x
            },
            name='trainform_data'
        )

        return [transform_data(), train_set_y, train_set_c.get_value()]
Ejemplo n.º 3
0
    def sgd_optimize(self, train_set, test_set, n_epochs, learning_rate, batch_size):

        train_set_x, train_set_y, train_set_c = train_set
        test_set_x, test_set_y, test_set_c = test_set

        train_set_z = np.zeros(train_set_c.shape) - 1
        for i in xrange(train_set_z.shape[0]):
            train_set_z[i][train_set_y[i]] = 1

        from toolbox import make_shared_data

        train_set_x = make_shared_data(train_set_x)
        train_set_c = make_shared_data(train_set_c)
        train_set_z = make_shared_data(train_set_z)
        train_set_y = T.cast(make_shared_data(train_set_y), 'int32')

        test_set_x = make_shared_data(test_set_x)
        test_set_c = make_shared_data(test_set_c)
        test_set_y = T.cast(make_shared_data(test_set_y), 'int32')

        print '... building the model'

        index = T.lscalar()

        cost = self.logRegressionLayer.one_sided_regression_loss

        gparams = [T.grad(cost, param) for param in self.params]

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=[
                (param, param - learning_rate * gparam)
                for param, gparam in zip(self.params, gparams)
            ],
            givens={
                self.input: train_set_x[index * batch_size: (index + 1) * batch_size],
                self.logRegressionLayer.cost_vector: train_set_c[index * batch_size: (index + 1) * batch_size],
                self.logRegressionLayer.Z_nk: train_set_z[index * batch_size: (index + 1) * batch_size]
            },
            name='train_model'
        )

        in_sample_result = theano.function(
            inputs=[],
            outputs=[
                self.logRegressionLayer.error,
                self.logRegressionLayer.future_cost
            ],
            givens={
                self.input: train_set_x,
                self.logRegressionLayer.y: train_set_y,
                self.logRegressionLayer.cost_vector: train_set_c
            },
            name='in_sample_result'
        )

        out_sample_result = theano.function(
            inputs=[],
            outputs=[
                self.logRegressionLayer.error,
                self.logRegressionLayer.future_cost
            ],
            givens={
                self.input: test_set_x,
                self.logRegressionLayer.y: test_set_y,
                self.logRegressionLayer.cost_vector: test_set_c
            },
            name='out_sample_result'
        )

        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

        print '... training the model'

        best_Cout = np.inf
        corresponding_Eout = np.inf
        for epoch in xrange(n_epochs):
            print 'epoch #%d' % (epoch + 1)
            for batch_index in xrange(n_train_batches):
                batch_cost = train_model(batch_index)
            Ein, Cin = in_sample_result()
            Eout, Cout = out_sample_result()
            if Cout < best_Cout:
                best_Cout = Cout
                corresponding_Eout = Eout
                print '    better performance achieved ... best_Cout = %f' % best_Cout

        print 'after training %d epochs, best_Cout = %f, and corresponding_Eout = %f'   \
               % (n_epochs, best_Cout, corresponding_Eout)
Ejemplo n.º 4
0
    def sgd_optimize(self, train_set, test_set, n_epochs, learning_rate, batch_size):
        """ Optimizing model parameters by stochastic gradient descent """

        train_set_x, train_set_y, train_set_c = train_set
        assert train_set_x.shape == (60000, 784)
        assert train_set_y.shape == (60000,)
        assert train_set_c.shape == (60000, 10)

        test_set_x, test_set_y, test_set_c = test_set
        assert test_set_x.shape == (10000, 784)
        assert test_set_y.shape == (10000,)
        assert test_set_c.shape == (10000, 10)

        from toolbox import make_shared_data

        train_set_x = make_shared_data(train_set_x)
        train_set_c = make_shared_data(train_set_c)
        train_set_y = T.cast(make_shared_data(train_set_y), 'int32')

        test_set_x = make_shared_data(test_set_x)
        test_set_c = make_shared_data(test_set_c)
        test_set_y = T.cast(make_shared_data(test_set_y), 'int32')

        print '... building the model'

        index = T.lscalar()

        cost = self.MSE

        gparams = [T.grad(cost, param) for param in self.params]

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=[
                (param, param - learning_rate * gparam)
                for param, gparam in zip(self.params, gparams)
            ],
            givens={
                self.input: train_set_x[index * batch_size: (index + 1) * batch_size],
                self.cost_vector: train_set_c[index * batch_size: (index + 1) * batch_size]
            },
            name='train_model'
        )

        in_sample_result = theano.function(
            inputs=[],
            outputs=[self.error, self.future_cost],
            givens={
                self.input: train_set_x,
                self.y: train_set_y,
                self.cost_vector: train_set_c
            },
            name='in_sample_result'
        )

        out_sample_result = theano.function(
            inputs=[],
            outputs=[self.error, self.future_cost],
            givens={
                self.input: test_set_x,
                self.y: test_set_y,
                self.cost_vector: test_set_c
            },
            name='out_sample_result'
        )

        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

        print '... training the model'

        best_Cout = np.inf
        corresponding_Eout = np.inf
        for epoch in xrange(n_epochs):
            print 'epoch #%d' % (epoch + 1)
            for batch_index in xrange(n_train_batches):
                batch_cost = train_model(batch_index)
                Ein, Cin = in_sample_result()
                Eout, Cout = out_sample_result()
                if Cout < best_Cout:
                    best_Cout = Cout
                    corresponding_Eout = Eout
                    print '    better performance achieved ... best_Cout = %f' % best_Cout

        print 'after training %d epochs, best_Cout = %f, and corresponding_Eout = %f'   \
               % (n_epochs, best_Cout, corresponding_Eout)