def get_train(self, batchsize=None, testsize=None):
        sx = tt.tensor4()
        sy = tt.ivector()

        yc = self._propup(sx, batchsize, noise=False)
        if 1:
            cost = -tt.log(tt.nnet.softmax(yc))[tt.arange(sy.shape[0]), sy].mean()
        else:
            from hinge import multi_hinge_margin
            cost = multi_hinge_margin(yc, sy).mean()

        error = tt.neq(tt.argmax(yc, axis=1), sy).mean()

        # get updates
        params = self.params
        grads = dict(zip(params, theano.grad(cost, params)))
        updates = collections.OrderedDict()
        for layer in self.layers:
            updates.update(layer.updates(grads))

        train = theano.function(
            [sx, sy], [cost, error], updates=updates)

        # --- make test function
        y_pred = tt.argmax(self._propup(sx, testsize, noise=False), axis=1)
        error = tt.mean(tt.neq(y_pred, sy))
        test = theano.function([sx, sy], error)

        return train, test
Esempio n. 2
0
    def train_classifier(self, train, test, n_epochs=30):
        dtype = theano.config.floatX

        # --- find codes
        images, labels = train
        n_labels = len(np.unique(labels))
        codes = self.encode(images.astype(dtype))

        codes = theano.shared(codes.astype(dtype), name='codes')
        labels = tt.cast(theano.shared(labels.astype(dtype), name='labels'),
                         'int32')

        # --- compute backprop function
        Wshape = (self.autos[-1].n_hid, n_labels)
        x = tt.matrix('x', dtype=dtype)
        y = tt.ivector('y')
        W = tt.matrix('W', dtype=dtype)
        b = tt.vector('b', dtype=dtype)

        W0 = np.random.normal(size=Wshape).astype(dtype).flatten() / 10
        b0 = np.zeros(n_labels)

        split_p = lambda p: [p[:-n_labels].reshape(Wshape), p[-n_labels:]]
        form_p = lambda params: np.hstack([p.flatten() for p in params])

        # # compute negative log likelihood
        # p_y_given_x = tt.nnet.softmax(tt.dot(x, W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # error = tt.mean(tt.neq(y_pred, y))

        # compute hinge loss
        yc = tt.dot(x, W) + b
        cost = multi_hinge_margin(yc, y).mean()
        error = cost

        # compute gradients
        grads = tt.grad(cost, [W, b])
        f_df = theano.function([W, b], [error] + grads,
                               givens={
                                   x: codes,
                                   y: labels
                               })

        # --- begin backprop
        def f_df_wrapper(p):
            w, b = split_p(p)
            outs = f_df(w.astype(dtype), b.astype(dtype))
            cost, grad = outs[0], form_p(outs[1:])
            return cost.astype('float64'), grad.astype('float64')

        p0 = form_p([W0, b0])
        p_opt, mincost, info = scipy.optimize.lbfgsb.fmin_l_bfgs_b(
            f_df_wrapper, p0, maxfun=n_epochs, iprint=1)

        self.W, self.b = split_p(p_opt)
Esempio n. 3
0
    def backprop(self, train_set, test_set, noise=0, shift=False, n_epochs=30):
        dtype = theano.config.floatX

        params = []
        for auto in self.autos:
            params.extend([auto.W, auto.c])

        # --- compute backprop function
        assert self.W is not None and self.b is not None
        W = theano.shared(self.W.astype(dtype), name='Wc')
        b = theano.shared(self.b.astype(dtype), name='bc')

        x = tt.matrix('batch')
        y = tt.ivector('labels')

        # compute coding error
        # p_y_given_x = tt.nnet.softmax(tt.dot(self.propup(x), W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # error = tt.mean(tt.neq(y_pred, y))

        # compute classification error
        yn = self.propup(x, noise=noise)
        yc = tt.dot(yn, W) + b
        cost = multi_hinge_margin(yc, y).mean()
        error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))

        # compute gradients
        grads = tt.grad(cost, params)
        f_df = theano.function([x, y], [error] + grads)

        np_params = [param.get_value() for param in params]

        # --- run L_BFGS
        train_images, train_labels = train_set
        train_labels = train_labels.astype('int32')

        def f_df_wrapper(p):
            for param, value in zip(params, split_params(p, np_params)):
                param.set_value(value.astype(param.dtype))

            images = shift_images(train_images, (28, 28)) if shift else train_images
            labels = train_labels

            outs = f_df(images, labels)
            cost, grads = outs[0], outs[1:]
            grad = join_params(grads)
            return cost.astype('float64'), grad.astype('float64')

        p0 = join_params(np_params)
        p_opt, mincost, info = scipy.optimize.lbfgsb.fmin_l_bfgs_b(
            f_df_wrapper, p0, maxfun=n_epochs, iprint=1)

        for param, value in zip(params, split_params(p_opt, np_params)):
            param.set_value(value.astype(param.dtype), borrow=False)
Esempio n. 4
0
    def train_classifier(self, train, test, n_epochs=30):
        dtype = theano.config.floatX

        # --- find codes
        images, labels = train
        n_labels = len(np.unique(labels))
        codes = self.encode(images.astype(dtype))

        codes = theano.shared(codes.astype(dtype), name='codes')
        labels = tt.cast(theano.shared(labels.astype(dtype), name='labels'), 'int32')

        # --- compute backprop function
        Wshape = (self.autos[-1].n_hid, n_labels)
        x = tt.matrix('x', dtype=dtype)
        y = tt.ivector('y')
        W = tt.matrix('W', dtype=dtype)
        b = tt.vector('b', dtype=dtype)

        W0 = np.random.normal(size=Wshape).astype(dtype).flatten() / 10
        b0 = np.zeros(n_labels)

        split_p = lambda p: [p[:-n_labels].reshape(Wshape), p[-n_labels:]]
        form_p = lambda params: np.hstack([p.flatten() for p in params])

        # # compute negative log likelihood
        # p_y_given_x = tt.nnet.softmax(tt.dot(x, W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # error = tt.mean(tt.neq(y_pred, y))

        # compute hinge loss
        yc = tt.dot(x, W) + b
        cost = multi_hinge_margin(yc, y).mean()
        error = cost

        # compute gradients
        grads = tt.grad(cost, [W, b])
        f_df = theano.function(
            [W, b], [error] + grads,
            givens={x: codes, y: labels})

        # --- begin backprop
        def f_df_wrapper(p):
            w, b = split_p(p)
            outs = f_df(w.astype(dtype), b.astype(dtype))
            cost, grad = outs[0], form_p(outs[1:])
            return cost.astype('float64'), grad.astype('float64')

        p0 = form_p([W0, b0])
        p_opt, mincost, info = scipy.optimize.lbfgsb.fmin_l_bfgs_b(
            f_df_wrapper, p0, maxfun=n_epochs, iprint=1)

        self.W, self.b = split_p(p_opt)
Esempio n. 5
0
    def compute_loss(self, yc, y):
        if self.loss == 'nll':
            # compute negative log likelihood
            cost = -tt.mean(tt.log(tt.nnet.softmax(yc))[tt.arange(y.shape[0]), y])
            error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))
        elif self.loss == 'hinge':
            # compute hinge loss
            cost = multi_hinge_margin(yc, y).mean()
            error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))
        else:
            raise ValueError("Unrecognized loss type '%s'" % self.loss)

        return cost, error
Esempio n. 6
0
    def compute_loss(self, yc, y):
        if self.loss == 'nll':
            # compute negative log likelihood
            cost = -tt.mean(
                tt.log(tt.nnet.softmax(yc))[tt.arange(y.shape[0]), y])
            error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))
        elif self.loss == 'hinge':
            # compute hinge loss
            cost = multi_hinge_margin(yc, y).mean()
            error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))
        else:
            raise ValueError("Unrecognized loss type '%s'" % self.loss)

        return cost, error
Esempio n. 7
0
    def sgd(self, train_set, test_set,
            rate=0.1, tradeoff=0.5, n_epochs=30, batch_size=100):
        """Use SGD to do combined autoencoder and classifier training"""
        dtype = theano.config.floatX
        assert tradeoff >= 0 and tradeoff <= 1

        params = []
        for auto in self.autos:
            auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V')
            params.extend([auto.W, auto.V, auto.c, auto.b])

        # --- compute backprop function
        assert self.W is not None and self.b is not None
        W = theano.shared(self.W.astype(dtype), name='Wc')
        b = theano.shared(self.b.astype(dtype), name='bc')

        x = tt.matrix('batch')
        y = tt.ivector('labels')

        xn = x
        # xn = x + self.theano_rng.normal(size=x.shape, std=0.1, dtype=dtype)
        yn = self.propup(xn, noise=1.0)

        # compute classification error

        # p_y_given_x = tt.nnet.softmax(tt.dot(yn, W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # class_error = tt.mean(tt.neq(y_pred, y))

        yc = tt.dot(yn, W) + b
        class_cost = multi_hinge_margin(yc, y).mean()
        class_error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))

        # compute autoencoder error
        z = self.propdown(yn)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        auto_cost = tt.mean(rmses)

        cost = (tt.cast(1 - tradeoff, dtype) * auto_cost
                + tt.cast(tradeoff, dtype) * class_cost)
        error = class_error

        # compute gradients
        grads = tt.grad(cost, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        for auto in self.autos:
            if auto.mask is not None:
                updates[auto.W] = updates[auto.W] * auto.mask
                updates[auto.V] = updates[auto.V] * auto.mask.T

        train_dbn = theano.function([x, y], error, updates=updates)
        reconstruct = self.reconstruct

        # --- perform SGD
        images, labels = train_set
        ibatches = images.reshape(-1, batch_size, images.shape[1])
        lbatches = labels.reshape(-1, batch_size).astype('int32')
        assert np.isfinite(ibatches).all()

        test_images, test_labels = test_set

        for epoch in range(n_epochs):
            costs = []
            for batch, label in zip(ibatches, lbatches):
                costs.append(train_dbn(batch, label))

            # copy back parameters (for test function)
            self.W = W.get_value()
            self.b = b.get_value()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.autos[0].filters, rows=10, cols=20)
            plt.draw()
Esempio n. 8
0
    def sgd(self,
            train_set,
            test_set,
            rate=0.1,
            tradeoff=0.5,
            n_epochs=30,
            batch_size=100):
        """Use SGD to do combined autoencoder and classifier training"""
        dtype = theano.config.floatX
        assert tradeoff >= 0 and tradeoff <= 1

        params = []
        for auto in self.autos:
            auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V')
            params.extend([auto.W, auto.V, auto.c, auto.b])

        # --- compute backprop function
        assert self.W is not None and self.b is not None
        W = theano.shared(self.W.astype(dtype), name='Wc')
        b = theano.shared(self.b.astype(dtype), name='bc')

        x = tt.matrix('batch')
        y = tt.ivector('labels')

        xn = x
        # xn = x + self.theano_rng.normal(size=x.shape, std=0.1, dtype=dtype)
        yn = self.propup(xn, noise=1.0)

        # compute classification error

        # p_y_given_x = tt.nnet.softmax(tt.dot(yn, W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # class_error = tt.mean(tt.neq(y_pred, y))

        yc = tt.dot(yn, W) + b
        class_cost = multi_hinge_margin(yc, y).mean()
        class_error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))

        # compute autoencoder error
        z = self.propdown(yn)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        auto_cost = tt.mean(rmses)

        cost = (tt.cast(1 - tradeoff, dtype) * auto_cost +
                tt.cast(tradeoff, dtype) * class_cost)
        error = class_error

        # compute gradients
        grads = tt.grad(cost, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        for auto in self.autos:
            if auto.mask is not None:
                updates[auto.W] = updates[auto.W] * auto.mask
                updates[auto.V] = updates[auto.V] * auto.mask.T

        train_dbn = theano.function([x, y], error, updates=updates)
        reconstruct = self.reconstruct

        # --- perform SGD
        images, labels = train_set
        ibatches = images.reshape(-1, batch_size, images.shape[1])
        lbatches = labels.reshape(-1, batch_size).astype('int32')
        assert np.isfinite(ibatches).all()

        test_images, test_labels = test_set

        for epoch in range(n_epochs):
            costs = []
            for batch, label in zip(ibatches, lbatches):
                costs.append(train_dbn(batch, label))

            # copy back parameters (for test function)
            self.W = W.get_value()
            self.b = b.get_value()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.autos[0].filters, rows=10, cols=20)
            plt.draw()