Beispiel #1
0
    def pretrain(self, batches, test_images, n_epochs=10, **train_params):

        data = tt.matrix('data', dtype=self.dtype)
        cost, updates = self.get_cost_updates(data, **train_params)
        train_rbm = theano.function([data], cost, updates=updates)

        for epoch in range(n_epochs):

            # train on each mini-batch
            costs = []
            for batch in batches:
                costs.append(train_rbm(batch))

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            # plot reconstructions on test set
            plt.figure(2)
            plt.clf()
            x = test_images
            y = rbm.encode(test_images)
            z = rbm.decode(y)
            plotting.compare([x.reshape(-1, 28, 28),
                              z.reshape(-1, 28, 28)],
                             rows=5,
                             cols=20,
                             vlims=(-1, 2) if GAUSSIAN else (0, 1))
            plt.draw()

            print "Test error:", rms(x - z, axis=1).mean()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.filters, rows=10, cols=20)
            plt.draw()
Beispiel #2
0
    def auto_sgd(self,
                 images,
                 test_images=None,
                 batch_size=100,
                 rate=0.1,
                 n_epochs=10):
        dtype = theano.config.floatX

        params = []
        for auto in self.autos:
            params.extend((auto.W, auto.c, auto.b))

        # --- compute backprop function
        x = tt.matrix('images')
        xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype)

        # compute coding error
        y = self.propup(xn)
        z = self.propdown(y)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        error = tt.mean(rmses)

        # compute gradients
        grads = tt.grad(error, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        for auto in self.autos:
            if auto.mask is not None:
                updates[auto.W] = updates[auto.W] * auto.mask

        train_dbn = theano.function([x], error, updates=updates)
        reconstruct = self.reconstruct

        # --- perform SGD
        batches = images.reshape(-1, batch_size, images.shape[1])
        assert np.isfinite(batches).all()

        for epoch in range(n_epochs):
            costs = []
            for batch in batches:
                costs.append(train_dbn(batch))
                # self.check_params()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.autos[0].filters, rows=10, cols=20)
            plt.draw()
Beispiel #3
0
    def pretrain(self, batches, test_images, n_epochs=10, **train_params):

        data = tt.matrix("data", dtype=self.dtype)
        cost, updates = self.get_cost_updates(data, **train_params)
        train_rbm = theano.function([data], cost, updates=updates)

        for epoch in range(n_epochs):

            # train on each mini-batch
            costs = []
            for batch in batches:
                costs.append(train_rbm(batch))

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            # plot reconstructions on test set
            plt.figure(2)
            plt.clf()
            x = test_images
            y = rbm.encode(test_images)
            z = rbm.decode(y)
            plotting.compare(
                [x.reshape(-1, 28, 28), z.reshape(-1, 28, 28)], rows=5, cols=20, vlims=(-1, 2) if GAUSSIAN else (0, 1)
            )
            plt.draw()

            print "Test error:", rms(x - z, axis=1).mean()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.filters, rows=10, cols=20)
            plt.draw()
Beispiel #4
0
    def pretrain(self, batches, dbn=None, test_images=None,
                 n_epochs=10, **train_params):

        data = tt.matrix('data', dtype=self.dtype)
        cost, updates = self.get_cost_updates(data, **train_params)
        train_rbm = theano.function([data], cost, updates=updates)

        for epoch in range(n_epochs):

            # train on each mini-batch
            costs = []
            for batch in batches:
                costs.append(train_rbm(batch))

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if dbn is not None and test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = dbn.reconstruct(test_images)
                plotting.compare([test_images.reshape(-1, 28, 28),
                                  recons.reshape(-1, 28, 28)],
                                 rows=5, cols=20)
                plt.draw()

            # plot filters for first layer only
            if dbn is not None and self is dbn.rbms[0]:
                plt.figure(3)
                plt.clf()
                plotting.filters(self.filters, rows=10, cols=20)
                plt.draw()
Beispiel #5
0
    def auto_sgd_down(self, images, test_images=None,
                      batch_size=100, rate=0.1, n_epochs=10):
        dtype = theano.config.floatX

        params = []
        for auto in self.autos:
            auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V')
            params.extend((auto.V, auto.b))

        # --- compute backprop function
        x = tt.matrix('images')
        xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype)

        # compute coding error
        y = self.propup(xn)
        z = self.propdown(y)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        error = tt.mean(rmses)

        # compute gradients
        grads = tt.grad(error, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        for auto in self.autos:
            if auto.mask is not None:
                updates[auto.V] = updates[auto.V] * auto.mask.T

        train_dbn = theano.function([x], error, updates=updates)
        reconstruct = self.reconstruct

        # --- perform SGD
        batches = images.reshape(-1, batch_size, images.shape[1])
        assert np.isfinite(batches).all()

        for epoch in range(n_epochs):
            costs = []
            for batch in batches:
                costs.append(train_dbn(batch))
                # self.check_params()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.autos[0].filters, rows=10, cols=20)
            plt.draw()
Beispiel #6
0
    def sgd_backprop(self, images, test_images, batch_size=100, rate=0.1, n_epochs=10):
        dtype = theano.config.floatX

        params = [self.W, self.c, self.b]

        # --- compute backprop function
        x = tt.matrix('images')
        xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype)

        # compute coding error
        y = self.propup(xn)
        z = self.propdown(y)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        error = tt.mean(rmses)

        # compute gradients
        grads = tt.grad(error, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        if self.mask is not None:
            updates[self.W] = updates[self.W] * self.mask

        train_dbn = theano.function([x], error, updates=updates)

        # --- perform SGD
        batches = images.reshape(-1, batch_size, images.shape[1])
        assert np.isfinite(batches).all()

        for epoch in range(n_epochs):
            costs = []
            for batch in batches:
                costs.append(train_dbn(batch))
                self.check_params()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            # plot reconstructions on test set
            plt.figure(2)
            plt.clf()
            x = test_images
            y = self.encode(test_images)
            z = self.decode(y)
            plotting.compare(
                [x.reshape(-1, 28, 28), z.reshape(-1, 28, 28)],
                rows=5, cols=20, vlims=(-1, 2))
            plt.draw()

            print "Test error:", rms(x - z, axis=1).mean()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.filters, rows=10, cols=20)
            plt.draw()
Beispiel #7
0
    def auto_sgd(self, images, deep=None, test_images=None,
                     batch_size=100, rate=0.1, n_epochs=10):
        dtype = theano.config.floatX

        params = [self.W, self.c, self.b]

        # --- compute backprop function
        x = tt.matrix('images')
        xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype)
        y = self.propup(xn, noise=0.1)
        z = self.propdown(y)

        # compute coding error
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        error = tt.mean(rmses)

        # compute gradients
        grads = tt.grad(error, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        if self.mask is not None:
            updates[self.W] = updates[self.W] * self.mask

        train_dbn = theano.function([x], error, updates=updates)
        reconstruct = deep.reconstruct

        # --- perform SGD
        batches = images.reshape(-1, batch_size, images.shape[1])
        assert np.isfinite(batches).all()
        print batches.shape

        for epoch in range(n_epochs):
            costs = []
            for batch in batches:
                costs.append(train_dbn(batch))
                self.check_params()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if deep is not None and test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            if deep is not None and self is deep.autos[0]:
                plt.figure(3)
                plt.clf()
                plotting.filters(self.filters, rows=10, cols=20)
                plt.draw()
Beispiel #8
0
    def pretrain(self,
                 batches,
                 dbn=None,
                 test_images=None,
                 n_epochs=10,
                 **train_params):

        data = tt.matrix('data', dtype=self.dtype)
        cost, updates = self.get_cost_updates(data, **train_params)
        train_rbm = theano.function([data], cost, updates=updates)

        for epoch in range(n_epochs):

            # train on each mini-batch
            costs = []
            for batch in batches:
                costs.append(train_rbm(batch))

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if dbn is not None and test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = dbn.reconstruct(test_images)
                plotting.compare([
                    test_images.reshape(-1, 28, 28),
                    recons.reshape(-1, 28, 28)
                ],
                                 rows=5,
                                 cols=20)
                plt.draw()

            # plot filters for first layer only
            if dbn is not None and self is dbn.rbms[0]:
                plt.figure(3)
                plt.clf()
                plotting.filters(self.filters, rows=10, cols=20)
                plt.draw()
Beispiel #9
0
        def f_df_wrapper(p):
            for param, value in zip(params, split_params(p, np_params)):
                param.set_value(value.astype(param.dtype))

            outs = f_df()
            cost, grads = outs[0], outs[1:]
            grad = join_params(grads)

            if deep is not None and test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            if deep is not None and self is deep.autos[0]:
                plt.figure(3)
                plt.clf()
                plotting.filters(self.filters, rows=10, cols=20)
                plt.draw()

            return cost.astype('float64'), grad.astype('float64')
Beispiel #10
0
cost, updates = rbm.get_cost_updates(lr=0.02, persistent=persistent, k=1)

train_rbm = theano.function([rbm.input], cost,
                            updates=updates)


ha, hp = rbm.propup(rbm.input)
va, vp = rbm.propdown(hp)
reconstruct_rbm = theano.function([rbm.input], vp)

for epoch in range(n_epochs):

    costs = []
    for batch in batches:
        costs.append(train_rbm(batch))

    print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

    weights = rbm.W.get_value()
    plt.figure(2)
    plt.clf()
    plotting.filters(weights.T.reshape(-1, 28, 28), rows=5, cols=10)

    test_batch = batches[0]
    recons = reconstruct_rbm(test_batch)
    plt.figure(3)
    plt.clf()
    plotting.compare([test_batch.reshape(-1, 28, 28), recons.reshape(-1, 28, 28)], rows=5, cols=20)

    plt.draw()
Beispiel #11
0
    def sgd(self, train_set, test_set,
            rate=0.1, tradeoff=0.5, n_epochs=30, batch_size=100):
        """Use SGD to do combined autoencoder and classifier training"""
        dtype = theano.config.floatX
        assert tradeoff >= 0 and tradeoff <= 1

        params = []
        for auto in self.autos:
            auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V')
            params.extend([auto.W, auto.V, auto.c, auto.b])

        # --- compute backprop function
        assert self.W is not None and self.b is not None
        W = theano.shared(self.W.astype(dtype), name='Wc')
        b = theano.shared(self.b.astype(dtype), name='bc')

        x = tt.matrix('batch')
        y = tt.ivector('labels')

        xn = x
        # xn = x + self.theano_rng.normal(size=x.shape, std=0.1, dtype=dtype)
        yn = self.propup(xn, noise=1.0)

        # compute classification error

        # p_y_given_x = tt.nnet.softmax(tt.dot(yn, W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # class_error = tt.mean(tt.neq(y_pred, y))

        yc = tt.dot(yn, W) + b
        class_cost = multi_hinge_margin(yc, y).mean()
        class_error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))

        # compute autoencoder error
        z = self.propdown(yn)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        auto_cost = tt.mean(rmses)

        cost = (tt.cast(1 - tradeoff, dtype) * auto_cost
                + tt.cast(tradeoff, dtype) * class_cost)
        error = class_error

        # compute gradients
        grads = tt.grad(cost, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        for auto in self.autos:
            if auto.mask is not None:
                updates[auto.W] = updates[auto.W] * auto.mask
                updates[auto.V] = updates[auto.V] * auto.mask.T

        train_dbn = theano.function([x, y], error, updates=updates)
        reconstruct = self.reconstruct

        # --- perform SGD
        images, labels = train_set
        ibatches = images.reshape(-1, batch_size, images.shape[1])
        lbatches = labels.reshape(-1, batch_size).astype('int32')
        assert np.isfinite(ibatches).all()

        test_images, test_labels = test_set

        for epoch in range(n_epochs):
            costs = []
            for batch, label in zip(ibatches, lbatches):
                costs.append(train_dbn(batch, label))

            # copy back parameters (for test function)
            self.W = W.get_value()
            self.b = b.get_value()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.autos[0].filters, rows=10, cols=20)
            plt.draw()
Beispiel #12
0
    def sgd(self,
            train_set,
            test_set,
            rate=0.1,
            tradeoff=0.5,
            n_epochs=30,
            batch_size=100):
        """Use SGD to do combined autoencoder and classifier training"""
        dtype = theano.config.floatX
        assert tradeoff >= 0 and tradeoff <= 1

        params = []
        for auto in self.autos:
            auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V')
            params.extend([auto.W, auto.V, auto.c, auto.b])

        # --- compute backprop function
        assert self.W is not None and self.b is not None
        W = theano.shared(self.W.astype(dtype), name='Wc')
        b = theano.shared(self.b.astype(dtype), name='bc')

        x = tt.matrix('batch')
        y = tt.ivector('labels')

        xn = x
        # xn = x + self.theano_rng.normal(size=x.shape, std=0.1, dtype=dtype)
        yn = self.propup(xn, noise=1.0)

        # compute classification error

        # p_y_given_x = tt.nnet.softmax(tt.dot(yn, W) + b)
        # y_pred = tt.argmax(p_y_given_x, axis=1)
        # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
        # class_error = tt.mean(tt.neq(y_pred, y))

        yc = tt.dot(yn, W) + b
        class_cost = multi_hinge_margin(yc, y).mean()
        class_error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y))

        # compute autoencoder error
        z = self.propdown(yn)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        auto_cost = tt.mean(rmses)

        cost = (tt.cast(1 - tradeoff, dtype) * auto_cost +
                tt.cast(tradeoff, dtype) * class_cost)
        error = class_error

        # compute gradients
        grads = tt.grad(cost, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        for auto in self.autos:
            if auto.mask is not None:
                updates[auto.W] = updates[auto.W] * auto.mask
                updates[auto.V] = updates[auto.V] * auto.mask.T

        train_dbn = theano.function([x, y], error, updates=updates)
        reconstruct = self.reconstruct

        # --- perform SGD
        images, labels = train_set
        ibatches = images.reshape(-1, batch_size, images.shape[1])
        lbatches = labels.reshape(-1, batch_size).astype('int32')
        assert np.isfinite(ibatches).all()

        test_images, test_labels = test_set

        for epoch in range(n_epochs):
            costs = []
            for batch, label in zip(ibatches, lbatches):
                costs.append(train_dbn(batch, label))

            # copy back parameters (for test function)
            self.W = W.get_value()
            self.b = b.get_value()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                recons = reconstruct(test_images)
                show_recons(test_images, recons)
                plt.draw()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.autos[0].filters, rows=10, cols=20)
            plt.draw()
Beispiel #13
0
    def auto_sgd(self,
                 images,
                 deep=None,
                 test_images=None,
                 batch_size=100,
                 rate=0.1,
                 noise=1.,
                 n_epochs=10):
        assert not hasattr(self, 'V')

        dtype = theano.config.floatX
        params = [self.W, self.c, self.b]

        # --- compute backprop function
        x = tt.matrix('images')
        xn = x + self.theano_rng.normal(size=x.shape, std=noise, dtype=dtype)
        y = self.propup(xn)
        z = self.propdown(y)

        # compute coding error
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        error = tt.mean(rmses)

        # compute gradients
        grads = tt.grad(error, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        if self.mask is not None:
            updates[self.W] = updates[self.W] * self.mask

        train_dbn = theano.function([x], error, updates=updates)
        # reconstruct = deep.reconstruct if deep is not None else None
        encode = deep.encode if deep is not None else None
        decode = deep.decode if deep is not None else None

        # --- perform SGD
        batches = images.reshape(-1, batch_size, images.shape[1])
        assert np.isfinite(batches).all()

        for epoch in range(n_epochs):
            costs = []
            for batch in batches:
                costs.append(train_dbn(batch))
                self.check_params()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            if deep is not None and test_images is not None:
                # plot reconstructions on test set
                plt.figure(2)
                plt.clf()
                test = test_images
                codes = encode(test)
                recs = decode(codes)
                # recons = reconstruct(test_images)
                show_recons(test, recs)
                plt.draw()

                print "Test set: (error: %0.3f) (sparsity: %0.3f)" % (rms(
                    test - recs, axis=1).mean(), (codes > 0).mean())

            # plot filters for first layer only
            if deep is not None and self is deep.autos[0]:
                plt.figure(3)
                plt.clf()
                plotting.filters(self.filters, rows=10, cols=20)
                plt.draw()
Beispiel #14
0
    def sgd_backprop(self,
                     images,
                     test_images,
                     batch_size=100,
                     rate=0.1,
                     n_epochs=10):
        dtype = theano.config.floatX

        params = [self.W, self.c, self.b]

        # --- compute backprop function
        x = tt.matrix('images')
        xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype)

        # compute coding error
        y = self.propup(xn)
        z = self.propdown(y)
        rmses = tt.sqrt(tt.mean((x - z)**2, axis=1))
        error = tt.mean(rmses)

        # compute gradients
        grads = tt.grad(error, params)
        updates = collections.OrderedDict()
        for param, grad in zip(params, grads):
            updates[param] = param - tt.cast(rate, dtype) * grad

        if self.mask is not None:
            updates[self.W] = updates[self.W] * self.mask

        train_dbn = theano.function([x], error, updates=updates)

        # --- perform SGD
        batches = images.reshape(-1, batch_size, images.shape[1])
        assert np.isfinite(batches).all()

        for epoch in range(n_epochs):
            costs = []
            for batch in batches:
                costs.append(train_dbn(batch))
                self.check_params()

            print "Epoch %d: %0.3f" % (epoch, np.mean(costs))

            # plot reconstructions on test set
            plt.figure(2)
            plt.clf()
            x = test_images
            y = self.encode(test_images)
            z = self.decode(y)
            plotting.compare([x.reshape(-1, 28, 28),
                              z.reshape(-1, 28, 28)],
                             rows=5,
                             cols=20,
                             vlims=(-1, 2))
            plt.draw()

            print "Test error:", rms(x - z, axis=1).mean()

            # plot filters for first layer only
            plt.figure(3)
            plt.clf()
            plotting.filters(self.filters, rows=10, cols=20)
            plt.draw()