def fit(self,
            X,
            learning_rate=0.5,
            mu=0.99,
            epochs=50,
            batch_sz=100,
            show_fig=False):
        # cast hyperparams
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        N, D = X.shape
        n_batches = N // batch_sz

        mi = D
        self.layers = []
        self.params = []
        for mo in self.hidden_layer_sizes:
            layer = Layer(mi, mo)
            self.layers.append(layer)
            self.params += layer.params
            mi = mo

        X_in = T.matrix('X')
        X_hat = self.forward(X_in)

        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).mean()
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        updates = momentum_updates(cost, self.params, mu, learning_rate)
        train_op = theano.function(
            inputs=[X_in],
            outputs=cost,
            updates=updates,
        )

        costs = []
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                c = train_op(batch)
                if j % 100 == 0:
                    print("j / n_batches:", j, "/", n_batches, "cost:", c)
                costs.append(c)
        if show_fig:
            plt.plot(costs)
            plt.show()
Exemple #2
0
    def fit_to_input(self, k, learning_rate=1.0, mu=0.99, epochs=100000):
        # This is not very flexible, as you would ideally
        # like to be able to activate any node in any hidden
        # layer, not just the last layer.
        # Exercise for students: modify this function to be able
        # to activate neurons in the middle layers.

        # cast hyperperams
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        # randomly initialize an image
        X0 = init_weights((1, self.D))

        # make the image a shared so theano can update it
        X = theano.shared(X0, 'X_shared')

        # get the output of the neural network
        Y = self.forward(X)

        # t = np.zeros(self.hidden_layers[-1].M)
        # t[k] = 1

        # # choose Y[0] b/c it's shape 1xD, we want just a D-size vector, not 1xD matrix
        # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum()

        # k = which output node to look at
        # there is only 1 image, so we select the 0th row of X
        cost = -T.log(Y[0,k])

        updates = momentum_updates(cost, [X], mu, learning_rate)
        train = theano.function(
            inputs=[],
            outputs=[cost, Y],
            updates=updates,
        )

        costs = []
        for i in range(epochs):
            if i % 10000 == 0:
                print("epoch:", i)
            the_cost, out = train()
            if i == 0:
                print("out.shape:", out.shape)
            costs.append(the_cost)
        plt.plot(costs)
        plt.show()

        return X.get_value()
    def fit(self, X, learning_rate=0.5, mu=0.99, epochs=50, batch_sz=100, show_fig=False):
        # cast hyperparams
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        N, D = X.shape
        n_batches = N // batch_sz

        mi = D
        self.layers = []
        self.params = []
        for mo in self.hidden_layer_sizes:
            layer = Layer(mi, mo)
            self.layers.append(layer)
            self.params += layer.params
            mi = mo

        X_in = T.matrix('X')
        X_hat = self.forward(X_in)

        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).mean()
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        updates = momentum_updates(cost, self.params, mu, learning_rate)
        train_op = theano.function(
            inputs=[X_in],
            outputs=cost,
            updates=updates,
        )

        costs = []
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                c = train_op(batch)
                if j % 100 == 0:
                    print("j / n_batches:", j, "/", n_batches, "cost:", c)
                costs.append(c)
        if show_fig:
            plt.plot(costs)
            plt.show()
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

<<<<<<< HEAD
        dparams = [theano.shared(p.get_value()*0) for p in self.params]
        grads = T.grad(cost, self.params)

        updates = [
            (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
        ] + [
            (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
        ]
=======
        updates = momentum_updates(cost, self.params, mu, learning_rate)
>>>>>>> upstream/master
        train_op = theano.function(
            inputs=[X_in],
            outputs=cost,
            updates=updates,
        )

        costs = []
<<<<<<< HEAD
        for i in xrange(epochs):
            print "epoch:", i
            X = shuffle(X)
            for j in xrange(n_batches):
                batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                c = train_op(batch)
    def fit(self, X, Y, learning_rate=0.01, mu=0.99, epochs=30, batch_sz=100):
        # cast to float32
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        N, D = X.shape
        K = len(set(Y))

        self.hidden_layers = []
        mi = D
        for mo in self.hidden_layer_sizes:
            h = HiddenLayer(mi, mo)
            self.hidden_layers.append(h)
            mi = mo

        # initialize logistic regression layer
        W = init_weights((mo, K))
        b = np.zeros(K, dtype=np.float32)
        self.W = theano.shared(W)
        self.b = theano.shared(b)

        self.params = [self.W, self.b]
        self.allWs = []
        for h in self.hidden_layers:
            self.params += h.params
            self.allWs.append(h.W)
        self.allWs.append(self.W)

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        cost = -T.mean( T.log(pY[T.arange(pY.shape[0]), targets]) )
        prediction = self.predict(X_in)

        updates = momentum_updates(cost, self.params, mu, learning_rate)
        train_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
            updates=updates,
        )

        n_batches = N // batch_sz
        costs = []
        lastWs = [W.get_value() for W in self.allWs]
        W_changes = []
        print("supervised training...")
        for i in range(epochs):
            print("epoch:", i)
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)]
                c, p = train_op(Xbatch, Ybatch)
                if j % 100 == 0:
                    print("j / n_batches:", j, "/", n_batches, "cost:", c, "error:", error_rate(p, Ybatch))
                costs.append(c)

                # log changes in all Ws
                W_change = [np.abs(W.get_value() - lastW).mean() for W, lastW in zip(self.allWs, lastWs)]
                W_changes.append(W_change)
                lastWs = [W.get_value() for W in self.allWs]

        W_changes = np.array(W_changes)
        plt.subplot(2,1,1)
        for i in range(W_changes.shape[1]):
            plt.plot(W_changes[:,i], label='layer %s' % i)
        plt.legend()
        # plt.show()

        plt.subplot(2,1,2)
        plt.plot(costs)
        plt.show()
        # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum() + reg*(X * X).sum()

        cost = -T.log(Y[0,k]) + reg*(X * X).sum()

        updates = [
            (X, X + mu*dX - learning_rate*T.grad(cost, X)),
            (dX, mu*dX - learning_rate*T.grad(cost, X)),
        ]
=======
        # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum()

        # k = which output node to look at
        # there is only 1 image, so we select the 0th row of X
        cost = -T.log(Y[0,k])

        updates = momentum_updates(cost, [X], mu, learning_rate)
>>>>>>> upstream/master
        train = theano.function(
            inputs=[],
            outputs=[cost, Y],
            updates=updates,
        )

        costs = []
<<<<<<< HEAD
        bestX = None
        for i in xrange(epochs):
            if i % 1000 == 0:
                print "epoch:", i
            the_cost, out = train()
            if i == 0:
    def fit(self, X, Y, learning_rate=0.01, mu=0.99, epochs=30, batch_sz=100):
        # cast to float32
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        N, D = X.shape
        K = len(set(Y))

        self.hidden_layers = []
        mi = D
        for mo in self.hidden_layer_sizes:
            h = HiddenLayer(mi, mo)
            self.hidden_layers.append(h)
            mi = mo

        # initialize logistic regression layer
        W = init_weights((mo, K))
        b = np.zeros(K, dtype=np.float32)
        self.W = theano.shared(W)
        self.b = theano.shared(b)

        self.params = [self.W, self.b]
        self.allWs = []
        for h in self.hidden_layers:
            self.params += h.params
            self.allWs.append(h.W)
        self.allWs.append(self.W)

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        cost = -T.mean(T.log(pY[T.arange(pY.shape[0]), targets]))
        prediction = self.predict(X_in)

        updates = momentum_updates(cost, self.params, mu, learning_rate)
        train_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
            updates=updates,
        )

        n_batches = N // batch_sz
        costs = []
        lastWs = [W.get_value() for W in self.allWs]
        W_changes = []
        print("supervised training...")
        for i in range(epochs):
            print("epoch:", i)
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]
                c, p = train_op(Xbatch, Ybatch)
                if j % 100 == 0:
                    print("j / n_batches:", j, "/", n_batches, "cost:", c,
                          "error:", error_rate(p, Ybatch))
                costs.append(c)

                # log changes in all Ws
                W_change = [
                    np.abs(W.get_value() - lastW).mean()
                    for W, lastW in zip(self.allWs, lastWs)
                ]
                W_changes.append(W_change)
                lastWs = [W.get_value() for W in self.allWs]

        W_changes = np.array(W_changes)
        plt.subplot(2, 1, 1)
        for i in range(W_changes.shape[1]):
            plt.plot(W_changes[:, i], label='layer %s' % i)
        plt.legend()
        # plt.show()

        plt.subplot(2, 1, 2)
        plt.plot(costs)
        plt.show()