def fit(self, X, learning_rate=0.5, mu=0.99, epochs=50, batch_sz=100, show_fig=False): # cast hyperparams learning_rate = np.float32(learning_rate) mu = np.float32(mu) N, D = X.shape n_batches = N // batch_sz mi = D self.layers = [] self.params = [] for mo in self.hidden_layer_sizes: layer = Layer(mi, mo) self.layers.append(layer) self.params += layer.params mi = mo X_in = T.matrix('X') X_hat = self.forward(X_in) cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).mean() cost_op = theano.function( inputs=[X_in], outputs=cost, ) updates = momentum_updates(cost, self.params, mu, learning_rate) train_op = theano.function( inputs=[X_in], outputs=cost, updates=updates, ) costs = [] for i in range(epochs): print("epoch:", i) X = shuffle(X) for j in range(n_batches): batch = X[j * batch_sz:(j * batch_sz + batch_sz)] c = train_op(batch) if j % 100 == 0: print("j / n_batches:", j, "/", n_batches, "cost:", c) costs.append(c) if show_fig: plt.plot(costs) plt.show()
def fit_to_input(self, k, learning_rate=1.0, mu=0.99, epochs=100000): # This is not very flexible, as you would ideally # like to be able to activate any node in any hidden # layer, not just the last layer. # Exercise for students: modify this function to be able # to activate neurons in the middle layers. # cast hyperperams learning_rate = np.float32(learning_rate) mu = np.float32(mu) # randomly initialize an image X0 = init_weights((1, self.D)) # make the image a shared so theano can update it X = theano.shared(X0, 'X_shared') # get the output of the neural network Y = self.forward(X) # t = np.zeros(self.hidden_layers[-1].M) # t[k] = 1 # # choose Y[0] b/c it's shape 1xD, we want just a D-size vector, not 1xD matrix # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum() # k = which output node to look at # there is only 1 image, so we select the 0th row of X cost = -T.log(Y[0,k]) updates = momentum_updates(cost, [X], mu, learning_rate) train = theano.function( inputs=[], outputs=[cost, Y], updates=updates, ) costs = [] for i in range(epochs): if i % 10000 == 0: print("epoch:", i) the_cost, out = train() if i == 0: print("out.shape:", out.shape) costs.append(the_cost) plt.plot(costs) plt.show() return X.get_value()
def fit(self, X, learning_rate=0.5, mu=0.99, epochs=50, batch_sz=100, show_fig=False): # cast hyperparams learning_rate = np.float32(learning_rate) mu = np.float32(mu) N, D = X.shape n_batches = N // batch_sz mi = D self.layers = [] self.params = [] for mo in self.hidden_layer_sizes: layer = Layer(mi, mo) self.layers.append(layer) self.params += layer.params mi = mo X_in = T.matrix('X') X_hat = self.forward(X_in) cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).mean() cost_op = theano.function( inputs=[X_in], outputs=cost, ) updates = momentum_updates(cost, self.params, mu, learning_rate) train_op = theano.function( inputs=[X_in], outputs=cost, updates=updates, ) costs = [] for i in range(epochs): print("epoch:", i) X = shuffle(X) for j in range(n_batches): batch = X[j*batch_sz:(j*batch_sz + batch_sz)] c = train_op(batch) if j % 100 == 0: print("j / n_batches:", j, "/", n_batches, "cost:", c) costs.append(c) if show_fig: plt.plot(costs) plt.show()
cost_op = theano.function( inputs=[X_in], outputs=cost, ) <<<<<<< HEAD dparams = [theano.shared(p.get_value()*0) for p in self.params] grads = T.grad(cost, self.params) updates = [ (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads) ] + [ (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads) ] ======= updates = momentum_updates(cost, self.params, mu, learning_rate) >>>>>>> upstream/master train_op = theano.function( inputs=[X_in], outputs=cost, updates=updates, ) costs = [] <<<<<<< HEAD for i in xrange(epochs): print "epoch:", i X = shuffle(X) for j in xrange(n_batches): batch = X[j*batch_sz:(j*batch_sz + batch_sz)] c = train_op(batch)
def fit(self, X, Y, learning_rate=0.01, mu=0.99, epochs=30, batch_sz=100): # cast to float32 learning_rate = np.float32(learning_rate) mu = np.float32(mu) N, D = X.shape K = len(set(Y)) self.hidden_layers = [] mi = D for mo in self.hidden_layer_sizes: h = HiddenLayer(mi, mo) self.hidden_layers.append(h) mi = mo # initialize logistic regression layer W = init_weights((mo, K)) b = np.zeros(K, dtype=np.float32) self.W = theano.shared(W) self.b = theano.shared(b) self.params = [self.W, self.b] self.allWs = [] for h in self.hidden_layers: self.params += h.params self.allWs.append(h.W) self.allWs.append(self.W) X_in = T.matrix('X_in') targets = T.ivector('Targets') pY = self.forward(X_in) cost = -T.mean( T.log(pY[T.arange(pY.shape[0]), targets]) ) prediction = self.predict(X_in) updates = momentum_updates(cost, self.params, mu, learning_rate) train_op = theano.function( inputs=[X_in, targets], outputs=[cost, prediction], updates=updates, ) n_batches = N // batch_sz costs = [] lastWs = [W.get_value() for W in self.allWs] W_changes = [] print("supervised training...") for i in range(epochs): print("epoch:", i) X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz + batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)] c, p = train_op(Xbatch, Ybatch) if j % 100 == 0: print("j / n_batches:", j, "/", n_batches, "cost:", c, "error:", error_rate(p, Ybatch)) costs.append(c) # log changes in all Ws W_change = [np.abs(W.get_value() - lastW).mean() for W, lastW in zip(self.allWs, lastWs)] W_changes.append(W_change) lastWs = [W.get_value() for W in self.allWs] W_changes = np.array(W_changes) plt.subplot(2,1,1) for i in range(W_changes.shape[1]): plt.plot(W_changes[:,i], label='layer %s' % i) plt.legend() # plt.show() plt.subplot(2,1,2) plt.plot(costs) plt.show()
# cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum() + reg*(X * X).sum() cost = -T.log(Y[0,k]) + reg*(X * X).sum() updates = [ (X, X + mu*dX - learning_rate*T.grad(cost, X)), (dX, mu*dX - learning_rate*T.grad(cost, X)), ] ======= # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum() # k = which output node to look at # there is only 1 image, so we select the 0th row of X cost = -T.log(Y[0,k]) updates = momentum_updates(cost, [X], mu, learning_rate) >>>>>>> upstream/master train = theano.function( inputs=[], outputs=[cost, Y], updates=updates, ) costs = [] <<<<<<< HEAD bestX = None for i in xrange(epochs): if i % 1000 == 0: print "epoch:", i the_cost, out = train() if i == 0:
def fit(self, X, Y, learning_rate=0.01, mu=0.99, epochs=30, batch_sz=100): # cast to float32 learning_rate = np.float32(learning_rate) mu = np.float32(mu) N, D = X.shape K = len(set(Y)) self.hidden_layers = [] mi = D for mo in self.hidden_layer_sizes: h = HiddenLayer(mi, mo) self.hidden_layers.append(h) mi = mo # initialize logistic regression layer W = init_weights((mo, K)) b = np.zeros(K, dtype=np.float32) self.W = theano.shared(W) self.b = theano.shared(b) self.params = [self.W, self.b] self.allWs = [] for h in self.hidden_layers: self.params += h.params self.allWs.append(h.W) self.allWs.append(self.W) X_in = T.matrix('X_in') targets = T.ivector('Targets') pY = self.forward(X_in) cost = -T.mean(T.log(pY[T.arange(pY.shape[0]), targets])) prediction = self.predict(X_in) updates = momentum_updates(cost, self.params, mu, learning_rate) train_op = theano.function( inputs=[X_in, targets], outputs=[cost, prediction], updates=updates, ) n_batches = N // batch_sz costs = [] lastWs = [W.get_value() for W in self.allWs] W_changes = [] print("supervised training...") for i in range(epochs): print("epoch:", i) X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] c, p = train_op(Xbatch, Ybatch) if j % 100 == 0: print("j / n_batches:", j, "/", n_batches, "cost:", c, "error:", error_rate(p, Ybatch)) costs.append(c) # log changes in all Ws W_change = [ np.abs(W.get_value() - lastW).mean() for W, lastW in zip(self.allWs, lastWs) ] W_changes.append(W_change) lastWs = [W.get_value() for W in self.allWs] W_changes = np.array(W_changes) plt.subplot(2, 1, 1) for i in range(W_changes.shape[1]): plt.plot(W_changes[:, i], label='layer %s' % i) plt.legend() # plt.show() plt.subplot(2, 1, 2) plt.plot(costs) plt.show()