class SentenceCompletion(object): """ Read raw data from """ def __init__(self, n_in, n_hidden, n_out, learning_rate=0.01, learning_rate_decay=1, L2_reg=0.00, n_epochs=100): """ Initialise basic variables """ self.n_in = int(n_in) self.n_hidden = int(n_hidden) self.n_out = int(n_out) self.learning_rate = float(learning_rate) self.learning_rate_decay = float(learning_rate_decay) self.L2_reg = float(L2_reg) self.epochs = int(n_epochs) self.ready() def ready(self): """ Load all inputs and parameters to train RNN """ # input sentence self.x = T.matrix(name="x", dtype=theano.config.floatX) #target #self.y = T.matrix(name="y", dtype=theano.config.floatX) self.y = T.vector(name="y", dtype="int32") # initial hidden state of the RNN self.h0 = T.vector() #learning rate self.lr = T.scalar() self.rnn =RNN(input=self.x, n_in=self.n_in, n_hidden=self.n_hidden, n_out=self.n_out) def fit(self, word2vec, vocab,samples, X_train, Y_train, X_test=None, Y_test=None, validation=10000): """ Fit model Pass in X_test, Y_test to compute test error and report during training """ #train_set_x, train_set_y = self.shared_dataset((X_train, #Y_train)) #n_train = train_set_x.get_value(borrow=True).shape[0] n_train = len(X_train) ##################### # Build model # ##################### #index = T.lscalar("index") train_set_x = T.matrix() #train_set_y = T.matrix(dtype=theano.config.floatX) train_set_y = T.vector(dtype="int32") l_r = T.scalar("l_r", dtype=theano.config.floatX) cost = self.rnn.loss(self.y) + self.L2_reg * self.rnn.L2_sqr compute_train_error = theano.function(inputs=[train_set_x, train_set_y], outputs=self.rnn.loss(self.y), givens={ self.x: train_set_x, self.y: train_set_y }, mode=mode) # test config n_test = len(X_test) test_set_x = T.matrix() test_set_y = T.vector(dtype="int32") compute_test_error = theano.function(inputs=[test_set_x, test_set_y], outputs=self.rnn.loss(self.y), givens={ self.x: test_set_x, self.y: test_set_y }, mode=mode) # compute gradient of cost with respect to theta = (W, W_in, # W_out, h0, bh, by) # gradients on the weights using BPTT updates = [] for param in self.rnn.params: gparam = T.grad(cost, param) #gparams.append(gparam) updates.append((param, param - l_r * gparam)) # compiling a Theano function `train_model` that returns the # cost, but in the same time updates the parameters of the # model based on the rules defined in `updates` train_model = theano.function(inputs=[train_set_x, train_set_y, l_r], outputs=cost, updates=updates, givens={ self.x: train_set_x, self.y: train_set_y }, mode=mode) ############## # Train model# ############## epoch = 0 while (epoch < self.epochs): epoch += 1 for idx in xrange(n_train): train_model(X_train[idx], Y_train[idx], self.learning_rate) # validate learnt weights on training set iter = (epoch-1) * n_train + idx + 1 if iter % validation == 0: train_losses = [compute_train_error(X_train[i], Y_train[i]) for i in sample(xrange(n_train), samples)] this_train_loss = np.mean(train_losses) test_losses = [compute_test_error(X_test[i], Y_test[i]) for i in xrange(n_test)] this_test_loss = np.mean(test_losses) fmt = "epoch %i, seq %i/%i, train loss %f, test loss %f, lr: %f" logging.debug(fmt % (epoch, idx+1, n_train, this_train_loss, this_test_loss, self.learning_rate)) self.learning_rate *= self.learning_rate_decay if epoch % 10 == 0: filename = "rnn-100_%e-%d.npz" % (self.L2_reg ,epoch) np.savez(filename, W=self.rnn.W.get_value(), W_in=self.rnn.W_in.get_value(), W_out=self.rnn.W_out.get_value(), h0=self.rnn.h0.get_value(), bh=self.rnn.bh.get_value(), by=self.rnn.by.get_value())
def E(W): Wx,Wh,Wy=vector_to_weights(W, 1,50,1) perturbRnn = RNN(Wx,Wh,Wy, rnn.tau) return perturbRnn.loss( perturbRnn.feedforward(trainData.x)[0], trainData.y )