def evaluate_set(self, data_x, data_y): args = self.args # compile prediction function eval_acc = theano.function( inputs = [self.x], outputs = self.pred, allow_input_downcast = True ) # create batches by grouping sentences of the same length together batches_x, batches_y = create_batches( range(len(data_x)), data_x, data_y, args.batch ) # evaluate on the data set dropout_prob = np.float64(args.dropout_rate).astype(theano.config.floatX) self.dropout.set_value(0.0) accuracy = self.evaluate_batches(batches_x, batches_y, eval_acc) self.dropout.set_value(dropout_prob) return accuracy
def train(self, train, dev, test): args = self.args trainx, trainy = train batch_size = args.batch_size if dev: dev_batches_x, dev_batches_y = create_batches( range(len(dev[0])), dev[0], dev[1], batch_size) if test: test_batches_x, test_batches_y = create_batches( range(len(test[0])), test[0], test[1], batch_size) cost = self.nll_loss + self.l2_sqr updates, lr, gnorm = create_optimization_updates( cost=cost, params=self.params, lr=args.learning_rate, method=args.learning)[:3] train_model = theano.function(inputs=[self.x, self.y], outputs=[cost, gnorm], updates=updates, allow_input_downcast=True) eval_acc = theano.function(inputs=[self.x], outputs=self.pred, allow_input_downcast=True) unchanged = 0 best_dev = 0.0 dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) rnn_dropout_prob = np.float64(args.rnn_dropout).astype( theano.config.floatX) start_time = time.time() perm = range(len(trainx)) say( str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ]) + "\n") for epoch in xrange(args.max_epochs): unchanged += 1 if unchanged > 20: return train_loss = 0.0 random.shuffle(perm) batches_x, batches_y = create_batches(perm, trainx, trainy, batch_size) N = len(batches_x) for i in xrange(N): if i % 100 == 0: sys.stdout.write("\r%d" % i) sys.stdout.flush() x = batches_x[i] y = batches_y[i] va, grad_norm = train_model(x, y) train_loss += va # debug if math.isnan(va): print "" print i - 1, i print x print y return if i == N - 1: self.dropout.set_value(0.0) self.rnn_dropout.set_value(0.0) say("\n") say("Epoch %.1f\tlr=%.6f\tloss=%.4f\t|g|=%s [%.2fm]\n" % (epoch + (i + 1) / (N + 0.0), float(lr.get_value(borrow=True)), train_loss / (i + 1), float(grad_norm), (time.time() - start_time) / 60.0)) say( str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ]) + "\n") if dev: preds = [eval_acc(x) for x in dev_batches_x] nowf_dev = self.eval_accuracy(preds, dev_batches_y) if nowf_dev > best_dev: unchanged = 0 best_dev = nowf_dev if args.save: self.save_model(args.save, args) say("\tdev accuracy=%.4f\tbest=%.4f\n" % (nowf_dev, best_dev)) if args.test and nowf_dev == best_dev: preds = [eval_acc(x) for x in test_batches_x] nowf_test = self.eval_accuracy( preds, test_batches_y) say("\ttest accuracy=%.4f\n" % (nowf_test, )) if best_dev > nowf_dev + 0.05: return self.dropout.set_value(dropout_prob) self.rnn_dropout.set_value(rnn_dropout_prob) start_time = time.time() if args.lr_decay > 0: assert args.lr_decay < 1 lr.set_value(np.float32(lr.get_value() * args.lr_decay))
def train(self, args, train, dev, test=None): embedding_layer = self.layers[-2] dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX) rnn_dropout_prob = np.float64(args["rnn_dropout"]).astype( theano.config.floatX) batch_size = args["batch_size"] unroll_size = args["unroll_size"] train = create_batches(train, embedding_layer.map_to_ids, batch_size) dev = create_batches(dev, embedding_layer.map_to_ids, 1) if test is not None: test = create_batches(test, embedding_layer.map_to_ids, 1) cost = T.sum(self.nll) / self.idxs.shape[1] updates, lr, gnorm = create_optimization_updates( cost=cost, params=self.params, lr=args["learning_rate"], eps=args["eps"], method=args["learning"])[:3] train_func = theano.function(inputs=[self.idxs, self.idys] + self.init_state, outputs=[cost, gnorm] + self.last_state, updates=updates) eval_func = theano.function( inputs=[self.idxs, self.idys] + self.init_state, outputs=[self.nll] + self.last_state, ) N = (len(train[0]) - 1) / unroll_size + 1 say(" train: {} tokens, {} mini-batches\n".format( len(train[0].ravel()), N)) say(" dev: {} tokens\n".format(len(dev[0].ravel()))) say("\tp_norm: {}\n".format(self.get_pnorm_stat())) decay_epoch = args["lr_decay_epoch"] decay_rate = args["lr_decay"] lr_0 = args["learning_rate"] iter_cnt = 0 depth = args["depth"] unchanged = 0 best_dev = 1e+10 start_time = 0 max_epoch = args["max_epoch"] for epoch in xrange(max_epoch): unchanged += 1 if unchanged > 20: break if decay_epoch > 0 and epoch >= decay_epoch: lr.set_value(np.float32(lr.get_value() * decay_rate)) start_time = time.time() prev_state = [ np.zeros((batch_size, self.n_d), dtype=theano.config.floatX) for i in xrange(depth * 2) ] train_loss = 0.0 for i in xrange(N): # get current batch x = train[0][i * unroll_size:(i + 1) * unroll_size] y = train[1][i * unroll_size:(i + 1) * unroll_size] iter_cnt += 1 ret = train_func(x, y, *prev_state) cur_loss, grad_norm, prev_state = ret[0], ret[1], ret[2:] train_loss += cur_loss / len(x) if i % 10 == 0: say("\r{}".format(i)) if i == N - 1: self.dropout.set_value(0.0) self.rnn_dropout.set_value(0.0) dev_preds = self.evaluate(eval_func, dev, 1, unroll_size) dev_loss = evaluate_average(predictions=dev_preds, masks=None) dev_ppl = np.exp(dev_loss) self.dropout.set_value(dropout_prob) self.rnn_dropout.set_value(rnn_dropout_prob) say("\r\n") say( ( "Epoch={} lr={:.4f} train_loss={:.3f} train_ppl={:.1f} " \ +"dev_loss={:.3f} dev_ppl={:.1f}\t|g|={:.3f}\t[{:.1f}m]\n" ).format( epoch, float(lr.get_value(borrow=True)), train_loss/N, np.exp(train_loss/N), dev_loss, dev_ppl, float(grad_norm), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format(self.get_pnorm_stat())) if dev_ppl < best_dev: best_dev = dev_ppl if test is None: continue self.dropout.set_value(0.0) self.rnn_dropout.set_value(0.0) test_preds = self.evaluate(eval_func, test, 1, unroll_size) test_loss = evaluate_average(predictions=test_preds, masks=None) test_ppl = np.exp(test_loss) self.dropout.set_value(dropout_prob) self.rnn_dropout.set_value(rnn_dropout_prob) say("\tbest_dev={:.1f} test_loss={:.3f} test_ppl={:.1f}\n" .format(best_dev, test_loss, test_ppl)) if best_dev < 200: unchanged = 0 say("\n")