def main(): global xp args = parse_args() x_ids = defaultdict(lambda:len(x_ids)) y_ids = defaultdict(lambda:len(y_ids)) init_wrapper(not args.use_cpu) data, target = load_data(args.train, x_ids, y_ids) test_data, test_target = load_data(args.test, x_ids, y_ids) model = init_model(input_size = args.input_size, embed_size = args.embed_size, hidden_size = args.hidden_size, output_size = len(y_ids)) optimizer = optimizers.SGD(lr=0.5) # Begin Training UF.init_model_parameters(model) model = UF.convert_to_GPU(not args.use_cpu, model) optimizer.setup(model) prev_acc = 0 for ep in range(epoch): UF.trace("Training Epoch %d" % ep) epoch_acc = 0 total = 0 for i in range(0, len(data), batchsize): x_batch = data[i: i+batchsize] y_batch = target[i : i+batchsize] optimizer.zero_grads() loss, accuracy = forward(model, x_batch, y_batch, args.hidden_size) loss.backward() optimizer.update() # Counting epoch accuracy epoch_acc += 100 * accuracy.data total += 1 epoch_acc /= total if prev_acc > epoch_acc: optimizer.lr *= 0.9 UF.trace("Reducing LR:", optimizer.lr) prev_acc = epoch_acc UF.trace("Epoch Accuracy: %.2f" % (epoch_acc)) # Begin Testing sum_loss, sum_accuracy = 0, 0 for i in range(0, len(test_data), batchsize): x_batch = test_data[i : i+batchsize] y_batch = test_target[i : i+batchsize] loss, accuracy = forward(model, x_batch, y_batch, args.hidden_size) sum_loss += loss.data * batchsize sum_accuracy += accuracy.data * batchsize mean_loss = sum_loss / len(test_data) mean_accuracy = sum_accuracy / len(test_data) print("Mean Loss", mean_loss) print("Mean Accuracy", mean_accuracy)
def main(): args = parse_args() init_program_state(args) vocab = make_vocab() data, batched_data = load_data(args.train, vocab, args.batch_size) dev , batched_dev = load_data(args.dev, vocab, 1) test, batched_test = load_data(args.test, vocab, 1) model = init_model(input_size = len(vocab), embed_size = args.embed_size, hidden_size = args.hidden_size, output_size = len(vocab)) optimizer = optimizers.SGD(lr=args.lr) # Begin Training UF.init_model_parameters(model) model = UF.convert_to_GPU(USE_GPU, model) optimizer.setup(model) batchsize = args.batch_size epoch = args.epoch accum_loss = Variable(xp.zeros((), dtype=np.float32)) counter = 0 # For each epoch.. for ep in range(epoch): UF.trace("Training Epoch %d" % ep) total_tokens = 0 log_ppl = 0.0 # For each batch, do forward & backward computations for i, batch in enumerate(batched_data): loss, nwords = forward(model, batch) accum_loss += loss log_ppl += loss.data.reshape(()) # Tracing... total_tokens += nwords # UF.trace(' %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize)) # Counting if (counter+1) % bp_len == 0: optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() accum_loss = Variable(xp.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() counter += 1 # Counting Perplexity log_ppl /= total_tokens UF.trace(" PPL (Train) = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl))) dev_ppl = evaluate(model, batched_dev) UF.trace(" PPL (Dev) = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl))) # Reducing learning rate if ep > 6: optimizer.lr /= 1.2 UF.trace("Reducing LR:", optimizer.lr) # Begin Testing UF.trace("Begin Testing...") test_ppl = evaluate(model, batched_test) UF.trace(" log(PPL) = %.10f" % test_ppl) UF.trace(" PPL = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))