def main(): config = get_config() # Wraps arg parse functionallity around train function so that it can be provided as arguments parser = argparse.ArgumentParser(description='Trains a language model from a wiki dataset') parser.add_argument('dump', help='The wiki dump name to train a language model for') parser.add_argument('name', help='Name of the model, used in exported files etc') parser.add_argument('--test-mode', help="makes dataset smaller to see if the script actually runs", action='store_true') parser.add_argument('--epochs',type=int,default=5, help="Number of epochs to run for") parser.add_argument('--batch_size', type=int, default=64, help="Batch size") parser.add_argument('--gpu',type=int, default=-1, help="Gpu to use") parser.add_argument('--out',default='result',help="Folder to put results") parser.add_argument('--grad-clip', default=True, help="Clip gradients") parser.add_argument('--brpoplen', type=int, default=35) parser.add_argument('--resume', default='') parser.add_argument('--max-seq-size', default=250000, type=int) args = parser.parse_args() com = Communication(args.out) com.add_text("Type", "language model") train(args.dump, args.name, args.test_mode, args.epochs, args.batch_size, args.gpu, args.out, args.grad_clip, args.brpoplen, args.resume, args.max_seq_size,com) com.send_slack(config.get('slack','channel'), config.get('slack','api_token'))
def main(): config = get_config() # Wraps arg parse functionallity around train function so that it can be provided as arguments parser = argparse.ArgumentParser(description='Evaluates a TRNN ') parser.add_argument('dump', help='The wiki dump name to train a language model for') parser.add_argument('lm', help='Path to language model') parser.add_argument('--test-mode', help="makes dataset smaller to see if the script actually runs", action='store_true') parser.add_argument('--batch_size', type=int, default=64, help="Batch size") parser.add_argument('--gpu',type=int, default=-1, help="Gpu to use") parser.add_argument('--max-seq-size', default=250000,type=int) parser.add_argument('--out', default='result', help="Folder to put results") args = parser.parse_args() com = Communication(args.out) com.add_text("Type", "Translation matrix") # keep time com.add_text("Start date", time.strftime("%c")) start = time.time() check_loss(com, args.lm, args.dump, args.gpu, args.batch_size, args.max_seq_size, args.test_mode) diff = time.time() - start com.add_text('time',seconds_to_str(diff)) com.send_slack(config.get('slack','channel'),config.get('slack','api_token'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() com = Communication(args.out) com.add_text("TYPE", "PBT") # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab =', n_vocab) if args.test: train = train[:100] val = val[:100] test = test[:100] train_iter = ParallelSequentialIterator(train, args.batchsize) val_iter = ParallelSequentialIterator(val, 1, repeat=False) test_iter = ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # Set up a trainer updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) eval_model = model.copy() # Model with shared params and distinct states eval_rnn = eval_model.predictor eval_rnn.train = False trainer.extend( extensions.Evaluator( val_iter, eval_model, device=args.gpu, # Reset the RNN state at the beginning of each evaluation eval_hook=lambda _: eval_rnn.reset_state())) interval = 5 if args.test else 500 trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(interval, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity']), trigger=(interval, 'iteration')) trainer.extend( extensions.ProgressBar(update_interval=1 if args.test else 10)) #trainer.extend(extensions.snapshot()) #trainer.extend(extensions.snapshot_object( # model, 'model_iter_{.updater.iteration}')) start = time.time() loss_r = extensions.PlotReport(['validation/main/loss', 'main/loss'], 'epoch', file_name='loss.png') trainer.extend(loss_r) if args.resume: chainer.serializers.load_npz(args.resume, trainer) date = time.strftime("%Y-%m-%d_%H-%M-%S") trainer.run() diff = time.time() - start com.add_text('time', seconds_to_str(diff)) com.add_file(os.path.join(args.out, 'loss.png'), "Loss") #save plots # Evaluate the final model print('test') eval_rnn.reset_state() evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu) result = evaluator() print('test perplexity:', np.exp(float(result['main/loss']))) com.add_text("Loss", result['main/loss']) config = get_config() com.send_slack(config.get('slack', 'channel'), config.get('slack', 'api_token'))