예제 #1
0
def main():
    config = get_config()
    # Wraps arg parse functionallity around train function so that it can be provided as arguments
    parser = argparse.ArgumentParser(description='Trains a language model from a wiki dataset')
    parser.add_argument('dump', help='The wiki dump name to train a language model for')
    parser.add_argument('name', help='Name of the model, used in exported files etc')
    parser.add_argument('--test-mode', help="makes dataset smaller to see if the script actually runs", action='store_true')
    parser.add_argument('--epochs',type=int,default=5, help="Number of epochs to run for")
    parser.add_argument('--batch_size', type=int, default=64, help="Batch size")
    parser.add_argument('--gpu',type=int, default=-1, help="Gpu to use")
    parser.add_argument('--out',default='result',help="Folder to put results")
    parser.add_argument('--grad-clip', default=True, help="Clip gradients")
    parser.add_argument('--brpoplen', type=int, default=35)
    parser.add_argument('--resume', default='')
    parser.add_argument('--max-seq-size', default=250000, type=int)
    args = parser.parse_args()
    com = Communication(args.out)
    com.add_text("Type", "language model")

    train(args.dump, args.name, args.test_mode, args.epochs, args.batch_size, args.gpu, args.out, args.grad_clip, args.brpoplen, args.resume, args.max_seq_size,com)
    com.send_slack(config.get('slack','channel'), config.get('slack','api_token'))
def main():
    config = get_config()
    # Wraps arg parse functionallity around train function so that it can be provided as arguments
    parser = argparse.ArgumentParser(description='Evaluates a TRNN ')
    parser.add_argument('dump', help='The wiki dump name to train a language model for')
    parser.add_argument('lm', help='Path to language model')
    parser.add_argument('--test-mode', help="makes dataset smaller to see if the script actually runs", action='store_true')
    parser.add_argument('--batch_size', type=int, default=64, help="Batch size")
    parser.add_argument('--gpu',type=int, default=-1, help="Gpu to use")
    parser.add_argument('--max-seq-size', default=250000,type=int)
    parser.add_argument('--out', default='result', help="Folder to put results")
    args = parser.parse_args()
    com = Communication(args.out)
    com.add_text("Type", "Translation matrix")

    # keep time
    com.add_text("Start date", time.strftime("%c"))
    start = time.time()

    check_loss(com, args.lm, args.dump, args.gpu, args.batch_size, args.max_seq_size, args.test_mode)
    diff = time.time() - start
    com.add_text('time',seconds_to_str(diff))
    com.send_slack(config.get('slack','channel'),config.get('slack','api_token'))
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=20,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen',
                        '-l',
                        type=int,
                        default=35,
                        help='Number of words in each mini-batch '
                        '(= length of truncated BPTT)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test',
                        action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()
    com = Communication(args.out)
    com.add_text("TYPE", "PBT")

    # Load the Penn Tree Bank long word sequence dataset
    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab =', n_vocab)

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    train_iter = ParallelSequentialIterator(train, args.batchsize)
    val_iter = ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    rnn = RNNForLM(n_vocab, args.unit)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # make the GPU current
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    # Set up a trainer
    updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    eval_model = model.copy()  # Model with shared params and distinct states
    eval_rnn = eval_model.predictor
    eval_rnn.train = False
    trainer.extend(
        extensions.Evaluator(
            val_iter,
            eval_model,
            device=args.gpu,
            # Reset the RNN state at the beginning of each evaluation
            eval_hook=lambda _: eval_rnn.reset_state()))

    interval = 5 if args.test else 500
    trainer.extend(
        extensions.LogReport(postprocess=compute_perplexity,
                             trigger=(interval, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'perplexity', 'val_perplexity']),
                   trigger=(interval, 'iteration'))
    trainer.extend(
        extensions.ProgressBar(update_interval=1 if args.test else 10))
    #trainer.extend(extensions.snapshot())
    #trainer.extend(extensions.snapshot_object(
    #    model, 'model_iter_{.updater.iteration}'))
    start = time.time()

    loss_r = extensions.PlotReport(['validation/main/loss', 'main/loss'],
                                   'epoch',
                                   file_name='loss.png')

    trainer.extend(loss_r)
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
    date = time.strftime("%Y-%m-%d_%H-%M-%S")
    trainer.run()
    diff = time.time() - start
    com.add_text('time', seconds_to_str(diff))
    com.add_file(os.path.join(args.out, 'loss.png'), "Loss")
    #save plots
    # Evaluate the final model
    print('test')
    eval_rnn.reset_state()
    evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu)
    result = evaluator()
    print('test perplexity:', np.exp(float(result['main/loss'])))
    com.add_text("Loss", result['main/loss'])
    config = get_config()
    com.send_slack(config.get('slack', 'channel'),
                   config.get('slack', 'api_token'))