Exemple #1
0
def main():
    logging.basicConfig(level=logging.INFO,
                        format='[%(levelname)s::%(name)s] %(message)s')
    parser = argparse.ArgumentParser(
        description='PyTorch RNN/LSTM Language Model')
    parser.add_argument('--data',
                        type=str,
                        required=True,
                        help='location of the data corpus')
    parser.add_argument('--prefix', type=str, help='')
    parser.add_argument(
        '--total-vocab-size',
        type=int,
        help='how many words should be assumed to exist overall')

    parser.add_argument('--batch-size',
                        type=int,
                        default=20,
                        metavar='N',
                        help='batch size')
    parser.add_argument('--max-tokens',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='Maximal number of softmaxes in a batch')

    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--cuda', action='store_true', help='use CUDA')
    parser.add_argument('--load',
                        type=str,
                        required=True,
                        help='where to load a model from')
    args = parser.parse_args()
    print(args)

    init_seeds(args.seed, args.cuda)

    print("loading model...")
    lm = torch.load(args.load, map_location='cpu')
    lm.nb_nonzero_masks = 0
    lm.eval()
    if args.cuda:
        lm.cuda()
    print(lm)

    evaluator = IndependentLinesEvaluator(
        lm=lm,
        fn_evalset=args.data,
        max_batch_size=args.batch_size,
        max_tokens=args.max_tokens,
        total_vocab_size=args.total_vocab_size)
    eval_report = evaluator.evaluate(args.prefix)

    print(f'Utilization: {100.0*eval_report.utilization:.2f} %')
    print('total loss {:.1f} | per token loss {:5.2f} | ppl {:8.2f}'.format(
        eval_report.total_loss, eval_report.loss_per_token,
        math.exp(eval_report.loss_per_token)))
Exemple #2
0
def main(args):
    print(args)

    init_seeds(args.seed, args.cuda)

    print("loading model...")
    device = torch.device('cuda') if args.cuda else torch.device('cpu')
    lm = torch.load(args.load, map_location=device)
    print(lm)

    evaluator = EnblockEvaluator(
        lm,
        args.data,
        args.batch_size,
        args.target_seq_len,
        tokenize_regime='chars' if args.characters else 'words',
    )
    eval_report = evaluator.evaluate()

    print('total loss {:.1f} | per token loss {:5.2f} | ppl {:8.2f}'.format(
        eval_report.total_loss, eval_report.loss_per_token,
        math.exp(eval_report.loss_per_token)))
Exemple #3
0
                        type=int,
                        default=200,
                        metavar='N',
                        help='report interval')
    parser.add_argument('--load',
                        type=str,
                        required=True,
                        help='where to load a model from')
    parser.add_argument('--save',
                        type=str,
                        required=True,
                        help='path to save the final model')
    args = parser.parse_args()
    print(args)

    init_seeds(args.seed, args.cuda)

    print("loading model...")
    lm = torch.load(args.load)
    if args.cuda:
        lm.cuda()
    print(lm.model)

    print("preparing data...")
    tokenize_regime = 'words'
    if args.characters:
        tokenize_regime = 'chars'

    train_ids = tokens_from_fn(args.train,
                               lm.vocab,
                               randomize=False,
def main(args):
    print(args)

    init_seeds(args.seed, args.cuda)

    print("loading model...")
    device = torch.device('cuda') if args.cuda else torch.device('cpu')
    lm = torch.load(args.load).to(device)
    print(lm.model)

    print("preparing training data...")

    if args.train_yaml:
        train_data_stream, single_stream_len = yaml_factory_noepoch(
            args.train_yaml, lm, device)
    else:
        train_data_stream, single_stream_len = plain_factory_noepoch(
            data_fn=args.train,
            lm=lm,
            tokenize_regime=args.tokenize_regime,
            batch_size=args.batch_size,
            device=device,
            target_seq_len=args.target_seq_len,
        )

    print("preparing validation data...")
    evaluator = EnblockEvaluator(lm,
                                 args.valid,
                                 10,
                                 args.target_seq_len,
                                 tokenize_regime=args.tokenize_regime)

    def val_loss_fn():
        return evaluator.evaluate().loss_per_token

    print("computing initial PPL...")
    initial_val_loss = val_loss_fn()
    print('Initial perplexity {:.2f}'.format(math.exp(initial_val_loss)))

    print("training...")
    lr = args.lr
    best_val_loss = None

    val_watcher = ValidationWatcher(val_loss_fn, initial_val_loss,
                                    args.val_interval, args.workdir, lm)
    best_val_loss = initial_val_loss

    optim = torch.optim.SGD(lm.parameters(), lr, weight_decay=args.beta)
    patience_ticks = 0

    logger = InfinityLogger(0, args.log_interval, lr)

    hidden = None
    for X, targets in train_data_stream:
        if hidden is None:
            hidden = lm.model.init_hidden(args.batch_size)

        hidden = repackage_hidden(hidden)

        lm.train()
        output, hidden = lm.model(X, hidden)
        loss, nb_words = lm.decoder.neg_log_prob(output, targets)
        loss /= nb_words

        val_watcher.log_training_update(loss.data, nb_words)

        optim.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(lm.parameters(), args.clip)

        optim.step()
        logger.log(loss.data)

    val_loss = val_loss_fn()

    # Save the model if the validation loss is the best we've seen so far.
    if val_loss < best_val_loss:
        torch.save(lm, args.save)
        best_val_loss = val_loss
        patience_ticks = 0
    else:
        patience_ticks += 1
        if patience_ticks > args.patience:
            lr /= 2.0
            if lr < args.min_lr:
                print(
                    f"Learning has reached {lr}, training was supposed to stop at {args.min_lr}, stopping."
                )
            for p in optim.param_groups:
                p['lr'] = lr
            patience_ticks = 0
Exemple #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--train',
                        type=str,
                        required=True,
                        help='location of the train corpus')
    parser.add_argument('--valid',
                        type=str,
                        required=True,
                        help='location of the valid corpus')
    parser.add_argument('--shuffle-lines',
                        action='store_true',
                        help='shuffle lines before every epoch')

    parser.add_argument('--max-batch-size',
                        type=int,
                        default=20,
                        help='maxiamal batch size')
    parser.add_argument('--max-softmaxes',
                        type=int,
                        default=1000,
                        help='maximal number of softmaxes in a single batch')

    parser.add_argument('--lr',
                        type=float,
                        default=20,
                        help='initial learning rate')
    parser.add_argument('--beta',
                        type=float,
                        default=0,
                        help='L2 regularization penalty')
    parser.add_argument('--clip',
                        type=float,
                        default=0.25,
                        help='gradient clipping')
    parser.add_argument('--epochs',
                        type=int,
                        default=40,
                        help='upper epoch limit')

    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--cuda', action='store_true', help='use CUDA')
    parser.add_argument('--log-interval',
                        type=int,
                        default=200,
                        metavar='N',
                        help='report interval')
    parser.add_argument('--val-interval',
                        type=int,
                        default=1000000,
                        metavar='N',
                        help='validation interval in number of tokens')
    parser.add_argument('--workdir', help='where to put models, logs etc.')
    parser.add_argument('--load',
                        type=str,
                        required=True,
                        help='where to load a model from')
    parser.add_argument('--save',
                        type=str,
                        required=True,
                        help='path to save the final model')
    args = parser.parse_args()
    print(args)

    init_seeds(args.seed, args.cuda)

    print("loading model...")
    lm = torch.load(args.load)
    if args.cuda:
        lm.cuda()
    print(lm.model)

    print("preparing training data...")
    with open(args.train) as f:
        train_lines = get_independent_lines(f, lm.vocab)

    nb_train_tokens = sum(len(ids) for ids in train_lines)
    nb_oovs = sum(
        sum(ids == lm.vocab.unk_ind).detach().item() for ids in train_lines)
    print('Nb oovs: {} / {} ({:.2f} %)\n'.format(
        nb_oovs, nb_train_tokens, 100.0 * nb_oovs / nb_train_tokens))

    evaluator = IndependentLinesEvaluator(lm, args.valid, args.max_batch_size,
                                          args.max_softmaxes)

    print("computing initial PPL...")
    initial_evaluation = evaluator.evaluate('')
    print('Initial perplexity {:.2f}'.format(
        math.exp(initial_evaluation.loss_per_token)))

    print("training...")
    lr = args.lr
    best_val_loss = None

    val_watcher = ValidationWatcher(
        lambda: evaluator.evaluate('').loss_per_token,
        initial_evaluation.loss_per_token, args.val_interval, args.workdir, lm)

    optim = torch.optim.SGD(lm.parameters(), lr, weight_decay=args.beta)
    for epoch in range(1, args.epochs + 1):
        logger = InfinityLogger(epoch, args.log_interval, lr)

        nb_batches = 0
        nb_tokens = 0
        running_loss = 0.0
        t0 = time.time()

        random.shuffle(train_lines)
        train_data_stream = OndemandDataProvider(Batcher(
            train_lines, args.max_batch_size, args.max_softmaxes),
                                                 cuda=False)
        for batch in train_data_stream:
            nb_batches += 1
            lm.train()
            loss = lm.batch_nll_idxs(batch).sum()
            running_loss += loss.detach().item()
            nb_words = sum(len(s) for s in batch)
            nb_tokens += nb_words
            loss /= nb_words

            val_watcher.log_training_update(loss.data, nb_words)

            optim.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(lm.parameters(), args.clip)

            optim.step()
            logger.log(loss.data)

        val_loss = evaluator.evaluate('').loss_per_token
        print(
            f'epoch {epoch}: {nb_batches} batches, train loss {running_loss:.1f}, running PPL {math.exp(running_loss/nb_tokens):.2f}, val PPL {math.exp(val_loss):.2f}, {time.time() - t0:.1f} sec'
        )
        print(
            epoch_summary(epoch, logger.nb_updates(),
                          logger.time_since_creation(), val_loss))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            torch.save(lm, args.save)
            best_val_loss = val_loss
        else:
            lr /= 2.0
            pass
Exemple #6
0
def main(args):
    print(args)
    logging.basicConfig(level=logging.INFO, format='[%(levelname)s::%(name)s] %(message)s')

    init_seeds(args.seed, args.cuda)

    print("loading model...")
    lm = torch.load(args.load)
    if args.cuda:
        lm.cuda()
    lm.decoder.core_loss.amount = args.label_smoothing

    print(lm.model)
    print('Label smoothing power', lm.decoder.core_loss.amount)

    tokenize_regime = 'words'

    print("preparing training data...")
    train_ids = tokens_from_fn(args.train, lm.vocab, randomize=False, regime=tokenize_regime)
    train_streams = form_input_targets(train_ids)
    corrupted_provider = InputTargetCorruptor(train_streams, args.subs_rate, args.target_subs_rate, len(lm.vocab), args.del_rate, args.ins_rate, protected=[lm.vocab['</s>']])
    batch_former = LazyBatcher(args.batch_size, corrupted_provider)
    train_data = TemplSplitterClean(args.target_seq_len, batch_former)
    train_data_stream = OndemandDataProvider(TransposeWrapper(train_data), args.cuda)

    print("preparing validation data...")
    evaluator = EnblockEvaluator(lm, args.valid, 10, args.target_seq_len)
    # Evaluation (de facto LR scheduling) with input corruption did not
    # help during the CHiMe-6 evaluation
    # evaluator = SubstitutionalEnblockEvaluator(
    #     lm, args.valid,
    #     batch_size=10, target_seq_len=args.target_seq_len,
    #     corruptor=lambda data: Corruptor(data, args.corruption_rate, len(lm.vocab)),
    #     nb_rounds=args.eval_rounds,
    # )

    def val_loss_fn():
        return evaluator.evaluate().loss_per_token

    print("computing initial PPL...")
    initial_val_loss = val_loss_fn()
    print('Initial perplexity {:.2f}'.format(math.exp(initial_val_loss)))

    print("training...")
    lr = args.lr
    best_val_loss = None

    val_watcher = ValidationWatcher(val_loss_fn, initial_val_loss, args.val_interval, args.workdir, lm)

    optim = torch.optim.SGD(lm.parameters(), lr, weight_decay=args.beta)
    for epoch in range(1, args.epochs + 1):
        logger = ProgressLogger(epoch, args.log_interval, lr, len(list(train_data)) // args.target_seq_len)

        hidden = None
        for X, targets in train_data_stream:
            if hidden is None:
                hidden = lm.model.init_hidden(args.batch_size)

            hidden = repackage_hidden(hidden)

            lm.train()
            output, hidden = lm.model(X, hidden)
            loss, nb_words = lm.decoder.neg_log_prob(output, targets)
            loss /= nb_words

            val_watcher.log_training_update(loss.data, nb_words)

            optim.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(lm.parameters(), args.clip)

            optim.step()
            logger.log(loss.data)

        val_loss = val_loss_fn()
        print(epoch_summary(epoch, logger.nb_updates(), logger.time_since_creation(), val_loss))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            torch.save(lm, args.save)
            best_val_loss = val_loss
            patience_ticks = 0
        else:
            patience_ticks += 1
            if patience_ticks > args.patience:
                lr /= 2.0
                patience_ticks = 0