nb_targets_parallel=args.target_seq_len) train_data = TransposeWrapper(train_data_tb) valid_ids = tokens_from_fn(args.valid, lm.vocab, randomize=False, regime=tokenize_regime) valid_batched = batchify(valid_ids, 10, args.cuda) valid_data_tb = TemporalSplits(valid_batched, nb_inputs_necessary=lm.model.in_len, nb_targets_parallel=args.target_seq_len) valid_data = TransposeWrapper(valid_data_tb) print('Initial perplexity {:.2f}'.format( math.exp( evaluate_(lm, valid_data, use_ivecs=False, custom_batches=False)))) print("training...") lr = args.lr best_val_loss = None for epoch in range(1, args.epochs + 1): logger = ProgressLogger(epoch, args.log_interval, lr, len(train_batched) // args.target_seq_len) optim = torch.optim.SGD(lm.parameters(), lr, weight_decay=args.beta) train_( lm, train_data, optim, logger, args.clip,
print(lm) print("preparing data...") tokenize_regime = 'words' if args.characters: tokenize_regime = 'chars' ids = tokens_from_fn(args.data, lm.vocab, randomize=False, regime=tokenize_regime) batched = batchify(ids, 10, args.cuda) data_tb = TemporalSplits(batched, nb_inputs_necessary=lm.model.in_len, nb_targets_parallel=args.target_seq_len) data = TransposeWrapper(data_tb) oov_mask = ids == lm.vocab.unk_ind nb_oovs = oov_mask.sum() print('Nb oovs: {} ({:.2f} %)\n'.format(nb_oovs, 100.0 * nb_oovs / len(ids))) # Run on test data. loss = evaluate_( lm, data, use_ivecs=False, custom_batches=False, ) print('loss {:5.2f} | ppl {:8.2f}'.format(loss, math.exp(loss)))
def val_loss_fn(lm): return evaluate_(lm, valid_data, use_ivecs=False, custom_batches=False)