Ejemplo n.º 1
0
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed(args.seed)

###############################################################################
# Load data
###############################################################################

logging.info("Loading data")
start = time.time()
corpus = Corpus(args.data, args.trainfile)
logging.info("( %.2f )" % (time.time() - start))
ntokens = len(corpus.dictionary)
logging.info("Vocab size %d", ntokens)

if args.cuda:
    corpus.train = corpus.train.cuda()
    corpus.valid = corpus.valid.cuda()
    corpus.test = corpus.test.cuda()

logging.info("Batchying..")
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size, args.cuda)
val_data = batchify(corpus.valid, eval_batch_size, args.cuda)
test_data = batchify(corpus.test, eval_batch_size, args.cuda)
Ejemplo n.º 2
0
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

eval_batch_size = 32

if args.test:
    dictionary = Dictionary(args.data)

    test = tokenize(dictionary, args.test)
    print("Size, OOV", test.size(0), sum(test == dictionary.word2idx["<unk>"]))
    test_data = batchify(test, eval_batch_size, args.cuda)
    ntokens = len(dictionary)

else:
    corpus = Corpus(args.data)
    print("Size, OOV", corpus.test.size(0),
          sum(corpus.test == corpus.dictionary.word2idx["<unk>"]))
    test_data = batchify(corpus.test, eval_batch_size, args.cuda)
    dictionary = corpus.dictionary

# Load the best saved model.
with open(args.checkpoint, 'rb') as f:
    print("Loading the model")
    if args.cuda:
        model = torch.load(f)
    else:
        # to convert model trained on cuda to cpu model
        model = torch.load(f, map_location=lambda storage, loc: storage)

print("Evaluation on non-unk tokens")
Ejemplo n.º 3
0
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed(args.seed)

###############################################################################
# Load data
###############################################################################

logging.info("Loading data")
start = time.time()
corpus = Corpus(args.data, onlyTest=True)
logging.info("( %.2f )" % (time.time() - start))
#logging.info(corpus.train)

logging.info("Batchying..")
eval_batch_size = 1
#train_data = batchify(corpus.train, args.batch_size, args.cuda)
#logging.info("Train data size", train_data.size())
#val_data = batchify(corpus.valid, eval_batch_size, args.cuda)
test_data = batchify(corpus.test, eval_batch_size, args.cuda)

ntokens = len(corpus.dictionary)

criterion = nn.CrossEntropyLoss()

###############################################################################