try: with open(args.vocab_file, 'r') as f: # We're using a pre-existing vocab file, so we shouldn't overwrite it args.predefined_vocab_flag = True except FileNotFoundError: # We should create a new vocab file args.predefined_vocab_flag = False corpus = data.SentenceCorpus(args.data_dir, args.vocab_file, args.test, args.interact, checkpoint_flag=args.load_checkpoint, predefined_vocab_flag=args.predefined_vocab_flag, collapse_nums_flag=args.collapse_nums_flag, multisentence_test_flag=args.multisentence_test, lower_flag=args.lowercase, trainfname=args.trainfname, validfname=args.validfname, testfname=args.testfname) if not args.interact: if args.test: if args.multisentence_test: test_data = [corpus.test] else: test_sents, test_data = corpus.test else: train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, args.batch_size)
if args.cuda: model = torch.load(f).to(device) else: model = torch.load(f, map_location='cpu') # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass if args.cuda and (not args.single) and (torch.cuda.device_count() > 1): model.module.rnn.flatten_parameters() else: if isinstance(model, torch.nn.DataParallel): model = model.module model.rnn.flatten_parameters() model.eval() corpus = data.SentenceCorpus(args.data_dir, args.vocab_file, generate_flag=True) ntokens = len(corpus.dictionary) if args.cuda and (not args.single) and (torch.cuda.device_count() > 1): hidden = model.module.init_hidden(1) else: hidden = model.init_hidden(1) input_sequence = torch.rand(1, 1).mul(ntokens).long() if args.cuda: input_sequence.data = input_sequence.data.to(device) with open(args.outf, 'w') as outf: for i in range(args.numwords): output, hidden = model(input_sequence, hidden) word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
# Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() # Turning the data over to CUDA at this point may lead to more OOM errors #if args.cuda: # data = data.cuda() if isinstance(data, tuple): return data, tag_data return data eval_batch_size = 10 corpus = data.SentenceCorpus(args.lm_data, args.ccg_data, args.save_lm_data, args.test, trainfname=args.trainfname, validfname=args.validfname, testfname=args.testfname) if args.test: test_lm_sentences, test_lm_data = corpus.test_lm if args.ccg_data: test_ccg_sentences, test_ccg_data = corpus.test_ccg else: test_ccg_sentences = [] test_ccg_data = [] else: train_lm_data = batchify(corpus.train_lm, args.batch_size) val_lm_data = batchify(corpus.valid_lm, eval_batch_size) train_ccg_data = batchify(corpus.train_ccg, args.batch_size)
# These columns are treated as independent by the model, which means that the # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient # batch processing. def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() # Turning the data over to CUDA at this point may lead to more OOM errors return data.to(device) corpus = data.SentenceCorpus(args.data_dir, args.vocab_file, args.test, args.interact, trainfname=args.trainfname, validfname=args.validfname, testfname=args.testfname) if not args.interact: if args.test: test_sents, test_data = corpus.test else: train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, args.batch_size) ############################################################################### # Build/load the model ############################################################################### if not args.test and not args.interact: ntokens = len(corpus.dictionary)
parser.error("--temperature has to be greater or equal 1e-3") with open(args.model_file, 'rb') as f: if args.cuda: model = torch.load(f).to(device) else: model = torch.load(f, map_location='cpu') # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass if args.cuda and (not args.single) and (torch.cuda.device_count() > 1): model.module.rnn.flatten_parameters() else: model.rnn.flatten_parameters() model.eval() corpus = data.SentenceCorpus(args.data_dir, args.vocab_file, True) ntokens = len(corpus.dictionary) if args.cuda and (not args.single) and (torch.cuda.device_count() > 1): hidden = model.module.init_hidden(1) else: hidden = model.init_hidden(1) input = torch.tensor(torch.rand(1, 1).mul(ntokens).long()) if args.cuda: input.data = input.data.to(device) with open(args.outf, 'w') as outf: for i in range(args.numwords): output, hidden = model(input, hidden) word_weights = output.squeeze().data.div(args.temperature).exp().cpu() word_idx = torch.multinomial(word_weights, 1)[0]
files = sorted(files) train_files = [] valid_files = [] for file in files: prefix = file.split('_')[0] if prefix == 'train': train_files.append(file) if prefix == 'valid': valid_files.append(file) print('Start training!!!') for epoch in range(1, args.epochs+1): valid_fname = random.choice(valid_files) for train_fname in train_files: train_fname = random.choice(train_files) corpus = data.SentenceCorpus(args.bptt, args.lm_data, args.tag_data, word2idx, tag2idx, idx2word, idx2tag, train_fname, valid_fname, None, testflag=args.test) train_lm_data = batchify(corpus.train_lm, args.batch_size) train_masking = batchify(corpus.train_maksing, args.batch_size) train_ccg_data = batchify(corpus.train_tag, args.batch_size) epoch_start_time = time.time() train(args, model, train_lm_data, train_masking, train_ccg_data, criterion, optimizer) val_lm_data = batchify(corpus.valid_lm, args.batch_size) val_masking = batchify(corpus.valid_maksing, args.batch_size) val_ccg_data = batchify(corpus.valid_tag, args.batch_size) val_loss = evaluate(args, model, val_lm_data, val_masking, val_ccg_data) print('-' * 80) print('| end of {} | time: {:5.2f}s | valid loss {:5.4f} '.format(train_fname,
else: torch.cuda.manual_seed(args.seed) if args.temperature < 1e-3: parser.error("--temperature has to be greater or equal 1e-3") with open(args.checkpoint, 'rb') as f: model = torch.load(f) model.eval() if args.cuda: model.cuda() else: model.cpu() corpus = data.SentenceCorpus(args.data, args.lm_data, True, testfname=args.testfname) ntokens = len(corpus.dictionary) hidden = model.init_hidden(1) input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True) if args.cuda: input.data = input.data.cuda() with open(args.outf, 'w') as outf: for i in range(args.words): output, hidden = model(input, hidden) word_weights = output.squeeze().data.div(args.temperature).exp().cpu() word_idx = torch.multinomial(word_weights, 1)[0] input.data.fill_(word_idx) word = corpus.dictionary.idx2word[word_idx]
else: torch.cuda.manual_seed(args.seed) if args.temperature < 1e-3: parser.error("--temperature has to be greater or equal 1e-3") with open(args.model_file, 'rb') as f: model = torch.load(f) model.eval() if args.cuda: model.cuda() else: model.cpu() corpus = data.SentenceCorpus(args.data_dir, args.vocab_file, True, testfname=args.testfname) ntokens = len(corpus.dictionary) hidden = model.init_hidden(1) input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True) if args.cuda: input.data = input.data.cuda() with open(args.outf, 'w') as outf: for i in range(args.numwords): output, hidden = model(input, hidden) word_weights = output.squeeze().data.div(args.temperature).exp().cpu() word_idx = torch.multinomial(word_weights, 1)[0] input.data.fill_(word_idx) word = corpus.dictionary.idx2word[word_idx]