def main(argv): logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) FLAGS = flags.FLAGS utils.print_flags(FLAGS) # Random seed initialization. random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) torch.manual_seed(FLAGS.random_seed) # Configuration and paths. cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader) PATH_DATA = cfg['path_data'] PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus']) PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix']) # Set up the experimental environment. exp = experiment.Experiment(FLAGS, cfg, dumpflag=False) for i, layer in enumerate(exp.model.base.encoder.layer): layer.attention.self.attention_window = FLAGS.window_size # Load the corpus. corpus = utils.Corpus(PATH_CORPUS, FLAGS) # Load train/dev data. test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS) # Evaluate dev data. test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval, 'Evaluating test queries') print('Test Evaluation', test_eval, file=sys.stderr)
def main(argv): logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR) FLAGS = flags.FLAGS utils.print_flags(FLAGS) # Random seed initialization. random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) torch.manual_seed(FLAGS.random_seed) # Configuration and paths. cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader) PATH_DATA = cfg['path_data'] PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus']) PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix']) PATH_MODEL_PREFIX = '{}/{}'.format(cfg['path_model'], FLAGS.model_prefix) os.makedirs(PATH_MODEL_PREFIX, exist_ok=True) # Set up the experimental environment. exp = experiment.Experiment(FLAGS, cfg) # Change attention window size. for i, layer in enumerate(exp.model.base.encoder.layer): layer.attention.self.attention_window = FLAGS.window_size # Load the corpus. corpus = utils.Corpus(PATH_CORPUS, FLAGS) # Load train/dev data. train_data = utils.Data(PATH_DATA_PREFIX + 'train', corpus, FLAGS) dev_data = utils.Data(PATH_DATA_PREFIX + 'dev', corpus, FLAGS) test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS) for epoch in range(FLAGS.last_epoch, FLAGS.num_epochs): print('Epoch {}'.format(epoch + 1), file=sys.stderr) # Train the model. train_loss = exp.train(train_data, eval_data=dev_data, test_data=test_data, num_sample_eval=FLAGS.num_sample_eval) print('Epoch {}, train_loss = {}'.format( epoch + 1, train_loss), file=sys.stderr) # Dump the model. print('Dump model for epoch {}.'.format(epoch + 1)) exp.dump_model(PATH_MODEL_PREFIX, str(epoch + 1)) # Evaluate dev data. test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval, 'Evaluating test queries') print('Test Evaluation', test_eval, file=sys.stderr) # Dump tensorboard results. if exp.tb: exp.tb_writer.add_scalar('Epoch_Eval_cut10/NDCG', test_eval['ndcg10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_cut10/MRR', test_eval['mrr10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_cut10/MAP', test_eval['map10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/NDCG', test_eval['ndcg'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/MRR', test_eval['mrr'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/MAP', test_eval['map'], epoch + 1)
def gen(args, model=None, max_len=15, top_p=True): device = init_device() corpus = utils.Corpus(args.data, args.persona_data) if model is None: vocab = len(corpus.dictionary) model = models.CVAE(vocab, args.embedding, args.hidden, args.latent) model.load_model() model = model.to(device) model.eval() generated_verses = {} for persona in corpus.personas: print("Artist {}".format(persona)) persona_tokens = corpus.personas[persona] p_len = torch.tensor([len(persona_tokens)]).long().to(device) p = torch.tensor([persona_tokens]).long().to(device) # 50 verses per artist, arbitrary artist_verses = [] for _ in range(50): generated_verse = [] ctxt = [1] # 16 bars per verse for _ in range(16): print(ctxt) out_sequence = ["S"] out_tokens = [] x_len = torch.tensor([len(ctxt)]).long().to(device) x = torch.tensor([ctxt]).long().to(device) hidden = model.infer_hidden(x, x_len, p, p_len) word = torch.ones([1, 1], dtype=torch.long, device=model.device()) while out_sequence[-1] != "L" and len(out_sequence) < max_len: word = model.embedding(word) outputs, hidden = model.decoder(word, hidden) outputs = F.log_softmax(model.out(outputs), dim=-1).squeeze() if top_p: outputs = top_p_filtering(outputs).unsqueeze(0) else: outputs = outputs.unsqueeze(0) # Get a random sample from output word = torch.multinomial(F.softmax(outputs, dim=-1), 1) out_tokens.append(word.item()) out_sequence.append( corpus.dictionary.idx2word[word.item()]) ctxt.extend(out_tokens) generated_verse.extend(out_sequence) artist_verses.append(generated_verse) generated_verses[persona] = artist_verses with open("verses.json", 'w') as verses_file: json.dump(generated_verses, verses_file)
def perplexity(args, model=None): device = init_device() corpus = utils.Corpus(args.data, args.persona_data) if model is None: vocab = len(corpus.dictionary) model = models.VAE(vocab, args.embedding, args.hidden, args.latent) model.load_model() model = model.to(device) model.eval() valid = utils.load_data(corpus.valid, batch_size=args.batch_size, num_workers=4) print("Beginning eval") norm_losses = [] norm_lens = [] avg_loss = 0 num_examples = 0 for x, x_len, p, p_len, y, y_len in valid: x, x_len = x.to(device), x_len.to(device) y, y_len = y.to(device), y_len.to(device) if model.name is "cvae": p, p_len = p.to(device), p_len.to(device) res = model(x, x_len, p, p_len, y, y_len) else: res = model(x, x_len, y, y_len) pred = res[0] eos_tensor = torch.empty(x.shape[0], 1).to(device) eos_tensor.fill_(corpus.dictionary.word2idx["L"]) gold = torch.cat([y, eos_tensor], dim=1).long() pred = pred.permute(0, 2, 1) BCE = F.nll_loss(pred, gold, reduction="none", ignore_index=0) avg_loss += torch.sum(BCE).item() # Norm by length norm_losses.append(torch.sum(BCE, dim=-1)) norm_lens.append(y_len) num_examples += y.shape[0] exp = torch.cat(norm_losses) / torch.cat(norm_lens) ppl = torch.mean(torch.exp(exp)).item() avg_loss = avg_loss / num_examples print( "Validation set had a perplexity of {} and an average NLL of {} per example" .format(ppl, avg_loss))
batch_size = [20] seq_lengths = [30] learning_rate = [0.001] #TODO: CHANGE THE MODEL NUMBER TO THE ONE YOU NEED model_num = 4 # Load Penn Treebank Dataset train_path = './data/train.txt' valid_path = './data/valid.txt' sample_path = './sample.txt' save_dir = './saved models/' for bs in batch_size: # Create corpuses train_corpus = utils.Corpus() ids_train = train_corpus.get_data(train_path, bs) ids_valid = train_corpus.get_data(valid_path, bs) train_vocab_size = len(train_corpus.dictionary) for seq_len in seq_lengths: num_train_batches = ids_train.size(1) // seq_len num_valid_batches = ids_valid.size(1) // seq_len for lr in learning_rate: model = utils.initialize_model(model_num, train_vocab_size, embed_size) model = utils.use_cuda(model) print('Training vocabulary size: {}'.format(train_vocab_size)) print('Model: {}'.format(model.name)) print('Number of parameters = {}'.format(
import utils # Hyper Parameters embed_size = 249 num_samples = 30 # number of words to be sampled temperature = [0.5, 1.0, 2] # Load Penn Treebank Dataset train_path = './data/train.txt' valid_path = './data/valid.txt' sample_path = './sample.txt' save_dir = './saved models/' model_name = 'LSTM - 249 hidden cells, 1 layers' # Create the corpus corpus = utils.Corpus() ids_train = corpus.get_data(train_path) ids_valid = corpus.get_data(valid_path) vocab_size = len(corpus.dictionary) # Load best model model_num = 4 model = utils.initialize_model(model_num, vocab_size, embed_size) model = utils.use_cuda(model) file_path = os.path.join(save_dir, model_name + '.pkl') load_state = torch.load(file_path, lambda storage, loc: storage) model.load_state_dict(load_state['state_dict']) model.eval() # Turn to eval mode - so there won't be any dropouts! # Sampling with open(sample_path, 'w') as f:
if __name__ == '__main__': for DATASET in ['movie', 'news', 'tweet']: WORD_DROP = 10 MIN_LEN = 5 MAX_LEN = 200 data_path = '../../data/' + DATASET + '2020.txt' train_path = '../../data/train_' + DATASET test_path = '../../data/test_' + DATASET vocabulary = utils.Vocabulary(data_path, max_len=MAX_LEN, min_len=MIN_LEN, word_drop=WORD_DROP) test = utils.Corpus(test_path, vocabulary, max_len=MAX_LEN, min_len=MIN_LEN) with open(test_path + '_nounk', 'w', encoding='utf8') as f: for sentence in test.corpus: if 1 not in sentence: f.write(' '.join([ vocabulary.i2w[_] for _ in sentence if _ not in [0, 2, 3] ]) + '\n') train = utils.Corpus(train_path, vocabulary, max_len=MAX_LEN, min_len=MIN_LEN) with open(train_path + '_nounk', 'w', encoding='utf8') as f: for sentence in train.corpus:
def main(): # ====================== # 超参数 # ====================== CELL = "lstm" # rnn, gru, lstm DATASET = 'movie' RATIO = 0.9 WORD_DROP = 10 MIN_LEN = 5 MAX_LEN = 200 BATCH_SIZE = 32 SEQUENCE_LEN = 50 EMBED_SIZE = 128 HIDDEN_DIM = 256 NUM_LAYERS = 2 DROPOUT_RATE = 0.0 EPOCH = 300 LEARNING_RATE = 0.01 MAX_GENERATE_LENGTH = 20 GENERATE_EVERY = 5 SEED = 100 all_var = locals() print() for var in all_var: if var != "var_name": print("{0:15} ".format(var), all_var[var]) print() # ====================== # 数据 # ====================== data_path = '../../__data/ROCStories.txt' train_path = 'train_roc' test_path = 'test_roc' vocabulary = utils.Vocabulary(data_path, max_len=MAX_LEN, min_len=MIN_LEN, word_drop=WORD_DROP) utils.split_corpus(data_path, train_path, test_path, max_len=MAX_LEN, min_len=MIN_LEN, ratio=RATIO, seed=SEED) train = utils.Corpus(train_path, vocabulary, max_len=MAX_LEN, min_len=MIN_LEN) test = utils.Corpus(test_path, vocabulary, max_len=MAX_LEN, min_len=MIN_LEN) train_generator = utils.Generator(train.corpus) test_generator = utils.Generator(test.corpus) # ====================== # 构建模型 # ====================== device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = lm.LM(cell=CELL, vocab_size=vocabulary.vocab_size, embed_size=EMBED_SIZE, hidden_dim=HIDDEN_DIM, num_layers=NUM_LAYERS, dropout_rate=DROPOUT_RATE) model.to(device) summary(model, (20, )) criteration = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE) # optimizer = torch.optim.Adam(textRNN.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) print() # ====================== # 训练与测试 # ====================== best_loss = 1000000 for epoch in range(EPOCH): train_g = train_generator.build_generator(BATCH_SIZE, SEQUENCE_LEN) test_g = test_generator.build_generator(BATCH_SIZE, SEQUENCE_LEN) train_loss = [] while True: try: text = train_g.__next__() except: break optimizer.zero_grad() y = model(torch.from_numpy(text[:, :-1]).long().to(device)) loss = criteration( y.reshape(-1, vocabulary.vocab_size), torch.from_numpy(text[:, 1:]).reshape(-1).long().to(device)) loss.backward() optimizer.step() train_loss.append(loss.item()) test_loss = [] while True: with torch.no_grad(): try: text = test_g.__next__() except: break y = model(torch.from_numpy(text[:, :-1]).long().to(device)) loss = criteration( y.reshape(-1, vocabulary.vocab_size), torch.from_numpy(text[:, 1:]).reshape(-1).long().to(device)) test_loss.append(loss.item()) print('epoch {:d} training loss {:.4f} test loss {:.4f}'.format( epoch + 1, np.mean(train_loss), np.mean(test_loss))) if np.mean(test_loss) < best_loss: best_loss = np.mean(test_loss) print('-----------------------------------------------------') print('saving parameters') os.makedirs('models', exist_ok=True) torch.save(model.state_dict(), 'models/' + DATASET + '-' + str(epoch) + '.pkl') print('-----------------------------------------------------') if (epoch + 1) % GENERATE_EVERY == 0: with torch.no_grad(): # 生成文本 x = torch.LongTensor([[vocabulary.w2i['_BOS']]] * 3).to(device) for i in range(MAX_GENERATE_LENGTH): samp = model.sample(x) x = torch.cat([x, samp], dim=1) x = x.cpu().numpy() print('-----------------------------------------------------') for i in range(x.shape[0]): print(' '.join([ vocabulary.i2w[_] for _ in list(x[i, :]) if _ not in [ vocabulary.w2i['_BOS'], vocabulary.w2i['_EOS'], vocabulary.w2i['_PAD'] ] ])) print('-----------------------------------------------------')
def main(): # ====================== # hyper-parameters # ====================== CELL = "lstm" # rnn, gru, lstm DATASET = 'tweet' # movie, news, tweet RATIO = 0.9 WORD_DROP = 10 MIN_LEN = 5 MAX_LEN = 200 BATCH_SIZE = 32 EMBED_SIZE = 350 HIDDEN_DIM = 512 NUM_LAYERS = 2 DROPOUT_RATE = 0.0 START_EPOCH = 0 EPOCH = 30 LEARNING_RATE = 0.001 MAX_GENERATE_LENGTH = 20 GENERATE_EVERY = 5 PRINT_EVERY = 1 SEED = 100 all_var = locals() print() for var in all_var: if var != "var_name": print("{0:15} ".format(var), all_var[var]) print() # ====================== # data # ====================== data_path = 'data/' + DATASET + '2020.txt' train_path = 'data/train_' + DATASET test_path = 'data/test_' + DATASET vocabulary = utils.Vocabulary(data_path, max_len=MAX_LEN, min_len=MIN_LEN, word_drop=WORD_DROP) utils.split_corpus(data_path, train_path, test_path, max_len=MAX_LEN, min_len=MIN_LEN, ratio=RATIO, seed=SEED) train = utils.Corpus(train_path, vocabulary, max_len=MAX_LEN, min_len=MIN_LEN) test = utils.Corpus(test_path, vocabulary, max_len=MAX_LEN, min_len=MIN_LEN) train_generator = utils.Generator(train.corpus, vocabulary=vocabulary) test_generator = utils.Generator(test.corpus, vocabulary=vocabulary) # ====================== # building model # ====================== device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) model = lm.LM(cell=CELL, vocab_size=vocabulary.vocab_size, embed_size=EMBED_SIZE, hidden_dim=HIDDEN_DIM, num_layers=NUM_LAYERS, dropout_rate=DROPOUT_RATE) model.to(device) total_params = sum(p.numel() for p in model.parameters()) print("Total params: {:d}".format(total_params)) total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print("Trainable params: {:d}".format(total_trainable_params)) criterion = nn.NLLLoss(ignore_index=vocabulary.w2i["_PAD"]) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) print() # ====================== # training and testing # ====================== best_loss = 1000000 step = 0 if START_EPOCH > 0: model.load_state_dict( torch.load('models/' + DATASET + '-' + str(START_EPOCH) + '.pkl', map_location=device)) for epoch in range(START_EPOCH + 1, EPOCH + 1): train_g = train_generator.build_generator(BATCH_SIZE) test_g = test_generator.build_generator(BATCH_SIZE) train_loss = [] model.train() while True: try: text = train_g.__next__() except: break optimizer.zero_grad() text_in = text[:, :-1] text_target = text[:, 1:] y = model(torch.from_numpy(text_in).long().to(device)) loss = criterion( y.reshape(-1, vocabulary.vocab_size), torch.from_numpy(text_target).reshape(-1).long().to(device)) loss.backward() optimizer.step() train_loss.append(loss.item()) step += 1 torch.cuda.empty_cache() if step % PRINT_EVERY == 0: print('step {:d} training loss {:.4f}'.format( step, loss.item())) test_loss = [] model.eval() with torch.no_grad(): while True: try: text = test_g.__next__() except: break text_in = text[:, :-1] text_target = text[:, 1:] y = model(torch.from_numpy(text_in).long().to(device)) loss = criterion( y.reshape(-1, vocabulary.vocab_size), torch.from_numpy(text_target).reshape(-1).long().to( device)) test_loss.append(loss.item()) torch.cuda.empty_cache() print('epoch {:d} training loss {:.4f} test loss {:.4f}'.format( epoch, np.mean(train_loss), np.mean(test_loss))) if np.mean(test_loss) < best_loss: best_loss = np.mean(test_loss) print('-----------------------------------------------------') print('saving parameters') os.makedirs('models', exist_ok=True) torch.save(model.state_dict(), 'models/' + DATASET + '-' + str(epoch) + '.pkl') print('-----------------------------------------------------') if (epoch + 1) % GENERATE_EVERY == 0: model.eval() with torch.no_grad(): # generating text x = torch.LongTensor([[vocabulary.w2i['_BOS']]] * 3).to(device) for i in range(MAX_GENERATE_LENGTH): samp = model.sample(x) x = torch.cat([x, samp], dim=1) x = x.cpu().numpy() print('-----------------------------------------------------') for i in range(x.shape[0]): print(' '.join([ vocabulary.i2w[_] for _ in list(x[i, :]) if _ not in [ vocabulary.w2i['_BOS'], vocabulary.w2i['_EOS'], vocabulary.w2i['_PAD'] ] ])) print('-----------------------------------------------------')
def vae_train(args): """ trains a model as specified by args """ seed_random(args.rand_seed) device = init_device() train_log, valid_log = init_logger(log_dir=args.log_dir) corpus = utils.Corpus(args.data, args.persona_data) train_data = utils.load_data(corpus.train, batch_size=args.batch_size, num_workers=4) test_data = utils.load_data(corpus.test, batch_size=args.batch_size, num_workers=4) vocab = len(corpus.dictionary) model = models.VAE(vocab, args.embedding, args.hidden, args.latent) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1) if args.continue_training: model.load_model() model = model.to(device) print("Training", model.name, "with #params:", model.num_params()) loss = cvae_loss_function best = float("inf") global_step = 0 for epoch in range(args.num_epoch): losses = [] for x, x_len, p, p_len, y, y_len in train_data: # Now we need to make sure everything in the batch has same size x, x_len = x.to(device), x_len.to(device) y, y_len = y.to(device), y_len.to(device) # Should go from 1 to 0 of ~100k steps (after learned good LM) teach = 1 if global_step < 200_000 else .9995 res = model(x, x_len, y, y_len, teach) pred, bow_log, r_mu, r_log_var, p_mu, p_log_var = res eos_tensor = torch.empty(x.shape[0], 1).to(device) eos_tensor.fill_(corpus.dictionary.word2idx["L"]) gold = torch.cat([y, eos_tensor], dim=1).long() alph = min(max(0, (global_step - 10_000) / 60_000), 1) pred = pred.permute(0, 2, 1) # Get loss, normalized by batch size loss_val = loss(pred, gold, bow_log, r_mu, r_log_var, p_mu, p_log_var, alpha=alph) optimizer.zero_grad() loss_val.backward() if args.grad_clip > 0.0: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() scheduler.step() global_step += 1 losses.append(loss_val.detach().cpu().numpy()) if train_log is not None: train_log.add_scalar("loss", losses[-1], global_step) with torch.no_grad(): validation = eval_inference(model, corpus, test_data, valid_log, global_step) avg_l = np.mean(losses) print("epoch %-3d \t loss = %0.3f \t" % (epoch, avg_l)) if validation < best: print("Saving model!") best = validation model.save_model() print("Finished training, best model got: {} NLL".format(best))
def twod_viz(args, model=None): device = init_device() corpus = utils.Corpus(args.data, args.persona_data) if model is None: vocab = len(corpus.dictionary) model = models.CVAE(vocab, args.embedding, args.hidden, args.latent, rnn=args.rnn) model.load_model() model = model.to(device) model.eval() artist_names = [ "21 savage", '6ix9ine', 'dr dre', 'earl sweatshirt', 'ice cube', 'kanye west', 'kendrick lamar', 'kid cudi', 'pusha t', 'tyler the creator', ] artist_list = [2, 5, 23, 26, 36, 44, 46, 47, 67, 86] names = {} for name, id_ in zip(artist_names, artist_list): names[id_] = name latents = [] labels = [] for artist in artist_list: curr = [] persona = corpus.personas[artist] print("Artist {}".format(artist)) ctxt = ['S'] ctxt = [corpus.dictionary.word2idx[word] for word in ctxt] p_len = torch.tensor([len(persona)]).long().to(device) p = torch.tensor([persona]).long().to(device) x_len = torch.tensor([len(ctxt)]).long().to(device) x = torch.tensor([ctxt]).long().to(device) x_emb = model.embedding(x) p_emb = model.embedding(p) c_enc = model.contextualize(x_emb, x_len, p_emb, p_len) out_prior = model.priorlnorm(model.tanh(model.prior(c_enc))) p = model.p_mu_log_var(out_prior) p_mu, p_log_var = torch.split(p, model.latent_dim, dim=-1) latents.append(p_mu.cpu().numpy().squeeze()) labels.append(artist) latents = np.stack(latents) means = np.mean(latents, axis=0) print(means) latents = latents - means print(latents) print(latents.shape) labels = np.array(labels) print(labels.shape) # cm = plt.get_cmap('gist_rainbow') fig = plt.figure() # jet = cm = plt.get_cmap('gist_rainbow') # cNorm = colors.Normalize(vmin=0, vmax=10) # scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) for idx, cl in enumerate(np.unique(labels)): plt.scatter(x=latents[idx, 0] * 1000, y=latents[idx, 1] * 1000, label=artist_names[idx]) # plt.text(x=latents[idx, 0]*1000, # y=latents[idx, 1]*1000, # s=artist_names[idx], # alpha=0.9, # ) plt.legend(loc='upper left') plt.xlim(-2.75, 2.75) plt.ylim(-2.75, 2.75) plt.xlabel("Dim 1") plt.ylabel("Dim 2") plt.title("Artist Embeddings with IDs") plt.show() fig.savefig('my_figure.png')
# config logging logging.basicConfig(filename='{}logging{}.log'.format(args.data, args.expt_id), level=logging.INFO) # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) #为CPU设置种子用于生成随机数,以使得结果是确定的 if args.gpu_device > 0: device = torch.device("cuda") torch.cuda.set_device(args.gpu_device) #id,默认从0开始使用GPU,这里是指定使用的GPU编号 torch.cuda.manual_seed(args.seed) random.seed(args.seed) else: device = torch.device("cpu") # load data corpus = utils.Corpus(args.data) eval_batch_size = 10 train_data = utils.create_batches(corpus.train, args.batch_size, order='random', device=device) #为什么数据batch_size不同 val_data = utils.create_batches(corpus.valid, eval_batch_size, order='random', device=device) test_data = utils.create_batches(corpus.test, eval_batch_size, order='random', device=device) # build model vocab_size = corpus.vocab_size #idx2word的长度 model = model.RNNModel(args.model, vocab_size, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.CrossEntropyLoss() # training code