def train(): # 模型定义 model = RNNModel(len(word2ix), embed_size, hidden_dims) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() model.to(device) model.train() for epoch in (range(epochs)): total_loss = 0 count = 0 for ii, data_ in tqdm.tqdm(enumerate(data)): data_ = torch.tensor(data_).long() x = data_.unsqueeze(1).to(device) optimizer.zero_grad() y = torch.zeros(x.shape).to(device).long() y[:-1], y[-1] = x[1:], x[0] output, _ = model(x) loss = criterion(output, y.view(-1)) """ hidden=None for k in range(2,max_lenth): data1=data_[:k] input_, target = data1[:-1, :], data1[1:, :] output, hidden = model(input_,hidden) loss = criterion(output, target.view(-1)) optimizer.step() """ loss.backward() optimizer.step() total_loss += (loss.item()) count += 1 print(epoch, 'loss=', total_loss / count) torch.save(model.state_dict(), 'model.bin') chars = test(model) print(chars)
def main(): # Add ckp parser = argparse.ArgumentParser( description='PyTorch PennTreeBank RNN/LSTM Language Model') parser.add_argument( '--data', type=str, default='/input', # /input help='location of the data corpus') parser.add_argument('--checkpoint', type=str, default='', help='model checkpoint to use') parser.add_argument( '--model', type=str, default='LSTM', help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)') parser.add_argument('--emsize', type=int, default=200, help='size of word embeddings') parser.add_argument('--nhid', type=int, default=200, help='number of hidden units per layer') parser.add_argument('--nlayers', type=int, default=2, help='number of layers') parser.add_argument('--lr', type=float, default=0.001, help='initial learning rate') parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping') parser.add_argument('--epochs', type=int, default=40, help='upper epoch limit') parser.add_argument('--batch_size', type=int, default=256, metavar='N', help='batch size') parser.add_argument('--dropout', type=float, default=0.2, help='dropout applied to layers (0 = no dropout)') parser.add_argument('--tied', action='store_true', help='tie the word embedding and softmax weights') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--cuda', action='store_true', help='use CUDA') parser.add_argument('--log-interval', type=int, default=200, metavar='N', help='report interval') parser.add_argument( '--save', type=str, default='/output/model.pt', # /output help='path to save the final model') args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: torch.cuda.manual_seed(args.seed) # Load checkpoint build_vocab = False if args.checkpoint != '' and os.path.exists(args.checkpoint): print(f'Loading field from {args.checkpoint}') save_dict = torch.load(args.checkpoint) field = save_dict['field'] start_epoch = save_dict['start_epoch'] else: save_dict = None field = Field(tokenize=split_tokenize, init_token='<init>') build_vocab = True start_epoch = 0 ############################################################################### # Load data ############################################################################### train_data, val_data, test_data = TabularDataset.splits( path=args.data, train='train.txt', validation='valid.txt', test='test.txt', format='tsv', fields=[('text', field)]) print(train_data, len(train_data), val_data, len(val_data), test_data, len(test_data)) if build_vocab: field.eos_token = '<eos>' field.build_vocab(train_data, val_data, min_freq=1000) field.eos_token = None eos_id = field.vocab.stoi['<eos>'] pad_id = field.vocab.stoi[field.pad_token] train_iter = BucketIterator(train_data, args.batch_size, train=True, repeat=False, device='cuda:0' if args.cuda else 'cpu:0') val_iter = Iterator(val_data, args.batch_size, repeat=False, device='cuda:0' if args.cuda else 'cpu:0') test_iter = Iterator(test_data, args.batch_size, repeat=False, device='cuda:0' if args.cuda else 'cpu:0') print(train_iter, len(train_iter), val_iter, len(val_iter), test_iter, len(test_iter)) ############################################################################### # Build the model ############################################################################### ntokens = len(field.vocab) model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) if save_dict is not None: model.load_state_dict(save_dict['model']) if args.cuda: model.cuda() else: model.cpu() print(model) if save_dict: opt = save_dict['optimizer'] else: opt = torch.optim.Adam(model.parameters(), lr=args.lr) if args.checkpoint: torch.save( dict(field=field, model=model.state_dict(), optimizer=opt, start_epoch=start_epoch), args.checkpoint) ############################################################################### # Training code ############################################################################### criterion = torch.nn.CrossEntropyLoss(ignore_index=pad_id) def make_target(text): batch_size = text.size()[1] eos_vector = torch.full((1, batch_size), eos_id, dtype=text.dtype, device='cuda:0' if args.cuda else 'cpu:0') target = torch.cat((text[1:], eos_vector), dim=0) return target def compute_loss(output, text): output_flat = output.view(-1, ntokens) target = make_target(text) target_flat = target.view(-1) return criterion(output_flat, target_flat) def evaluate(data_source): # Turn on evaluation mode which disables dropout. with torch.no_grad(): model.eval() total_loss = 0 for batch in data_source: output, hidden = model(batch.text) loss = compute_loss(output, batch.text) total_loss += loss.item() return total_loss / len(data_source) def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() for i, batch in enumerate(train_iter): model.zero_grad() output, hidden = model(batch.text) target = make_target(batch.text) loss = compute_loss(output, batch.text) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) opt.step() total_loss += loss.item() if i % args.log_interval == 0 and i > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, i, len(train_iter), elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs. best_val_loss = None # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(start_epoch, args.epochs): epoch_start_time = time.time() train() val_loss = evaluate(val_iter) print('-' * 89) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: if args.checkpoint: torch.save( dict(field=field, model=model.state_dict(), optimizer=opt, start_epoch=epoch), args.checkpoint) best_val_loss = val_loss except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') torch.save( dict(vocab=field.vocab.itos, model=model.state_dict(), settings=dict(rnn_type=args.model, emsize=args.emsize, nhid=args.nhid, nlayers=args.nlayers)), args.save) # Load the best saved model. #with open(args.save, 'rb') as f: # save_dict = torch.load(f) # field = save_dict['field'] # if save_dict is not None: # model.load_state_dict(save_dict['model']) # # if args.cuda: # model.cuda() # else: # model.cpu() # Run on test data. test_loss = evaluate(test_iter) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89)
model = model.eval() for batch_num, batch in enumerate(valid_loader): lm_input, lens, lm_output = batch predictions, _, lens = model(cudalize(lm_input), cudalize(lens)) loss = loss_function(predictions, cudalize(lm_output), lens) valid_loss += loss.item() valid_avg_loss = valid_loss / (batch_num + 1) valid_ppl = math.exp(valid_avg_loss) print(f'\rvalid batch:{batch_num} loss: {valid_avg_loss:.4f} ' + f'ppl:{valid_ppl:.4f}', end='') print() if valid_loss / (batch_num + 1) < min_loss: min_loss = valid_loss / (batch_num + 1) earlystop_count = 0 torch.save(model.state_dict(), './model.pt') print('saved model') else: earlystop_count += 1 if earlystop_count > 20: print('earlystop') break print() test_loss = 0 for batch_num, batch in enumerate(test_loader): lm_input, lens, lm_output = batch predictions, _, lens = model(cudalize(lm_input), cudalize(lens)) loss = loss_function(predictions, cudalize(lm_output), lens) test_loss += loss.item() test_avg_loss = test_loss / (batch_num + 1) test_ppl = math.exp(test_avg_loss)
lr = cf.learning_rate best_val_loss = None try: for epoch in range(1, cf.n_epochs + 1): epoch_start_time = time.time() loop('train', lr) val_loss, val_acc = loop('valid') print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid acc {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, val_acc)) print('-' * 89) if not best_val_loss or val_loss < best_val_loss: torch.save(model.state_dict(), os.path.join(args.save_path, 'model.pt')) best_val_loss = val_loss else: lr /= 4.0 except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. model.load_state_dict(torch.load(os.path.join(args.save_path, 'model.pt'))) # Run on test data. test_loss, test_acc = loop('test') print('=' * 89) print('| End of training | test loss {:5.2f} | test acc {:8.2f}'.format(
words_weights = output.squeeze().data.div(temperature).exp().cpu() word_idx = torch.multinomial(words_weights, 1)[0] input.data.fill_(word_idx) word = corpus.dictionary.idx2word[word_idx] word = word.replace('<eos>','\n') # if ':' in word: # word = '\n\n' + word print("%s "%(word), end='') # outf.write(word + ('\n' if i % 20 == 19 else ' ')) try: for epoch in range(1, epochs+1): train(model, optimizer, criterion, corpus, batch_size, train_data, sequence_length, clip, step_vis, epoch, learning_rate) torch.save(model.state_dict(), './snapchot.pth') generate(model, words, temperature, corpus) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')
nclasses, args.emsize, args.nhid_class, subdictmask.to(device), args.nhid, args.nlayers, None, args.rnndrop, args.dropout, reset=args.reset, classnorm=args.classnorm, tied=args.tied) # Initialise with trained parameters if args.load_from != '': pretrained_dict = torch.load(args.load_from).state_dict() model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) if args.classnorm: criterion = nn.NLLLoss() interpCrit = nn.NLLLoss(reduction='none') else: criterion = nn.CrossEntropyLoss() interpCrit = nn.CrossEntropyLoss(reduction='none') att_criterion = nn.CrossEntropyLoss(reduction='none', ignore_index=0)
def train(): # 载入数据与配置模型 print("Loading data...") corpus = Corpus(train_dir) print(corpus) config = Config() config.vocab_size = len(corpus.dictionary) train_data = batchify(corpus.train, config.batch_size) train_len = train_data.size(0) seq_len = config.seq_len print("Configuring model...") model = RNNModel(config) if use_cuda: model.cuda() print(model) criterion = nn.CrossEntropyLoss() lr = config.learning_rate # 初始学习率 start_time = time.time() print("Training and generating...") for epoch in range(1, config.num_epochs + 1): # 多轮次训练 total_loss = 0.0 model.train() # 在训练模式下dropout才可用。 hidden = model.init_hidden(config.batch_size) # 初始化隐藏层参数 for ibatch, i in enumerate(range(0, train_len - 1, seq_len)): data, targets = get_batch(train_data, i, seq_len) # 取一个批次的数据 # 在每批开始之前,将隐藏的状态与之前产生的结果分离。 # 如果不这样做,模型会尝试反向传播到数据集的起点。 hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, config.vocab_size), targets) loss.backward() # 反向传播 # `clip_grad_norm` 有助于防止RNNs/LSTMs中的梯度爆炸问题。 torch.nn.utils.clip_grad_norm(model.parameters(), config.clip) for p in model.parameters(): # 梯度更新 p.data.add_(-lr, p.grad.data) total_loss += loss.data # loss累计 if ibatch % config.log_interval == 0 and ibatch > 0: # 每隔多少个批次输出一次状态 cur_loss = total_loss[0] / config.log_interval elapsed = get_time_dif(start_time) print( "Epoch {:3d}, {:5d}/{:5d} batches, lr {:2.3f}, loss {:5.2f}, ppl {:8.2f}, time {}" .format(epoch, ibatch, train_len // seq_len, lr, cur_loss, math.exp(cur_loss), elapsed)) total_loss = 0.0 lr /= 4.0 # 在一轮迭代完成后,尝试缩小学习率 # 每隔多少轮次保存一次模型参数 if epoch % config.save_interval == 0: torch.save(model.state_dict(), os.path.join(save_dir, model_name.format(epoch))) print(''.join(generate(model, corpus.dictionary.idx2word)))
if args.save and os.path.isfile(args.save): print("Loading Saved Model") with open(args.save, 'rb') as f: net.load_state_dict(torch.load(f)) net.rnn.flatten_parameters() else: print("Random Initialization - No Saved Model") # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train() if args.save: with open(args.save, 'wb') as f: torch.save(net.state_dict(), f) # test_loader = test_corpus.batch_generator(seq_length=1, batch_size=1, shuffle=False) # val_loss = evaluate(test_corpus, test_loader) # print('-' * 89) # print('Test: {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f}' # .format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) # print('-' * 89) sys.stdout.flush() except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') sys.stdout.flush() # Run on test data. '''