def main(): inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2]) #coloredlogs.install(level='DEBUG') num_layers = 2 rnn_type = 'lstm' dropout = 0.5 emb_size = 50 hidden_size = 256 learning_rate = 0.001 n_tokens = len(idx_to_token) model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() optimiser = optim.Adam(model.parameters(), lr=learning_rate) try: model, optimiser, epoch, valid_loss_min = load_ckp( checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser) generate_sample(model, token_to_idx, idx_to_token, n_tokens=20) except KeyboardInterrupt: print('Aborted!')
def train(opt, th): ''' 训练模型 Args: opt -- 参数 th -- TextConverter对象 Returns: None ''' # 1. 训练数据 data_set = TextDataset(opt.train_data_path, th) train_data = DataLoader(data_set, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # 2. 初始化模型 model = CharRNN(th.vocab_size, opt.embed_size, opt.hidden_size, opt.n_layers, opt.dropout_p, opt.bidir) if USE_CUDA: model = model.cuda(DEVICE_ID) # 3. 优化配置 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.learning_rate) # 4. 训练 for e in range(opt.max_epochs): epoch_loss = 0 hidden = None for input_seqs, labels in train_data: # 都是[b, seq_len],最后一个不足b # 准备input和hidden b = input_seqs.shape[0] if hidden is not None: hidden = hidden[:, :b, :] labels = labels.long().view(-1) input_seqs, labels = get_variable(input_seqs), get_variable(labels) # 前向计算 probs, hidden = model(input_seqs, hidden) probs = probs.view(-1, th.vocab_size) # loss和反向 loss = criterion(probs, labels) optimizer.zero_grad() loss.backward(retain_graph=True) # 优化 nn.utils.clip_grad_norm(model.parameters(), 5) optimizer.step() epoch_loss += loss.data[0] # 交叉熵 entropy_loss = epoch_loss / len(train_data) perplexity = np.exp(entropy_loss) info = "epoch: {}, perp: {:.3f}".format(e + 1, perplexity) print(info) if perplexity <= opt.min_perplexity or e == opt.max_epochs - 1: print("best model") torch.save(model, opt.model_path) break
def main(): logging.root.setLevel(logging.NOTSET) inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2]) #coloredlogs.install(level='DEBUG') num_layers = 2 rnn_type = 'lstm' dropout = 0.5 emb_size = 50 hidden_size = 256 learning_rate = 0.001 n_tokens = len(idx_to_token) model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() optimiser = optim.Adam(model.parameters(), lr=learning_rate) s1 = "bababac bababa bacc bac bacc" s2 = "bababac baba bac bacc bac" s3 = "baba" s4 = "ccab cab ccab ababab cababab" try: model, optimiser, epoch, valid_loss_min = load_ckp( checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser) score(model, token_to_idx, idx_to_token, seed_phrase=s1) score(model, token_to_idx, idx_to_token, seed_phrase=s2) score(model, token_to_idx, idx_to_token, seed_phrase=s3) score(model, token_to_idx, idx_to_token, seed_phrase=s4) except KeyboardInterrupt: print('Aborted!')
def main(): # Parse command line arguments argparser = argparse.ArgumentParser() argparser.add_argument('--train_set', type=str, required=True) argparser.add_argument('--valid_set', type=str, required=True) argparser.add_argument('--model', type=str, default="gru") argparser.add_argument('--model_file', type=str, default='None') argparser.add_argument('--n_epochs', type=int, default=30) argparser.add_argument('--hidden_size', type=int, default=200) argparser.add_argument('--n_layers', type=int, default=3) argparser.add_argument('--learning_rate', type=float, default=0.01) argparser.add_argument('--chunk_len', type=int, default=200) argparser.add_argument('--batch_size', type=int, default=300) argparser.add_argument('--num_workers', type=int, default=8) argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--cpu', action='store_true') args = argparser.parse_args() # Initialize models and start training if args.model_file == 'None': decoder = CharRNN( n_characters, args.hidden_size, n_characters, model=args.model, n_layers=args.n_layers, ) epoch_from = 1 prev_valid_loss = sys.maxsize old_filename = None else: if args.cpu: decoder = torch.load(args.model_file, map_location=lambda storage, loc: storage) else: decoder = torch.load(args.model_file) info = args.model_file.split('_') args.model = info[0] epoch_from = int(info[1][5:]) + 1 args.n_layers = int(info[2][7:]) args.hidden_size = int(info[5][2:]) prev_valid_loss = float(info[7][4:-3]) old_filename = args.model_file print( "successfully loaded model! Continuing from epoch {0} with valid loss {1}" .format(epoch_from, prev_valid_loss)) optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() start = time.time() train_dataset = WordDataset(args.train_set, args.chunk_len) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) valid_dataset = WordDataset(args.valid_set, args.chunk_len) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) try: print('Training for maximum {} epochs...'.format(args.n_epochs)) for epoch in range(epoch_from, args.n_epochs + 1): train_loss, num_samples = 0, 0 for s in tqdm(train_dataloader): input_, target = prep_data(s['input'], s['target'], args.cuda) train_loss += train(decoder, optimizer, criterion, input_, target, args.batch_size, args.chunk_len, args.cuda) num_samples += 1 train_loss /= num_samples valid_loss, num_samples = 0, 0 for s in valid_dataloader: input_, target = prep_data(s['input'], s['target'], args.cuda) valid_loss += evaluate(decoder, criterion, input_, target, args.batch_size, args.chunk_len, args.cuda) num_samples += 1 valid_loss /= num_samples elapsed = time_since(start) pcnt = epoch / args.n_epochs * 100 log = ( '{} elapsed - epoch #{} ({:.1f}%) - training loss (BPC) {:.2f} ' '- validation loss (BPC) {:.2f}') print(log.format(elapsed, epoch, pcnt, train_loss, valid_loss)) if valid_loss > prev_valid_loss: print('No longer learning, just overfitting, stopping here.') break else: filename = model_file_name(decoder, epoch, train_loss, valid_loss) torch.save(decoder, filename) print('Saved as {}'.format(filename)) if old_filename: os.remove(old_filename) old_filename = filename prev_valid_loss = valid_loss except KeyboardInterrupt: print("Saving before quit...") try: valid_loss except: valid_loss = 'no_val' filename = model_file_name(decoder, epoch, train_loss, valid_loss) torch.save(decoder, filename) print('Saved as {}'.format(filename))
def train(filename, rnn_type, num_layers, dropout, emb_size, hidden_size, num_epochs, batch_size, learning_rate, num_samples, seed_phrase, sample_every, checkpoint_path): """ Trains a character-level Recurrent Neural Network in PyTorch. Args: optional arguments [python train.py --help] """ logging.info('reading `{}` for character sequences'.format(filename)) inputs, token_to_idx, idx_to_token = load_dataset(file_name=filename) idx_to_token.remove('~') idx_to_token.remove('#') idx_to_token = ['~'] + idx_to_token + ['#'] token_to_idx = {token: idx_to_token.index(token) for token in idx_to_token} logging.info(idx_to_token) logging.info(token_to_idx) n_tokens = len(idx_to_token) max_length = inputs.size(1) logging.debug('creating char-level RNN model') model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() logging.debug('defining model training operations') # define training procedures and operations for training the model criterion = nn.NLLLoss(reduction='mean') optimizer = optim.Adam(model.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', min_lr=1e-6, factor=0.1, patience=7, verbose=True) # train-val-test split of the dataset split_index = int(0.9 * inputs.size(0)) train_tensors, inputs = inputs[:split_index], inputs[split_index:] split_index = int(0.5 * inputs.size(0)) val_tensors, test_tensors = inputs[:split_index], inputs[split_index:] del inputs logging.info('train tensors: {}'.format(train_tensors.size())) logging.info('val tensors: {}'.format(val_tensors.size())) logging.info('test tensors: {}'.format(test_tensors.size())) logging.debug('training char-level RNN model') # loop over epochs for epoch in range(1, num_epochs + 1): epoch_loss, n_iter = 0.0, 0 # loop over batches for tensors in tqdm(iterate_minibatches(train_tensors, batchsize=batch_size), desc='Epoch[{}/{}]'.format(epoch, num_epochs), leave=False, total=train_tensors.size(0) // batch_size): # optimize model parameters epoch_loss += optimize(model, tensors, max_length, n_tokens, criterion, optimizer) n_iter += 1 # evaluate model after every epoch val_loss = evaluate(model, val_tensors, max_length, n_tokens, criterion) # lr_scheduler decreases lr when stuck at local minima scheduler.step(val_loss) # log epoch status info logging.info( 'Epoch[{}/{}]: train_loss - {:.4f} val_loss - {:.4f}'.format( epoch, num_epochs, epoch_loss / n_iter, val_loss)) # sample from the model every few epochs if epoch % sample_every == 0: print( 'Epoch[{}/{}]: train_loss - {:.4f} val_loss - {:.4f}'.format( epoch, num_epochs, epoch_loss / n_iter, val_loss)) for _ in range(num_samples): sample = generate_sample(model, token_to_idx, idx_to_token, max_length, n_tokens, seed_phrase=seed_phrase) logging.debug(sample) checkpoint = { 'epoch': epoch + 1, 'valid_loss_min': val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } # save checkpoint best_model_path = checkpoint_path save_ckp(checkpoint, False, checkpoint_path, best_model_path)
def train( filename = "poets.txt", hidden_size = 128, n_layers = 2, learning_rate=0.01, n_epochs = 10000, chunk_len=20, batch_size = 1024, print_every = 100 ): #%% Global Configuration file, file_len, all_characters, n_characters = helpers.read_file( filename ) sentences = file.split("\n") print( "There are %d unique characters in the dataset" % n_characters ) print( "There are %d sentences in the dataset with total of %d characters" % ( len(sentences), len(file) ) ) #%% Model Saving and Loading model_filename = helpers.pt_name if os.path.exists( model_filename ): decoder = load( model_filename ) else: decoder = CharRNN( n_characters, hidden_size, n_characters, model = helpers.mcell, n_layers=n_layers, ) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() if helpers.USE_CUDA: decoder.cuda() start = time.time() all_losses = [] try: print("Training for %d epochs..." % n_epochs) for epoch in range(n_epochs): if epoch != 0 and epoch % 1000 == 0: learning_rate /= 2 decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate) inp, target = random_training_set( sentences, chunk_len, batch_size ) loss = train_one_entry(decoder, decoder_optimizer, criterion, inp, target, chunk_len, batch_size ) all_losses.append( loss ) if epoch != 0 and epoch % print_every == 0: print('%s: [%s (%d %d%%) %.4f]' % ( time.ctime(), helpers.time_since(start), epoch, epoch / n_epochs * 100, loss)) print(generate(decoder, '新年', 100, cuda= helpers.USE_CUDA), '\n') save( decoder, model_filename ) except KeyboardInterrupt: save( decoder, model_filename ) import matplotlib.pyplot as plt plt.plot( all_losses ) plt.xlabel( "iteration" ) plt.ylabel( "train loss" )
n_characters = len(string.printable) decoder = CharRNN( n_characters, args.hidden_size, n_characters, model=args.model, n_layers=args.n_layers, ) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() start = time.time() all_losses = [] loss_avg = 0 try: print("Training for %d epochs..." % args.n_epochs) for epoch in tqdm(range(1, args.n_epochs + 1)): loss = train(*random_training_set(args.chunk_len, args.batch_size)) loss_avg += loss if epoch % args.print_every == 0: print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss))