def freestyle(loc): # TODO # load data model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) print(settings) # settings cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] how_many = 100 # load the models vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size fnames = os.listdir(model_dir / 'checkpoints') fname = fnames[-1] # load the model model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() # monitor sents = [ 'The Standard ', 'non-abelian', 'silicon pixel detector', 'estimate the', '[23] ATLAS' ] temperatures = [0.01 + 0.1 * i for i in range(11)] eval_stream = model_dir / 'evaluate_stream.txt' for temperature in temperatures: txt = '\nTemperature = {}'.format(temperature) utils.report(txt, eval_stream) for sent in sents: txt = generate.compose(model, vocab, emb, sent, temperature, how_many) utils.report(txt, eval_stream)
def train(opt): # Read preprocessed data print_line() print('Loading training data ...') check_name = re.compile('.*\.prep\.train\.pt') assert os.path.exists( opt.train_data) or check_name.match(opt.train_data) is None train_dataset = torch.load(opt.train_data) train_dataset.set_batch_size(opt.batch_size) print('Done.') print_line() print('Loading validation data ...') check_name = re.compile('.*\.prep\.val\.pt') assert os.path.exists( opt.val_data) or check_name.match(opt.val_data) is None val_dataset = torch.load(opt.val_data) val_dataset.set_batch_size(opt.batch_size) print('Done.') # Build / load Model if opt.model_reload is None: print_line() print('Build new model...') model = LanguageModel(train_dataset.num_vocb, dim_word=opt.dim_word, dim_rnn=opt.dim_rnn, num_layers=opt.num_layers, dropout_rate=opt.dropout_rate) model.dictionary = train_dataset.dictionary print('Done') train_dataset.describe_dataset() val_dataset.describe_dataset() else: print_line() print('Loading existing model...') model = torch.load(opt.model_reload) print('done') train_dataset.change_dict(model.dictionary) val_dataset.change_dict(model.dictionary) model_start_epoch = model.train_info['epoch idx'] - 1 model_start_batch = model.train_info['batch idx'] - 1 # Use GPU / CPU print_line() if opt.cuda: model.cuda() print('Using GPU %d' % torch.cuda.current_device()) else: print('Using CPU') # Crterion, mask padding criterion_weight = torch.ones(train_dataset.num_vocb + 1) criterion_weight[const.PAD] = 0 criterion = nn.CrossEntropyLoss(weight=criterion_weight, size_average=False) if opt.cuda: criterion = criterion.cuda() # Optimizer lr = opt.lr optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr) if (model_start_epoch > opt.epoch): print( 'This model has already trained more than %d epoch, add epoch parameter is you want to continue' % (opt.epoch + 1)) return print_line() print('') if opt.model_reload is None: print('Start training new model, will go through %d epoch' % opt.epoch) else: print('Continue existing model, from epoch %d, batch %d to epoch %d' % (model_start_epoch, model_start_batch, opt.epoch)) print('') best_model = model.train_info if opt.save_freq == 0: opt.save_freq = train_dataset.num_batch - 1 # Train model.train() for epoch_idx in range(model_start_epoch, opt.epoch): # New epoch acc_loss = 0 acc_count = 0 start_time = time.time() train_dataset.shuffle() print_line() print('Start epoch %d, learning rate %f ' % (epoch_idx + 1, lr)) print_line('-') epoch_start_time = start_time # If load model and continue training if epoch_idx == model_start_epoch and model_start_batch > 0: start_batch = model_start_batch else: start_batch = 0 for batch_idx in range(start_batch, train_dataset.num_batch): # Generate batch data batch_data, batch_lengths, target_words = train_dataset[batch_idx] if opt.cuda: batch_data = batch_data.cuda() batch_lengths = batch_lengths.cuda() target_words = target_words.cuda() batch_data = Variable(batch_data, requires_grad=False) batch_lengths = Variable(batch_lengths, requires_grad=False) target_words = Variable(target_words, requires_grad=False) optimizer.zero_grad() # Forward output_flat = model.forward(batch_data, batch_lengths) # Caculate loss loss = criterion(output_flat, target_words.view(-1)) # Backward loss.backward() # Prevent gradient explode torch.nn.utils.clip_grad_norm(model.parameters(), opt.clip) # Update parameters optimizer.step() # Accumulate loss acc_loss += loss.data acc_count += batch_lengths.data.sum() # Display progress if batch_idx % opt.display_freq == 0: average_loss = acc_loss[0] / acc_count.item() print( 'Epoch : %d, Batch : %d / %d, Loss : %f, Perplexity : %f, Time : %f' % (epoch_idx + 1, batch_idx, train_dataset.num_batch, average_loss, math.exp(average_loss), time.time() - start_time)) acc_loss = 0 acc_count = 0 start_time = time.time() #Save and validate if it is neccesary if (1 + batch_idx) % opt.save_freq == 0: print_line('-') print('Pause training for save and validate.') model.eval() val_loss = evaluate(model=model, eval_dataset=val_dataset, cuda=opt.cuda, criterion=criterion) model.train() print('Validation Loss : %f' % val_loss) print('Validation Perplexity : %f' % math.exp(val_loss)) model_savename = opt.model_name + '-e_' + str( epoch_idx + 1) + '-b_' + str(batch_idx + 1) + '-ppl_' + str( int(math.exp(val_loss))) + '.pt' model.val_loss = val_loss model.val_ppl = math.exp(val_loss) model.epoch_idx = epoch_idx + 1 model.batch_idx = batch_idx + 1 model.train_info['val loss'] = val_loss model.train_info['train loss'] = math.exp(val_loss) model.train_info['epoch idx'] = epoch_idx + 1 model.train_info['batch idx'] = batch_idx + 1 model.train_info['val ppl'] = math.exp(model.val_loss) model.train_info['save name'] = model_savename try: torch.save(model, model_savename) except: print('Failed to save model!') if model.val_loss < best_model['val loss']: print_line('-') print('New best model on validation set') best_model = model.train_info shutil.copy2(best_model['name'], opt.model_name + '.best.pt') print_line('-') print('Save model at %s' % (model_savename)) print_line('-') print('Continue Training...') print_line('-') print('Epoch %d finished, spend %d s' % (epoch_idx + 1, time.time() - epoch_start_time)) # Update lr if needed lr *= opt.lr_decay optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr) # Finish training print_line() print(' ') print('Finish training %d epochs!' % opt.epoch) print(' ') print_line() print('Best model:') print('Epoch : %d, Batch : %d ,Loss : %f, Perplexity : %f' % (best_model['epoch idx'], best_model['batch idx'], best_model['val loss'], best_model['val ppl'])) print_line('-') print('Save best model at %s' % (opt.model_name + '.best.pt')) shutil.copy2(best_model['name'], opt.model_name + '.best.pt') print_line()
loss.backward() grad_norm = nn.utils.clip_grad_norm_(model.parameters(), 5) optimizer.step() scheduler.step() err.update(loss.item()) grd.update(grad_norm) lr = scheduler.get_lr()[0] progress.set_description('epoch %d lr %.6f %s %s' % (epoch + 1, lr, err, grd)) model.eval() err = AverageMeter('loss') loader = DataLoader(test, pin_memory=True, num_workers=4, batch_size=bptt, drop_last=True) progress = tqdm(loader) hidden = model.step_init(batch_size) with torch.no_grad(): for inputs, targets in progress: inputs = inputs.cuda(non_blocking=True)
def plot_switch_prob(loc): # load settings model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] max_len = settings['max_len'] # load the final model vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size fnames = os.listdir(model_dir / 'checkpoints') fname = fnames[-1] # load the model model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() # prepare the base and replacement batch N = 100 gen = generate.generate('valid', token=token, max_len=max_len, small=small, batch_size=N) base_batch, _ = next(gen) repl_batch, _ = next(gen) # compute the average KL divs over the batch depths = [i for i in range(max_len)] switch_probs = [ compute_switch_prob(model, base_batch, repl_batch, keep_depth, vocab, emb) for keep_depth in depths ] # make the plot fig, ax = plt.subplots() ax.plot(depths, switch_probs, 'tomato') ax.plot(depths, [0.01] * len(depths), 'k') ax.set_yscale('log') ax.set_ylim(0.001, 1) ax.set_xlim(0, max_len) ax.set_title('Probability of switching predicted character\n{}'.format( model_dir.name), fontsize=7) ax.set_xlabel('sequence keep-depth') ax.set_ylabel('Probabillity') ax.grid() plt.savefig(model_dir / 'SwitchProbability.pdf')
def plot_losses(loc): # load data model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) # settings cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] max_len = settings['max_len'] n_epochs = settings['n_epochs'] n_saves = settings['n_saves'] criterion = nn.CrossEntropyLoss() # load the models models = [] vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size for fname in os.listdir(model_dir / 'checkpoints'): model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() models.append(model) # prepare training and validation sets N = 10000 splits = ['train', 'valid'] gens = { split: generate.generate(split, token=token, max_len=max_len, small=small, batch_size=N) for split in splits } batch, labels = {}, {} for split in splits: for b, l in gens[split]: # one hot encode if token == 'character': b = generate.one_hot_encode(b, vocab) # or embed elif token == 'word': b = generate.w2v_encode(b, emb, vocab) batch[split], labels[split] = torch.Tensor(b), torch.Tensor( l).long() break # evaluate the models loss = {split: [] for split in splits} acc = {split: [] for split in splits} for i, model in enumerate(models): t0 = time.time() print(i) for split in splits: # loss outputs = model(batch[split]) l = criterion(outputs, labels[split]) loss[split].append(float(l)) # accuracy _, preds = torch.max(outputs, 1) a = sum(preds == labels[split]) / float(N) acc[split].append(float(a)) print('{:2.2f}s'.format(time.time() - t0)) for split in splits: with open(model_dir / 'best_{}_acc.txt'.format(split), 'w') as handle: best = max(acc[split]) handle.write('{}\n'.format(best)) # plot both quantities for quantity, description in zip([loss, acc], ['Loss', 'Accuracy']): fig, ax = plt.subplots() for split in splits: xs = (1 + np.arange(len(quantity[split]))) / n_saves ax.plot(xs, quantity[split], label=split) ax.set_xlabel('Training epoch') if n_epochs > 1: ax.set_xlabel('Epoch') ax.set_ylabel(description) upper = ax.get_ylim()[1] if description == 'Loss' else 1 ax.set_ylim(0, upper) ax.set_xlim(0, ax.get_xlim()[1]) ax.set_title(model_dir.name, fontsize=7) ax.legend() ax.grid(alpha=0.5, which='both') plt.savefig(model_dir / '{}.pdf'.format(description))