def freestyle(loc): # TODO # load data model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) print(settings) # settings cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] how_many = 100 # load the models vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size fnames = os.listdir(model_dir / 'checkpoints') fname = fnames[-1] # load the model model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() # monitor sents = [ 'The Standard ', 'non-abelian', 'silicon pixel detector', 'estimate the', '[23] ATLAS' ] temperatures = [0.01 + 0.1 * i for i in range(11)] eval_stream = model_dir / 'evaluate_stream.txt' for temperature in temperatures: txt = '\nTemperature = {}'.format(temperature) utils.report(txt, eval_stream) for sent in sents: txt = generate.compose(model, vocab, emb, sent, temperature, how_many) utils.report(txt, eval_stream)
def train(settings, model_dir): # training and sampling temperature = 0.5 how_many = 70 vocab = generate.get_vocab(args.token, small=args.small) # create the vocab, model, (and embedding) if args.token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif args.token == 'character': emb = None input_size = vocab.size output_size = vocab.size model = LanguageModel(args.cell, input_size, args.hidden_size, output_size) # create criterion and optimiser criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # create the validation set n_valid = 10000 valid_gen = generate.generate('valid', token=args.token, max_len=args.max_len, small=args.small, batch_size=n_valid) for valid_batch, valid_labels in valid_gen: # one hot encode if args.token == 'character': valid_batch = generate.one_hot_encode(valid_batch, vocab) # or embed elif args.token == 'word': valid_batch = generate.w2v_encode(valid_batch, emb, vocab) valid_batch, valid_labels = torch.Tensor(valid_batch), torch.Tensor(valid_labels).long() break # how many epochs do we need? batches_per_epoch = generate.get_n_batches_in_epoch('train', args.token, args.batch_size, args.max_len, args.small) # training settings every_n = int(batches_per_epoch/args.n_saves) if not args.debug else 50 running_loss = 0 training_losses = [] valid_losses = [] t0 = time.time() # dump the settings pickle.dump(settings, open(model_dir/ 'settings.pkl', 'wb')) out_stream = model_dir / 'out_stream.txt' # run the training loop for epoch in range(1, args.n_epochs+1): opening = ['', '#'*20, '# Epoch {} (t={:2.2f}h)'.format(epoch, (time.time() - t0)/3600.), '#'*20, ''] for txt in opening: utils.report(txt, out_stream) # create the generator for each epoch train_gen = generate.generate('train', token=args.token, max_len=args.max_len, small=args.small, batch_size=args.batch_size) for i, (batch, labels) in enumerate(train_gen): # one hot encode if args.token == 'character': batch = generate.one_hot_encode(batch, vocab) # or embed elif args.token == 'word': batch = generate.w2v_encode(batch, emb, vocab) # turn into torch tensors batch = torch.Tensor(batch) labels = torch.Tensor(labels).long() # zero the gradients optimizer.zero_grad() # forward and backward pass and optimisation step outputs = model(batch) loss = criterion(outputs, labels) loss.backward() optimizer.step() # monitor the losses running_loss += loss if i % every_n == (every_n-1): # append the training losses training_losses.append(float(running_loss/every_n)) running_loss = 0 # compute the valid loss valid_outputs = model(valid_batch) valid_losses.append(float(criterion(valid_outputs, valid_labels))) # monitor progress monitor = ['\n{}/{} done'.format(i+1, batches_per_epoch)] monitor.append(generate.compose(model, vocab, emb, 'The Standard Model of', temperature, how_many)) for m in monitor: utils.report(m, out_stream) # save the model torch.save(model.state_dict(), model_dir/'checkpoints'/'epoch{}_step_{}.pt'.format(epoch, round(i/every_n))) if i >= 1000 and args.debug: break # save information dt = (time.time() - t0) time_txt = '\ntime taken: {:2.2f}h\n'.format(dt/3600.) utils.report(time_txt, out_stream) utils.report(str(dt/3600.), model_dir/'time.txt') loss_dict = {'train':training_losses, 'valid':valid_losses, 'time_taken':dt} pickle.dump(loss_dict, open(model_dir/ 'losses.pkl', 'wb')) # evaluate evaluate.plot_losses(model_dir)
def plot_switch_prob(loc): # load settings model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] max_len = settings['max_len'] # load the final model vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size fnames = os.listdir(model_dir / 'checkpoints') fname = fnames[-1] # load the model model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() # prepare the base and replacement batch N = 100 gen = generate.generate('valid', token=token, max_len=max_len, small=small, batch_size=N) base_batch, _ = next(gen) repl_batch, _ = next(gen) # compute the average KL divs over the batch depths = [i for i in range(max_len)] switch_probs = [ compute_switch_prob(model, base_batch, repl_batch, keep_depth, vocab, emb) for keep_depth in depths ] # make the plot fig, ax = plt.subplots() ax.plot(depths, switch_probs, 'tomato') ax.plot(depths, [0.01] * len(depths), 'k') ax.set_yscale('log') ax.set_ylim(0.001, 1) ax.set_xlim(0, max_len) ax.set_title('Probability of switching predicted character\n{}'.format( model_dir.name), fontsize=7) ax.set_xlabel('sequence keep-depth') ax.set_ylabel('Probabillity') ax.grid() plt.savefig(model_dir / 'SwitchProbability.pdf')
def plot_losses(loc): # load data model_dir = Path(loc) settings = pickle.load(open(model_dir / 'settings.pkl', 'rb')) # settings cell = settings['cell'] hidden_size = settings['hidden_size'] token = settings['token'] small = settings['small'] max_len = settings['max_len'] n_epochs = settings['n_epochs'] n_saves = settings['n_saves'] criterion = nn.CrossEntropyLoss() # load the models models = [] vocab = generate.get_vocab(token, small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size for fname in os.listdir(model_dir / 'checkpoints'): model = LanguageModel(cell, input_size, hidden_size, output_size) model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname)) model.eval() models.append(model) # prepare training and validation sets N = 10000 splits = ['train', 'valid'] gens = { split: generate.generate(split, token=token, max_len=max_len, small=small, batch_size=N) for split in splits } batch, labels = {}, {} for split in splits: for b, l in gens[split]: # one hot encode if token == 'character': b = generate.one_hot_encode(b, vocab) # or embed elif token == 'word': b = generate.w2v_encode(b, emb, vocab) batch[split], labels[split] = torch.Tensor(b), torch.Tensor( l).long() break # evaluate the models loss = {split: [] for split in splits} acc = {split: [] for split in splits} for i, model in enumerate(models): t0 = time.time() print(i) for split in splits: # loss outputs = model(batch[split]) l = criterion(outputs, labels[split]) loss[split].append(float(l)) # accuracy _, preds = torch.max(outputs, 1) a = sum(preds == labels[split]) / float(N) acc[split].append(float(a)) print('{:2.2f}s'.format(time.time() - t0)) for split in splits: with open(model_dir / 'best_{}_acc.txt'.format(split), 'w') as handle: best = max(acc[split]) handle.write('{}\n'.format(best)) # plot both quantities for quantity, description in zip([loss, acc], ['Loss', 'Accuracy']): fig, ax = plt.subplots() for split in splits: xs = (1 + np.arange(len(quantity[split]))) / n_saves ax.plot(xs, quantity[split], label=split) ax.set_xlabel('Training epoch') if n_epochs > 1: ax.set_xlabel('Epoch') ax.set_ylabel(description) upper = ax.get_ylim()[1] if description == 'Loss' else 1 ax.set_ylim(0, upper) ax.set_xlim(0, ax.get_xlim()[1]) ax.set_title(model_dir.name, fontsize=7) ax.legend() ax.grid(alpha=0.5, which='both') plt.savefig(model_dir / '{}.pdf'.format(description))
# settings token = 'word' max_len = 20 hidden_size = 16 small = False # training and sampling total_n = 10000 temperature = 0.5 how_many = 50 # create the vocab, model, (and embedding) vocab = generate.get_vocab(token, small=small) if token == 'word': emb = generate.get_embedding('word2vec') input_size = emb.vectors.shape[1] output_size = emb.vectors.shape[0] elif token == 'character': emb = None input_size = vocab.size output_size = vocab.size model = LanguageModel('RNN', input_size, hidden_size, output_size) # create criterion and optimiser criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # training loop every_n = int(total_n / 100)