def test(self, prime_words, predict_len, temperature=0.8): hidden = self.init_hidden(1) prime_input = Variable( self.from_string_to_tensor(prime_words).unsqueeze(0)) if is_remote(): prime_input = prime_input.cuda() predicted = ' '.join(prime_words) # Use priming string to "build up" hidden state for p in range(len(prime_words) - 1): _, hidden = self.forward(prime_input[:, p], hidden) inp = prime_input[:, -1] for p in range(predict_len): output, hidden = self.forward(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # Add predicted character to string and use as next input predicted_word = self.from_predicted_index_to_string(top_i) predicted += ' ' + predicted_word inp = Variable( self.from_string_to_tensor([predicted_word]).unsqueeze(0)) if is_remote(): inp = inp.cuda() return predicted
def init_hidden(self, batch_size): return (zeros(gpu=is_remote(), sizes=(self.opt.n_layers_rnn, batch_size, self.opt.hidden_size_rnn)), zeros(gpu=is_remote(), sizes=(self.opt.n_layers_rnn, batch_size, self.opt.hidden_size_rnn)))
def init_hidden(self, batch_size): if self.opt.bidirectional: return (zeros(gpu=is_remote(), sizes=(self.opt.n_layers_rnn * 2, batch_size, self.opt.hidden_size_rnn)), zeros(gpu=is_remote(), sizes=(self.opt.n_layers_rnn * 2, batch_size, self.opt.hidden_size_rnn))) else: return (zeros(gpu=is_remote(), sizes=(self.opt.n_layers_rnn, batch_size, self.opt.hidden_size_rnn)), zeros(gpu=is_remote(), sizes=(self.opt.n_layers_rnn, batch_size, self.opt.hidden_size_rnn)))
def analyze_topics(self, batch): examples = [] # Select "topic" as the closest word, in the embedded space, to the centroid of the sentence. for i in range(len(batch[0])): sentence = [batch[j][i] for j in range(len(batch))] sentence_var = Variable(self.from_string_to_tensor(sentence)) sentence_var = sentence_var.cuda() if is_remote() else sentence_var sentence_emb = self.encoder(sentence_var) centroid = torch.mean(sentence_emb, 0) distances = torch.sum((sentence_emb - centroid)**2, 1) closest_word_to_centroid = sentence[distances.min(0)[1].data[0]] distances_to_centroid = { sentence[i]: distances[i].data[0] for i in range(len(sentence)) } examples.append({ 'sentence': ' '.join(sentence), 'closest_word_to_centroid': closest_word_to_centroid, 'distances_to_centroid': distances_to_centroid }) for e in examples: try: print( 'Sentence: {}. Closest word to centroid: {}. Distances to centroid: {}.' .format(e['sentence'], e['closest_word_to_centroid'], e['distances_to_centroid'])) except: print('Exception when printing')
def get_input_and_target(self, batch): if len(batch) == 2: # Topic included. Batch is: topics, sentences batch = batch[1] batch_size, sentence_len = len(batch[0]), len(batch) - 1 inp = torch.LongTensor(batch_size, sentence_len + 1, 2) target = torch.LongTensor(batch_size, sentence_len + 1, 2) for i in range(sentence_len + 1): sentence = batch[i] inp[:, i, 0] = self.from_string_to_tensor(sentence) inp[:, i, 1:] = self.from_chars_to_tensor(sentence) target[:, i, 0] = self.from_string_to_tensor(sentence) target[:, i, 1:] = self.from_chars_to_tensor(sentence) inp = inp[:, :-1] target = target[:, 1:] inp = Variable(inp) target = Variable(target) if is_remote(): inp = inp.cuda() target = target.cuda() return inp, target
def select_topics(self, batch): # batch is weirdly ordered due to Pytorch Dataset class from which we inherit in each dataloader : sizes are sentence_len x batch_size # Is noun, adjective, verb or adverb? is_nava = lambda word: len(wn.synsets(word)) != 0 batch_size = len(batch[0]) # Select "topic" as the least common noun, verb, adjective or adverb in each sentence topics = torch.LongTensor(self.opt.n_layers_rnn, batch_size, 1) topics_words = [] for i in range(batch_size): sentence = [batch[j][i] for j in range(len(batch))] words_sorted = sorted([(self.word_count[word], word) for word in set(sentence) if is_nava(word) and word in self.word2idx]) if len(words_sorted) < self.opt.n_layers_rnn: n_more = self.opt.n_layers_rnn - len(words_sorted) for _ in range(n_more): words_sorted.append(words_sorted[0]) for j in range(self.opt.n_layers_rnn): topics[j,i] = self.from_string_to_tensor([words_sorted[j][1]]) topics_words.append(tuple([w[1] for w in words_sorted[:self.opt.n_layers_rnn]])) if self.opt.bidirectional: topics = torch.cat([topics, topics], 2) topics = Variable(topics).cuda() if is_remote() else Variable(topics) return topics, topics_words
def closeness_to_topics(self, words_weights, topics): _, words = words_weights.max(1) dist = nn.PairwiseDistance(p=2) closeness = [] for i in range(topics.size(1)): closeness_batch = [] for j in range(topics.size(0)): topic_str = self.inverted_word_dict[topics[j,i].data[0]] topic = topics[j,i] word = words[i] synonyms = [topic.data[0]] for syn in wn.synsets(topic_str): synonyms += [self.word2idx[l.name()] for l in syn.lemmas() if l.name() in self.word2idx] synonyms = torch.from_numpy(numpy.array(synonyms)) synonyms = Variable(synonyms).cuda() if is_remote() else Variable(synonyms) closeness_batch.append( torch.mean(torch.stack([dist(self.encoder(syn), self.encoder(word)) for syn in synonyms])) ) closeness_batch = torch.cat(closeness_batch) closeness.append(closeness_batch.mean()) return torch.stack(closeness)
def select_topics(self, batch): try: batch_size = len(batch[1][0]) except: import pdb; pdb.set_trace() topics_words = batch[0] topics = self.from_string_to_tensor(topics_words).view(batch_size, 1) return Variable(topics).cuda() if is_remote() else Variable(topics), topics_words
def analyze_closeness_to_topics(self, batch): batch_size, sentence_len = len(batch[0]), len(batch) - 1 # Analyze closeness to topics print('Analyzing predictions......') examples = [] self.copy_weights_encoder() topics, _ = self.select_topics(batch) if len(batch) ==2: batch = batch[1] inp, target = self.get_input_and_target(batch) # Topic is provided as an initialization to the hidden state if self.opt.bidirectional: hidden = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn*2)], 1).permute(1, 0, 2), \ torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn*2)], 1).permute(1, 0, 2) # N_layers x batch_size x N_hidden else: hidden = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn)], 1).permute(1, 0, 2), \ torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn)], 1).permute(1, 0, 2) # N_layers x batch_size x N_hidden for i in range(len(batch[0])): sentence = [batch[j][i] for j in range(len(batch))] examples.append( {'sentence': ' '.join(sentence), 'topic': self.inverted_word_dict[topics[i].data[0]], 'preds and dist': []}) # Encode/Decode sentence for w in range(sentence_len): output, hidden = self.forward(inp[:, w], hidden) # Topic closeness loss: Weight each word contribution by the inverse of it's frequency # _, words_i = output.max(1) # Sample from the network as a multinomial distribution output_dist = output.div(0.8).exp() words_i = torch.multinomial(output_dist, 1) loss_topic_weights = Variable(torch.from_numpy(numpy.array( [1 / self.word_count[self.inverted_word_dict[i.data[0]]] for i in words_i] )).unsqueeze(1)).float() loss_topic_weights = loss_topic_weights.cuda() if is_remote() else loss_topic_weights closeness = self.closeness_to_topics(output, topics) for i in range(len(batch[0])): examples[i]['preds and dist'].append({ 'predicted word': self.inverted_word_dict[words_i[i].data[0]], 'word weight in topic loss': loss_topic_weights[i].data[0], 'closeness to sentence topic': closeness[i].data[0] }) for e in examples: try: print('Sentence: {}. Topic: {}. Predictions, weights and closeness: {}.'.format( ' '.join(e['sentence']), e['topic'], '\n\t' + '\n\t'.join([str(x) for x in e['preds and dist']]) )) except: print('Exception when printing')
def select_topics(self, batch): # batch is weirdly ordered due to Pytorch Dataset class from which we inherit in each dataloader : sizes are sentence_len x batch_size batch_size = len(batch[0]) # Select "topic" as the closest word, in the embedded space, to the centroid of the sentence. topics = torch.LongTensor(batch_size, 1) topics_words = [] for i in range(batch_size): sentence = [batch[j][i] for j in range(len(batch))] sentence_var = Variable(self.from_string_to_tensor(sentence)) sentence_var = sentence_var.cuda() if is_remote() else sentence_var sentence_emb = self.encoder(sentence_var) centroid = torch.mean(sentence_emb, 0) distances = torch.sum((sentence_emb - centroid)**2, 1) closest_word_to_centroid = sentence[distances.min(0)[1].data[0]] topics[i] = self.from_string_to_tensor([closest_word_to_centroid]) topics_words.append(closest_word_to_centroid) return Variable(topics).cuda() if is_remote() else Variable( topics), topics_words
def test(self, prime_words, predict_len, temperature=0.8): self.copy_weights_encoder() topic, _ = self.get_test_topic() if self.opt.bidirectional: topic_enc = torch.cat([self.encoder(topic) for _ in range(self.opt.n_layers_rnn*2)], 1) \ .contiguous().permute(1, 0, 2) # N_layers x 1 x N_hidden else: topic_enc = torch.cat([self.encoder(topic) for _ in range(self.opt.n_layers_rnn)], 1) \ .contiguous().permute(1, 0, 2) # N_layers x 1 x N_hidden hidden = topic_enc, topic_enc.clone() prime_input = Variable(self.from_string_to_tensor(prime_words).unsqueeze(0)) if is_remote(): prime_input = prime_input.cuda() predicted = ' '.join(prime_words) # Use priming string to "build up" hidden state for p in range(len(prime_words) - 1): _, hidden = self.forward(prime_input[:, p], hidden) inp = prime_input[:, -1] for p in range(predict_len): output, hidden = self.forward(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # Add predicted character to string and use as next input predicted_word = self.from_predicted_index_to_string(top_i) predicted += ' '+predicted_word inp = Variable(self.from_string_to_tensor([predicted_word]).unsqueeze(0)) if is_remote(): inp = inp.cuda() return predicted
def evaluate(self, batch): loss_reconstruction = 0 loss_topic = 0 self.copy_weights_encoder() topics, topics_words = self.select_topics(batch) inp, target = self.get_input_and_target(batch) # Topic is provided as an initialization to the hidden state if self.opt.bidirectional: topic_enc = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn*2)], 1) \ .contiguous().permute(1, 0, 2) # N_layers x 1 x N_hidden else: topic_enc = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn)], 1) \ .contiguous().permute(1, 0, 2) # N_layers x 1 x N_hidden hidden = topic_enc, topic_enc.clone() # Encode/Decode sentence loss_topic_total_weight = 0 last_output = inp[:, 0] # Only used if "reuse_pred" is set for w in range(self.opt.sentence_len): x = last_output if self.opt.reuse_pred else inp[:, w] output, hidden = self.forward(x, hidden) # Reconstruction Loss loss_reconstruction += self.criterion(output.view(self.opt.batch_size, -1), target[:, w]) # Topic closeness loss: Weight each word contribution by the inverse of it's frequency _, words_i = output.max(1) last_output = words_i loss_topic_weights = Variable(torch.from_numpy(numpy.array( [1/self.word_count[self.inverted_word_dict[i.data[0]]] for i in words_i] )).unsqueeze(1)).float() loss_topic_weights = loss_topic_weights.cuda() if is_remote() else loss_topic_weights loss_topic_total_weight += loss_topic_weights loss_topic += self.closeness_to_topics(output, topics) * loss_topic_weights loss_topic = torch.mean(loss_topic / loss_topic_total_weight) self.losses_reconstruction.append(loss_reconstruction.data[0]) self.losses_topic.append(loss_topic.data[0]) ratio = float(loss_reconstruction.detach().cpu().data.numpy()[0] / loss_topic.detach().cpu().data.numpy()[0]) loss = self.opt.loss_alpha*loss_reconstruction + (1-self.opt.loss_alpha)*loss_topic*ratio return loss, loss_reconstruction, loss_topic
def evaluate(self, batch): inp = torch.LongTensor(self.opt.batch_size, self.opt.sentence_len) target = torch.LongTensor(self.opt.batch_size, self.opt.sentence_len) for i, sentence in enumerate(batch): inp[i] = self.from_string_to_tensor(sentence[:-1]) target[i] = self.from_string_to_tensor(sentence[1:]) inp = Variable(inp) target = Variable(target) if is_remote(): inp = inp.cuda() target = target.cuda() hidden = self.init_hidden(self.opt.batch_size) loss = 0 for c in range(self.opt.sentence_len): output, hidden = self.forward(inp[:, c], hidden) loss += self.criterion(output.view(self.opt.batch_size, -1), target[:, c]) return loss
def select_topics(self, batch): # batch is weirdly ordered due to Pytorch Dataset class from which we inherit in each dataloader : sizes are sentence_len x batch_size # Is noun, adjective, verb or adverb? is_nava = lambda word: len(wn.synsets(word)) != 0 batch_size = len(batch[0]) # Select "topic" as the least common noun, verb, adjective or adverb in each sentence topics = torch.LongTensor(batch_size, 1) topics_words = [] for i in range(batch_size): sentence = [batch[j][i] for j in range(len(batch))] words_sorted = sorted([(self.word_count[word], word) for word in set(sentence) if is_nava(word) and word in self.word2idx]) least_common_word = words_sorted[0][1] if len( words_sorted) > 0 else sentence[0] topics[i] = self.from_string_to_tensor([least_common_word]) topics_words.append(least_common_word) return Variable(topics).cuda() if is_remote() else Variable( topics), topics_words
def train(n_epochs): # prepare for saving os.system("mkdir -p " + opt.save_dir) # training best_valid_loss = 1e6 train_losses, valid_losses = [], [] valid_loss_reconstruction, valid_loss_topic = -1, -1 for i in range(0, n_epochs): train_loss, train_loss_reconstruction, train_loss_topic = train_epoch( i) train_losses.append(train_loss) try: valid_loss, valid_loss_reconstruction, valid_loss_topic = test_epoch( i) valid_losses.append(valid_loss) except: print('Error when testing epoch') valid_losses.append(1e6) # If model improved, save it if valid_losses[-1] < best_valid_loss: best_valid_loss = valid_losses[-1] # save utils.move(gpu=False, tensor_list=model.submodules) torch.save( { 'epoch': i, 'model': model, 'train_loss': train_losses, 'valid_loss': valid_losses, 'optimizer': optimizer, 'opt': opt }, opt.save_dir + 'checkpoint') utils.move(gpu=utils.is_remote(), tensor_list=model.submodules) # Print log string if 'topic' in opt.model: log_string = ( 'iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}, ' 'train_loss_reconstruction: {:0.6f}, train_loss_topic: {:0.6f}, ' 'valid_loss_reconstruction: {:0.6f}, valid_loss_topic: {:0.6f}' ).format((i + 1) * opt.epoch_size, train_losses[-1], valid_losses[-1], best_valid_loss, opt.lrt, train_loss_reconstruction, train_loss_topic, valid_loss_reconstruction, valid_loss_topic) else: log_string = ( 'iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}' ).format((i + 1) * opt.epoch_size, train_losses[-1], valid_losses[-1], best_valid_loss, opt.lrt) print(log_string) utils.log(opt.save_dir + 'logs.txt', log_string, utils.time_since(start)) # Print example warmup = 'Wh' if opt.model == 'char_rnn' else ['what'] test_sample = model.test(warmup, opt.sentence_len) utils.log(opt.save_dir + 'examples.txt', test_sample) try: print(test_sample + '\n') except: traceback.print_exc()
# Training settings # ##################### parser = argparse.ArgumentParser() parser.add_argument('-seed', type=int, default=1) parser.add_argument('-dataloader', type=str, default='multi_file_str' ) # Must be a valid file name in dataloaders/ folder parser.add_argument( '-model', type=str, default='word_rnn') # Must be a valid file name in models/ folder parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-lrt', type=float, default=0.0001) parser.add_argument('-epoch_size', type=int, default=100) parser.add_argument('-n_epochs', type=int, default=2000) parser.add_argument('-gpu', type=int, default=1 if utils.is_remote() else 0, help='Which GPU to use, ignored if running in local') parser.add_argument('-data_dir', type=str, default='data/', help='path for preprocessed dataloader files') parser.add_argument('-dropout', type=float, default=0.4) ############################ # Model dependent settings # ############################ parser.add_argument('-hidden_size_rnn', type=int, default=200, help='RNN hidden vector size') parser.add_argument('-n_layers_rnn',