Example #1
0
    def test(self, prime_words, predict_len, temperature=0.8):

        hidden = self.init_hidden(1)
        prime_input = Variable(
            self.from_string_to_tensor(prime_words).unsqueeze(0))

        if is_remote():
            prime_input = prime_input.cuda()
        predicted = ' '.join(prime_words)

        # Use priming string to "build up" hidden state
        for p in range(len(prime_words) - 1):
            _, hidden = self.forward(prime_input[:, p], hidden)

        inp = prime_input[:, -1]

        for p in range(predict_len):
            output, hidden = self.forward(inp, hidden)

            # Sample from the network as a multinomial distribution
            output_dist = output.data.view(-1).div(temperature).exp()
            top_i = torch.multinomial(output_dist, 1)[0]

            # Add predicted character to string and use as next input
            predicted_word = self.from_predicted_index_to_string(top_i)
            predicted += ' ' + predicted_word
            inp = Variable(
                self.from_string_to_tensor([predicted_word]).unsqueeze(0))
            if is_remote():
                inp = inp.cuda()

        return predicted
Example #2
0
 def init_hidden(self, batch_size):
     return (zeros(gpu=is_remote(),
                   sizes=(self.opt.n_layers_rnn, batch_size,
                          self.opt.hidden_size_rnn)),
             zeros(gpu=is_remote(),
                   sizes=(self.opt.n_layers_rnn, batch_size,
                          self.opt.hidden_size_rnn)))
Example #3
0
 def init_hidden(self, batch_size):
     if self.opt.bidirectional:
         return (zeros(gpu=is_remote(),
                       sizes=(self.opt.n_layers_rnn * 2, batch_size,
                              self.opt.hidden_size_rnn)),
                 zeros(gpu=is_remote(),
                       sizes=(self.opt.n_layers_rnn * 2, batch_size,
                              self.opt.hidden_size_rnn)))
     else:
         return (zeros(gpu=is_remote(),
                       sizes=(self.opt.n_layers_rnn, batch_size,
                              self.opt.hidden_size_rnn)),
                 zeros(gpu=is_remote(),
                       sizes=(self.opt.n_layers_rnn, batch_size,
                              self.opt.hidden_size_rnn)))
    def analyze_topics(self, batch):
        examples = []

        # Select "topic" as the closest word, in the embedded space, to the centroid of the sentence.
        for i in range(len(batch[0])):
            sentence = [batch[j][i] for j in range(len(batch))]
            sentence_var = Variable(self.from_string_to_tensor(sentence))
            sentence_var = sentence_var.cuda() if is_remote() else sentence_var
            sentence_emb = self.encoder(sentence_var)
            centroid = torch.mean(sentence_emb, 0)
            distances = torch.sum((sentence_emb - centroid)**2, 1)
            closest_word_to_centroid = sentence[distances.min(0)[1].data[0]]
            distances_to_centroid = {
                sentence[i]: distances[i].data[0]
                for i in range(len(sentence))
            }
            examples.append({
                'sentence': ' '.join(sentence),
                'closest_word_to_centroid': closest_word_to_centroid,
                'distances_to_centroid': distances_to_centroid
            })
        for e in examples:
            try:
                print(
                    'Sentence: {}. Closest word to centroid: {}. Distances to centroid: {}.'
                    .format(e['sentence'], e['closest_word_to_centroid'],
                            e['distances_to_centroid']))
            except:
                print('Exception when printing')
Example #5
0
    def get_input_and_target(self, batch):

        if len(batch) == 2:  # Topic included. Batch is: topics, sentences
            batch = batch[1]

        batch_size, sentence_len = len(batch[0]), len(batch) - 1
        inp = torch.LongTensor(batch_size, sentence_len + 1, 2)
        target = torch.LongTensor(batch_size, sentence_len + 1, 2)
        for i in range(sentence_len + 1):
            sentence = batch[i]
            inp[:, i, 0] = self.from_string_to_tensor(sentence)
            inp[:, i, 1:] = self.from_chars_to_tensor(sentence)
            target[:, i, 0] = self.from_string_to_tensor(sentence)
            target[:, i, 1:] = self.from_chars_to_tensor(sentence)
        inp = inp[:, :-1]
        target = target[:, 1:]

        inp = Variable(inp)
        target = Variable(target)

        if is_remote():
            inp = inp.cuda()
            target = target.cuda()

        return inp, target
    def select_topics(self, batch):

        # batch is weirdly ordered due to Pytorch Dataset class from which we inherit in each dataloader : sizes are sentence_len x batch_size

        # Is noun, adjective, verb or adverb?
        is_nava = lambda word: len(wn.synsets(word)) != 0
        batch_size = len(batch[0])

        # Select "topic" as the least common noun, verb, adjective or adverb in each sentence
        topics = torch.LongTensor(self.opt.n_layers_rnn, batch_size, 1)
        topics_words = []
        for i in range(batch_size):
            sentence = [batch[j][i] for j in range(len(batch))]
            words_sorted = sorted([(self.word_count[word], word) for word in set(sentence) if is_nava(word) and word in self.word2idx])
            if len(words_sorted) < self.opt.n_layers_rnn:
                n_more = self.opt.n_layers_rnn - len(words_sorted)
                for _ in range(n_more):
                    words_sorted.append(words_sorted[0])
            for j in range(self.opt.n_layers_rnn):
                topics[j,i] = self.from_string_to_tensor([words_sorted[j][1]])
            topics_words.append(tuple([w[1] for w in words_sorted[:self.opt.n_layers_rnn]]))

        if self.opt.bidirectional:
            topics = torch.cat([topics, topics], 2)
        topics = Variable(topics).cuda() if is_remote() else Variable(topics)
        return topics, topics_words
    def closeness_to_topics(self, words_weights, topics):

        _, words = words_weights.max(1)

        dist = nn.PairwiseDistance(p=2)

        closeness = []

        for i in range(topics.size(1)):
            closeness_batch = []
            for j in range(topics.size(0)):
                topic_str = self.inverted_word_dict[topics[j,i].data[0]]
                topic = topics[j,i]
                word = words[i]
                synonyms = [topic.data[0]]
                for syn in wn.synsets(topic_str):
                    synonyms += [self.word2idx[l.name()] for l in syn.lemmas() if l.name() in self.word2idx]
                synonyms = torch.from_numpy(numpy.array(synonyms))
                synonyms = Variable(synonyms).cuda() if is_remote() else Variable(synonyms)
                closeness_batch.append(
                    torch.mean(torch.stack([dist(self.encoder(syn), self.encoder(word)) for syn in synonyms]))
                )
            closeness_batch = torch.cat(closeness_batch)
            closeness.append(closeness_batch.mean())

        return torch.stack(closeness)
Example #8
0
 def select_topics(self, batch):
     try:
         batch_size = len(batch[1][0])
     except:
         import pdb; pdb.set_trace()
     topics_words = batch[0]
     topics = self.from_string_to_tensor(topics_words).view(batch_size, 1)
     return Variable(topics).cuda() if is_remote() else Variable(topics), topics_words
Example #9
0
    def analyze_closeness_to_topics(self, batch):

        batch_size, sentence_len = len(batch[0]), len(batch) - 1

        # Analyze closeness to topics
        print('Analyzing predictions......')
        examples = []

        self.copy_weights_encoder()
        topics, _ = self.select_topics(batch)
        if len(batch) ==2:
            batch = batch[1]
        inp, target = self.get_input_and_target(batch)
        # Topic is provided as an initialization to the hidden state
        if self.opt.bidirectional:
            hidden = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn*2)], 1).permute(1, 0, 2), \
                     torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn*2)], 1).permute(1, 0, 2)  # N_layers x batch_size x N_hidden
        else:
            hidden = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn)], 1).permute(1, 0, 2), \
                     torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn)], 1).permute(1, 0, 2)  # N_layers x batch_size x N_hidden

        for i in range(len(batch[0])):
            sentence = [batch[j][i] for j in range(len(batch))]
            examples.append(
                {'sentence': ' '.join(sentence), 'topic': self.inverted_word_dict[topics[i].data[0]], 'preds and dist': []})

        # Encode/Decode sentence
        for w in range(sentence_len):
            output, hidden = self.forward(inp[:, w], hidden)

            # Topic closeness loss: Weight each word contribution by the inverse of it's frequency
            # _, words_i = output.max(1)
            # Sample from the network as a multinomial distribution
            output_dist = output.div(0.8).exp()
            words_i = torch.multinomial(output_dist, 1)

            loss_topic_weights = Variable(torch.from_numpy(numpy.array(
                [1 / self.word_count[self.inverted_word_dict[i.data[0]]] for i in words_i]
            )).unsqueeze(1)).float()
            loss_topic_weights = loss_topic_weights.cuda() if is_remote() else loss_topic_weights
            closeness = self.closeness_to_topics(output, topics)

            for i in range(len(batch[0])):
                examples[i]['preds and dist'].append({
                    'predicted word': self.inverted_word_dict[words_i[i].data[0]],
                    'word weight in topic loss': loss_topic_weights[i].data[0],
                    'closeness to sentence topic': closeness[i].data[0]
                })
        for e in examples:
            try:
                print('Sentence: {}. Topic: {}. Predictions, weights and closeness: {}.'.format(
                    ' '.join(e['sentence']), e['topic'], '\n\t' + '\n\t'.join([str(x) for x in e['preds and dist']])
                ))
            except:
                print('Exception when printing')
    def select_topics(self, batch):

        # batch is weirdly ordered due to Pytorch Dataset class from which we inherit in each dataloader : sizes are sentence_len x batch_size
        batch_size = len(batch[0])

        # Select "topic" as the closest word, in the embedded space, to the centroid of the sentence.
        topics = torch.LongTensor(batch_size, 1)
        topics_words = []
        for i in range(batch_size):
            sentence = [batch[j][i] for j in range(len(batch))]
            sentence_var = Variable(self.from_string_to_tensor(sentence))
            sentence_var = sentence_var.cuda() if is_remote() else sentence_var
            sentence_emb = self.encoder(sentence_var)
            centroid = torch.mean(sentence_emb, 0)
            distances = torch.sum((sentence_emb - centroid)**2, 1)
            closest_word_to_centroid = sentence[distances.min(0)[1].data[0]]
            topics[i] = self.from_string_to_tensor([closest_word_to_centroid])
            topics_words.append(closest_word_to_centroid)

        return Variable(topics).cuda() if is_remote() else Variable(
            topics), topics_words
Example #11
0
    def test(self, prime_words, predict_len, temperature=0.8):

        self.copy_weights_encoder()
        topic, _ = self.get_test_topic()
        if self.opt.bidirectional:
            topic_enc = torch.cat([self.encoder(topic) for _ in range(self.opt.n_layers_rnn*2)], 1) \
                .contiguous().permute(1, 0, 2)  # N_layers x 1 x N_hidden
        else:
            topic_enc = torch.cat([self.encoder(topic) for _ in range(self.opt.n_layers_rnn)], 1) \
                             .contiguous().permute(1, 0, 2)  # N_layers x 1 x N_hidden
        hidden = topic_enc, topic_enc.clone()
        prime_input = Variable(self.from_string_to_tensor(prime_words).unsqueeze(0))

        if is_remote():
            prime_input = prime_input.cuda()
        predicted = ' '.join(prime_words)

        # Use priming string to "build up" hidden state
        for p in range(len(prime_words) - 1):
            _, hidden = self.forward(prime_input[:, p], hidden)

        inp = prime_input[:, -1]

        for p in range(predict_len):
            output, hidden = self.forward(inp, hidden)

            # Sample from the network as a multinomial distribution
            output_dist = output.data.view(-1).div(temperature).exp()
            top_i = torch.multinomial(output_dist, 1)[0]

            # Add predicted character to string and use as next input
            predicted_word = self.from_predicted_index_to_string(top_i)
            predicted += ' '+predicted_word
            inp = Variable(self.from_string_to_tensor([predicted_word]).unsqueeze(0))
            if is_remote():
                inp = inp.cuda()

        return predicted
Example #12
0
    def evaluate(self, batch):

        loss_reconstruction = 0
        loss_topic = 0

        self.copy_weights_encoder()
        topics, topics_words = self.select_topics(batch)
        inp, target = self.get_input_and_target(batch)

        # Topic is provided as an initialization to the hidden state
        if self.opt.bidirectional:
            topic_enc = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn*2)], 1) \
                .contiguous().permute(1, 0, 2)  # N_layers x 1 x N_hidden
        else:
            topic_enc = torch.cat([self.encoder(topics) for _ in range(self.opt.n_layers_rnn)], 1) \
                             .contiguous().permute(1, 0, 2)  # N_layers x 1 x N_hidden

        hidden = topic_enc, topic_enc.clone()

        # Encode/Decode sentence
        loss_topic_total_weight = 0
        last_output = inp[:, 0]  # Only used if "reuse_pred" is set
        for w in range(self.opt.sentence_len):

            x = last_output if self.opt.reuse_pred else inp[:, w]
            output, hidden = self.forward(x, hidden)

            # Reconstruction Loss
            loss_reconstruction += self.criterion(output.view(self.opt.batch_size, -1), target[:, w])

            # Topic closeness loss: Weight each word contribution by the inverse of it's frequency
            _, words_i = output.max(1)
            last_output = words_i
            loss_topic_weights = Variable(torch.from_numpy(numpy.array(
                [1/self.word_count[self.inverted_word_dict[i.data[0]]] for i in words_i]
            )).unsqueeze(1)).float()
            loss_topic_weights = loss_topic_weights.cuda() if is_remote() else loss_topic_weights
            loss_topic_total_weight += loss_topic_weights
            loss_topic += self.closeness_to_topics(output, topics) * loss_topic_weights

        loss_topic = torch.mean(loss_topic / loss_topic_total_weight)

        self.losses_reconstruction.append(loss_reconstruction.data[0])
        self.losses_topic.append(loss_topic.data[0])

        ratio = float(loss_reconstruction.detach().cpu().data.numpy()[0] / loss_topic.detach().cpu().data.numpy()[0])
        loss = self.opt.loss_alpha*loss_reconstruction + (1-self.opt.loss_alpha)*loss_topic*ratio
        return loss, loss_reconstruction, loss_topic
Example #13
0
    def evaluate(self, batch):

        inp = torch.LongTensor(self.opt.batch_size, self.opt.sentence_len)
        target = torch.LongTensor(self.opt.batch_size, self.opt.sentence_len)
        for i, sentence in enumerate(batch):
            inp[i] = self.from_string_to_tensor(sentence[:-1])
            target[i] = self.from_string_to_tensor(sentence[1:])
        inp = Variable(inp)
        target = Variable(target)
        if is_remote():
            inp = inp.cuda()
            target = target.cuda()

        hidden = self.init_hidden(self.opt.batch_size)
        loss = 0

        for c in range(self.opt.sentence_len):
            output, hidden = self.forward(inp[:, c], hidden)
            loss += self.criterion(output.view(self.opt.batch_size, -1),
                                   target[:, c])

        return loss
    def select_topics(self, batch):

        # batch is weirdly ordered due to Pytorch Dataset class from which we inherit in each dataloader : sizes are sentence_len x batch_size

        # Is noun, adjective, verb or adverb?
        is_nava = lambda word: len(wn.synsets(word)) != 0
        batch_size = len(batch[0])

        # Select "topic" as the least common noun, verb, adjective or adverb in each sentence
        topics = torch.LongTensor(batch_size, 1)
        topics_words = []
        for i in range(batch_size):
            sentence = [batch[j][i] for j in range(len(batch))]
            words_sorted = sorted([(self.word_count[word], word)
                                   for word in set(sentence)
                                   if is_nava(word) and word in self.word2idx])
            least_common_word = words_sorted[0][1] if len(
                words_sorted) > 0 else sentence[0]
            topics[i] = self.from_string_to_tensor([least_common_word])
            topics_words.append(least_common_word)

        return Variable(topics).cuda() if is_remote() else Variable(
            topics), topics_words
Example #15
0
def train(n_epochs):

    # prepare for saving
    os.system("mkdir -p " + opt.save_dir)

    # training
    best_valid_loss = 1e6
    train_losses, valid_losses = [], []
    valid_loss_reconstruction, valid_loss_topic = -1, -1
    for i in range(0, n_epochs):
        train_loss, train_loss_reconstruction, train_loss_topic = train_epoch(
            i)
        train_losses.append(train_loss)
        try:
            valid_loss, valid_loss_reconstruction, valid_loss_topic = test_epoch(
                i)
            valid_losses.append(valid_loss)
        except:
            print('Error when testing epoch')
            valid_losses.append(1e6)

        # If model improved, save it
        if valid_losses[-1] < best_valid_loss:
            best_valid_loss = valid_losses[-1]
            # save
            utils.move(gpu=False, tensor_list=model.submodules)
            torch.save(
                {
                    'epoch': i,
                    'model': model,
                    'train_loss': train_losses,
                    'valid_loss': valid_losses,
                    'optimizer': optimizer,
                    'opt': opt
                }, opt.save_dir + 'checkpoint')
            utils.move(gpu=utils.is_remote(), tensor_list=model.submodules)

        # Print log string
        if 'topic' in opt.model:
            log_string = (
                'iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}, '
                'train_loss_reconstruction: {:0.6f}, train_loss_topic: {:0.6f}, '
                'valid_loss_reconstruction: {:0.6f}, valid_loss_topic: {:0.6f}'
            ).format((i + 1) * opt.epoch_size, train_losses[-1],
                     valid_losses[-1], best_valid_loss, opt.lrt,
                     train_loss_reconstruction, train_loss_topic,
                     valid_loss_reconstruction, valid_loss_topic)
        else:
            log_string = (
                'iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}'
            ).format((i + 1) * opt.epoch_size, train_losses[-1],
                     valid_losses[-1], best_valid_loss, opt.lrt)
        print(log_string)
        utils.log(opt.save_dir + 'logs.txt', log_string,
                  utils.time_since(start))

        # Print example
        warmup = 'Wh' if opt.model == 'char_rnn' else ['what']
        test_sample = model.test(warmup, opt.sentence_len)
        utils.log(opt.save_dir + 'examples.txt', test_sample)
        try:
            print(test_sample + '\n')
        except:
            traceback.print_exc()
Example #16
0
# Training settings #
#####################
parser = argparse.ArgumentParser()
parser.add_argument('-seed', type=int, default=1)
parser.add_argument('-dataloader', type=str, default='multi_file_str'
                    )  # Must be a valid file name in dataloaders/ folder
parser.add_argument(
    '-model', type=str,
    default='word_rnn')  # Must be a valid file name in models/ folder
parser.add_argument('-batch_size', type=int, default=128)
parser.add_argument('-lrt', type=float, default=0.0001)
parser.add_argument('-epoch_size', type=int, default=100)
parser.add_argument('-n_epochs', type=int, default=2000)
parser.add_argument('-gpu',
                    type=int,
                    default=1 if utils.is_remote() else 0,
                    help='Which GPU to use, ignored if running in local')
parser.add_argument('-data_dir',
                    type=str,
                    default='data/',
                    help='path for preprocessed dataloader files')
parser.add_argument('-dropout', type=float, default=0.4)

############################
# Model dependent settings #
############################
parser.add_argument('-hidden_size_rnn',
                    type=int,
                    default=200,
                    help='RNN hidden vector size')
parser.add_argument('-n_layers_rnn',