Example #1
    def produce(self, start_tokens=None, max_len=20):
        Generate a tweet using the provided start tokens at the inputs on the initial timesteps
        :param start_tokens: A tensor of the shape (n,) where n is the number of start tokens
        :param max_len: Maximum length of the tweet
        :return: Indices of the tokens of the generated tweet
        hidden = self.cell_zero_state(1)
        x_i = variable(np.full((1, ), self.init_token))

        if start_tokens is not None:
            start_tokens = variable(start_tokens)

        outputs = []
        for i in range(max_len):
            ### Insert your code below ###
            # `x_i` should be the output of the network at the current timestep
            raise NotImplementedError()

            ### Insert your code above ####

        outputs = torch.cat(outputs)
        return outputs
Example #2
    def forward(self, inputs):
        Perform the forward pass of the network and return non-normalized probabilities of the output tokens at each timestep
        :param inputs: A tensor of size (batch_size x max_len) of indices of tweets' tokens
        :return: A tensor of size (batch_size x max_len x vocab_size)
        batch_size, max_len = inputs.shape
        hidden = self.cell_zero_state(batch_size)
        x_i = variable(np.full((1,), self.init_token)).expand((batch_size,))

        outputs = []
        for i in range(max_len):
            if i == 0:
                ip = x_i
                if random.random() < self.teacher_forcing:      
                    ip = inputs.select(1,i)
                    ip = argmax(output).view(-1)
            encoded = self.embedding(ip)       
            hidden = self.gru(encoded,hidden)     
            output = self.linear(hidden)

        outputs = torch.stack(outputs, dim=1)

        return outputs
Example #3
    def forward(self, inputs):
        Perform the forward pass of the network and return non-normalized probabilities of the output tokens at each timestep
        :param inputs: A tensor of size (batch_size x max_len) of indices of tweets' tokens
        :return: A tensor of size (batch_size x max_len x vocab_size)
        batch_size, max_len = inputs.shape

        hidden = self.cell_zero_state(batch_size)
        x_i = variable(np.full((1, ), self.init_token)).expand((batch_size, ))

        outputs = []
        for i in range(max_len):
            ### Insert your code below ###
            # `output` should be the output of the network at the current timestep
            raise NotImplementedError()

            ### Insert your code above ####

        outputs = torch.stack(outputs, dim=1)

        return outputs
Example #4
    def produce(self, start_tokens=None, max_len=20):
        Generate a tweet using the provided start tokens at the inputs on the initial timesteps
        :param start_tokens: A tensor of the shape (n,) where n is the number of start tokens
        :param max_len: Maximum length of the tweet
        :return: Indices of the tokens of the generated tweet
        hidden = self.cell_zero_state(1)
        x_i = variable(np.full((1,), self.init_token))

        if start_tokens is not None:
            start_tokens = variable(start_tokens)

        outputs = []
        for i in range(max_len):
            token = x_i
            em = self.embedding (token)
            hidden = self.gru(em, hidden)
            if start_tokens is not None:
                if list(start_tokens.size())[0] > i:
                    output = start_tokens[i]
                else :
                    output = self.linear(hidden)
                    output = self.softmax(output)
                    output = torch.multinomial(output, 1)
            else :
                output = self.linear(hidden)
                output = self.softmax(output)
                output = torch.multinomial(output, 1)
            x_i = output.view(-1)
            if x_i == self.eos_token:

        outputs = torch.cat(outputs)
        return outputs
Example #5
def main():
    train_on = 'trump'  # 'trump' or 'obama'
    val_size = 0.2
    max_len = 20
    embedding_size = 200
    hidden_size = 300
    batch_size = 64
    nb_epochs = 100
    max_grad_norm = 5
    teacher_forcing = 0.7

    # load data and create datasets
    # note that they use the same Vocab object so they will share the vocabulary
    # (in particular, for a given token both of them will return the same id)
    trump_tweets_filename = '/content/uml_nlp_class/data/trump_tweets.txt'
    obama_tweets_filename = '/content/uml_nlp_class/data/obama_white_house_tweets.txt'
    #trump_tweets_filename = 'data/trump_tweets.txt'
    #obama_tweets_filename = 'data/obama_white_house_tweets.txt'
    dataset_trump = TwitterFileArchiveDataset(trump_tweets_filename, max_len=max_len)
    dataset_obama = TwitterFileArchiveDataset(obama_tweets_filename, max_len=max_len, vocab=dataset_trump.vocab)


    if train_on == 'trump':
        dataset_train = dataset_trump
        dataset_val_ext = dataset_obama
    elif train_on == 'obama':
        dataset_train = dataset_obama
        dataset_val_ext = dataset_trump
        raise ValueError('`train_on` cannot be {} - use `trump` or `obama`'.format(train_on))

    val_len = int(len(dataset_train) * val_size)
    train_len = len(dataset_train) - val_len
    dataset_train, dataset_val = torch.utils.data.dataset.random_split(dataset_train, [train_len, val_len])

    # note that the the training and validation sets come from the same person,
    # whereas the val_ext set come from a different person

    data_loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
    data_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size, shuffle=False)
    data_loader_val_ext = torch.utils.data.DataLoader(dataset_val_ext, batch_size=batch_size, shuffle=False)
    print('Training on: {}'.format(train_on))
    print('Train {}, val: {}, val ext: {}'.format(len(dataset_train), len(dataset_val), len(dataset_val_ext)))

    vocab_size = len(dataset_trump.vocab)
    model = NeuralLanguageModel(
        embedding_size, hidden_size, vocab_size,
        dataset_trump.vocab[dataset_trump.INIT_TOKEN], dataset_trump.vocab[dataset_trump.EOS_TOKEN],
    model = cuda(model)

    parameters = list(model.parameters())
    optimizer = torch.optim.Adam(parameters, amsgrad=True)
    criterion = torch.nn.CrossEntropyLoss(ignore_index=dataset_trump.vocab[dataset_trump.PAD_TOKEN])

    phases = ['train', 'val', 'val_ext']
    data_loaders = [data_loader_train, data_loader_val, data_loader_val_ext]
    losses_history = defaultdict(list)
    for epoch in range(nb_epochs):
        for phase, data_loader in zip(phases, data_loaders):
            if phase == 'train':

            epoch_loss = []
            for i, inputs in enumerate(data_loader):

                inputs = variable(inputs)

                outputs = model(inputs)

                targets = inputs.view(-1)
                outputs = outputs.view(targets.size(0), -1)
                loss = criterion(outputs, targets)

                if phase == 'train':
                    torch.nn.utils.clip_grad_norm(parameters, max_grad_norm)


            epoch_loss = np.mean(epoch_loss)
            print('Epoch {} {}\t\tloss {:.2f}'.format(epoch, phase, epoch_loss))

            # decode something in the validation phase
            if phase == 'val_ext':
                possible_start_tokens = [
                    ['We', 'will' ],
                start_tokens = possible_start_tokens[np.random.randint(len(possible_start_tokens))]
                start_tokens = np.array([dataset_trump.vocab[t] for t in start_tokens])
                outputs = model.produce(start_tokens, max_len=20)
                outputs = outputs.cpu().numpy()

                produced_sequence = get_sequence_from_indices(outputs, dataset_trump.vocab.id2token)
    losses = [losses_history[phase] for phase in phases]

    losses = list(zip(*losses))
    for losses_vals in losses:
        print('\t'.join('{:.2f}'.format(lv) for lv in losses_vals))
    #plot the graph
    plt.rcParams['axes.facecolor'] = 'white'
    y1 = losses_history['train']
    y2 = losses_history['val']
    y3 = losses_history['val_ext']
    x = list(range(nb_epochs))

    fig, ax = plt.subplots()
    ax.plot(x, y1, 'r--', label='train')
    ax.plot(x, y2, 'g:', label='Eval')
    ax.plot(x, y3, 'b', label='eval_ext')
    legend = ax.legend(loc='upper center')
