Exemple #1
0
    def forward(self, x):

        x = self.embed(x)

        if (self.cove):
            outputs_both_layer_cove_with_glove = MTLSTM(
                n_vocab=None,
                vectors=None,
                layer0=True,
                residual_embeddings=True)
            outputs_both_layer_cove_with_glove.cuda()
            x = outputs_both_layer_cove_with_glove(x,
                                                   [x.shape[1]] * x.shape[0])

        x = x.unsqueeze(1)

        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]

        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]

        x = torch.cat(x, 1)

        x = self.dropout(x)

        output = self.fully_connected(x)

        return output
Exemple #2
0
def compute_torch_values(inputs, embeddings):
    model = MTLSTM(n_vocab=embeddings.shape[0],
                   vectors=torch.from_numpy(embeddings.astype(np.float32)))
    model.cuda(0)
    model_inputs = Variable(torch.from_numpy(inputs.astype(np.int64)))
    lengths = torch.from_numpy(
        np.ones((inputs.shape[0], ), dtype=np.int64) * inputs.shape[1])
    cove_outputs = model.forward(model_inputs.cuda(), lengths=lengths.cuda())
    torch_output = (cove_outputs.data.cpu().numpy())
    print("Torch output shape", torch_output.shape)
    return torch_output
Exemple #3
0
class tmcove(Model):
    def load(self, vectors):
        self.model = MTLSTM(n_vocab=len(vectors.keys()), vectors=vectors)
        self.model.cuda()

    def train(self, X, Y):
        pass

    def predict(self, X):
        X, Y = self.input_function(X, [])
        return [[get_word2vec(token, self.vectors) for token in tokens_list]
                for tokens in X]
Exemple #4
0
print('Generating train, dev, test splits')
train, dev, test = datasets.IWSLT.splits(root=args.data, exts=['.en', '.de'], fields=[inputs, inputs])
train_iter, dev_iter, test_iter = data.Iterator.splits(
            (train, dev, test), batch_size=100, device=torch.device(args.device) if args.device >= 0 else None)

print('Building vocabulary')
inputs.build_vocab(train, dev, test)
inputs.vocab.load_vectors(vectors=GloVe(name='840B', dim=300, cache=args.embeddings))

outputs_last_layer_cove = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors, model_cache=args.embeddings)
outputs_both_layer_cove = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors, layer0=True, model_cache=args.embeddings)
outputs_both_layer_cove_with_glove = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors, layer0=True, residual_embeddings=True, model_cache=args.embeddings)

if args.device >=0:
    outputs_last_layer_cove.cuda()
    outputs_both_layer_cove.cuda()
    outputs_both_layer_cove_with_glove.cuda()

train_iter.init_epoch()
print('Generating CoVe')
for batch_idx, batch in enumerate(train_iter):
    if batch_idx > 0:
        break
    last_layer_cove = outputs_last_layer_cove(*batch.src)
    print(last_layer_cove.size())
    first_then_last_layer_cove = outputs_both_layer_cove(*batch.src)
    print(first_then_last_layer_cove.size())
    glove_then_first_then_last_layer_cove = outputs_both_layer_cove_with_glove(*batch.src)
    print(glove_then_first_then_last_layer_cove.size())
    assert np.allclose(last_layer_cove, first_then_last_layer_cove[:, :, -600:])
Exemple #5
0
import torch
from torchtext import data
from torchtext import datasets

from cove import MTLSTM

inputs = data.Field(lower=True, include_lengths=True, batch_first=True)
answers = data.Field(sequential=False)

print('Generating train, dev, test splits')
train, dev, test = datasets.SNLI.splits(inputs, answers)

print('Building vocabulary')
inputs.build_vocab(train, dev, test)
inputs.vocab.load_vectors(wv_type='glove.840B', wv_dim=300)
answers.build_vocab(train)

model = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors)
model.cuda(0)

train_iter, dev_iter, test_iter = data.BucketIterator.splits(
    (train, dev, test), batch_size=100, device=0)

train_iter.init_epoch()
print('Generating CoVe')
for batch_idx, batch in enumerate(train_iter):
    model.train()
    cove_premise = model(*batch.premise)
    cove_hypothesis = model(*batch.hypothesis)
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``.
        label : torch.LongTensor, optional (default = None)
            A variable representing the label for each instance in the batch.
        Returns
        -------
        An output dictionary consisting of:
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_classes)`` representing a
            distribution over the label classes for each instance.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        text_mask = util.get_text_field_mask(tokens).float()
        # Pop elmo tokens, since elmo embedder should not be present.
        elmo_tokens = tokens.pop("elmo", None)
        if tokens:
            embedded_text = self._text_field_embedder(tokens)
        else:
            # only using "elmo" for input
            embedded_text = None

        # Add the "elmo" key back to "tokens" if not None, since the tests and the
        # subsequent training epochs rely not being modified during forward()
        if elmo_tokens is not None:
            tokens["elmo"] = elmo_tokens

        # Create ELMo embeddings if applicable
        if self._elmo:
            if elmo_tokens is not None:
                elmo_representations = self._elmo(elmo_tokens)["elmo_representations"]
                # Pop from the end is more performant with list
                if self._use_integrator_output_elmo:
                    integrator_output_elmo = elmo_representations.pop()
                if self._use_input_elmo:
                    input_elmo = elmo_representations.pop()
                assert not elmo_representations
            else:
                raise ConfigurationError(
                        "Model was built to use Elmo, but input text is not tokenized for Elmo.")

        if self._use_input_elmo:
            if embedded_text is not None:
                embedded_text = torch.cat([embedded_text, input_elmo], dim=-1)
            else:
                embedded_text = input_elmo

        # While using embeddings from the mt-cnn encoder, the hardcoded values for vocab_size can be initialsed appropriately
        if cnn:
            embedded_text_cnn = embedded_text
            enc = Encoder(7855, 300, 600, 5, 3, 0.25, 'cuda')
            dec = Decoder(5893, 300, 600, 5, 3, 0.25, 1, 'cuda')

            cnn_model = Seq2Seq(enc, dec).cuda()
            cnn_model.load_state_dict(torch.load('../cnn_lstm_model.pt'))
            cnn_model.eval()
            v1, v2 = cnn_model.encoder(embedded_text[:,:,:256])

            v3 = torch.cat((v1,v2),2)



            embedded_text = torch.cat((embedded_text_cnn,v3),2)

        # While using embeddings from the mt-lstm encoder (either load from the saved model from the paper or the reproduced model)
        elif lstm:
            outputs_both_layer_cove_with_glove = MTLSTM(n_vocab=None, vectors=None, layer0=True, residual_embeddings=True)
            outputs_both_layer_cove_with_glove.cuda()
            embedded_text = outputs_both_layer_cove_with_glove(embedded_text,[embedded_text.shape[1]]*embedded_text.shape[0])

        dropped_embedded_text = self._embedding_dropout(embedded_text)
        pre_encoded_text = self._pre_encode_feedforward(dropped_embedded_text)

        encoded_tokens = self._encoder(pre_encoded_text, text_mask)
        # Compute biattention. This is a special case since the inputs are the same.
        attention_logits = encoded_tokens.bmm(encoded_tokens.permute(0, 2, 1).contiguous())
        attention_weights = util.masked_softmax(attention_logits, text_mask)
        encoded_text = util.weighted_sum(encoded_tokens, attention_weights)

        # Build the input to the integrator
        integrator_input = torch.cat([encoded_tokens,
                                      encoded_tokens - encoded_text,
                                      encoded_tokens * encoded_text], 2)
        integrated_encodings = self._integrator(integrator_input, text_mask)

        # Concatenate ELMo representations to integrated_encodings if specified
        if self._use_integrator_output_elmo:
            integrated_encodings = torch.cat([integrated_encodings,
                                              integrator_output_elmo], dim=-1)

        # Simple Pooling layers
        max_masked_integrated_encodings = util.replace_masked_values(
                integrated_encodings, text_mask.unsqueeze(2), -1e7)
        max_pool = torch.max(max_masked_integrated_encodings, 1)[0]
        min_masked_integrated_encodings = util.replace_masked_values(
                integrated_encodings, text_mask.unsqueeze(2), +1e7)
        min_pool = torch.min(min_masked_integrated_encodings, 1)[0]
        mean_pool = torch.sum(integrated_encodings, 1) / torch.sum(text_mask, 1, keepdim=True)

        # Self-attentive pooling layer
        # Run through linear projection. Shape: (batch_size, sequence length, 1)
        # Then remove the last dimension to get the proper attention shape (batch_size, sequence length).
        self_attentive_logits = self._self_attentive_pooling_projection(
                integrated_encodings).squeeze(2)
        self_weights = util.masked_softmax(self_attentive_logits, text_mask)
        self_attentive_pool = util.weighted_sum(integrated_encodings, self_weights)

        pooled_representations = torch.cat([max_pool, min_pool, mean_pool, self_attentive_pool], 1)
        pooled_representations_dropped = self._integrator_dropout(pooled_representations)

        logits = self._output_layer(pooled_representations_dropped)
        class_probabilities = F.softmax(logits, dim=-1)

        output_dict = {'logits': logits, 'class_probabilities': class_probabilities}
        if label is not None:
            loss = self.loss(logits, label)
            for metric in self.metrics.values():
                metric(logits, label)
            output_dict["loss"] = loss

        return output_dict