예제 #1
0
class Pooler_for_title_and_desc(Seq2VecEncoder):
    def __init__(self, args, word_embedder):
        super(Pooler_for_title_and_desc, self).__init__()
        self.args = args
        self.huggingface_nameloader()
        self.bertpooler_sec2vec = BertPooler(
            pretrained_model=self.bert_weight_filepath)
        self.word_embedder = word_embedder
        self.word_embedding_dropout = nn.Dropout(
            self.args.word_embedding_dropout)

        self.linear_for_entity_encoding = nn.Linear(
            self.bertpooler_sec2vec.get_output_dim(),
            self.bertpooler_sec2vec.get_output_dim())

        self.linear_for_dimentionReduction = nn.Linear(
            self.bertpooler_sec2vec.get_output_dim(),
            self.args.dimentionReductionToThisDim)

    def huggingface_nameloader(self):
        if self.args.bert_name == 'bert-base-uncased':
            self.bert_weight_filepath = 'bert-base-uncased'
        else:
            self.bert_weight_filepath = 'dummy'
            print('Currently not supported', self.args.bert_name)
            exit()

    def forward(self, title_and_desc_concatnated_text):
        mask_sent = get_text_field_mask(title_and_desc_concatnated_text)
        entity_emb = self.word_embedder(title_and_desc_concatnated_text)
        entity_emb = self.word_embedding_dropout(entity_emb)

        if self.args.entityPooling == "CLSLinear":
            entity_emb = entity_emb[:, 0, :]
            entity_emb = self.linear_for_entity_encoding(entity_emb)
        elif self.args.entityPooling == 'CLS':
            entity_emb = entity_emb[:, 0, :]
        else:
            assert self.args.entityPooling == "CLSLinearTanh"
            entity_emb = self.bertpooler_sec2vec(entity_emb, mask_sent)

        if self.args.dimentionReduction:
            return self.linear_for_dimentionReduction(entity_emb)
        else:
            return entity_emb
예제 #2
0
    def test_encoder(self):
        encoder = BertPooler("bert-base-uncased")
        assert encoder.get_input_dim() == encoder.get_output_dim()
        embedding = torch.rand(8, 24, encoder.get_input_dim())

        pooled1 = encoder(embedding)
        assert pooled1.size() == (8, encoder.get_input_dim())

        embedding[:, 1:, :] = 0
        pooled2 = encoder(embedding)
        numpy.testing.assert_array_almost_equal(pooled1.detach().numpy(),
                                                pooled2.detach().numpy())
예제 #3
0
class SimpleBertClassifier(BaseModel):
    """
    Model that encodes input using BERT, takes the embedding for the CLS
    token (using BertPooler) and puts the output through a FFN to get the
    probabilities.
    """

    def __init__(self,
                 bert_path: Path,
                 vocab: Vocabulary,
                 train_bert: bool = False
                 ) -> None:
        # We have to pass the vocabulary to the constructor.
        super().__init__(vocab)
        self.word_embeddings = bert_embeddings(pretrained_model=bert_path,
                                               training=train_bert)

        self.pooler = BertPooler(pretrained_model=str(bert_path))

        hidden_dim = self.pooler.get_output_dim()
        self.hidden2logit = torch.nn.Linear(
            in_features=hidden_dim,
            out_features=1
        )

    # This is the computation bit of the model. The arguments of this function
    # are the fields from the `Instance` we created, as that's what's going to
    # be passed to this. We also have the optional `label`, which is only
    # available at training time, used to calculate the loss.
    def forward(self,
                metadata: Dict[str, torch.Tensor],
                bert0: Dict[str, torch.Tensor],
                bert1: Dict[str, torch.Tensor],
                label: Optional[torch.Tensor] = None
                ) -> Dict[str, torch.Tensor]:
        # Every sample in a batch has to have the same size (as it's a tensor),
        # so smaller entries are padded. The mask is used to counteract this
        # padding.
        t0_masks = util.get_text_field_mask(bert0)
        t1_masks = util.get_text_field_mask(bert1)

        # We create the embeddings from the input text
        t0_embs = self.word_embeddings(bert0)
        t1_embs = self.word_embeddings(bert1)

        # Then we use those embeddings (along with the masks) as inputs for
        # our encoders
        enc0_outs = self.pooler(t0_embs, t0_masks)
        enc1_outs = self.pooler(t1_embs, t1_masks)

        # Finally, we pass each encoded output tensor to the feedforward layer
        # to produce logits corresponding to each class.
        logit0 = self.hidden2logit(enc0_outs).squeeze(-1)
        logit1 = self.hidden2logit(enc1_outs).squeeze(-1)
        logit0, _ = torch.max(logit0, dim=1)
        logit1, _ = torch.max(logit1, dim=1)
        logits = torch.stack((logit0, logit1), dim=-1)
        # We also compute the class with highest likelihood (our prediction)
        prob = torch.softmax(logits, dim=-1)
        output = {"logits": logits, "prob": prob}

        # Labels are optional. If they're present, we calculate the accuracy
        # and the loss function.
        if label is not None:
            self.accuracy(prob, label)
            output["loss"] = self.loss(logits, label)

        # The output is the dict we've been building, with the logits, loss
        # and the prediction.
        return output
예제 #4
0
class AdvancedAttentionBertClassifier(BaseModel):
    """
    Model similar to the AttentiveClassifier with BERT, but without external
    features.

    SimpleTrian is this with the attention before the encoders.
    """
    def __init__(self,
                 bert_path: Path,
                 encoder: Seq2SeqEncoder,
                 vocab: Vocabulary,
                 hidden_dim: int = 100,
                 encoder_dropout: float = 0.0,
                 train_bert: bool = False) -> None:
        # We have to pass the vocabulary to the constructor.
        super().__init__(vocab)
        self.word_embeddings = bert_embeddings(pretrained_model=bert_path,
                                               training=train_bert)

        self.encoder_dropout: torch.nn.Module
        if encoder_dropout > 0:
            self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout)
        else:
            self.encoder_dropout = torch.nn.Identity()

        self.pooler = BertPooler(pretrained_model=str(bert_path))
        self.dense1 = torch.nn.Linear(in_features=self.pooler.get_output_dim(),
                                      out_features=hidden_dim)
        self.encoder = encoder
        self.self_attn = LinearSelfAttention(
            input_dim=self.encoder.get_output_dim(), bias=True)
        self.dense2 = torch.nn.Linear(
            in_features=self.encoder.get_output_dim(), out_features=1)

    # This is the computation bit of the model. The arguments of this function
    # are the fields from the `Instance` we created, as that's what's going to
    # be passed to this. We also have the optional `label`, which is only
    # available at training time, used to calculate the loss.
    def forward(
            self,
            metadata: Dict[str, torch.Tensor],
            bert0: Dict[str, torch.Tensor],
            bert1: Dict[str, torch.Tensor],
            label: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
        # Every sample in a batch has to have the same size (as it's a tensor),
        # so smaller entries are padded. The mask is used to counteract this
        # padding.

        # We create the embeddings from the input text
        t0_embs = self.word_embeddings(bert0)
        t1_embs = self.word_embeddings(bert1)

        t0_pooled = self.pooler(t0_embs)
        t1_pooled = self.pooler(t1_embs)

        t0_transformed = self.dense1(t0_pooled)
        t1_transformed = self.dense1(t1_pooled)

        t0_enc_hiddens = self.encoder_dropout(
            self.encoder(t0_transformed, mask=None))
        t1_enc_hiddens = self.encoder_dropout(
            self.encoder(t1_transformed, mask=None))

        t0_enc_attn = self.self_attn(t0_enc_hiddens, t0_enc_hiddens)
        t1_enc_attn = self.self_attn(t1_enc_hiddens, t1_enc_hiddens)

        t0_enc_out = util.weighted_sum(t0_enc_hiddens, t0_enc_attn)
        t1_enc_out = util.weighted_sum(t1_enc_hiddens, t1_enc_attn)

        logit0 = self.dense2(t0_enc_out).squeeze(-1)
        logit1 = self.dense2(t1_enc_out).squeeze(-1)

        logits = torch.stack((logit0, logit1), dim=-1)

        # We also compute the class with highest likelihood (our prediction)
        prob = torch.softmax(logits, dim=-1)
        output = {"logits": logits, "prob": prob}

        # Labels are optional. If they're present, we calculate the accuracy
        # and the loss function.
        if label is not None:
            self.accuracy(prob, label)
            output["loss"] = self.loss(logits, label)

        # The output is the dict we've been building, with the logits, loss
        # and the prediction.
        return output