Exemple #1
0
class BertFine(BertPreTrainedModel):
    def __init__(self, bertConfig, num_classes):
        super(BertFine, self).__init__(bertConfig)
        self.bert = BertModel(bertConfig)  # bert模型
        self.dropout = nn.Dropout(bertConfig.hidden_dropout_prob)
        self.classifier = nn.Linear(in_features=bertConfig.hidden_size,
                                    out_features=num_classes)
        self.apply(self.init_weights)
        # 默认情况下,bert encoder模型所有的参数都是参与训练的,32的batch_size大概8.7G显存
        # 可以通过以下设置为将其设为不训练,只将classifier这一层进行反响传播,32的batch_size大概显存1.1G
        self.unfreeze_bert_encoder()

    def freeze_bert_encoder(self):
        for p in self.bert.parameters():
            p.requires_grad = False

    def unfreeze_bert_encoder(self):
        for p in self.bert.parameters():
            p.requires_grad = True

    def forward(self,
                input_ids,
                token_type_ids,
                attention_mask,
                label_ids=None,
                output_all_encoded_layers=False):
        _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask)
        #output_all_encoded_layers=output_all_encoded_layers)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits
Exemple #2
0
class TextBertForSequenceClassification(BertPreTrainedModel):
    """
    The text embedder that is implemented as a BERT model
    """
    def __init__(self, config):
        super(TextBertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)
        self.pooler = TextBertAttentionPooler(config)

        self.apply(self.init_weights)

    def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None,
                position_ids=None, head_mask=None, use_all_sequence=False,
                img_embedding=None, output_img_txt_attn=False):
        outputs = self.bert(input_ids, position_ids=position_ids, token_type_ids=token_type_ids,
                            attention_mask=attention_mask, head_mask=head_mask)
        # When invesigating what is going on, use_all_sequence needs to be investigated further
        # because the hidden states of all the tokens seemed to be the same
        # print('CLS output', outputs[1])
        # print('CLS input', outputs[0][:,0])
        # print('Hidden states', outputs[0])
        if not use_all_sequence:
            pooled_output = outputs[1] # this is the default pooled output i.e. [CLS]
        else:
            # insert own pooling mechanism
            hidden_states = outputs[0] # now we have the whole BERT sequence to use i.e. max_seq_len
            # don't use the [CLS] and [SEP]
            # need to implement something called attention pooling over here
            pooled_output = self.pooler(input_ids, hidden_states, attention_mask, img_embedding,
                    output_img_txt_attn)
            if output_img_txt_attn:
                img_txt_attn = pooled_output[1]
                pooled_output = pooled_output[0]
            else:
                pooled_output = pooled_output[0]

        pooled_output = self.dropout(pooled_output)
        
        # if same_classifier:
        #     outputs = (pooled_output,) + outputs[2:]
        # else:
        logits = self.classifier(pooled_output)
        outputs = (pooled_output, logits,) + outputs[2:]
        # add hidden states and attention if they are here
        if use_all_sequence and output_img_txt_attn:
            outputs = outputs + (img_txt_attn,)
        return outputs  # pooled_output, (logits), (hidden_states), (txt_attentions), (img_txt_attn)

    def freeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = False

    def unfreeze_bert_encode(self):
        for param in self.bert.parameters():
            param.requires_grad = True
Exemple #3
0
class TextBertForSequenceClassification(BertPreTrainedModel):
    """
    The text embedder that is implemented as a BERT model
    """
    def __init__(self, config):
        super(TextBertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.apply(self.init_weights)

    def forward(self,
                input_ids,
                token_type_ids=None,
                attention_mask=None,
                labels=None,
                position_ids=None,
                head_mask=None,
                use_all_sequence=False,
                img_embedding=None,
                output_img_txt_attn=False):
        outputs = self.bert(input_ids,
                            position_ids=position_ids,
                            token_type_ids=token_type_ids,
                            attention_mask=attention_mask,
                            head_mask=head_mask)
        # When invesigating what is going on, use_all_sequence needs to be investigated further
        # because the hidden states of all the tokens seemed to be the same
        # print('CLS output', outputs[1])
        # print('CLS input', outputs[0][:,0])
        # print('Hidden states', outputs[0])
        pooled_output = outputs[
            1]  # this is the default pooled output i.e. [CLS]

        pooled_output = self.dropout(pooled_output)

        # if same_classifier:
        #     outputs = (pooled_output,) + outputs[2:]
        # else:
        logits = self.classifier(pooled_output)
        outputs = (
            pooled_output,
            logits,
        ) + outputs[2:]
        return outputs  # pooled_output, (logits), (hidden_states), (txt_attentions)

    def freeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = False

    def unfreeze_bert_encode(self):
        for param in self.bert.parameters():
            param.requires_grad = True
class BertForMultiLabelSequenceClassification():
    """BERT model for classification.
    This module is composed of the BERT model with a linear layer on top of
    the pooled output.
    """
    def __init__(self, config, num_labels=2):
        super(BertForMultiLabelSequenceClassification, self).__init__(config)
        self.num_labels = num_labels
        self.bert = BertModel(config)
        self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
        self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
        self.apply(self.init_bert_weights)

    def forward(self,
                input_ids,
                token_type_ids=None,
                attention_mask=None,
                labels=None):
        _, pooled_output = self.bert(input_ids,
                                     token_type_ids,
                                     attention_mask,
                                     output_all_encoded_layers=False)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        if labels is not None:
            loss_fct = BCEWithLogitsLoss()
            loss = loss_fct(logits.view(-1, self.num_labels),
                            labels.view(-1, self.num_labels))
            return loss
        else:
            return logits

    def freeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = False

    def unfreeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = True
class CredPredictor(nn.Module):
    def __init__(self, config):
        super(CredPredictor, self).__init__()
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.reduce = nn.Linear(config.hidden_size, REDUCE_SIZE)
        self.prev_recursive = nn.LSTMCell(REDUCE_SIZE + 1, REDUCE_SIZE)
        self.aftr_recursive = nn.LSTMCell(REDUCE_SIZE + 1, REDUCE_SIZE)
        self.fc1 = nn.Linear(REDUCE_SIZE, int(REDUCE_SIZE / 2))
        self.fc2 = nn.Linear(int(REDUCE_SIZE / 2), 1)
        self.referee = nn.Linear(2, 1)

    def forward(self, prev, aftr):
        # TODO: if number of sentences is too big, process would be killed
        _, prev_pooled = self.bert(prev.input_ids, prev.token_type_ids,
                                   prev.attention_mask)
        _, aftr_pooled = self.bert(aftr.input_ids, aftr.token_type_ids,
                                   aftr.attention_mask)

        # prev_pooled = self.dropout(prev_pooled)
        # aftr_pooled = self.dropout(aftr_pooled)

        # print('\n\n')
        # pretty_print([('prev_pooled', prev_pooled.size()),
        #               ('aftr_pooled', aftr_pooled.size()),
        #               ('prev.labels', prev.labels.size()),
        #               ('aftr.labels', aftr.labels.size())])

        prev_pooled = self.reduce(prev_pooled)
        aftr_pooled = self.reduce(aftr_pooled)

        prevs = torch.cat((prev_pooled, prev.labels), 1)
        aftrs = torch.cat((aftr_pooled, aftr.labels), 1)

        # pretty_print([('prevs', prevs.size()),
        #               ('aftrs', aftrs.size())])

        prev_h = None
        cx = None

        for i, pair_vec in enumerate(prevs):
            if i == 0:
                prev_h, cx = self.prev_recursive(pair_vec.view(1, -1))
            else:
                prev_h, cx = self.prev_recursive(pair_vec.view(1, -1),
                                                 (prev_h, cx))

        aftr_h = None
        for i, pair_vec in enumerate(aftrs):
            if i == 0:
                aftr_h, cx = self.aftr_recursive(pair_vec.view(1, -1))
            else:
                aftr_h, cx = self.aftr_recursive(pair_vec.view(1, -1),
                                                 (aftr_h, cx))

        prev_feat = self.fc2(F.relu(self.fc1(prev_h)))
        aftr_feat = self.fc2(F.relu(self.fc1(aftr_h)))

        out = self.referee(torch.cat((prev_feat, aftr_feat), 1))
        return out.squeeze(-1)

    def freeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = False

    def unfreeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = True
class BertForMultiLabelSequenceClassification(BertPreTrainedModel):
    r"""BERT model for classification.
    This module is composed of the BERT model with a linear layer on top of
    the pooled output.
    Params:
        `config`: a BertConfig class instance with the configuration to build a new model.
        `num_labels`: the number of classes for mthe classifier. Default = 2.
    Inputs:
        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
            a `sentence B` token (see BERT paper for more details).
        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
            input sequence length in the current batch. It's the mask that we typically use for attention when
            a batch has varying length sentences.
        `labels`: labels for the classification output: torch.LongTensor of shape [batch_size]
            with indices selected in [0, ..., num_labels].
    Outputs:
        if `labels` is not `None`:
            Outputs the CrossEntropy classification loss of the output with the labels.
        if `labels` is `None`:
            Outputs the classification logits of shape [batch_size, num_labels].
    Example usage:
    ```python
    # Already been converted into WordPiece token ids
    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
    config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
    num_labels = 2
    model = BertForSequenceClassification(config, num_labels)
    logits = model(input_ids, token_type_ids, input_mask)
    ```
    """
    def __init__(self, config):
        super(BertForMultiLabelSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config)
        self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
        self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
        self.apply(self.init_weights)

    #def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
      #  _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)

    def forward(self, input_ids, token_type_ids=None, attention_mask=None, start_positions=None,
                end_positions=None, position_ids=None, head_mask=None, labels=None):
        outputs = self.bert(input_ids, position_ids=position_ids, token_type_ids=token_type_ids,
                            attention_mask=attention_mask, head_mask=head_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        outputs = (logits,) + outputs[2:]


        if labels is not None:
            pos_weight = torch.cuda.FloatTensor([37.5, 61.5, 6.2, 51.6, 31.3])
            pos_weight = pos_weight * 0.7
            loss_fct = BCEWithLogitsLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1, self.num_labels))
            outputs = (loss,) + outputs
        
        return outputs
        
    def freeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = False
    
    def unfreeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = True