Beispiel #1
0
    def __init__(self, params):
        super(BiEncoderModule, self).__init__()
        ctxt_bert = BertModel.from_pretrained(params["bert_model"],
                                              output_hidden_states=True)
        if params["load_cand_enc_only"]:
            bert_model = "bert-large-uncased"
        else:
            bert_model = params['bert_model']
        cand_bert = BertModel.from_pretrained(
            bert_model,
            output_hidden_states=True,
        )
        self.context_encoder = BertEncoder(
            ctxt_bert,
            params["out_dim"],
            layer_pulled=params["pull_from_layer"],
            add_linear=params["add_linear"],
        )
        self.cand_encoder = BertEncoder(
            cand_bert,
            params["out_dim"],
            layer_pulled=params["pull_from_layer"],
            add_linear=params["add_linear"],
        )
        if params.get("freeze_cand_enc", False):
            for param in self.cand_encoder.parameters():
                param.requires_grad = False

        self.config = ctxt_bert.config

        ctxt_bert_output_dim = ctxt_bert.embeddings.word_embeddings.weight.size(
            1)

        self.mention_aggregation_type = params.get('mention_aggregation_type',
                                                   None)
        self.classification_heads = nn.ModuleDict({})
        self.linear_compression = None
        if self.mention_aggregation_type is not None:
            classification_heads_dict = {
                'get_context_embeds':
                GetContextEmbedsHead(
                    self.mention_aggregation_type,
                    ctxt_bert_output_dim,
                    cand_bert.embeddings.word_embeddings.weight.size(1),
                )
            }
            classification_heads_dict['mention_scores'] = MentionScoresHead(
                ctxt_bert_output_dim,
                params["mention_scoring_method"],
                params.get("max_mention_length", 10),
            )
            self.classification_heads = nn.ModuleDict(
                classification_heads_dict)
        elif ctxt_bert_output_dim != cand_bert.embeddings.word_embeddings.weight.size(
                1):
            # mapping to make the output dimensions match for dot-product similarity
            self.linear_compression = nn.Linear(
                ctxt_bert_output_dim,
                cand_bert.embeddings.word_embeddings.weight.size(1))
Beispiel #2
0
    def __init__(self, bert_model_config: BertConfig):
        super(DocumentBertLinear, self).__init__(bert_model_config)
        self.bert = BertModel(bert_model_config)
        self.bert_batch_size = self.bert.config.bert_batch_size
        self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob)

        self.classifier = nn.Sequential(
            nn.Dropout(p=bert_model_config.hidden_dropout_prob),
            nn.Linear(bert_model_config.hidden_size * self.bert_batch_size,
                      bert_model_config.num_labels), nn.Tanh())
Beispiel #3
0
    def __init__(self, config):

        super(BertFCForMultiLable, self).__init__(config)
        # bert = BertModel.from_pretrained(bert_model_path)
        self.bert = BertModel(config)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.apply(self.init_weights)
Beispiel #4
0
    def __init__(self, bert_model_config: BertConfig):
        super(DocumentBertMaxPool, self).__init__(bert_model_config)
        self.bert = BertModel(bert_model_config)
        self.bert_batch_size = self.bert.config.bert_batch_size
        self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob)

        # self.transformer_encoder = TransformerEncoderLayer(d_model=bert_model_config.hidden_size,
        #                                            nhead=6,
        #                                            dropout=bert_model_config.hidden_dropout_prob)
        #self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=6, norm=nn.LayerNorm(bert_model_config.hidden_size))
        self.classifier = nn.Sequential(
            nn.Dropout(p=bert_model_config.hidden_dropout_prob),
            nn.Linear(bert_model_config.hidden_size,
                      bert_model_config.num_labels), nn.Tanh())
Beispiel #5
0
def bertModel(*args, **kwargs):
    """
    BertModel is the basic BERT Transformer model with a layer of summed token,
    position and sequence embeddings followed by a series of identical
    self-attention blocks (12 for BERT-base, 24 for BERT-large).

    Example:
        # Load the tokenizer
        >>> import torch
        >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
        #  Prepare tokenized input
        >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
        >>> tokenized_text = tokenizer.tokenize(text)
        >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> tokens_tensor = torch.tensor([indexed_tokens])
        >>> segments_tensors = torch.tensor([segments_ids])
        # Load bertModel
        >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
        >>> model.eval()
        # Predict hidden states features for each layer
        >>> with torch.no_grad():
                encoded_layers, _ = model(tokens_tensor, segments_tensors)
    """
    model = BertModel.from_pretrained(*args, **kwargs)
    return model
def main(raw_args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name",
                        type=str,
                        required=True,
                        help="model name e.g. bert-base-uncased")
    parser.add_argument("--cache_dir",
                        type=str,
                        default=None,
                        required=False,
                        help="Directory containing pytorch model")
    parser.add_argument("--pytorch_model_path",
                        type=str,
                        required=True,
                        help="/path/to/<pytorch-model-name>.bin")
    parser.add_argument("--tf_cache_dir",
                        type=str,
                        required=True,
                        help="Directory in which to save tensorflow model")
    args = parser.parse_args(raw_args)

    model = BertModel.from_pretrained(
        pretrained_model_name_or_path=args.model_name,
        state_dict=torch.load(args.pytorch_model_path),
        cache_dir=args.cache_dir,
        args=args)

    convert_pytorch_checkpoint_to_tf(model=model,
                                     ckpt_dir=args.tf_cache_dir,
                                     model_name=args.model_name)
 def __init__(self, config):
     super(RecallTransformer, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, 1)
     self.hidden_size = config.hidden_size
     self.init_weights()
Beispiel #8
0
    def __init__(self,
                 params,
                 tokenizer,
                 start_mention_id=None,
                 end_mention_id=None):
        super(CrossEncoderModule, self).__init__()
        model_path = params["bert_model"]
        if params.get("roberta"):
            encoder_model = RobertaModel.from_pretrained(model_path)
        else:
            encoder_model = BertModel.from_pretrained(model_path)
        encoder_model.resize_token_embeddings(len(tokenizer))
        self.pool_highlighted = params["pool_highlighted"]
        self.encoder = BertEncoder(encoder_model,
                                   params["out_dim"],
                                   layer_pulled=params["pull_from_layer"],
                                   add_linear=params["add_linear"]
                                   and not self.pool_highlighted,
                                   get_all_outputs=self.pool_highlighted)
        self.config = self.encoder.bert_model.config
        self.start_mention_id = start_mention_id
        self.end_mention_id = end_mention_id

        if self.pool_highlighted:
            bert_output_dim = encoder_model.embeddings.word_embeddings.weight.size(
                1)
            output_dim = params["out_dim"]
            self.additional_linear = nn.Linear(2 * bert_output_dim, output_dim)
            self.dropout = nn.Dropout(0.1)
Beispiel #9
0
    def __init__(self, config):

        super(BertForMultiLable, self).__init__(config)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.apply(self.init_weights)
Beispiel #10
0
 def __init__(self, config, model_configs):
     super(BertBiLSTMCRF, self).__init__(config)
     self.num_labels = config.num_labels
     self.max_seq_length = model_configs['max_seq_length']
     self.bert = BertModel(config)
     self.use_cuda = model_configs['use_cuda'] and torch.cuda.is_available()
     self.crf = CRF(target_size=self.num_labels,
                    use_cuda=self.use_cuda,
                    average_batch=False)
     bert_embedding = config.hidden_size
     # hidden_dim即输出维度
     # lstm的hidden_dim和init_hidden的hidden_dim是一致的
     # 是输出层hidden_dim的1/2
     self.hidden_dim = config.hidden_size
     self.rnn_layers = model_configs['rnn_layers']
     self.lstm = nn.LSTM(
         input_size=bert_embedding,  # bert embedding
         hidden_size=self.hidden_dim,
         num_layers=self.rnn_layers,
         batch_first=True,
         # dropout = model_configs['train']['dropout_rate'],
         bidirectional=True)
     self.dropout = nn.Dropout(model_configs['dropout_rate'])
     self.hidden2label = nn.Linear(self.hidden_dim * 2, self.num_labels + 2)
     self.apply(self.init_weights)
Beispiel #11
0
    def __init__(self, config):
        super(BertForReranking, self).__init__(config)

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, 1)
        self.init_weights()
Beispiel #12
0
 def __init__(self, config):
     super(BertForEmotionClassification, self).__init__(config)
     self.bert = BertModel(config)
     self.num_labels = config.num_labels
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.fc = nn.Linear(config.hidden_size, self.num_labels)
     self.init_weights()
Beispiel #13
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropouts = nn.ModuleDict()
        self.classifiers = nn.ModuleDict()
Beispiel #14
0
    def __init__(self, config):
        super(BertForMLMwithClassification, self).__init__(config)
        self.bert = BertModel(config)

        self.num_labels = config.num_labels
        self.cls = BertPreTrainingHeads(config)
        self.init_weights()
Beispiel #15
0
 def __init__(self, config, num_classes, vocab) -> None:
     super(SentenceClassifier, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, num_classes)
     self.vocab = vocab
     self.apply(self.init_weights)
Beispiel #16
0
    def __init__(self, config):
        super(BertPreTrainedModel, self).__init__(config)
        config.num_filters = basic_config.cnn.num_filters
        config.filter_sizes = basic_config.cnn.filter_sizes
        config.dropout = basic_config.dropout

        self.bert = BertModel(config)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.convs = nn.ModuleList([
            nn.Conv2d(1, config.num_filters, (k, config.hidden_size))
            for k in config.filter_sizes
        ])
        self.dropout = nn.Dropout(config.dropout)
        self.fc_cnn = nn.Linear(config.num_filters * len(config.filter_sizes),
                                config.num_labels)
    def __init__(self, config):
        super(Bert_for_UNILM, self).__init__(config)

        self.bert = BertModel(config)
        self.classifier = nn.Linear(config.hidden_size, config.vocab_size)
        weight = self.bert.embeddings.word_embeddings.weight
        self.classifier.weight.data = weight.data
    def __init__(self, config):
        super(BertForPreTrainingMLM, self).__init__(config)

        self.bert = BertModel(config)
        self.cls = BertPreTrainingHeads(config)

        self.init_weights()
        self.tie_weights()
    def __init__(self, config):

        super(BertForMultiLable, self).__init__(config)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.hidden_size_1)
        self.classifier_1 = nn.Linear(config.hidden_size_1, config.num_labels)
        self.relu = nn.ReLU()
Beispiel #20
0
    def __init__(self, config):
        super(BertForQuestionAnswering, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        self.apply(self.init_weights)
Beispiel #21
0
 def __init__(self, params):
     super(BiEncoderModule, self).__init__()
     ctxt_bert = BertModel.from_pretrained(params["bert_model"])
     cand_bert = BertModel.from_pretrained(params['bert_model'])
     self.context_encoder = BertEncoder(
         ctxt_bert,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.cand_encoder = BertEncoder(
         cand_bert,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.config = ctxt_bert.config
Beispiel #22
0
    def __init__(self, config):
        super(BertDebiasForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)
        self.apply(self.init_weights)
        self.config = config
        self.hypothesis_only = self.get_bool_value(config, "hypothesis_only")
        self.gamma_focal = config.gamma_focal if hasattr(
            config, "gamma_focal") else 2
        self.ensemble_training = self.get_bool_value(config,
                                                     "ensemble_training")
        self.poe_alpha = config.poe_alpha if hasattr(config,
                                                     'poe_alpha') else 1

        # Sets the rubi parameters.
        self.similarity = self.get_list_value(config, "similarity")
        self.rubi = self.get_bool_value(config, 'rubi')
        self.hans = self.get_bool_value(config, 'hans')
        self.hans_features = self.get_bool_value(config, 'hans_features')
        self.focal_loss = self.get_bool_value(config, 'focal_loss')
        self.length_features = self.get_list_value(config, "length_features")
        self.hans_only = self.get_bool_value(config, 'hans_only')
        self.aggregate_ensemble = self.get_str_value(config,
                                                     'aggregate_ensemble')
        self.poe_loss = self.get_bool_value(config, 'poe_loss')
        self.weighted_bias_only = self.get_bool_value(config,
                                                      "weighted_bias_only")

        num_labels_bias_only = self.config.num_labels
        if self.rubi or self.hypothesis_only or self.focal_loss or self.poe_loss or self.hans_only:
            if self.hans:
                num_features = 4 + len(self.similarity)

                if self.hans_features:
                    num_features += len(self.length_features)

                if not config.nonlinear_h_classifier:
                    self.h_classifier1 = nn.Linear(num_features,
                                                   num_labels_bias_only)
                else:
                    self.h_classifier1 = nn.Sequential(
                        nn.Linear(num_features, num_features), nn.Tanh(),
                        nn.Linear(num_features, num_features), nn.Tanh(),
                        nn.Linear(num_features, num_labels_bias_only))

                if self.ensemble_training:
                    self.h_classifier1_second = self.get_classifier(
                        config, config.nonlinear_h_classifier,
                        num_labels_bias_only)
            else:
                # Loads the classifiers from the pretrained model.
                self.h_classifier1 = self.get_classifier(
                    config, config.nonlinear_h_classifier,
                    num_labels_bias_only)

            self.lambda_h = config.lambda_h
Beispiel #23
0
class BertDPCNNForMultiLabel(BertPreTrainedModel):
    def __init__(self, config):
        super(BertPreTrainedModel, self).__init__(config)
        config.kernel_size = basic_config.dpcnn.kernel_size
        config.num_filters = basic_config.dpcnn.num_filters

        self.bert = BertModel(config)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.conv_region = nn.Conv2d(1,
                                     config.num_filters,
                                     (3, config.hidden_size),
                                     stride=1)
        self.conv = nn.Conv2d(config.num_filters,
                              config.num_filters, (3, 1),
                              stride=1)
        self.max_pool = nn.MaxPool2d(kernel_size=(3, 1), stride=2)
        self.padding1 = nn.ZeroPad2d((0, 0, 1, 1))  # top bottom
        self.padding2 = nn.ZeroPad2d((0, 0, 0, 1))  # bottom
        self.relu = nn.ReLU()
        self.fc = nn.Linear(config.num_filters, config.num_labels)

    def forward(self,
                input_ids,
                attention_mask=None,
                token_type_ids=None,
                head_mask=None):
        outputs = self.bert(input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids,
                            head_mask=head_mask)
        encoder_out, text_cls = outputs
        x = encoder_out.unsqueeze(1)  # [batch_size, 1, seq_len, embed]
        x = self.conv_region(x)  # [batch_size, num_filters, seq_len-3+1, 1]
        x = self.padding1(x)  # [batch_size, num_filters, seq_len, 1]
        x = self.relu(x)
        x = self.conv(x)  # [batch_size, num_filters, seq_len-3+1, 1]
        x = self.padding1(x)  # [batch_size, num_filters, seq_len, 1]
        x = self.relu(x)
        x = self.conv(x)  # [batch_size, num_filters, seq_len-3+1, 1]
        while x.size()[2] > 2:
            x = self._block(x)
        x = x.squeeze()  # [batch_size, num_filters]
        x = self.fc(x)
        return x

    def _block(self, x):
        x = self.padding2(x)
        px = self.max_pool(x)
        x = self.padding1(px)
        x = F.relu(x)
        x = self.conv(x)
        x = self.padding1(x)
        x = F.relu(x)
        x = self.conv(x)
        x = x + px  # short cut
        return x
 def __init__(self, config, max_seq_length=128):
     super(BertForNamedEntityRecognition, self).__init__(config)
     self.bert = BertModel(config)
     self.num_labels = config.num_labels
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.hidden_size = config.hidden_size
     self.max_seq_length = max_seq_length
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.apply(self.init_weights)
Beispiel #25
0
    def load(cls, model_name: str, cache_model: bool = True) -> BertModel:
        if model_name in cls._cache:
            return PretrainedBertModel._cache[model_name]

        model = BertModel.from_pretrained(model_name)
        if cache_model:
            cls._cache[model_name] = model

        return model
Beispiel #26
0
    def __init__(self, config):
        super(BertForTokenClassification1hot, self).__init__(config)
        self.num_labels = 2  # config.num_labels ## for us, each output vector is "yes/no", so we should keep this at self.num_labels=2 to avoid any strange error later

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.apply(self.init_weights)
Beispiel #27
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
        self.apply(self.init_weights)
        self.loss = None
Beispiel #28
0
    def __init__(self, config, n_filters=None, filter_sizes=None):
        super(BertCNN, self).__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.convs = Conv1d(config.hidden_size, n_filters, filter_sizes)

        self.classifier = nn.Linear(len(filter_sizes) * n_filters, config.num_labels)
        self.init_weights()
Beispiel #29
0
 def __init__(self, config, tie_weights):
     super(BertMCQWeightedSum, self).__init__(config)
     self.bert = BertModel(config)
     self._dropout = nn.Dropout(config.hidden_dropout_prob)
     self._classification_layer = nn.Linear(config.hidden_size, 1)
     if tie_weights is True:
         self._weight_layer = self._classification_layer
     else:
         self._weight_layer = nn.Linear(config.hidden_size, 1)
     self.apply(self.init_weights)
    def __init__(self, bert_model_config: BertConfig):
        super(BertSimilarityRegressor, self).__init__(bert_model_config)
        self.bert = BertModel(bert_model_config)
        linear_size = bert_model_config.hidden_size

        self.regression = nn.Sequential(
            nn.Dropout(p=bert_model_config.hidden_dropout_prob),
            nn.Linear(linear_size, 1))

        self.apply(self.init_weights)