def __init__(self, params): super(BiEncoderModule, self).__init__() ctxt_bert = BertModel.from_pretrained(params["bert_model"], output_hidden_states=True) if params["load_cand_enc_only"]: bert_model = "bert-large-uncased" else: bert_model = params['bert_model'] cand_bert = BertModel.from_pretrained( bert_model, output_hidden_states=True, ) self.context_encoder = BertEncoder( ctxt_bert, params["out_dim"], layer_pulled=params["pull_from_layer"], add_linear=params["add_linear"], ) self.cand_encoder = BertEncoder( cand_bert, params["out_dim"], layer_pulled=params["pull_from_layer"], add_linear=params["add_linear"], ) if params.get("freeze_cand_enc", False): for param in self.cand_encoder.parameters(): param.requires_grad = False self.config = ctxt_bert.config ctxt_bert_output_dim = ctxt_bert.embeddings.word_embeddings.weight.size( 1) self.mention_aggregation_type = params.get('mention_aggregation_type', None) self.classification_heads = nn.ModuleDict({}) self.linear_compression = None if self.mention_aggregation_type is not None: classification_heads_dict = { 'get_context_embeds': GetContextEmbedsHead( self.mention_aggregation_type, ctxt_bert_output_dim, cand_bert.embeddings.word_embeddings.weight.size(1), ) } classification_heads_dict['mention_scores'] = MentionScoresHead( ctxt_bert_output_dim, params["mention_scoring_method"], params.get("max_mention_length", 10), ) self.classification_heads = nn.ModuleDict( classification_heads_dict) elif ctxt_bert_output_dim != cand_bert.embeddings.word_embeddings.weight.size( 1): # mapping to make the output dimensions match for dot-product similarity self.linear_compression = nn.Linear( ctxt_bert_output_dim, cand_bert.embeddings.word_embeddings.weight.size(1))
def __init__(self, bert_model_config: BertConfig): super(DocumentBertLinear, self).__init__(bert_model_config) self.bert = BertModel(bert_model_config) self.bert_batch_size = self.bert.config.bert_batch_size self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(bert_model_config.hidden_size * self.bert_batch_size, bert_model_config.num_labels), nn.Tanh())
def __init__(self, config): super(BertFCForMultiLable, self).__init__(config) # bert = BertModel.from_pretrained(bert_model_path) self.bert = BertModel(config) for param in self.bert.parameters(): param.requires_grad = True self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_weights)
def __init__(self, bert_model_config: BertConfig): super(DocumentBertMaxPool, self).__init__(bert_model_config) self.bert = BertModel(bert_model_config) self.bert_batch_size = self.bert.config.bert_batch_size self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob) # self.transformer_encoder = TransformerEncoderLayer(d_model=bert_model_config.hidden_size, # nhead=6, # dropout=bert_model_config.hidden_dropout_prob) #self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=6, norm=nn.LayerNorm(bert_model_config.hidden_size)) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(bert_model_config.hidden_size, bert_model_config.num_labels), nn.Tanh())
def bertModel(*args, **kwargs): """ BertModel is the basic BERT Transformer model with a layer of summed token, position and sequence embeddings followed by a series of identical self-attention blocks (12 for BERT-base, 24 for BERT-large). Example: # Load the tokenizer >>> import torch >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertModel >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') >>> model.eval() # Predict hidden states features for each layer >>> with torch.no_grad(): encoded_layers, _ = model(tokens_tensor, segments_tensors) """ model = BertModel.from_pretrained(*args, **kwargs) return model
def main(raw_args=None): parser = argparse.ArgumentParser() parser.add_argument("--model_name", type=str, required=True, help="model name e.g. bert-base-uncased") parser.add_argument("--cache_dir", type=str, default=None, required=False, help="Directory containing pytorch model") parser.add_argument("--pytorch_model_path", type=str, required=True, help="/path/to/<pytorch-model-name>.bin") parser.add_argument("--tf_cache_dir", type=str, required=True, help="Directory in which to save tensorflow model") args = parser.parse_args(raw_args) model = BertModel.from_pretrained( pretrained_model_name_or_path=args.model_name, state_dict=torch.load(args.pytorch_model_path), cache_dir=args.cache_dir, args=args) convert_pytorch_checkpoint_to_tf(model=model, ckpt_dir=args.tf_cache_dir, model_name=args.model_name)
def __init__(self, config): super(RecallTransformer, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.hidden_size = config.hidden_size self.init_weights()
def __init__(self, params, tokenizer, start_mention_id=None, end_mention_id=None): super(CrossEncoderModule, self).__init__() model_path = params["bert_model"] if params.get("roberta"): encoder_model = RobertaModel.from_pretrained(model_path) else: encoder_model = BertModel.from_pretrained(model_path) encoder_model.resize_token_embeddings(len(tokenizer)) self.pool_highlighted = params["pool_highlighted"] self.encoder = BertEncoder(encoder_model, params["out_dim"], layer_pulled=params["pull_from_layer"], add_linear=params["add_linear"] and not self.pool_highlighted, get_all_outputs=self.pool_highlighted) self.config = self.encoder.bert_model.config self.start_mention_id = start_mention_id self.end_mention_id = end_mention_id if self.pool_highlighted: bert_output_dim = encoder_model.embeddings.word_embeddings.weight.size( 1) output_dim = params["out_dim"] self.additional_linear = nn.Linear(2 * bert_output_dim, output_dim) self.dropout = nn.Dropout(0.1)
def __init__(self, config): super(BertForMultiLable, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_weights)
def __init__(self, config, model_configs): super(BertBiLSTMCRF, self).__init__(config) self.num_labels = config.num_labels self.max_seq_length = model_configs['max_seq_length'] self.bert = BertModel(config) self.use_cuda = model_configs['use_cuda'] and torch.cuda.is_available() self.crf = CRF(target_size=self.num_labels, use_cuda=self.use_cuda, average_batch=False) bert_embedding = config.hidden_size # hidden_dim即输出维度 # lstm的hidden_dim和init_hidden的hidden_dim是一致的 # 是输出层hidden_dim的1/2 self.hidden_dim = config.hidden_size self.rnn_layers = model_configs['rnn_layers'] self.lstm = nn.LSTM( input_size=bert_embedding, # bert embedding hidden_size=self.hidden_dim, num_layers=self.rnn_layers, batch_first=True, # dropout = model_configs['train']['dropout_rate'], bidirectional=True) self.dropout = nn.Dropout(model_configs['dropout_rate']) self.hidden2label = nn.Linear(self.hidden_dim * 2, self.num_labels + 2) self.apply(self.init_weights)
def __init__(self, config): super(BertForReranking, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.init_weights()
def __init__(self, config): super(BertForEmotionClassification, self).__init__(config) self.bert = BertModel(config) self.num_labels = config.num_labels self.dropout = nn.Dropout(config.hidden_dropout_prob) self.fc = nn.Linear(config.hidden_size, self.num_labels) self.init_weights()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropouts = nn.ModuleDict() self.classifiers = nn.ModuleDict()
def __init__(self, config): super(BertForMLMwithClassification, self).__init__(config) self.bert = BertModel(config) self.num_labels = config.num_labels self.cls = BertPreTrainingHeads(config) self.init_weights()
def __init__(self, config, num_classes, vocab) -> None: super(SentenceClassifier, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_classes) self.vocab = vocab self.apply(self.init_weights)
def __init__(self, config): super(BertPreTrainedModel, self).__init__(config) config.num_filters = basic_config.cnn.num_filters config.filter_sizes = basic_config.cnn.filter_sizes config.dropout = basic_config.dropout self.bert = BertModel(config) for param in self.bert.parameters(): param.requires_grad = True self.convs = nn.ModuleList([ nn.Conv2d(1, config.num_filters, (k, config.hidden_size)) for k in config.filter_sizes ]) self.dropout = nn.Dropout(config.dropout) self.fc_cnn = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_labels)
def __init__(self, config): super(Bert_for_UNILM, self).__init__(config) self.bert = BertModel(config) self.classifier = nn.Linear(config.hidden_size, config.vocab_size) weight = self.bert.embeddings.word_embeddings.weight self.classifier.weight.data = weight.data
def __init__(self, config): super(BertForPreTrainingMLM, self).__init__(config) self.bert = BertModel(config) self.cls = BertPreTrainingHeads(config) self.init_weights() self.tie_weights()
def __init__(self, config): super(BertForMultiLable, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.hidden_size_1) self.classifier_1 = nn.Linear(config.hidden_size_1, config.num_labels) self.relu = nn.ReLU()
def __init__(self, config): super(BertForQuestionAnswering, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_weights)
def __init__(self, params): super(BiEncoderModule, self).__init__() ctxt_bert = BertModel.from_pretrained(params["bert_model"]) cand_bert = BertModel.from_pretrained(params['bert_model']) self.context_encoder = BertEncoder( ctxt_bert, params["out_dim"], layer_pulled=params["pull_from_layer"], add_linear=params["add_linear"], ) self.cand_encoder = BertEncoder( cand_bert, params["out_dim"], layer_pulled=params["pull_from_layer"], add_linear=params["add_linear"], ) self.config = ctxt_bert.config
def __init__(self, config): super(BertDebiasForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) self.apply(self.init_weights) self.config = config self.hypothesis_only = self.get_bool_value(config, "hypothesis_only") self.gamma_focal = config.gamma_focal if hasattr( config, "gamma_focal") else 2 self.ensemble_training = self.get_bool_value(config, "ensemble_training") self.poe_alpha = config.poe_alpha if hasattr(config, 'poe_alpha') else 1 # Sets the rubi parameters. self.similarity = self.get_list_value(config, "similarity") self.rubi = self.get_bool_value(config, 'rubi') self.hans = self.get_bool_value(config, 'hans') self.hans_features = self.get_bool_value(config, 'hans_features') self.focal_loss = self.get_bool_value(config, 'focal_loss') self.length_features = self.get_list_value(config, "length_features") self.hans_only = self.get_bool_value(config, 'hans_only') self.aggregate_ensemble = self.get_str_value(config, 'aggregate_ensemble') self.poe_loss = self.get_bool_value(config, 'poe_loss') self.weighted_bias_only = self.get_bool_value(config, "weighted_bias_only") num_labels_bias_only = self.config.num_labels if self.rubi or self.hypothesis_only or self.focal_loss or self.poe_loss or self.hans_only: if self.hans: num_features = 4 + len(self.similarity) if self.hans_features: num_features += len(self.length_features) if not config.nonlinear_h_classifier: self.h_classifier1 = nn.Linear(num_features, num_labels_bias_only) else: self.h_classifier1 = nn.Sequential( nn.Linear(num_features, num_features), nn.Tanh(), nn.Linear(num_features, num_features), nn.Tanh(), nn.Linear(num_features, num_labels_bias_only)) if self.ensemble_training: self.h_classifier1_second = self.get_classifier( config, config.nonlinear_h_classifier, num_labels_bias_only) else: # Loads the classifiers from the pretrained model. self.h_classifier1 = self.get_classifier( config, config.nonlinear_h_classifier, num_labels_bias_only) self.lambda_h = config.lambda_h
class BertDPCNNForMultiLabel(BertPreTrainedModel): def __init__(self, config): super(BertPreTrainedModel, self).__init__(config) config.kernel_size = basic_config.dpcnn.kernel_size config.num_filters = basic_config.dpcnn.num_filters self.bert = BertModel(config) for param in self.bert.parameters(): param.requires_grad = True self.conv_region = nn.Conv2d(1, config.num_filters, (3, config.hidden_size), stride=1) self.conv = nn.Conv2d(config.num_filters, config.num_filters, (3, 1), stride=1) self.max_pool = nn.MaxPool2d(kernel_size=(3, 1), stride=2) self.padding1 = nn.ZeroPad2d((0, 0, 1, 1)) # top bottom self.padding2 = nn.ZeroPad2d((0, 0, 0, 1)) # bottom self.relu = nn.ReLU() self.fc = nn.Linear(config.num_filters, config.num_labels) def forward(self, input_ids, attention_mask=None, token_type_ids=None, head_mask=None): outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, head_mask=head_mask) encoder_out, text_cls = outputs x = encoder_out.unsqueeze(1) # [batch_size, 1, seq_len, embed] x = self.conv_region(x) # [batch_size, num_filters, seq_len-3+1, 1] x = self.padding1(x) # [batch_size, num_filters, seq_len, 1] x = self.relu(x) x = self.conv(x) # [batch_size, num_filters, seq_len-3+1, 1] x = self.padding1(x) # [batch_size, num_filters, seq_len, 1] x = self.relu(x) x = self.conv(x) # [batch_size, num_filters, seq_len-3+1, 1] while x.size()[2] > 2: x = self._block(x) x = x.squeeze() # [batch_size, num_filters] x = self.fc(x) return x def _block(self, x): x = self.padding2(x) px = self.max_pool(x) x = self.padding1(px) x = F.relu(x) x = self.conv(x) x = self.padding1(x) x = F.relu(x) x = self.conv(x) x = x + px # short cut return x
def __init__(self, config, max_seq_length=128): super(BertForNamedEntityRecognition, self).__init__(config) self.bert = BertModel(config) self.num_labels = config.num_labels self.dropout = nn.Dropout(config.hidden_dropout_prob) self.hidden_size = config.hidden_size self.max_seq_length = max_seq_length self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_weights)
def load(cls, model_name: str, cache_model: bool = True) -> BertModel: if model_name in cls._cache: return PretrainedBertModel._cache[model_name] model = BertModel.from_pretrained(model_name) if cache_model: cls._cache[model_name] = model return model
def __init__(self, config): super(BertForTokenClassification1hot, self).__init__(config) self.num_labels = 2 # config.num_labels ## for us, each output vector is "yes/no", so we should keep this at self.num_labels=2 to avoid any strange error later self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_weights)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.num_labels) self.apply(self.init_weights) self.loss = None
def __init__(self, config, n_filters=None, filter_sizes=None): super(BertCNN, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.convs = Conv1d(config.hidden_size, n_filters, filter_sizes) self.classifier = nn.Linear(len(filter_sizes) * n_filters, config.num_labels) self.init_weights()
def __init__(self, config, tie_weights): super(BertMCQWeightedSum, self).__init__(config) self.bert = BertModel(config) self._dropout = nn.Dropout(config.hidden_dropout_prob) self._classification_layer = nn.Linear(config.hidden_size, 1) if tie_weights is True: self._weight_layer = self._classification_layer else: self._weight_layer = nn.Linear(config.hidden_size, 1) self.apply(self.init_weights)
def __init__(self, bert_model_config: BertConfig): super(BertSimilarityRegressor, self).__init__(bert_model_config) self.bert = BertModel(bert_model_config) linear_size = bert_model_config.hidden_size self.regression = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(linear_size, 1)) self.apply(self.init_weights)