def __init__(self, *, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, random_init=False, **kwargs): TrainableNM.__init__(self, **kwargs) # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "BERT constructor.") if vocab_size is not None: config = BertConfig( vocab_size_or_config_json_file=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings) model = BertModel(config) elif pretrained_model_name is not None: model = BertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = BertConfig.from_json_file(config_filename) model = BertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + "be passed into the BERT constructor") model.to(self._device) self.add_module("bert", model) self.config = model.config if random_init: self.apply( lambda module: transformer_weights_init(module, xavier=False))
def __init__(self, config, num_labels=2): super(BertForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, num_labels) self.apply(self.init_bert_weights)
def create_and_check_bert_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ): model = BertModel(config=config) model.eval() sequence_output, pooled_output = model(input_ids, token_type_ids, input_mask) sequence_output, pooled_output = model(input_ids, token_type_ids) sequence_output, pooled_output = model(input_ids) result = { "sequence_output": sequence_output, "pooled_output": pooled_output } self.parent.assertListEqual( list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size], ) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def __init__(self, config): super(BertForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.l2_reg_lambda = config.l2_reg_lambda self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear( config.hidden_size*3, self.config.num_labels) self.tanh = nn.Tanh() rnn_hidden_size=300 num_layers=1 self.rnn = nn.LSTM(config.hidden_size, rnn_hidden_size, num_layers, bidirectional=True, dropout=0.5, batch_first=True) self.W2 = nn.Linear(config.hidden_size + 2 * rnn_hidden_size, config.hidden_size) #self.convs = Conv1d(config.hidden_size, n_filters, filter_sizes) self.W_w = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size)) self.u_w = nn.Parameter(torch.Tensor(config.hidden_size, 1)) #self.W_w1 = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size)) #self.W_w2 = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size)) #self.W_w3 = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size)) nn.init.uniform_(self.W_w, -0.1, 0.1) nn.init.uniform_(self.u_w, -0.1, 0.1) #nn.init.uniform_(self.W_w1, -0.1, 0.1) #nn.init.uniform_(self.W_w2, -0.1, 0.1) #nn.init.uniform_(self.W_w3, -0.1, 0.1) self.apply(self.init_weights)
def __init__(self, config): super(BertForTokenClassification, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, config.num_labels)
def __init__(self, name='bert-base-uncased', dropout=0.1, num_class=2): super(BertC, self).__init__() config = BertConfig.from_pretrained(name) self.bert = BertModel(config) self.proj = nn.Linear(config.hidden_size, num_class) self.loss_f = nn.CrossEntropyLoss() self.drop = nn.Dropout(p=dropout)
def __init__(self, vocab_size, tag_to_ix, hidden_dim, n_layers): super(BERT_BiLSTM_CRF, self).__init__() self.hidden_dim = hidden_dim self.n_layers = n_layers self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) config = BertConfig.from_pretrained('bert-base-multilingual-cased') self.model = BertModel(config) self.lstm = nn.LSTM(768, hidden_dim, num_layers=n_layers, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim * 2, self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size, device=device)) # These two statements enforce the constraint that we never transfer # to the start tag and we never transfer from the stop tag self.transitions.data[tag_to_ix[START_TAG], :] = -10000 self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000 self.hidden = self.init_hidden()
def __init__(self, args, device, checkpoint): super(ExtSummarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args, args.temp_dir, args.finetune_bert) self.ext_layer = ExtTransformerEncoder(self.bert.model.config.hidden_size, args.ext_ff_size, args.ext_heads, args.ext_dropout, args.ext_layers) if (args.encoder == 'baseline'): bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.ext_hidden_size, num_hidden_layers=args.ext_layers, num_attention_heads=args.ext_heads, intermediate_size=args.ext_ff_size) self.bert.model = BertModel(bert_config) self.ext_layer = Classifier(self.bert.model.config.hidden_size) if(args.max_pos>512): my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size) my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1) self.bert.model.embeddings.position_embeddings = my_pos_embeddings if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: if args.param_init != 0.0: for p in self.ext_layer.parameters(): p.data.uniform_(-args.param_init, args.param_init) if args.param_init_glorot: for p in self.ext_layer.parameters(): if p.dim() > 1: xavier_uniform_(p) self.to(device)
def __init__(self, args, device, checkpoint=None, bert_from_extractive=None): super(AbsSummarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args.large, args.temp_dir, args.finetune_bert) if bert_from_extractive is not None: self.bert.model.load_state_dict( dict([(n[11:], p) for n, p in bert_from_extractive.items() if n.startswith('bert.model')]), strict=True) if (args.encoder == 'baseline'): bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.enc_hidden_size, num_hidden_layers=args.enc_layers, num_attention_heads=8, intermediate_size=args.enc_ff_size, hidden_dropout_prob=args.enc_dropout, attention_probs_dropout_prob=args.enc_dropout) self.bert.model = BertModel(bert_config) if(args.max_pos>512): my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size) my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1) self.bert.model.embeddings.position_embeddings = my_pos_embeddings self.vocab_size = self.bert.model.config.vocab_size tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0) if (self.args.share_emb): tgt_embeddings = self.bert.model.embeddings.word_embeddings self.decoder = TransformerDecoder( self.args.dec_layers, self.args.dec_hidden_size, heads=self.args.dec_heads, d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings) self.generator = get_generator(self.vocab_size, self.args.dec_hidden_size, device) self.generator[0].weight = self.decoder.embeddings.weight if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: for module in self.decoder.modules(): if isinstance(module, (nn.Linear, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=0.02) elif isinstance(module, nn.LayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_() for p in self.generator.parameters(): if p.dim() > 1: xavier_uniform_(p) else: p.data.zero_() if(args.use_bert_emb): tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0) tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight) self.decoder.embeddings = tgt_embeddings self.generator[0].weight = self.decoder.embeddings.weight self.to(device)
def __init__(self, config, num_tag): super(Bert_SenAnalysis, self).__init__(config) self.num_labels = num_tag self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) #self.classifier = nn.Linear(config.hidden_size, num_tag) #self.pooling = nn.Linear(config.hidden_size, config.hidden_size) self.classifier = nn.Linear(args.lstm_hidden_size * 2, self.num_labels) self.W = [] self.gru = [] for i in range(args.lstm_layers): self.W.append( nn.Linear(args.lstm_hidden_size * 2, args.lstm_hidden_size * 2)) self.gru.append( nn.GRU( config.hidden_size if i == 0 else args.lstm_hidden_size * 4, args.lstm_hidden_size, num_layers=1, bidirectional=True, batch_first=True).cuda()) self.W = nn.ModuleList(self.W) self.gru = nn.ModuleList(self.gru) self.init_weights()
def __init__(self, args, device, checkpoint): super(ExtSummarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args.large, args.temp_dir, args.finetune_bert) self.ext_layer = ExtTransformerEncoder( self.bert.model.config.hidden_size, args.ext_ff_size, args.ext_heads, args.ext_dropout, args.ext_layers) if (args.encoder == 'baseline'): bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.hidden_size, num_hidden_layers=6, num_attention_heads=8, intermediate_size=args.ff_size) self.bert.model = BertModel(bert_config) self.ext_layer = Classifier(self.bert.model.config.hidden_size) if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: if args.param_init != 0.0: for p in self.ext_layer.parameters(): p.data.uniform_(-args.param_init, args.param_init) if args.param_init_glorot: for p in self.ext_layer.parameters(): if p.dim() > 1: xavier_uniform_(p) self.to(device)
def createCsvData(): config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel(config) with Cd("lemmadata"): with open("id_to_sent.json") as sent_id_dict_file: sent_id_dict = json.load(sent_id_dict_file) for dir_item in os.listdir(): if os.path.isfile(dir_item): if dir_item.endswith(".json") and dir_item != "id_to_sent.json": print(dir_item) with open(dir_item, "r") as f: lemma_data = json.load(f) with Cd("vectors"): with open(dir_item[:-5]+".csv", "w") as vector_file: writer = csv.writer(vector_file, delimiter=",") for instance in lemma_data: inst_sent_id = instance["sent_id"] inst_sense = instance["sense"] inst_sent = sent_id_dict[str(inst_sent_id)] if(len(inst_sent) > 511): continue vector = vectorizeWordInContext(inst_sent, instance["pos"], tokenizer, model) vec_list = vector.detach().tolist() row_data = [inst_sent_id, instance["pos"], inst_sense] + vec_list writer.writerow(row_data)
def __init__(self, config, num_labels): super(BERTCRF, self).__init__(config) self.num_labels = num_labels # ent label num self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.fc = nn.Linear(config.hidden_size, num_labels) self.crf = CRF(num_labels, batch_first=True) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertForLes, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_weights)
def __init__(self, config): super(ABSABert, self).__init__(config) self.bert = BertModel(config) self.num_labels = config.num_labels self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels) self.cls = DepBertPreTrainingHeads(config) self.domain_cls = DomainPredictionHead(config) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, bert_config, num_choices): super(BertCloze, self).__init__(bert_config) self.num_choices = num_choices self.bert = BertModel(bert_config) self.idiom_embedding = nn.Embedding(len(config.idiom2index), bert_config.hidden_size) self.my_fc = nn.Sequential(nn.Dropout(config.hidden_dropout_prob), nn.Linear(bert_config.hidden_size, 1))
def __init__(self, hidden_dim, n_layers, tagset_size): super(BertLSTM, self).__init__() config = BertConfig.from_pretrained('bert-base-multilingual-cased') self.model = BertModel(config) self.decoder = nn.LSTM(768, hidden_dim, n_layers) self.hiddentotag = nn.Linear(hidden_dim, tagset_size)
def __init__(self, config): super(NoHiddenLayerClassification, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.fc1 = nn.Linear(in_features=768 * 5, out_features=2) self.init_weights()
def __init__(self, config, num_choices=1, num_docs_rank=30): super(BertForMultipleChoice, self).__init__(config) self.num_choices = num_choices self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_choices) self.init_weights()
def __init__(self, config): super(BertNer, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) # 载入bert模型 self.dropout = nn.Dropout(config.hidden_dropout_prob) # 简单的线性层 self.classifier = nn.Linear(config.hidden_size, num_labels) # 初始化权重 self.init_weights(self.classifier)
def __init__(self, config): super(BertForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) self.init_weights()
def __init__(self, config): super(CredPredictor, self).__init__() self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.reduce = nn.Linear(config.hidden_size, REDUCE_SIZE) self.prev_recursive = nn.LSTMCell(REDUCE_SIZE + 1, REDUCE_SIZE) self.aftr_recursive = nn.LSTMCell(REDUCE_SIZE + 1, REDUCE_SIZE) self.fc1 = nn.Linear(REDUCE_SIZE, int(REDUCE_SIZE / 2)) self.fc2 = nn.Linear(int(REDUCE_SIZE / 2), 1) self.referee = nn.Linear(2, 1)
def __init__(self, config): super(MultiTaskBert, self).__init__(config) self.num_labels = 3 self.num_categories = 5 self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier1 = nn.Linear(config.hidden_size, self.num_labels) self.classifier2 = nn.Linear(config.hidden_size, self.num_categories) self.init_weights()
def __init__(self, bertConfig, num_classes): super(BertFine, self).__init__(bertConfig) self.bert = BertModel(bertConfig) # bert模型 self.dropout = nn.Dropout(bertConfig.hidden_dropout_prob) self.classifier = nn.Linear(in_features=bertConfig.hidden_size, out_features=num_classes) self.apply(self.init_weights) # 默认情况下,bert encoder模型所有的参数都是参与训练的,32的batch_size大概8.7G显存 # 可以通过以下设置为将其设为不训练,只将classifier这一层进行反响传播,32的batch_size大概显存1.1G self.unfreeze_bert_encoder()
def __init__(self, config): super(twoHiddenLayer_with_3840_3840_classifier, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.fc1 = nn.Linear(in_features=768 * 5, out_features=768 * 5) self.fc2 = nn.Linear(in_features=768 * 5, out_features=768 * 5) self.fc3 = nn.Linear(in_features=768 * 5, out_features=2) self.init_weights()
def __init__(self, config, pooling='max', dropout_prob=0): super(BertForDocClassification, self).__init__(config) self.num_labels = config.num_labels self.pooling = pooling self.bert = BertModel(config) self.dropout = nn.Dropout(dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.num_labels) self.bn = nn.BatchNorm1d(768) self.init_weights()
def __init__(self, config): super(BertClassifier, self).__init__() # Binary classification problem (num_labels = 2) self.num_labels = config.num_labels # Pre-trained BERT model self.bert = BertModel(config) # Dropout to avoid overfitting self.dropout = nn.Dropout(config.hidden_dropout_prob) # A single layer classifier added on top of BERT to fine tune for binary classification self.classifier = nn.Linear(config.hidden_size, config.num_labels) # Weight initialization torch.nn.init.xavier_normal_(self.classifier.weight)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Sequential(nn.Linear(config.hidden_size, 50), nn.ReLU(), nn.Linear(50, config.num_labels)) #self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config): super(FeatureBert, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.classifier = nn.Linear(config.hidden_size + 5, self.config.num_labels) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) self.features_bn = nn.BatchNorm1d(5) self.features_dense = nn.Linear(5, 5) self.init_weights()
def __init__(self, config): super(BertForLesWithFeatures, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) # 添加特征后的维度 # self.hidden_size = config.hidden_size + 14 + POS_DIM + NER_DIM + 2 + 2 self.hidden_size = config.hidden_size + POS_DIM + NER_DIM + 2 + 2 self.qa_outputs = nn.Linear(self.hidden_size, config.num_labels) self.apply(self.init_weights)