def __init__(self, config): super(BertForSequenceClassificationNq, self).__init__(config) self.num_labels = config.num_labels # config.output_hidden_states = True bert_later_dropout = 0.3 self.dropout = nn.Dropout(bert_later_dropout) self.later_model_type = config.later_model_type if self.later_model_type == 'linear': self.bert = BertModel(config) self.projection = nn.Linear(config.hidden_size * 3, config.hidden_size) self.projection_dropout = nn.Dropout(0.1) self.projection_activation = nn.Tanh() self.classifier = nn.Linear(config.hidden_size, config.num_labels) elif self.later_model_type == '1bert_layer': config.num_hidden_layers = 1 self.bert = BertModel(config) self.classifier = nn.Linear(config.hidden_size, config.num_labels) elif self.later_model_type == 'bilinear': self.bert = BertModel(config) lstm_layers = 2 self.qemb_match = SeqAttnMatch(config.hidden_size) doc_input_size = 2 * config.hidden_size # RNN document encoder self.doc_rnn = StackedBRNN( input_size=doc_input_size, hidden_size=config.hidden_size, num_layers=lstm_layers, dropout_rate=bert_later_dropout, dropout_output=bert_later_dropout, concat_layers=True, rnn_type=nn.LSTM, padding=False, ) self.bilinear_dropout = nn.Dropout(bert_later_dropout) self.bilinear_size = 128 self.doc_proj = nn.Linear(lstm_layers * 2 * config.hidden_size, self.bilinear_size) self.qs_proj = nn.Linear(config.hidden_size, self.bilinear_size) self.bilinear = nn.Bilinear(self.bilinear_size, self.bilinear_size, self.bilinear_size) self.classifier = nn.Linear(self.bilinear_size, config.num_labels) elif self.later_model_type == 'transformer': self.copy_from_bert_layer_num = 11 self.bert = BertModel(config) self.bert_position_emb = nn.Embedding( config.max_position_embeddings, config.hidden_size) self.bert_type_id_emb = nn.Embedding(config.type_vocab_size, config.hidden_size) self.bert_layer = BertLayer(config) self.bert_pooler_qd = BertPoolerQD(config) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config): super(BertCrfForNer, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.crf = CRF(num_tags=config.num_labels, batch_first=True) self.init_weights()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.crf = CRF(config.num_labels, batch_first=True) self.classifier_bienc = nn.Linear(2 * config.hidden_size, config.num_labels) N = 4 #layer number h = 4 #heads dropout_value = 0.1 d_model = config.hidden_size d_ff = 2048 c = copy.deepcopy attn = MultiHeadedAttention(h, d_model, dropout=dropout_value) ff = PositionwiseFeedForward(d_model, d_ff, dropout=dropout_value) self.encoder = Encoder( EncoderLayer(d_model, c(attn), c(ff), dropout_value), N) self.decoder = Decoder( DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout=dropout_value), N) self.init_weights()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels pico_embeddings_size = 100 # same as the vocab size - we don't care about this at this point self.bert = BertModel(config) self.pico_embeddings = nn.Embedding( pico_embeddings_size, pico_embeddings_size) #randomly initialized self.crf = CRF(4, batch_first=True) #since we removed the 2 labels self.classifier = nn.Linear(2 * config.hidden_size, config.num_labels) N = 4 #layer number h = 4 #heads dropout_value = 0.1 d_model = config.hidden_size + pico_embeddings_size d_ff = 2048 c = copy.deepcopy attn = MultiHeadedAttention(h, d_model, dropout=dropout_value) ff = PositionwiseFeedForward(d_model, d_ff, dropout=dropout_value) self.encoder = Encoder( EncoderLayer(d_model, c(attn), c(ff), dropout_value), N) self.decoder = Decoder( DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout=dropout_value), N) self.classifier_bienc = nn.Linear(2 * d_model, config.num_labels) self.init_weights()
def __init__(self, config, no_masking, lambda_scale=1.0): super(BertForQuestionAnsweringConfidence, self).__init__(config) self.bert = BertModel(config) # self.num_labels = num_labels self.num_labels = config.num_labels self.no_masking = no_masking self.dropout = nn.Dropout(config.hidden_dropout_prob) self.qa_outputs = nn.Linear(config.hidden_size, 2) # [N, L, H] => [N, L, 2] self.qa_classifier = nn.Linear( config.hidden_size, self.num_labels) # [N, H] => [N, n_class] self.lambda_scale = lambda_scale def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.deterministic = config.deterministic self.ib_dim = config.ib_dim self.ib = config.ib self.activation = config.activation self.activations = { 'tanh': nn.Tanh(), 'relu': nn.ReLU(), 'sigmoid': nn.Sigmoid() } if self.ib or self.deterministic: self.kl_annealing = config.kl_annealing self.hidden_dim = config.hidden_dim intermediate_dim = (self.hidden_dim + config.hidden_size) // 2 self.mlp = nn.Sequential( nn.Linear(config.hidden_size, intermediate_dim), self.activations[self.activation], nn.Linear(intermediate_dim, self.hidden_dim), self.activations[self.activation]) self.beta = config.beta self.sample_size = config.sample_size self.emb2mu = nn.Linear(self.hidden_dim, self.ib_dim) self.emb2std = nn.Linear(self.hidden_dim, self.ib_dim) self.mu_p = nn.Parameter(torch.randn(self.ib_dim)) self.std_p = nn.Parameter(torch.randn(self.ib_dim)) self.classifier = nn.Linear(self.ib_dim, self.config.num_labels) else: self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) self.init_weights()
def __init__(self, config: BertConfig): super().__init__(config) self.bert = BertModel(config) self.couplet_head = CoupletHead(config) self.init_weights()
def __init__(self, config): super(ImageBertForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.config = config if config.img_feature_dim > 0: self.bert = BertImgModel(config) else: self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) if hasattr(config, 'classifier'): if not hasattr(config, 'cls_hidden_scale'): config.cls_hidden_scale = 2 if config.classifier == 'linear': self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) elif config.classifier == 'mlp': self.classifier = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size * config.cls_hidden_scale), nn.ReLU(), nn.Linear(config.hidden_size * config.cls_hidden_scale, self.config.num_labels)) else: self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) # original self.apply(self._init_weights)
def __init__(self, config): super().__init__(config) self.bert = BertModel(config) self.cls = BertSentimentPretrainingHeads(config) self.init_weights()
def __init__( self, config, # additional_features_size, urlvocab, weight=None, device=None, gcn_hidden_size=128, gcn_output_size=128): super(BertForSequenceClassificationWithGCN, self).__init__(config) self.num_labels = config.num_labels # BERT model self.bert = BertModel(config) # GCN model self.g = dgl.DGLGraph() nx_dg = nx.from_numpy_matrix(urlvocab.out_connectivity) self.g.from_networkx(nx_dg) self.g_features = torch.tensor(urlvocab.bert_embedding, dtype=torch.float) # FIXME: this does not work for multiple GPUs, unsure for one GPU # if device is not None: # self.g_features = self.g_features.to(device) self.gcn = Net(input_size=self.g_features.shape[1], hidden_size=gcn_hidden_size, output_size=gcn_output_size) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size + gcn_output_size, self.config.num_labels) self.weight = weight self.init_weights()
def __init__(self, config, label2id, data, device="cuda"): super(BertCrfForNer, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(512 * 2 + 50 * 1, len(label2id)) self.crf = CRF(tagset_size=len(label2id), tag_dictionary=label2id, device=device, is_bert=True) self.W = [] self.rnn = [] self.k_hop = 1 for i in range(self.k_hop): self.W.append(nn.Linear(512 * 2, 512 * 2)) self.rnn.append( nn.GRU(config.hidden_size if i == 0 else 512 * 2, 512, num_layers=1, bidirectional=True, batch_first=True).cuda()) self.W = nn.ModuleList(self.W) self.rnn = nn.ModuleList(self.rnn) self.init_weights() self.label2id = label2id self.id2label = {a: b for b, a in label2id.items()} self.pooling = nn.Linear(1024, 50) self.gaz_embed_all = Gaz_Embed(data, 0)
def __init__(self, config: BertConfig, graph_retriever_config): super(BertForGraphRetriever, self).__init__(config) self.graph_retriever_config = graph_retriever_config self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) # Initial state self.s = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) # Scaling factor for weight norm self.g = Parameter(torch.FloatTensor(1).fill_(1.0)) # RNN weight self.rw = nn.Linear(2 * config.hidden_size, config.hidden_size) # EOE and output bias self.eos = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) self.bias = Parameter(torch.FloatTensor(1).zero_()) self.init_weights() self.cpu = torch.device('cpu')
def __init__(self, config): super(BertForClassifier, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) # Here 9 denotes the pattern feature length self.clf_layer = nn.Linear(config.hidden_size + 9, config.num_labels) self.init_weights()
def __init__(self, config): super().__init__(config) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config) self.loss_fct = CrossEntropyLoss() # -100 index = padding token; initialize once to speed up. self.init_weights()
def __init__(self, config): super(BertForSequentialSentenceSelector, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) # Initial state self.s = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) # Scaling factor for weight norm self.g = Parameter(torch.FloatTensor(1).fill_(1.0)) # RNN weight self.rw = nn.Linear(2 * config.hidden_size, config.hidden_size) # EOE and output bias self.eos = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) self.bias = Parameter(torch.FloatTensor(1).zero_()) # self.apply(self.init_bert_weights) self.init_weights() self.cpu = torch.device('cpu')
def __init__(self, config: BertTokenizer, num_sequence_labels: int, num_text_labels: int = 2, text_clf_weight: float = 1.0, sequence_clf_weight: float = 1.0, padding_index: int = 0, pooling_type: str = ""): super().__init__(config) self.text_clf_weight = text_clf_weight self.sequence_clf_weight = sequence_clf_weight self.num_text_labels = num_text_labels self.num_sequence_labels = num_sequence_labels self.padding_index = padding_index self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.sequence_classifier = nn.Linear(config.hidden_size, self.num_sequence_labels) self.text_classifier = nn.Linear(config.hidden_size, self.num_text_labels) print(self.num_text_labels, self.num_sequence_labels) self.init_weights()
def __init__(self, config, args, tokenizer): super(DecoderWithLoss, self).__init__() # model components print("initializing decoder with params {}".format(args)) self.bert = BertModel(config) self.lm_head = BertOnlyMLMHead(config) self.span_b_proj = nn.ModuleList([ HighwayLayer(config.hidden_size) for _ in range(args.num_highway) ]) self.span_e_proj = nn.ModuleList([ HighwayLayer(config.hidden_size) for _ in range(args.num_highway) ]) # predict text span beginning and end self.text_span_start_head = nn.Linear(config.hidden_size, config.hidden_size) self.text_span_end_head = nn.Linear(config.hidden_size, config.hidden_size) # loss functions if args.node_label_smoothing > 0: self.lm_ce_loss = LabelSmoothingLoss( args.node_label_smoothing, config.vocab_size, ignore_index=tokenizer.pad_token_id) else: self.lm_ce_loss = torch.nn.CrossEntropyLoss( ignore_index=tokenizer.pad_token_id, reduction="none") self.span_ce_loss = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction="none") self.span_loss_lb = args.lambda_span_loss self.text_span_loss = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction="none") self.tree_to_text = args.tree_to_text
def __init__(self, config): super(BertForDST, self).__init__(config) self.slot_list = config.dst_slot_list self.class_types = config.dst_class_types self.class_labels = config.dst_class_labels self.token_loss_for_nonpointable = config.dst_token_loss_for_nonpointable self.refer_loss_for_nonpointable = config.dst_refer_loss_for_nonpointable self.class_aux_feats_inform = config.dst_class_aux_feats_inform self.class_aux_feats_ds = config.dst_class_aux_feats_ds self.class_loss_ratio = config.dst_class_loss_ratio # Only use refer loss if refer class is present in dataset. if 'refer' in self.class_types: self.refer_index = self.class_types.index('refer') else: self.refer_index = -1 self.bert = BertModel(config) self.dropout = nn.Dropout(config.dst_dropout_rate) self.dropout_heads = nn.Dropout(config.dst_heads_dropout_rate) if self.class_aux_feats_inform: self.add_module("inform_projection", nn.Linear(len(self.slot_list), len(self.slot_list))) if self.class_aux_feats_ds: self.add_module("ds_projection", nn.Linear(len(self.slot_list), len(self.slot_list))) aux_dims = len(self.slot_list) * (self.class_aux_feats_inform + self.class_aux_feats_ds) # second term is 0, 1 or 2 for slot in self.slot_list: self.add_module("class_" + slot, nn.Linear(config.hidden_size + aux_dims, self.class_labels)) self.add_module("token_" + slot, nn.Linear(config.hidden_size, 2)) self.add_module("refer_" + slot, nn.Linear(config.hidden_size + aux_dims, len(self.slot_list) + 1)) self.init_weights()
def from_scratch(cls, vocab_size, name="bert", language="en"): bert = cls() bert.name = name bert.language = language config = BertConfig(vocab_size=vocab_size) bert.model = BertModel(config) return bert
def __init__(self, config): super(BertForMultiLable, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config): super(GeneratingMasksAC, self).__init__(config) if config.model_type == 'bert': self.bert = BertModel(config=config) else: self.bert = None # Reload config (Since it's bert.., I think there is a way to modify # this more simple) if self.bert is not None: config = BertConfig.from_pretrained("bert-base-uncased") config.attention_probs_dropout_prob = 0.0 config.hidden_dropout_prob = 0.0 self.config = config self.transformer = BertAttention(config) self.policy1 = nn.Linear(config.hidden_size, 128) self.policy2 = nn.Linear(128, 1) # Value Part # self.value1 = nn.Linear(config.hidden_size, 128) self.value2 = nn.Linear(128, 1) #self.apply(self._init_weights) self.init_weights()
def __init__(self, config): super(LFESM, self).__init__(config) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.seq_relationship = nn.Linear(config.hidden_size, 2) self.init_weights() # dropout = 0.5 # self._rnn_dropout = RNNDropout(p=dropout) feature_size = 28 self._feature = nn.Linear(feature_size, config.hidden_size) self._attention = SoftmaxAttention() self._projection = nn.Sequential(nn.Linear(4 * config.hidden_size, config.hidden_size), nn.ReLU()) self._composition = Seq2SeqEncoder(nn.LSTM, config.hidden_size, config.hidden_size, bidirectional=True) self._classification = nn.Sequential(nn.Dropout(p=config.hidden_dropout_prob), # p=dropout nn.Linear(4 * 2 * config.hidden_size, config.hidden_size), nn.Tanh(), nn.Dropout(p=config.hidden_dropout_prob), # p=dropout nn.Linear(config.hidden_size, 2)) self.apply(self.init_esim_weights)
def __init__(self, config, action_num, recur_type="gated", allow_yes_no=False): super(RCMBert, self).__init__(config) self.bert = BertModel(config) self.recur_type = recur_type self.allow_yes_no = allow_yes_no if recur_type == "gated": self.recur_network = recurGatedNetwork(config.hidden_size, config.hidden_size) elif recur_type == "lstm": self.recur_network = recurLSTMNetwork(config.hidden_size, config.hidden_size) else: print("Invalid recur_type: {}".format(recur_type)) sys.exit(0) self.action_num = action_num self.stop_network = stopNetwork(config.hidden_size) self.move_stride_network = moveStrideNetwork(config.hidden_size, self.action_num) self.dropout = nn.Dropout(config.hidden_dropout_prob) if self.allow_yes_no: self.yes_no_flag_outputs = nn.Linear(config.hidden_size, 2) self.yes_no_ans_outputs = nn.Linear(config.hidden_size, 2) self.qa_outputs = nn.Linear(config.hidden_size, 2) self.init_weights()
def __init__(self, config): super().__init__(config) self.bert = BertModel(config) self.cls = BertPreTrainingHeads(config) self.qa_outputs = torch.nn.Linear(config.hidden_size, 2) self.init_weights()
def __init__(self, config, feature=None, use_lstm=False, device="cpu"): super(NerModel, self).__init__(config) self.num_labels = config.num_labels self.use_feature = False self.use_lstm = False self.hidden_size = config.hidden_size self.bert = BertModel(config) self.ferep = None if feature is not None: self.ferep = FeatureRep(feature, device) self.use_feature = True self.hidden_size += self.ferep.feature_dim if use_lstm: self.use_lstm = True self.lstm = nn.LSTM(self.hidden_size, config.hidden_size, batch_first=True, num_layers=1) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(self.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config, num_classes, vocab) -> None: super(PairwiseClassifier, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_classes) self.vocab = vocab self.init_weights()
def __init__(self, config): super(KorQuADModel, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config): super().__init__(config) self.bert = BertModel(config) self.cls = BertTopicTreatControlPreTrainingHeads(config) self.init_weights() self.tie_weights()
def __init__(self, config): super(BertForSiameseModel, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.seq_relationship = nn.Linear(config.hidden_size * 3, config.num_labels) self.init_weights()
def __init__(self, config): super(BertForMLMPreTraining, self).__init__(config) self.bert = BertModel(config) self.cls = BertMLMPreTrainingHeads(config) self.init_weights() self.tie_weights()