def __init__(self, num_choices, bert_config_file, init_embeddings): self.num_choices = num_choices self.bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, self.bert_config) self.bert = BertModel(self.bert_config) self.init_weights() # 初始化权重参数 self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob) # 用于知识表征的词向量矩阵 self.vocab_size, self.embed_size = np.shape(init_embeddings) self.embed = nn.Embedding.from_pretrained(torch.FloatTensor(init_embeddings), freeze=False) #self.classifier = nn.Linear(self.bert_config.hidden_size + self.embed_size, 1) self.classifier = nn.Linear(self.embed_size + self.bert_config.hidden_size, 1) self.A = nn.Parameter(torch.Tensor(self.bert_config.hidden_size, self.embed_size)) self.bias = nn.Parameter(torch.Tensor(1)) # BERT中的[CLS]是先经过Transformer层中MLP最后是layer-norm # 然后经过BertPooler层使用nn.Tanh激活的 self.layer_norm = nn.LayerNorm(self.embed_size, eps=self.bert_config.layer_norm_eps) # self.know_activation = ACT2FN["gelu"] self.know_activation = nn.Tanh() self.activation = nn.Sigmoid() nn.init.xavier_normal_(self.A) self.bias.data.fill_(0)
def __init__(self, config): # super(HeadlessBertForSequenceClassification, self).__init__(config) BertPreTrainedModel.__init__(self, config) HeadlessModelForSequenceClassification.__init__(self, config) self.bert = BertModel(config) self.init_weights()
def __init__(self, config): BertPreTrainedModel.__init__(self, config) HeadlessModelForSequenceClassification.__init__(self, config) # super(HeadlessRobertaForSequenceClassification, self).__init__(config) self.roberta = RobertaModel(config) self.init_weights()
def __init__(self, num_choices, bert_config_file): self.num_choices = num_choices bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, bert_config) self.bert = BertModel(bert_config) self.dropout = nn.Dropout(bert_config.hidden_dropout_prob) self.classifier = nn.Linear(bert_config.hidden_size, 1) self.activation = nn.Sigmoid() self.init_weights()
def __init__(self, config): # Call the init one parent class up. # Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) self.num_labels = config.num_labels # Replace `BertModel` with SparseBertModel. self.bert = bert_cls(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config, opts): BertPreTrainedModel.__init__(self, config) self.use_leaf_rnn = True self.intra_attention = False self.gumbel_temperature = 1 self.bidirectional = True self.model_name = opts.model self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) assert not (self.bidirectional and not self.use_leaf_rnn) word_dim = config.hidden_size hidden_dim = config.hidden_size if self.use_leaf_rnn: self.leaf_rnn_cell = nn.LSTMCell(input_size=word_dim, hidden_size=hidden_dim) if self.bidirectional: self.leaf_rnn_cell_bw = nn.LSTMCell(input_size=word_dim, hidden_size=hidden_dim) else: self.word_linear = nn.Linear(in_features=word_dim, out_features=2 * hidden_dim) if self.bidirectional: self.treelstm_layer = BinaryTreeLSTMLayer(2 * hidden_dim) # self.comp_query = nn.Parameter(torch.FloatTensor(2 * hidden_dim)) self.comp_query_linear = nn.Linear(hidden_dim * 2, 1, bias=False) else: self.treelstm_layer = BinaryTreeLSTMLayer(hidden_dim) # self.comp_query = nn.Parameter(torch.FloatTensor(hidden_dim)) self.comp_query_linear = nn.Linear(hidden_dim, 1, bias=False) self.v_linear = nn.Linear(config.hidden_size * 2, config.hidden_size) emb_hidden_size = config.hidden_size self.register_buffer('enlarged_candidates', torch.arange(opts.len_idiom_vocab)) self.idiom_embedding_u = nn.Embedding(opts.len_idiom_vocab, emb_hidden_size) self.idiom_embedding_v = nn.Embedding(opts.len_idiom_vocab, emb_hidden_size) self.LayerNorm_u = nn.LayerNorm(emb_hidden_size, eps=config.layer_norm_eps) self.LayerNorm_v = nn.LayerNorm(emb_hidden_size, eps=config.layer_norm_eps) self.context_pool = AttentionPool(config.hidden_size, config.hidden_dropout_prob) self.init_weights()
def __init__(self, config): BertPreTrainedModel.__init__(config) XLMPreTrainedModel.__init__(config) self.num_labels = BertPreTrainedModel.config.num_labels self.bert = BertModel(config) self.classifier = nn.Linear(BertPreTrainedModel.config.hidden_size + XLMPreTrainedModel.config.hidden_size, config.num_labels) self.init_weights() #self.num_labels = config.num_labels self.transformer = XLMModel(config) self.init_weights() self.dropout = nn.Dropout(0.1)
def __init__(self, config, add_pooling_layer=True): # Call the init one parent class up. Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) self.config = config self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) if add_pooling_layer else None # Sparsify linear modules. self.sparsify_model() self.init_weights()
def __init__(self, config): # Call the init one parent class up. # Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) if config.is_decoder: logging.warning( # This warning was included with the original BertForMaskedLM. f"If you want to use `{name_prefix}BertForMaskedLM` make sure " " `config.is_decoder=False` for bi-directional self-attention." ) self.bert = bert_cls(config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) self.init_weights()