Beispiel #1
0
    def __init__(self, config):
        super(BertForIREmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #2
0
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act,
                   str) or (sys.version_info[0] == 2
                            and isinstance(config.hidden_act, str)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
Beispiel #3
0
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     # Need to unty it when we separate the dimensions of hidden and emb
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act,
                   str) or (sys.version_info[0] == 2
                            and isinstance(config.hidden_act, unicode)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
Beispiel #4
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.embedding_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.embedding_size)

        self.LayerNorm = BertLayerNorm(config.embedding_size,
                                       eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #5
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
        # TODO:ROBERTA暂时存在一些问题,必须512才能加载一些模型,但是部分模型却不是用512长度训练的,要注意
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #6
0
 def __init__(self, in_hsz, out_hsz, layer_norm=True,
              dropout=0.1, relu=True):
     super(LinearLayer, self).__init__()
     self.relu = relu
     self.layer_norm = layer_norm
     if layer_norm:
         self.LayerNorm = BertLayerNorm(in_hsz, eps=1e-5)
     layers = [
         nn.Dropout(dropout),
         nn.Linear(in_hsz, out_hsz)
     ]
     self.net = nn.Sequential(*layers)
Beispiel #7
0
 def __init__(self, config):
     super(DepBertPredictionHeadTransform, self).__init__()
     self.child_transform = nn.Linear(config.hidden_size,
                                      int(config.hidden_size / 3))
     self.head_transform = nn.Linear(config.hidden_size,
                                     int(config.hidden_size / 3))
     if isinstance(config.hidden_act,
                   str) or (sys.version_info[0] == 2
                            and isinstance(config.hidden_act, unicode)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size,
                                    eps=config.layer_norm_eps)
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
        try:
            self.use_relative_position = config.use_relative_position
        except:
            self.use_relative_position = False
        if not self.use_relative_position:
            self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #9
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.doc_embeddings_len = config.hidden_size
        self.word_embeddings_len = config.hidden_size
        self.position_embeddings_len = config.hidden_size

        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            self.word_embeddings_len)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                self.position_embeddings_len)
        self.doc_embeddings = nn.Embedding(config.type_vocab_size,
                                           self.doc_embeddings_len)
        # self.interact = nn.Parameter(torch.FloatTensor(self.doc_embeddings_len, self.word_embeddings_len).unsqueeze(0))
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.hidden_size = config.hidden_size
Beispiel #10
0
    def __init__(self, config, roberta_model_embedding_weights):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.layer_norm = BertLayerNorm(config.hidden_size,
                                        eps=config.layer_norm_eps)

        #self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
        self.decoder = nn.Linear(roberta_model_embedding_weights.size(1),
                                 roberta_model_embedding_weights.size(0),
                                 bias=False)
        self.decoder.weight = roberta_model_embedding_weights
        self.bias = nn.Parameter(
            torch.zeros(roberta_model_embedding_weights.size(0)))
        #self.bias = nn.Parameter(torch.zeros(config.vocab_size))

        # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
        self.decoder.bias = self.bias
    def __init__(self, config):
        super(BertOutput, self).__init__()
        self.dense = QuantizeLinear(
            config.intermediate_size,
            config.hidden_size,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_layerwise=config.weight_layerwise,
            input_layerwise=config.input_layerwise,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_ffn_attn)

        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #12
0
    def __init__(self, config, bert_model_embedding_weights):
        super(BertLMPredictionHead, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        if isinstance(config.hidden_act, str):
            self.transform_act_fn = ACT2FN[config.hidden_act]
        else:
            self.transform_act_fn = config.hidden_act
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5)

        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        self.decoder = nn.Linear(bert_model_embedding_weights.size(1),
                                 bert_model_embedding_weights.size(0),
                                 bias=False)
        self.decoder.weight = bert_model_embedding_weights
        self.bias = nn.Parameter(
            torch.zeros(bert_model_embedding_weights.size(0)))
Beispiel #13
0
    def __init__(self, config):
        super().__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
        self.max_relative_pos_len = config.max_relative_pos_len
        self.pos_emb_type = config.pos_emb_type
        self.diff_head_pos = config.diff_head_pos
        if self.pos_emb_type == 'absolute':
            self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        else:
            self.position_embeddings = None

        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
        self.max_position_id = config.max_position_embeddings
        self.bert_word_dropout = config.bert_word_dropout
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.bert_emb_dropout)
        self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
Beispiel #14
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)
        self.max_position_id = config.max_position_embeddings

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size,
                                       eps=config.layer_norm_eps)

        # 找到第三个dropout, 三个embedding相加后做LN,LN之后,执行一次dropout
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #15
0
    def __init__(self, config,
                 bert_word_dropout=None,
                 bert_emb_dropout=None,
                 bert_atten_dropout=None,
                 bert_hidden_dropout=None,
                 bert_hidden_size=None,
                 is_decoder=False,
                 before_plm_output_ln=False,
                 gradient_checkpointing=False,
                 **kwargs
                 ):

        super().__init__(config)
        self.config = config
        if bert_word_dropout is not None:
            self.config.bert_word_dropout = bert_word_dropout
        if bert_emb_dropout is not None:
            self.config.bert_emb_dropout = bert_emb_dropout
        if bert_atten_dropout is not None:
            self.config.bert_atten_dropout = bert_atten_dropout
        if bert_hidden_dropout is not None:
            self.config.bert_hidden_dropout = bert_hidden_dropout
        if bert_hidden_size is not None:
            self.config.bert_hidden_size = bert_hidden_size

        self.config.max_relative_pos_len = kwargs.pop('max_pos_len', 0)
        self.config.diff_head_pos = kwargs.pop('diff_head_pos', False)
        self.config.pos_emb_type = kwargs.pop('pos_emb_type', "absolute")
        self.config.is_decoder = is_decoder
        self.config.before_plm_output_ln = before_plm_output_ln
        self.config.gradient_checkpointing = gradient_checkpointing

        self.embeddings = BertEmbeddings(self.config)
        self.encoder = BertEncoder(self.config)
        
        if self.config.before_plm_output_ln:
            self.before_plm_output_ln = BertLayerNorm(self.config.hidden_size, eps=self.config.layer_norm_eps)
        else:
            self.before_plm_output_ln = None

        self.init_weights()
Beispiel #16
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size,
                                            padding_idx=config.POS_NULL)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        if config.graph_input:
            self.token_type_embeddings = nn.Embedding(config.type_vocab_size+1, config.hidden_size)
        else:
            self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
        
        ### composition model
        self.fcompmodel = config.fcompmodel
        self.graph_input = config.graph_input
        if config.fcompmodel:
            self.compose = FFCompose(config.hidden_size,config.label_emb)
        
        ### label embedding
        if config.fcompmodel or config.graph_input:
            self.label_emb = config.label_embedding

        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #17
0
    def __init__(self,
                 config,
                 pos_tag_embedding=False,
                 senti_embedding=False,
                 polarity_embedding=False):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        if senti_embedding:
            self.senti_embeddings = nn.Embedding(3,
                                                 config.hidden_size,
                                                 padding_idx=2)
        else:
            self.register_parameter('senti_embeddings', None)
        if pos_tag_embedding:
            self.pos_tag_embeddings = nn.Embedding(5,
                                                   config.hidden_size,
                                                   padding_idx=4)
        else:
            self.register_parameter('pos_tag_embeddings', None)
        if polarity_embedding:
            self.polarity_embeddings = nn.Embedding(6,
                                                    config.hidden_size,
                                                    padding_idx=5)
        else:
            self.register_parameter('polarity_embeddings', None)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size,
                                       eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #18
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()

        self.word_embeddings_1 = QuantizeEmbedding(
            config.vocab_size,
            config.hidden_size,
            padding_idx=0,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            embed_layerwise=config.embed_layerwise,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_qkvo)
        self.word_embeddings_2 = QuantizeEmbedding(
            config.vocab_size,
            config.hidden_size,
            padding_idx=0,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            embed_layerwise=config.embed_layerwise,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_qkvo)

        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #19
0
 def __init__(self, config):
     super(BertOutput, self).__init__()
     self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #20
0
 def __init__(self, config):
     super(BertSelfOutput, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.transform_act_fn = ACT2FN[config.hidden_act] \
         if isinstance(config.hidden_act, str) else config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
Beispiel #22
0
 def __init__(self, config):
     super(AlbertLayer, self).__init__()
     self.attention_1 = BertAttention(config)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5)
     self.ffn_1 = BertFF(config)
     self.LayerNorm_1 = BertLayerNorm(config.hidden_size, eps=1e-5)
Beispiel #23
0
 def __init__(self, in_hsz, out_hsz):
     super(MLPLayer, self).__init__()
     self.linear_1 = nn.Linear(in_hsz, in_hsz*2)
     self.LayerNorm = BertLayerNorm(in_hsz*2, eps=1e-5)
     self.linear_2 = nn.Linear(in_hsz*2, out_hsz)
     self.act = gelu
 def __init__(self, config):
     super(BertSelfOutput, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     # 원래는 hidden * hidden 이였음
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #25
0
 def __init__(self, config, num_answers):
     super().__init__()
     hid_dim = config.hidden_size
     self.logit_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(),
                                   BertLayerNorm(hid_dim * 2, eps=1e-12),
                                   nn.Linear(hid_dim * 2, num_answers))
Beispiel #26
0
 def __init__(self, config):
     super().__init__()
     self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.bert_hidden_dropout)