Ejemplo n.º 1
0
    def __init__(self, config):
        super(UniterEmbeddings, self).__init__()

        self.hidden_size = config.hidden_size
        self.initializer_range = config.initializer_range

        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.image_embeddings = nn.Linear(config.v_feature_size,
                                          config.v_hidden_size)
        self.image_location_embeddings = nn.Linear(config.num_locs,
                                                   config.v_hidden_size)
        self.image_layer_norm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.image_location_layer_norm = BertLayerNorm(config.hidden_size,
                                                       eps=1e-12)

        self.v_LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.v_dropout = nn.Dropout(config.hidden_dropout_prob)
        self.special_initialize()
Ejemplo n.º 2
0
    def __init__(self, config):
        super(LxmertImageEmbeddings, self).__init__()

        self.image_embeddings = nn.Linear(config.v_feature_size,
                                          config.v_hidden_size)
        self.image_location_embeddings = nn.Linear(config.num_locs,
                                                   config.v_hidden_size)
        self.ImgLayerNorm = BertLayerNorm(config.v_hidden_size, eps=1e-12)
        self.LocLayerNorm = BertLayerNorm(config.v_hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.v_hidden_dropout_prob)
Ejemplo n.º 3
0
    def __init__(self, config, img_dim):
        super().__init__()
        self.img_linear = nn.Linear(img_dim, config.hidden_size)
        self.img_layer_norm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.pos_layer_norm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.pos_linear = nn.Linear(7, config.hidden_size)
        self.mask_embedding = nn.Embedding(2, img_dim, padding_idx=0)

        # tf naming convention for layer norm
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
    def __init__(self, config):
        super().__init__()
        feat_dim = VISUAL_CONFIG.visual_feat_dim
        pos_dim = VISUAL_CONFIG.visual_pos_dim

        # Object feature encoding
        self.visn_fc = nn.Linear(feat_dim, config.hidden_size)
        self.visn_layer_norm = BertLayerNorm(config.hidden_size, eps=1e-12)

        # Box position encoding
        self.box_fc = nn.Linear(pos_dim, config.hidden_size)
        self.box_layer_norm = BertLayerNorm(config.hidden_size, eps=1e-12)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 5
0
    def __init__(self, config):
        super(VLBertEmbeddings, self).__init__()

        self.hidden_size = config.hidden_size
        self.with_mvrc_loss = config.visual_target_weights.get("6", 0) > 0
        self.initializer_range = config.initializer_range

        self.v_coordinate_embeddings_dim = config.v_coordinate_embeddings_dim
        self.obj_downsample = torch.nn.Sequential(
            torch.nn.Dropout(config.v_attention_probs_dropout_prob),
            torch.nn.Linear(2 * config.v_feature_size, config.v_hidden_size),
            torch.nn.ReLU(inplace=True),
        )

        self.object_linguistic_embeddings = nn.Embedding(1, config.hidden_size)
        if self.with_mvrc_loss:
            self.object_mask_word_embedding = nn.Embedding(
                1, config.hidden_size)
        self.object_mask_visual_embedding = nn.Embedding(
            1, config.v_feature_size)
        self.end_embedding = nn.Embedding(1, config.hidden_size)

        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        # visual transform
        self.visual_1x1_text = None
        self.visual_1x1_object = None
        if config.v_hidden_size != config.hidden_size:
            self.visual_1x1_text = nn.Linear(config.v_hidden_size,
                                             config.hidden_size)
            self.visual_1x1_object = nn.Linear(config.v_hidden_size,
                                               config.hidden_size)
        self.visual_ln_text = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.visual_ln_object = BertLayerNorm(config.hidden_size, eps=1e-12)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        # init weights
        self.init_weights()
Ejemplo n.º 6
0
    def __init__(self, config):
        super(BertOutput, self).__init__()
        self.dense_1 = QuantizeLinear(
            config.intermediate_size,
            config.hidden_size,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_layerwise=config.weight_layerwise,
            input_layerwise=config.input_layerwise,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_ffn_attn)
        self.dense_2 = QuantizeLinear(
            config.intermediate_size,
            config.hidden_size,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_layerwise=config.weight_layerwise,
            input_layerwise=config.input_layerwise,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_ffn_attn)

        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()

        self.word_embeddings = QuantizeEmbedding(
            config.vocab_size,
            config.hidden_size,
            padding_idx=0,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            embed_layerwise=config.embed_layerwise,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_qkvo)

        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 8
0
    def __init__(self, config):
        super(BertSelfOutput, self).__init__()
        # support binary split
        self.dense_1 = QuantizeLinear(
            config.dyna_hidden_size,
            config.hidden_size,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_layerwise=config.weight_layerwise,
            input_layerwise=config.input_layerwise,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_qkvo)
        self.dense_2 = QuantizeLinear(
            config.dyna_hidden_size,
            config.hidden_size,
            clip_val=config.clip_init_val,
            weight_bits=config.weight_bits,
            input_bits=config.input_bits,
            weight_layerwise=config.weight_layerwise,
            input_layerwise=config.input_layerwise,
            weight_quant_method=config.weight_quant_method,
            input_quant_method=config.input_quant_method,
            learnable=config.learnable_scaling,
            symmetric=config.sym_quant_qkvo)

        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 9
0
    def __init__(self, config):
        super(VisualBertEmbeddings, self).__init__()

        self.hidden_size = config.hidden_size
        self.initializer_range = config.initializer_range

        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        # Segment and position embedding for image features
        self.projection = nn.Linear(config.v_feature_size, config.hidden_size)
        self.token_type_embeddings_visual = nn.Embedding(
            config.type_vocab_size, config.hidden_size, padding_idx=0)
        self.position_embeddings_visual = nn.Embedding(
            config.max_position_embeddings, config.hidden_size)
        self.special_initialize()
Ejemplo n.º 10
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()

        if config.embedding_size == config.hidden_size:
            self.word_embeddings = nn.Embedding(config.vocab_size,
                                                config.embedding_size,
                                                padding_idx=0)
            self.word_embeddings_2 = None
        else:
            #
            self.word_embeddings = nn.Embedding(config.vocab_size,
                                                config.embedding_size,
                                                padding_idx=0)
            self.word_embeddings_2 = nn.Linear(config.embedding_size,
                                               config.hidden_size,
                                               bias=False)

        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)
        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size,
                                       eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 11
0
 def __init__(self, config):
     super(BertOutput, self).__init__()
     self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size,
                                    eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.ln_type = config.ln_type
Ejemplo n.º 12
0
    def __init__(self, config):
        super(BertBiOutput, self).__init__()

        v_config = BertConfig.from_dict(config.v_config)

        self.dense1 = nn.Linear(config.bi_hidden_size, v_config.hidden_size)
        self.LayerNorm1 = BertLayerNorm(v_config.hidden_size, eps=1e-12)
        self.dropout1 = nn.Dropout(v_config.hidden_dropout_prob)

        # self.q_dense1 = nn.Linear(config.bi_hidden_size, v_config.hidden_size)
        # self.q_dropout1 = nn.Dropout(v_config.hidden_dropout_prob)

        t_config = BertConfig.from_dict(config.t_config)

        self.dense2 = nn.Linear(config.bi_hidden_size, t_config.hidden_size)
        self.LayerNorm2 = BertLayerNorm(t_config.hidden_size, eps=1e-12)
        self.dropout2 = nn.Dropout(t_config.hidden_dropout_prob)
Ejemplo n.º 13
0
 def __init__(self, in_dim, hid_dim, out_dim, dropout):
     super().__init__()
     self.logit_fc = nn.Sequential(
         nn.Linear(in_dim, hid_dim),
         GeLU(),
         BertLayerNorm(hid_dim, eps=1e-12),
         nn.Linear(hid_dim, out_dim),
     )
Ejemplo n.º 14
0
 def __init__(self, config):
     super(BertOutput, self).__init__()
     self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.ln_type = 'postln'
     if 'ln_type' in config.__dict__:
         self.ln_type = config.ln_type
Ejemplo n.º 15
0
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
Ejemplo n.º 16
0
 def __init__(self, config):
     super(BertSelfOutput, self).__init__()
     # 这个dense的线性层就是把 多个head 得到的z(i) 拼接后转换回 hidden_size,
     # 只不过它的设计拼接后n_head*attention_head_size 刚好等于= hidden_size
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size,
                                    eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 17
0
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act, str):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
Ejemplo n.º 18
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
    def __init__(self, vision_size, config):
        super().__init__()
        feat_dim = vision_size

        # Object feature encoding
        self.visn_fc = nn.Linear(feat_dim, config.hidden_size)
        self.visn_layer_norm = BertLayerNorm(config.hidden_size, eps=1e-12)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 20
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 21
0
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     # Need to unty it when we separate the dimensions of hidden and emb
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
 def __init__(self, config, num_answers):
     super().__init__()
     hid_dim = config.hidden_size
     self.logit_fc = nn.Sequential(
         nn.Linear(hid_dim, hid_dim * 2),
         GeLU(),
         BertLayerNorm(hid_dim * 2, eps=1e-12),
         nn.Linear(hid_dim * 2, num_answers)
     )
Ejemplo n.º 23
0
 def __init__(self, config, num_labels=2):
     super(BertForSequenceClassification, self).__init__(config)
     self.num_labels = num_labels
     self.bert = BertModel(config)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
     self.puri = PuriAttention(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, num_labels)
     self.init_weights()
Ejemplo n.º 24
0
 def __init__(self, config, intermediate_size=-1):
     super(BertOutput, self).__init__()
     if intermediate_size < 0:
         self.dense = nn.Linear(config.intermediate_size,
                                config.hidden_size)
     else:
         self.dense = nn.Linear(intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 25
0
    def __init__(self, config):
        super().__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model
        # variable name and be able to load any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 26
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.embedding_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.embedding_size)

        self.LayerNorm = BertLayerNorm(config.embedding_size,
                                       eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 27
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
        # TODO:ROBERTA暂时存在一些问题,必须512才能加载一些模型,但是部分模型却不是用512长度训练的,要注意
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 28
0
 def __init__(self, in_hsz, out_hsz, layer_norm=True,
              dropout=0.1, relu=True):
     super(LinearLayer, self).__init__()
     self.relu = relu
     self.layer_norm = layer_norm
     if layer_norm:
         self.LayerNorm = BertLayerNorm(in_hsz, eps=1e-5)
     layers = [
         nn.Dropout(dropout),
         nn.Linear(in_hsz, out_hsz)
     ]
     self.net = nn.Sequential(*layers)
Ejemplo n.º 29
0
	def __init__(self, config):
		super(BertEmbeddings, self).__init__()
		self.add_positional_encoding = config.add_positional_encoding_to_input
		self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
		if self.add_positional_encoding:
			self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
		self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

		# self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
		# any TensorFlow checkpoint file
		self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
		self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 30
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.doc_embeddings_len = config.hidden_size
        self.word_embeddings_len = config.hidden_size
        self.position_embeddings_len = config.hidden_size

        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            self.word_embeddings_len)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                self.position_embeddings_len)
        self.doc_embeddings = nn.Embedding(config.type_vocab_size,
                                           self.doc_embeddings_len)
        # self.interact = nn.Parameter(torch.FloatTensor(self.doc_embeddings_len, self.word_embeddings_len).unsqueeze(0))
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.hidden_size = config.hidden_size