def __init__(self, config: TableBertConfig, prediction_layer: BertLMPredictionHead):
        super(SpanBasedPrediction, self).__init__()
        
        self.dense1 = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False)
        self.layer_norm1 = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dense2 = nn.Linear(config.hidden_size, config.hidden_size, bias=False)
        self.layer_norm2 = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)

        self.prediction = prediction_layer
Beispiel #2
0
 def __init__(self,
              config,
              img_dim,
              loss="cls",
              margin=0.2,
              hard_ratio=0.3,
              mlp=1):
     super().__init__(config)
     self.bert = BertVisionLanguageEncoder(config, img_dim)
     if mlp == 1:
         self.re_output = nn.Linear(config.hidden_size, 1)
     elif mlp == 2:
         self.re_output = nn.Sequential(
             nn.Linear(config.hidden_size, config.hidden_size), nn.ReLU(),
             BertLayerNorm(config.hidden_size, eps=1e-12),
             nn.Linear(config.hidden_size, 1))
     else:
         sys.exit("MLP restricted to be 1 or 2 layers.")
     self.loss = loss
     assert self.loss in ['cls', 'rank']
     if self.loss == 'rank':
         self.margin = margin
         self.hard_ratio = hard_ratio
     else:
         self.crit = nn.CrossEntropyLoss(reduction='none')
     # initialize
     self.apply(self.init_bert_weights)
 def __init__(self,config,max_sentence_type = 10):
     super(BertEmbeddings_type,self).__init__()
     self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
     self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
     self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
     self.sentence_type_embeddings = nn.Embedding(max_sentence_type, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #4
0
    def __init__(self, config):
        super(BertEmbeddings_custom, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
    def __init__(self, config):
        super().__init__()
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        # should eliminate the below two and fold into meta_model.
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #6
0
    def __init__(self,
                 vocab: Vocabulary,
                 entity_linker: Model,
                 span_attention_config: Dict[str, int],
                 should_init_kg_to_bert_inverse: bool = True,
                 freeze: bool = False,
                 regularizer: RegularizerApplicator = None):
        super().__init__(vocab, regularizer)

        self.entity_linker = entity_linker
        self.entity_embedding_dim = self.entity_linker.disambiguator.entity_embedding_dim
        self.contextual_embedding_dim = self.entity_linker.disambiguator.contextual_embedding_dim

        self.weighted_entity_layer_norm = BertLayerNorm(self.entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.weighted_entity_layer_norm, 0.02)

        self.dropout = torch.nn.Dropout(0.1)

        # the span attention layers
        assert len(span_attention_config) == 4
        config = BertConfig(
            0,  # vocab size, not used
            hidden_size=span_attention_config['hidden_size'],
            num_hidden_layers=span_attention_config['num_hidden_layers'],
            num_attention_heads=span_attention_config['num_attention_heads'],
            intermediate_size=span_attention_config['intermediate_size']
        )
        self.span_attention_layer = SpanAttentionLayer(config)
        # already init inside span attention layer

        # for the output!
        self.output_layer_norm = BertLayerNorm(self.contextual_embedding_dim, eps=1e-5)

        self.kg_to_bert_projection = torch.nn.Linear(
            self.entity_embedding_dim, self.contextual_embedding_dim
        )

        self.should_init_kg_to_bert_inverse = should_init_kg_to_bert_inverse
        self._init_kg_to_bert_projection()

        self._freeze_all = freeze
Beispiel #7
0
    def __init__(self, input_dim, num_attention_heads, do_transform: bool = False):
        super(MultiHeadPooling, self).__init__()
        self.attention_dim_per_head = input_dim // num_attention_heads
        self.all_heads_attention_dim = self.attention_dim_per_head * num_attention_heads
        self.num_attention_heads = num_attention_heads
        self.query = nn.Linear(input_dim, num_attention_heads)
        self.value = nn.Linear(input_dim, self.all_heads_attention_dim)

        self.do_transform = do_transform
        if self.do_transform:
            self.transform = nn.Linear(self.attention_dim_per_head, self.attention_dim_per_head)

        self.layer_norm = BertLayerNorm(self.attention_dim_per_head, eps=1e-12)
Beispiel #8
0
 def __init__(self, config, img_dim, num_region_toks):
     BertPreTrainedModel.__init__(self, config)
     self.embeddings = BertTextEmbeddings(config)
     self.img_embeddings = BertImageEmbeddings(config, img_dim)
     self.num_region_toks = num_region_toks
     self.region_token_embeddings = nn.Embedding(
         num_region_toks,
         config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
Beispiel #9
0
    def __init__(self, num_hid, bidirect, dropout, rnn_type):
        super().__init__()

        assert isinstance(rnn_type, str)
        rnn_type = rnn_type.upper()
        assert rnn_type == 'LSTM' or rnn_type == 'GRU'
        rnn_cls = getattr(nn, rnn_type)
        self._rnn = rnn_cls(num_hid, num_hid, 1,
                bidirectional=bidirect,
                dropout=dropout,
                batch_first=True)
        self._layer_norm = BertLayerNorm(num_hid, eps=1e-12)
        self.rnn_type = rnn_type
        self.num_hid = num_hid
        self.ndirections = 1 + int(bidirect)
Beispiel #10
0
 def __init__(self, config, img_dim, obj_cls=True, img_label_dim=81):
     super().__init__(config, img_dim)
     self.bert = BertVisionLanguageEncoder(
         config, img_dim)
     # self.vcr_output = nn.Linear(config.hidden_size, 1)
     # self.vcr_output = nn.Linear(config.hidden_size, 2)
     self.vcr_output = nn.Sequential(
         nn.Linear(config.hidden_size, config.hidden_size*2),
         nn.ReLU(),
         BertLayerNorm(config.hidden_size*2, eps=1e-12),
         nn.Linear(config.hidden_size*2, 2)
     )
     self.apply(self.init_bert_weights)
     self.obj_cls = obj_cls
     if self.obj_cls:
         self.region_classifier = RegionClassification(
             config.hidden_size, img_label_dim)
 def __init__(self,
              config,
              include_compress=False,
              compress_size=0,
              compress_fp16=False):
     super(BertLayer, self).__init__()
     self.config = config
     self.attention = BertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
     if not include_compress or compress_size == 0:
         self.selfencode = None
     else:
         self.selfencode = nn.Sequential(
             nn.Linear(config.hidden_size, compress_size),
             Act(ACT2FN[config.hidden_act], compress_fp16),
             nn.Linear(compress_size, config.hidden_size),
             BertLayerNorm(
                 config.hidden_size)  # , eps=config.layer_norm_eps
         )
     self.only_cls_output = False
Beispiel #12
0
 def __init__(self, config):
     super().__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #13
0
    def __init__(
        self,
        config,
        input_dim,
        output_dim,
        ent_emb_file,
        static_ent_emb_file,
        type_ent_emb_file,
        rel_ent_emb_file,
        tanh=False,
        norm=False,
        freeze=True,
    ):
        super(EntBertEncoder, self).__init__(config)
        if (
            ent_emb_file is not None
            or static_ent_emb_file is not None
            or type_ent_emb_file is not None
            or rel_ent_emb_file is not None
        ):
            self.encoder = BertEncoder(config)
        else:
            self.encoder = None
        self.pooler = BertPooler(config)

        self.apply(self.init_bert_weights)

        if ent_emb_file is not None:
            ent_emb_matrix = torch.from_numpy(np.load(ent_emb_file))
            self.ent_embeddings = nn.Embedding(
                ent_emb_matrix.size()[0], ent_emb_matrix.size()[1], padding_idx=0
            )
            self.ent_embeddings.weight.data.copy_(ent_emb_matrix)
            input_dim += ent_emb_matrix.size()[1]
            if freeze:
                for param in self.ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.ent_embeddings = None

        if static_ent_emb_file is not None:
            static_ent_emb_matrix = torch.from_numpy(np.load(static_ent_emb_file))
            self.static_ent_embeddings = nn.Embedding(
                static_ent_emb_matrix.size()[0],
                static_ent_emb_matrix.size()[1],
                padding_idx=0,
            )
            self.static_ent_embeddings.weight.data.copy_(static_ent_emb_matrix)
            input_dim += static_ent_emb_matrix.size()[1]
            if freeze:
                for param in self.static_ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.static_ent_embeddings = None

        if type_ent_emb_file is not None:
            type_ent_emb_matrix = torch.from_numpy(np.load(type_ent_emb_file))
            self.type_ent_embeddings = nn.Embedding(
                type_ent_emb_matrix.size()[0],
                type_ent_emb_matrix.size()[1],
                padding_idx=0,
            )
            self.type_ent_embeddings.weight.data.copy_(type_ent_emb_matrix)
            input_dim += type_ent_emb_matrix.size()[1]
            if freeze:
                for param in self.type_ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.type_ent_embeddings = None

        if rel_ent_emb_file is not None:
            rel_ent_emb_matrix = torch.from_numpy(np.load(rel_ent_emb_file))
            self.rel_ent_embeddings = nn.Embedding(
                rel_ent_emb_matrix.size()[0],
                rel_ent_emb_matrix.size()[1],
                padding_idx=0,
            )
            self.rel_ent_embeddings.weight.data.copy_(rel_ent_emb_matrix)
            input_dim += rel_ent_emb_matrix.size()[1]
            if freeze:
                for param in self.rel_ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.rel_ent_embeddings = None

        self.proj = nn.Linear(input_dim, output_dim)

        if tanh is True:
            self.proj_activation = nn.Tanh()
        else:
            self.proj_activation = None

        self.norm = norm
        if self.norm is True:
            self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
            self.dropout = nn.Dropout(config.hidden_dropout_prob)
 def __init__(self, in_channels, out_channels, activation_function=gelu, should_norm=True):
     super().__init__()
     self.linear = nn.Linear(in_channels, out_channels)
     self.activation_function = activation_function
     self.layer_norm = BertLayerNorm(out_channels, eps=1e-12) if should_norm else None
Beispiel #15
0
 def __init__(self, config):
     super(BertSentInput, self).__init__()
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
 def __init__(self, config):
     super(BertDESelfOutput, self).__init__()
     self.dense = nn.Linear(int(config.hidden_size/2), int(config.hidden_size/2))
     self.LayerNorm = BertLayerNorm(int(config.hidden_size/2), eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #17
0
    def __init__(self,
                 contextual_embedding_dim,
                 entity_embedding_dim: int,
                 entity_embeddings: torch.nn.Embedding,
                 max_sequence_length: int = 512,
                 span_encoder_config: Dict[str, int] = None,
                 dropout: float = 0.1,
                 output_feed_forward_hidden_dim: int = 100,
                 initializer_range: float = 0.02,
                 weighted_entity_threshold: float = None,
                 null_entity_id: int = None,
                 include_null_embedding_in_dot_attention: bool = False):
        """
        Idea: Align the bert and KG vector space by learning a mapping between
            them.
        """
        super().__init__()

        self.span_extractor = SelfAttentiveSpanExtractor(entity_embedding_dim)
        init_bert_weights(self.span_extractor._global_attention._module,
                          initializer_range)

        self.dropout = torch.nn.Dropout(dropout)

        self.bert_to_kg_projector = torch.nn.Linear(
            contextual_embedding_dim, entity_embedding_dim)
        init_bert_weights(self.bert_to_kg_projector, initializer_range)
        self.projected_span_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.projected_span_layer_norm, initializer_range)

        self.kg_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.kg_layer_norm, initializer_range)

        # already pretrained, don't init
        self.entity_embeddings = entity_embeddings
        self.entity_embedding_dim = entity_embedding_dim

        # layers for the dot product attention
        if weighted_entity_threshold is not None or include_null_embedding_in_dot_attention:
            if hasattr(self.entity_embeddings, 'get_null_embedding'):
                null_embedding = self.entity_embeddings.get_null_embedding()
            else:
                null_embedding = self.entity_embeddings.weight[null_entity_id, :]
        else:
            null_embedding = None
        self.dot_attention_with_prior = DotAttentionWithPrior(
            output_feed_forward_hidden_dim,
            weighted_entity_threshold,
            null_embedding,
            initializer_range
        )
        self.null_entity_id = null_entity_id
        self.contextual_embedding_dim = contextual_embedding_dim

        if span_encoder_config is None:
            self.span_encoder = None
        else:
            # create BertConfig
            assert len(span_encoder_config) == 4
            config = BertConfig(
                0,  # vocab size, not used
                hidden_size=span_encoder_config['hidden_size'],
                num_hidden_layers=span_encoder_config['num_hidden_layers'],
                num_attention_heads=span_encoder_config['num_attention_heads'],
                intermediate_size=span_encoder_config['intermediate_size']
            )
            self.span_encoder = BertEncoder(config)
            init_bert_weights(self.span_encoder, initializer_range)
 def __init__(self, config):
     super(BertSelfOutput, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Beispiel #19
0
 def __init__(self, config):
     super(BertOutput_Quant, self).__init__()
     self.dense = QuantLinear(config.intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)