def __init__(self, config: TableBertConfig, prediction_layer: BertLMPredictionHead): super(SpanBasedPrediction, self).__init__() self.dense1 = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False) self.layer_norm1 = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dense2 = nn.Linear(config.hidden_size, config.hidden_size, bias=False) self.layer_norm2 = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.prediction = prediction_layer
def __init__(self, config, img_dim, loss="cls", margin=0.2, hard_ratio=0.3, mlp=1): super().__init__(config) self.bert = BertVisionLanguageEncoder(config, img_dim) if mlp == 1: self.re_output = nn.Linear(config.hidden_size, 1) elif mlp == 2: self.re_output = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size), nn.ReLU(), BertLayerNorm(config.hidden_size, eps=1e-12), nn.Linear(config.hidden_size, 1)) else: sys.exit("MLP restricted to be 1 or 2 layers.") self.loss = loss assert self.loss in ['cls', 'rank'] if self.loss == 'rank': self.margin = margin self.hard_ratio = hard_ratio else: self.crit = nn.CrossEntropyLoss(reduction='none') # initialize self.apply(self.init_bert_weights)
def __init__(self,config,max_sentence_type = 10): super(BertEmbeddings_type,self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) self.sentence_type_embeddings = nn.Embedding(max_sentence_type, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertEmbeddings_custom, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super().__init__() self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) # should eliminate the below two and fold into meta_model. self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, vocab: Vocabulary, entity_linker: Model, span_attention_config: Dict[str, int], should_init_kg_to_bert_inverse: bool = True, freeze: bool = False, regularizer: RegularizerApplicator = None): super().__init__(vocab, regularizer) self.entity_linker = entity_linker self.entity_embedding_dim = self.entity_linker.disambiguator.entity_embedding_dim self.contextual_embedding_dim = self.entity_linker.disambiguator.contextual_embedding_dim self.weighted_entity_layer_norm = BertLayerNorm(self.entity_embedding_dim, eps=1e-5) init_bert_weights(self.weighted_entity_layer_norm, 0.02) self.dropout = torch.nn.Dropout(0.1) # the span attention layers assert len(span_attention_config) == 4 config = BertConfig( 0, # vocab size, not used hidden_size=span_attention_config['hidden_size'], num_hidden_layers=span_attention_config['num_hidden_layers'], num_attention_heads=span_attention_config['num_attention_heads'], intermediate_size=span_attention_config['intermediate_size'] ) self.span_attention_layer = SpanAttentionLayer(config) # already init inside span attention layer # for the output! self.output_layer_norm = BertLayerNorm(self.contextual_embedding_dim, eps=1e-5) self.kg_to_bert_projection = torch.nn.Linear( self.entity_embedding_dim, self.contextual_embedding_dim ) self.should_init_kg_to_bert_inverse = should_init_kg_to_bert_inverse self._init_kg_to_bert_projection() self._freeze_all = freeze
def __init__(self, input_dim, num_attention_heads, do_transform: bool = False): super(MultiHeadPooling, self).__init__() self.attention_dim_per_head = input_dim // num_attention_heads self.all_heads_attention_dim = self.attention_dim_per_head * num_attention_heads self.num_attention_heads = num_attention_heads self.query = nn.Linear(input_dim, num_attention_heads) self.value = nn.Linear(input_dim, self.all_heads_attention_dim) self.do_transform = do_transform if self.do_transform: self.transform = nn.Linear(self.attention_dim_per_head, self.attention_dim_per_head) self.layer_norm = BertLayerNorm(self.attention_dim_per_head, eps=1e-12)
def __init__(self, config, img_dim, num_region_toks): BertPreTrainedModel.__init__(self, config) self.embeddings = BertTextEmbeddings(config) self.img_embeddings = BertImageEmbeddings(config, img_dim) self.num_region_toks = num_region_toks self.region_token_embeddings = nn.Embedding( num_region_toks, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def __init__(self, num_hid, bidirect, dropout, rnn_type): super().__init__() assert isinstance(rnn_type, str) rnn_type = rnn_type.upper() assert rnn_type == 'LSTM' or rnn_type == 'GRU' rnn_cls = getattr(nn, rnn_type) self._rnn = rnn_cls(num_hid, num_hid, 1, bidirectional=bidirect, dropout=dropout, batch_first=True) self._layer_norm = BertLayerNorm(num_hid, eps=1e-12) self.rnn_type = rnn_type self.num_hid = num_hid self.ndirections = 1 + int(bidirect)
def __init__(self, config, img_dim, obj_cls=True, img_label_dim=81): super().__init__(config, img_dim) self.bert = BertVisionLanguageEncoder( config, img_dim) # self.vcr_output = nn.Linear(config.hidden_size, 1) # self.vcr_output = nn.Linear(config.hidden_size, 2) self.vcr_output = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size*2), nn.ReLU(), BertLayerNorm(config.hidden_size*2, eps=1e-12), nn.Linear(config.hidden_size*2, 2) ) self.apply(self.init_bert_weights) self.obj_cls = obj_cls if self.obj_cls: self.region_classifier = RegionClassification( config.hidden_size, img_label_dim)
def __init__(self, config, include_compress=False, compress_size=0, compress_fp16=False): super(BertLayer, self).__init__() self.config = config self.attention = BertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config) if not include_compress or compress_size == 0: self.selfencode = None else: self.selfencode = nn.Sequential( nn.Linear(config.hidden_size, compress_size), Act(ACT2FN[config.hidden_act], compress_fp16), nn.Linear(compress_size, config.hidden_size), BertLayerNorm( config.hidden_size) # , eps=config.layer_norm_eps ) self.only_cls_output = False
def __init__(self, config): super().__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__( self, config, input_dim, output_dim, ent_emb_file, static_ent_emb_file, type_ent_emb_file, rel_ent_emb_file, tanh=False, norm=False, freeze=True, ): super(EntBertEncoder, self).__init__(config) if ( ent_emb_file is not None or static_ent_emb_file is not None or type_ent_emb_file is not None or rel_ent_emb_file is not None ): self.encoder = BertEncoder(config) else: self.encoder = None self.pooler = BertPooler(config) self.apply(self.init_bert_weights) if ent_emb_file is not None: ent_emb_matrix = torch.from_numpy(np.load(ent_emb_file)) self.ent_embeddings = nn.Embedding( ent_emb_matrix.size()[0], ent_emb_matrix.size()[1], padding_idx=0 ) self.ent_embeddings.weight.data.copy_(ent_emb_matrix) input_dim += ent_emb_matrix.size()[1] if freeze: for param in self.ent_embeddings.parameters(): param.requires_grad = False else: self.ent_embeddings = None if static_ent_emb_file is not None: static_ent_emb_matrix = torch.from_numpy(np.load(static_ent_emb_file)) self.static_ent_embeddings = nn.Embedding( static_ent_emb_matrix.size()[0], static_ent_emb_matrix.size()[1], padding_idx=0, ) self.static_ent_embeddings.weight.data.copy_(static_ent_emb_matrix) input_dim += static_ent_emb_matrix.size()[1] if freeze: for param in self.static_ent_embeddings.parameters(): param.requires_grad = False else: self.static_ent_embeddings = None if type_ent_emb_file is not None: type_ent_emb_matrix = torch.from_numpy(np.load(type_ent_emb_file)) self.type_ent_embeddings = nn.Embedding( type_ent_emb_matrix.size()[0], type_ent_emb_matrix.size()[1], padding_idx=0, ) self.type_ent_embeddings.weight.data.copy_(type_ent_emb_matrix) input_dim += type_ent_emb_matrix.size()[1] if freeze: for param in self.type_ent_embeddings.parameters(): param.requires_grad = False else: self.type_ent_embeddings = None if rel_ent_emb_file is not None: rel_ent_emb_matrix = torch.from_numpy(np.load(rel_ent_emb_file)) self.rel_ent_embeddings = nn.Embedding( rel_ent_emb_matrix.size()[0], rel_ent_emb_matrix.size()[1], padding_idx=0, ) self.rel_ent_embeddings.weight.data.copy_(rel_ent_emb_matrix) input_dim += rel_ent_emb_matrix.size()[1] if freeze: for param in self.rel_ent_embeddings.parameters(): param.requires_grad = False else: self.rel_ent_embeddings = None self.proj = nn.Linear(input_dim, output_dim) if tanh is True: self.proj_activation = nn.Tanh() else: self.proj_activation = None self.norm = norm if self.norm is True: self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, in_channels, out_channels, activation_function=gelu, should_norm=True): super().__init__() self.linear = nn.Linear(in_channels, out_channels) self.activation_function = activation_function self.layer_norm = BertLayerNorm(out_channels, eps=1e-12) if should_norm else None
def __init__(self, config): super(BertSentInput, self).__init__() self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertDESelfOutput, self).__init__() self.dense = nn.Linear(int(config.hidden_size/2), int(config.hidden_size/2)) self.LayerNorm = BertLayerNorm(int(config.hidden_size/2), eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, contextual_embedding_dim, entity_embedding_dim: int, entity_embeddings: torch.nn.Embedding, max_sequence_length: int = 512, span_encoder_config: Dict[str, int] = None, dropout: float = 0.1, output_feed_forward_hidden_dim: int = 100, initializer_range: float = 0.02, weighted_entity_threshold: float = None, null_entity_id: int = None, include_null_embedding_in_dot_attention: bool = False): """ Idea: Align the bert and KG vector space by learning a mapping between them. """ super().__init__() self.span_extractor = SelfAttentiveSpanExtractor(entity_embedding_dim) init_bert_weights(self.span_extractor._global_attention._module, initializer_range) self.dropout = torch.nn.Dropout(dropout) self.bert_to_kg_projector = torch.nn.Linear( contextual_embedding_dim, entity_embedding_dim) init_bert_weights(self.bert_to_kg_projector, initializer_range) self.projected_span_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5) init_bert_weights(self.projected_span_layer_norm, initializer_range) self.kg_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5) init_bert_weights(self.kg_layer_norm, initializer_range) # already pretrained, don't init self.entity_embeddings = entity_embeddings self.entity_embedding_dim = entity_embedding_dim # layers for the dot product attention if weighted_entity_threshold is not None or include_null_embedding_in_dot_attention: if hasattr(self.entity_embeddings, 'get_null_embedding'): null_embedding = self.entity_embeddings.get_null_embedding() else: null_embedding = self.entity_embeddings.weight[null_entity_id, :] else: null_embedding = None self.dot_attention_with_prior = DotAttentionWithPrior( output_feed_forward_hidden_dim, weighted_entity_threshold, null_embedding, initializer_range ) self.null_entity_id = null_entity_id self.contextual_embedding_dim = contextual_embedding_dim if span_encoder_config is None: self.span_encoder = None else: # create BertConfig assert len(span_encoder_config) == 4 config = BertConfig( 0, # vocab size, not used hidden_size=span_encoder_config['hidden_size'], num_hidden_layers=span_encoder_config['num_hidden_layers'], num_attention_heads=span_encoder_config['num_attention_heads'], intermediate_size=span_encoder_config['intermediate_size'] ) self.span_encoder = BertEncoder(config) init_bert_weights(self.span_encoder, initializer_range)
def __init__(self, config): super(BertSelfOutput, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertOutput_Quant, self).__init__() self.dense = QuantLinear(config.intermediate_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)