def __init__(self, config, gen_attention_mask): super(BertWithCustomAttentionMask, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights) self.gen_attention_mask = gen_attention_mask
def __init__(self, config): """ :param config: a BertConfig class instance with the configuration to build a new model :type config: BertConfig """ super(BertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertModelModified, self).__init__(config) self.embeddings = BertEmbeddingsModified(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights) self.embeddings.history_embeddings.weight[0].data.zero_( ) # self.embeddings.history_embeddings.padding_idx self.embeddings.turn_embeddings.weight[0].data.zero_( ) # self.embeddings.turn_embeddings.padding_idx
def __init__(self, bert_cfg: BertConfig, cfg: Optional[Dict[str, Any]] = None): self.bert_cfg = bert_cfg super().__init__(bert_cfg) self.cfg = cfg self.embeddings = BertEmbeddings(bert_cfg) self.encoder = BertEncoder(bert_cfg) self.pooler = BertPooler(bert_cfg) self.num_choices = 4 self.classifier = nn.Linear(bert_cfg.hidden_size, 1) self.apply(self.init_bert_weights)
def __init__(self, config, img_dim, num_region_toks): BertPreTrainedModel.__init__(self, config) self.embeddings = BertTextEmbeddings(config) self.img_embeddings = BertImageEmbeddings(config, img_dim) self.num_region_toks = num_region_toks self.region_token_embeddings = nn.Embedding( num_region_toks, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def __init__(self, config, gcn_adj_dim, gcn_adj_num, gcn_embedding_dim, num_labels, output_attentions=False, keep_multihead_output=False): super(VGCN_Bert, self).__init__(config,output_attentions,keep_multihead_output) self.embeddings = VGCNBertEmbeddings(config,gcn_adj_dim,gcn_adj_num, gcn_embedding_dim) self.encoder = BertEncoder(config, output_attentions=output_attentions, keep_multihead_output=keep_multihead_output) self.pooler = BertPooler(config) self.num_labels=num_labels self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_labels) self.will_collect_cls_states=False self.all_cls_states=[] self.output_attentions=output_attentions self.apply(self.init_bert_weights)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, judge: Model = None, update_judge: bool = False, reward_method: str = None, detach_value_head: bool = False, qa_loss_weight: float = 0., influence_reward: bool = False, theory_of_mind: bool = False) -> None: super(BertMC, self).__init__(vocab, regularizer) self.judge = judge self.is_judge = self.judge is None self.reward_method = None if self.is_judge else reward_method self.update_judge = update_judge and (self.judge is not None) self._detach_value_head = detach_value_head self._qa_loss_weight = qa_loss_weight self.influence_reward = influence_reward self.theory_of_mind = theory_of_mind self._text_field_embedder = text_field_embedder self._hidden_dim = text_field_embedder.get_output_dim() self.answer_type = 'mc' self.output_type = 'mc' self._config = self._text_field_embedder.token_embedder_tokens._modules[ 'bert_model'].config if not self.is_judge: self._sent_chosen_embeddings = torch.nn.Embedding( 2, self._config.hidden_size) self._sent_chosen_embeddings.weight.data *= 0 # Init to zero to minimally affect BERT at start self._policy_head = TimeDistributed( torch.nn.Linear(self._hidden_dim, 1)) # Can make MLP self._value_head = TimeDistributed( torch.nn.Linear(self._hidden_dim, 1)) # Can make MLP self._turn_film_gen = torch.nn.Linear(1, 2 * self._hidden_dim) self._film = FiLM() if self.theory_of_mind: final_blocks_config = deepcopy(self._config) final_blocks_config.num_hidden_layers = 1 self.final_blocks_input_proj = TimeDistributed( torch.nn.Linear(self._hidden_dim * 2, self._hidden_dim)) self.final_blocks = BertEncoder(final_blocks_config) # NOTE: Rename to self._accuracy (may break model loading) self._span_start_accuracy = CategoricalAccuracy() self._initializer = initializer
def test_BertEncoder(): input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) embeddings = BertEmbeddings(config) model = BertEncoder(config) embedding_output = embeddings(input_ids, token_type_ids) input_mask = input_mask.view([-1, 1, 1, input_mask.size()[-1]]).float() print(model(embedding_output, input_mask))
def __init__(self, vocab_size, embed_dim, embed_init, hidden_size, log, *args, **kwargs): super(Bert, self).__init__(vocab_size, embed_dim, embed_init, log) #print ('init bert') self.config = BertConfig(vocab_size_or_config_json_file=vocab_size, hidden_size=embed_dim, num_hidden_layers=3, num_attention_heads=4, intermediate_size=4 * hidden_size, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02) self.max_length = 128 self.BertEncoder = BertEncoder(self.config) print('init BertModel')
def __init__(self, config, num_labels): super(BertForSequenceTaggingACTA, self).__init__(config) self.num_labels = num_labels self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.gru = nn.GRU(config.hidden_size, config.hidden_size, batch_first=True, bidirectional=True) self.crf = CRF(num_labels, batch_first=True) self.clf = nn.Linear(2 * config.hidden_size, num_labels) #nn.init.xavier_uniform_(self.clf.weight) #self.clf.bias.data.fill_(0.01) #self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda: float = 0.8, my_dropout_p: float = 0.2, tf_layers: int = 1, tf_inter_size: int = 3072): super(BertHierarchicalTransformer, self).__init__(config) logger.info(f'Model {__class__.__name__} is loading...') logger.info(f'Model parameters:') logger.info(f'Evidence lambda: {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(my_dropout_p) self.bert = BertModel(config) self.query_self_attn = layers.MultiHeadPooling(config.hidden_size, 6) self.value_self_attn = layers.MultiHeadPooling(config.hidden_size, 6) # self.sentence_input = layers.BertSentInput(config) config.num_hidden_layers = tf_layers config.intermediate_size = tf_inter_size self.sentence_encoder = BertEncoder(config) self.attention_score = layers.AttentionScore(config.hidden_size, 256) # Output layer self.evidence_lambda = evidence_lambda self.predictor = nn.Linear(config.hidden_size * 2, 3)
def __init__(self, contextual_embedding_dim, entity_embedding_dim: int, entity_embeddings: torch.nn.Embedding, max_sequence_length: int = 512, span_encoder_config: Dict[str, int] = None, dropout: float = 0.1, output_feed_forward_hidden_dim: int = 100, initializer_range: float = 0.02, weighted_entity_threshold: float = None, null_entity_id: int = None, include_null_embedding_in_dot_attention: bool = False): """ Idea: Align the bert and KG vector space by learning a mapping between them. """ super().__init__() self.span_extractor = SelfAttentiveSpanExtractor(entity_embedding_dim) init_bert_weights(self.span_extractor._global_attention._module, initializer_range) self.dropout = torch.nn.Dropout(dropout) self.bert_to_kg_projector = torch.nn.Linear( contextual_embedding_dim, entity_embedding_dim) init_bert_weights(self.bert_to_kg_projector, initializer_range) self.projected_span_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5) init_bert_weights(self.projected_span_layer_norm, initializer_range) self.kg_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5) init_bert_weights(self.kg_layer_norm, initializer_range) # already pretrained, don't init self.entity_embeddings = entity_embeddings self.entity_embedding_dim = entity_embedding_dim # layers for the dot product attention if weighted_entity_threshold is not None or include_null_embedding_in_dot_attention: if hasattr(self.entity_embeddings, 'get_null_embedding'): null_embedding = self.entity_embeddings.get_null_embedding() else: null_embedding = self.entity_embeddings.weight[null_entity_id, :] else: null_embedding = None self.dot_attention_with_prior = DotAttentionWithPrior( output_feed_forward_hidden_dim, weighted_entity_threshold, null_embedding, initializer_range ) self.null_entity_id = null_entity_id self.contextual_embedding_dim = contextual_embedding_dim if span_encoder_config is None: self.span_encoder = None else: # create BertConfig assert len(span_encoder_config) == 4 config = BertConfig( 0, # vocab size, not used hidden_size=span_encoder_config['hidden_size'], num_hidden_layers=span_encoder_config['num_hidden_layers'], num_attention_heads=span_encoder_config['num_attention_heads'], intermediate_size=span_encoder_config['intermediate_size'] ) self.span_encoder = BertEncoder(config) init_bert_weights(self.span_encoder, initializer_range)
def __init__(self, config: BertConfig): super().__init__(config) self.encoder = BertEncoder(config) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertModel_emb_encoder_custom, self).__init__(config) self.embeddings = BertEmbeddings_custom(config) self.encoder = BertEncoder(config)
def __init__(self, config): super().__init__(config) self.embedder = ContinuousBertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def __init__( self, config, input_dim, output_dim, ent_emb_file, static_ent_emb_file, type_ent_emb_file, rel_ent_emb_file, tanh=False, norm=False, freeze=True, ): super(EntBertEncoder, self).__init__(config) if ( ent_emb_file is not None or static_ent_emb_file is not None or type_ent_emb_file is not None or rel_ent_emb_file is not None ): self.encoder = BertEncoder(config) else: self.encoder = None self.pooler = BertPooler(config) self.apply(self.init_bert_weights) if ent_emb_file is not None: ent_emb_matrix = torch.from_numpy(np.load(ent_emb_file)) self.ent_embeddings = nn.Embedding( ent_emb_matrix.size()[0], ent_emb_matrix.size()[1], padding_idx=0 ) self.ent_embeddings.weight.data.copy_(ent_emb_matrix) input_dim += ent_emb_matrix.size()[1] if freeze: for param in self.ent_embeddings.parameters(): param.requires_grad = False else: self.ent_embeddings = None if static_ent_emb_file is not None: static_ent_emb_matrix = torch.from_numpy(np.load(static_ent_emb_file)) self.static_ent_embeddings = nn.Embedding( static_ent_emb_matrix.size()[0], static_ent_emb_matrix.size()[1], padding_idx=0, ) self.static_ent_embeddings.weight.data.copy_(static_ent_emb_matrix) input_dim += static_ent_emb_matrix.size()[1] if freeze: for param in self.static_ent_embeddings.parameters(): param.requires_grad = False else: self.static_ent_embeddings = None if type_ent_emb_file is not None: type_ent_emb_matrix = torch.from_numpy(np.load(type_ent_emb_file)) self.type_ent_embeddings = nn.Embedding( type_ent_emb_matrix.size()[0], type_ent_emb_matrix.size()[1], padding_idx=0, ) self.type_ent_embeddings.weight.data.copy_(type_ent_emb_matrix) input_dim += type_ent_emb_matrix.size()[1] if freeze: for param in self.type_ent_embeddings.parameters(): param.requires_grad = False else: self.type_ent_embeddings = None if rel_ent_emb_file is not None: rel_ent_emb_matrix = torch.from_numpy(np.load(rel_ent_emb_file)) self.rel_ent_embeddings = nn.Embedding( rel_ent_emb_matrix.size()[0], rel_ent_emb_matrix.size()[1], padding_idx=0, ) self.rel_ent_embeddings.weight.data.copy_(rel_ent_emb_matrix) input_dim += rel_ent_emb_matrix.size()[1] if freeze: for param in self.rel_ent_embeddings.parameters(): param.requires_grad = False else: self.rel_ent_embeddings = None self.proj = nn.Linear(input_dim, output_dim) if tanh is True: self.proj_activation = nn.Tanh() else: self.proj_activation = None self.norm = norm if self.norm is True: self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)