예제 #1
0
 def __init__(self, config, gen_attention_mask):
     super(BertWithCustomAttentionMask, self).__init__(config)
     self.embeddings = BertEmbeddings(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
     self.gen_attention_mask = gen_attention_mask
예제 #2
0
 def __init__(self, config):
     """
     :param config: a BertConfig class instance with the configuration to build a new model
     :type config: BertConfig
     """
     super(BertModel, self).__init__(config)
     self.embeddings = BertEmbeddings(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
예제 #3
0
파일: bert.py 프로젝트: pombredanne/UrcaNet
 def __init__(self, config):
     super(BertModelModified, self).__init__(config)
     self.embeddings = BertEmbeddingsModified(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
     self.embeddings.history_embeddings.weight[0].data.zero_(
     )  # self.embeddings.history_embeddings.padding_idx
     self.embeddings.turn_embeddings.weight[0].data.zero_(
     )  # self.embeddings.turn_embeddings.padding_idx
 def __init__(self,
              bert_cfg: BertConfig,
              cfg: Optional[Dict[str, Any]] = None):
     self.bert_cfg = bert_cfg
     super().__init__(bert_cfg)
     self.cfg = cfg
     self.embeddings = BertEmbeddings(bert_cfg)
     self.encoder = BertEncoder(bert_cfg)
     self.pooler = BertPooler(bert_cfg)
     self.num_choices = 4
     self.classifier = nn.Linear(bert_cfg.hidden_size, 1)
     self.apply(self.init_bert_weights)
예제 #5
0
 def __init__(self, config, img_dim, num_region_toks):
     BertPreTrainedModel.__init__(self, config)
     self.embeddings = BertTextEmbeddings(config)
     self.img_embeddings = BertImageEmbeddings(config, img_dim)
     self.num_region_toks = num_region_toks
     self.region_token_embeddings = nn.Embedding(
         num_region_toks,
         config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
예제 #6
0
    def __init__(self, config, gcn_adj_dim, gcn_adj_num, gcn_embedding_dim, num_labels, output_attentions=False, keep_multihead_output=False):
        super(VGCN_Bert, self).__init__(config,output_attentions,keep_multihead_output)
        self.embeddings = VGCNBertEmbeddings(config,gcn_adj_dim,gcn_adj_num, gcn_embedding_dim)
        self.encoder = BertEncoder(config, output_attentions=output_attentions,
                                           keep_multihead_output=keep_multihead_output)
        self.pooler = BertPooler(config)
        self.num_labels=num_labels
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_labels)
        self.will_collect_cls_states=False
        self.all_cls_states=[]
        self.output_attentions=output_attentions

        self.apply(self.init_bert_weights)
예제 #7
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 judge: Model = None,
                 update_judge: bool = False,
                 reward_method: str = None,
                 detach_value_head: bool = False,
                 qa_loss_weight: float = 0.,
                 influence_reward: bool = False,
                 theory_of_mind: bool = False) -> None:
        super(BertMC, self).__init__(vocab, regularizer)

        self.judge = judge
        self.is_judge = self.judge is None
        self.reward_method = None if self.is_judge else reward_method
        self.update_judge = update_judge and (self.judge is not None)
        self._detach_value_head = detach_value_head
        self._qa_loss_weight = qa_loss_weight
        self.influence_reward = influence_reward
        self.theory_of_mind = theory_of_mind
        self._text_field_embedder = text_field_embedder
        self._hidden_dim = text_field_embedder.get_output_dim()
        self.answer_type = 'mc'
        self.output_type = 'mc'
        self._config = self._text_field_embedder.token_embedder_tokens._modules[
            'bert_model'].config

        if not self.is_judge:
            self._sent_chosen_embeddings = torch.nn.Embedding(
                2, self._config.hidden_size)
            self._sent_chosen_embeddings.weight.data *= 0  # Init to zero to minimally affect BERT at start
            self._policy_head = TimeDistributed(
                torch.nn.Linear(self._hidden_dim, 1))  # Can make MLP
            self._value_head = TimeDistributed(
                torch.nn.Linear(self._hidden_dim, 1))  # Can make MLP
            self._turn_film_gen = torch.nn.Linear(1, 2 * self._hidden_dim)
            self._film = FiLM()
            if self.theory_of_mind:
                final_blocks_config = deepcopy(self._config)
                final_blocks_config.num_hidden_layers = 1
                self.final_blocks_input_proj = TimeDistributed(
                    torch.nn.Linear(self._hidden_dim * 2, self._hidden_dim))
                self.final_blocks = BertEncoder(final_blocks_config)

        # NOTE: Rename to self._accuracy (may break model loading)
        self._span_start_accuracy = CategoricalAccuracy()
        self._initializer = initializer
def test_BertEncoder():
    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
    config = BertConfig(vocab_size_or_config_json_file=32000,
                        hidden_size=768,
                        num_hidden_layers=12,
                        num_attention_heads=12,
                        intermediate_size=3072)
    embeddings = BertEmbeddings(config)
    model = BertEncoder(config)

    embedding_output = embeddings(input_ids, token_type_ids)
    input_mask = input_mask.view([-1, 1, 1, input_mask.size()[-1]]).float()
    print(model(embedding_output, input_mask))
 def __init__(self, vocab_size, embed_dim, embed_init, hidden_size, log,
              *args, **kwargs):
     super(Bert, self).__init__(vocab_size, embed_dim, embed_init, log)
     #print ('init bert')
     self.config = BertConfig(vocab_size_or_config_json_file=vocab_size,
                              hidden_size=embed_dim,
                              num_hidden_layers=3,
                              num_attention_heads=4,
                              intermediate_size=4 * hidden_size,
                              hidden_act="gelu",
                              hidden_dropout_prob=0.1,
                              attention_probs_dropout_prob=0.1,
                              max_position_embeddings=512,
                              type_vocab_size=2,
                              initializer_range=0.02)
     self.max_length = 128
     self.BertEncoder = BertEncoder(self.config)
     print('init BertModel')
예제 #10
0
    def __init__(self, config, num_labels):
        super(BertForSequenceTaggingACTA, self).__init__(config)
        self.num_labels = num_labels
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.gru = nn.GRU(config.hidden_size,
                          config.hidden_size,
                          batch_first=True,
                          bidirectional=True)

        self.crf = CRF(num_labels, batch_first=True)

        self.clf = nn.Linear(2 * config.hidden_size, num_labels)

        #nn.init.xavier_uniform_(self.clf.weight)
        #self.clf.bias.data.fill_(0.01)
        #self.pooler = BertPooler(config)
        self.apply(self.init_bert_weights)
예제 #11
0
    def __init__(self,
                 config,
                 evidence_lambda: float = 0.8,
                 my_dropout_p: float = 0.2,
                 tf_layers: int = 1,
                 tf_inter_size: int = 3072):
        super(BertHierarchicalTransformer, self).__init__(config)
        logger.info(f'Model {__class__.__name__} is loading...')
        logger.info(f'Model parameters:')
        logger.info(f'Evidence lambda: {evidence_lambda}')
        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(my_dropout_p)
        self.bert = BertModel(config)
        self.query_self_attn = layers.MultiHeadPooling(config.hidden_size, 6)
        self.value_self_attn = layers.MultiHeadPooling(config.hidden_size, 6)
        # self.sentence_input = layers.BertSentInput(config)
        config.num_hidden_layers = tf_layers
        config.intermediate_size = tf_inter_size
        self.sentence_encoder = BertEncoder(config)
        self.attention_score = layers.AttentionScore(config.hidden_size, 256)

        # Output layer
        self.evidence_lambda = evidence_lambda
        self.predictor = nn.Linear(config.hidden_size * 2, 3)
예제 #12
0
    def __init__(self,
                 contextual_embedding_dim,
                 entity_embedding_dim: int,
                 entity_embeddings: torch.nn.Embedding,
                 max_sequence_length: int = 512,
                 span_encoder_config: Dict[str, int] = None,
                 dropout: float = 0.1,
                 output_feed_forward_hidden_dim: int = 100,
                 initializer_range: float = 0.02,
                 weighted_entity_threshold: float = None,
                 null_entity_id: int = None,
                 include_null_embedding_in_dot_attention: bool = False):
        """
        Idea: Align the bert and KG vector space by learning a mapping between
            them.
        """
        super().__init__()

        self.span_extractor = SelfAttentiveSpanExtractor(entity_embedding_dim)
        init_bert_weights(self.span_extractor._global_attention._module,
                          initializer_range)

        self.dropout = torch.nn.Dropout(dropout)

        self.bert_to_kg_projector = torch.nn.Linear(
            contextual_embedding_dim, entity_embedding_dim)
        init_bert_weights(self.bert_to_kg_projector, initializer_range)
        self.projected_span_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.projected_span_layer_norm, initializer_range)

        self.kg_layer_norm = BertLayerNorm(entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.kg_layer_norm, initializer_range)

        # already pretrained, don't init
        self.entity_embeddings = entity_embeddings
        self.entity_embedding_dim = entity_embedding_dim

        # layers for the dot product attention
        if weighted_entity_threshold is not None or include_null_embedding_in_dot_attention:
            if hasattr(self.entity_embeddings, 'get_null_embedding'):
                null_embedding = self.entity_embeddings.get_null_embedding()
            else:
                null_embedding = self.entity_embeddings.weight[null_entity_id, :]
        else:
            null_embedding = None
        self.dot_attention_with_prior = DotAttentionWithPrior(
            output_feed_forward_hidden_dim,
            weighted_entity_threshold,
            null_embedding,
            initializer_range
        )
        self.null_entity_id = null_entity_id
        self.contextual_embedding_dim = contextual_embedding_dim

        if span_encoder_config is None:
            self.span_encoder = None
        else:
            # create BertConfig
            assert len(span_encoder_config) == 4
            config = BertConfig(
                0,  # vocab size, not used
                hidden_size=span_encoder_config['hidden_size'],
                num_hidden_layers=span_encoder_config['num_hidden_layers'],
                num_attention_heads=span_encoder_config['num_attention_heads'],
                intermediate_size=span_encoder_config['intermediate_size']
            )
            self.span_encoder = BertEncoder(config)
            init_bert_weights(self.span_encoder, initializer_range)
예제 #13
0
 def __init__(self, config: BertConfig):
     super().__init__(config)
     self.encoder = BertEncoder(config)
     self.apply(self.init_bert_weights)
예제 #14
0
 def __init__(self, config):
     super(BertModel_emb_encoder_custom, self).__init__(config)
     self.embeddings = BertEmbeddings_custom(config)
     self.encoder = BertEncoder(config)
예제 #15
0
 def __init__(self, config):
     super().__init__(config)
     self.embedder = ContinuousBertEmbeddings(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
예제 #16
0
 def __init__(self, config):
     super(BertModel, self).__init__(config)
     self.embeddings = BertEmbeddings(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.apply(self.init_bert_weights)
예제 #17
0
    def __init__(
        self,
        config,
        input_dim,
        output_dim,
        ent_emb_file,
        static_ent_emb_file,
        type_ent_emb_file,
        rel_ent_emb_file,
        tanh=False,
        norm=False,
        freeze=True,
    ):
        super(EntBertEncoder, self).__init__(config)
        if (
            ent_emb_file is not None
            or static_ent_emb_file is not None
            or type_ent_emb_file is not None
            or rel_ent_emb_file is not None
        ):
            self.encoder = BertEncoder(config)
        else:
            self.encoder = None
        self.pooler = BertPooler(config)

        self.apply(self.init_bert_weights)

        if ent_emb_file is not None:
            ent_emb_matrix = torch.from_numpy(np.load(ent_emb_file))
            self.ent_embeddings = nn.Embedding(
                ent_emb_matrix.size()[0], ent_emb_matrix.size()[1], padding_idx=0
            )
            self.ent_embeddings.weight.data.copy_(ent_emb_matrix)
            input_dim += ent_emb_matrix.size()[1]
            if freeze:
                for param in self.ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.ent_embeddings = None

        if static_ent_emb_file is not None:
            static_ent_emb_matrix = torch.from_numpy(np.load(static_ent_emb_file))
            self.static_ent_embeddings = nn.Embedding(
                static_ent_emb_matrix.size()[0],
                static_ent_emb_matrix.size()[1],
                padding_idx=0,
            )
            self.static_ent_embeddings.weight.data.copy_(static_ent_emb_matrix)
            input_dim += static_ent_emb_matrix.size()[1]
            if freeze:
                for param in self.static_ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.static_ent_embeddings = None

        if type_ent_emb_file is not None:
            type_ent_emb_matrix = torch.from_numpy(np.load(type_ent_emb_file))
            self.type_ent_embeddings = nn.Embedding(
                type_ent_emb_matrix.size()[0],
                type_ent_emb_matrix.size()[1],
                padding_idx=0,
            )
            self.type_ent_embeddings.weight.data.copy_(type_ent_emb_matrix)
            input_dim += type_ent_emb_matrix.size()[1]
            if freeze:
                for param in self.type_ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.type_ent_embeddings = None

        if rel_ent_emb_file is not None:
            rel_ent_emb_matrix = torch.from_numpy(np.load(rel_ent_emb_file))
            self.rel_ent_embeddings = nn.Embedding(
                rel_ent_emb_matrix.size()[0],
                rel_ent_emb_matrix.size()[1],
                padding_idx=0,
            )
            self.rel_ent_embeddings.weight.data.copy_(rel_ent_emb_matrix)
            input_dim += rel_ent_emb_matrix.size()[1]
            if freeze:
                for param in self.rel_ent_embeddings.parameters():
                    param.requires_grad = False
        else:
            self.rel_ent_embeddings = None

        self.proj = nn.Linear(input_dim, output_dim)

        if tanh is True:
            self.proj_activation = nn.Tanh()
        else:
            self.proj_activation = None

        self.norm = norm
        if self.norm is True:
            self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
            self.dropout = nn.Dropout(config.hidden_dropout_prob)