Ejemplo n.º 1
0
        def init_data(self, use_cuda: bool):
            test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')

            torch.set_grad_enabled(False)
            cfg = BertConfig()
            self.torch_embedding = BertEmbeddings(cfg)

            self.torch_embedding.eval()

            if use_cuda:
                self.torch_embedding.to(test_device)

            self.turbo_embedding = turbo_transformers.BertEmbeddings.from_torch(
                self.torch_embedding)

            input_ids = torch.randint(low=0,
                                      high=cfg.vocab_size - 1,
                                      size=(batch_size, seq_length),
                                      dtype=torch.long,
                                      device=test_device)
            position_ids = torch.arange(seq_length,
                                        dtype=torch.long,
                                        device=input_ids.device)

            position_ids = position_ids.repeat(batch_size, 1)
            token_type_ids = torch.zeros_like(input_ids, dtype=torch.long)

            return input_ids, position_ids, token_type_ids
Ejemplo n.º 2
0
    class TestBertEmbedding(unittest.TestCase):
        def init_data(self, use_cuda: bool):
            test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')

            torch.set_grad_enabled(False)
            cfg = BertConfig()
            self.torch_embedding = BertEmbeddings(cfg)

            self.torch_embedding.eval()

            if use_cuda:
                self.torch_embedding.to(test_device)

            self.turbo_embedding = turbo_transformers.BertEmbeddings.from_torch(
                self.torch_embedding)

            input_ids = torch.randint(low=0,
                                      high=cfg.vocab_size - 1,
                                      size=(batch_size, seq_length),
                                      dtype=torch.long,
                                      device=test_device)
            position_ids = torch.arange(seq_length,
                                        dtype=torch.long,
                                        device=input_ids.device)

            position_ids = position_ids.repeat(batch_size, 1)
            token_type_ids = torch.zeros_like(input_ids, dtype=torch.long)

            return input_ids, position_ids, token_type_ids

        def check_torch_and_turbo(self, use_cuda):
            input_ids, position_ids, token_type_ids = self.init_data(use_cuda)

            device = "GPU" if use_cuda else "CPU"
            num_iter = 100
            torch_model = lambda: self.torch_embedding(
                input_ids, token_type_ids, position_ids)
            torch_result, torch_qps, torch_time = test_helper.run_model(
                torch_model, use_cuda, num_iter)
            print(f"BertEmbeddings \"({batch_size},{seq_length:03})\" ",
                  f"{device} Torch QPS,  {torch_qps}, time, {torch_time}")

            turbo_model = lambda: self.turbo_embedding(input_ids, position_ids,
                                                       token_type_ids)
            turbo_result, turbo_qps, turbo_time = test_helper.run_model(
                turbo_model, use_cuda, num_iter)
            print(f"BertEmbeddings \"({batch_size},{seq_length:03})\" ",
                  f"{device} Turbo QPS,  {turbo_qps}, time, {turbo_time}")

            self.assertTrue(
                torch.max(torch.abs(torch_result - turbo_result)) < 1e-5)

        def test_embedding(self):
            self.check_torch_and_turbo(use_cuda=False)
            if torch.cuda.is_available() and \
                turbo_transformers.config.is_compiled_with_cuda():
                self.check_torch_and_turbo(use_cuda=True)
Ejemplo n.º 3
0
    def __init__(self, config):
        super(BertModel4Mix, self).__init__(config)
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder4Mix(config)
        self.pooler = BertPooler(config)

        self.init_weights()
Ejemplo n.º 4
0
 def __init__(self, config):
     super(TtaModel, self).__init__(config)
     self.embeddings = BertEmbeddings(config)
     self.position_embedding = PositionEmbeddings(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.init_weights()
Ejemplo n.º 5
0
    def __init__(self, config):
        super().__init__(config)

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        # self.apply(self.init_weights)  # old versions of pytorch_transformers
        self.init_weights()
Ejemplo n.º 6
0
    def __init__(self, config):
        super(CBOW, self).__init__()

        self.embeddings = BertEmbeddings(config)
        self.attention = BertSelfAttention(config)
        self.act_fn = nn.ReLU()
        self.linear_1 = nn.Linear(config.hidden_size, config.hidden_size)
        self.linear_2 = nn.Linear(config.hidden_size, config.hidden_size)
Ejemplo n.º 7
0
    def __init__(self, config):
        super(TruncBertModel, self).__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = TruncBertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
    def __init__(self, config):
        super().__init__(config)
        # self.config中包含了拼写错误纠正网络Correction_Network中的Bert模型的各种配置超参数.
        self.config = config
        '''一、构建错误探查网络Detection_Network中所需的网络层'''

        # Bi-GRU网络作为错误探查网络Detection_Network的编码器
        # 此处由于BertModel中的embeddings层中所有子嵌入模块的嵌入维度都为768, 所以此处Bi-GRU网络的input_size也为768,
        # 而将Bi-GRU网络的hidden_size设为256,是为了保证Bi-GRU网络双向编码后双向隐藏层拼接到一块后隐藏层维度能保持在512.
        # 此时enc_hid_size为512.
        self.enc_bi_gru = torch.nn.GRU(input_size=768,
                                       hidden_size=256,
                                       dropout=0.2,
                                       bidirectional=True)

        # 双向GRU编码层对于输入错误探查网络Detection_Network中的input_embeddings进行双向编码,
        # 此时双向GRU编码层的输出为(seq_len, batch_size, enc_hid_size * 2),将其交换维度变形为(batch_size, seq_len, enc_hid_size * 2),
        # 再将双向GRU编码层的变形后的输出输入self.detection_network_dense_out层中,映射为形状(batch_size, seq_len, 2)的张量,
        # 这样方便后面进行判断句子序列中每一个字符是否为拼写错误字符的二分类任务的交叉熵损失值计算.
        self.detection_network_dense_out = torch.nn.Linear(512, 2)

        # 同时,将双向GRU编码层输出后经过变形的形状为(batch_size, seq_len, enc_hid_size * 2),的张量输入进soft_masking_coef_mapping层中,
        # 将其形状映射为(batch_size, seq_len, 1)的张量,此张量再在后面输入进Sigmoid()激活函数中, 将此张量的值映射至(0,1)之间,
        # 这样这个张量即变为了后面计算soft-masked embeddings时和mask_embeddings相乘的系数p (结果pi即可表示为文本序列中第i处的字符拼写错误的似然概率(likelihood)).
        self.soft_masking_coef_mapping = torch.nn.Linear(512, 1)
        '''二、构建的拼写错误纠正网络Correction_Network中BertModel中所用的个三种网络层'''
        '''
        (1): 嵌入层BertEmbeddings(),其中包含了每个character的word embedding、segment embeddings、position embedding三种嵌入函数.
        (2): Bert模型的核心,多层(12层)多头自注意力(multi-head self attention)编码层BertEncoder.
        (3): Bert模型最后的池化层BertPooler.
        '''
        # 嵌入层BertEmbeddings().
        self.embeddings = BertEmbeddings(config)
        # 多层(12层)多头自注意力(multi-head self attention)编码层BertEncoder.
        self.encoder = BertEncoder(config)
        # 池化层BertPooler。
        self.pooler = BertPooler(config)
        # 初始化权重矩阵,偏置等.
        self.init_weights()
        '''获取遮罩特殊符[MASK]在Bert模型的嵌入层BertEmbeddings()中的词嵌入层word_embeddings层中特殊符[MASK]所对应索引的嵌入向量(embeddins vector)'''
        # 在Bert模型的tokenizer类BertTokenizer()的词表中,遮罩特殊符[MASK]会被编码为索引103(只要是BertTokenizer()类,无论其from_pretrained哪种
        # 预训练的Bert模型词表,遮罩特殊符[MASK]在词表中的索引都为103; 除非换预训练模型如换成Albert模型,遮罩特殊符[MASK]在词表中的索引才会变, 否则
        # 遮罩特殊符[MASK]在同一类预训练Bert模型的词表下索引不变).
        # 在之后, 遮罩特殊符[MASK]的张量self.mask_embedding的形状要变为和Bert模型嵌入层BertEmbeddings()的输出input_embeddings张量的形状一样,
        # 此时self.mask_embeddings张量的形状要为(batch_size, seq_len, embed_size)->(batch_size, seq_len, 768).
        self.mask_embeddings = self.embeddings.word_embeddings.weight[
            103]  # 此时,mask_embedding张量的形状为(768,)

        # 注意!: 在soft_masked_embeddings输入拼写错误纠正网络correction network中的Bert模型后,其计算结果输入进最终的输出层与Softmax层之前,
        # 拼写错误纠正网络correction network的结果需通过残差连接residual connection与输入模型一开始的input embeddings相加,
        # 相加的结果才输入最终的输出层与Softmax层中做最终的正确字符预测。
        '''self.soft_masked_bert_dense_out即为拼写错误纠正网络correction network之后的输出层, 其会将经过残差连接模块residual connection之后
           的输出的维度由768投影到纠错词表的索引空间. (此处输出层self.soft_masked_bert_dense_out的输出即可被视为Soft_Masked_BERT模型的最终输出)'''
        self.soft_masked_bert_dense_out = torch.nn.Linear(
            self.config.hidden_size,
            self.embeddings.word_embeddings.weight.shape[0])
        '''此处可不写最后的Softmax()函数, 因为若之后在训练模型时使用CrossEntropyLoss()交叉熵函数来计算损失值的话, CrossEntropyLoss()函数
Ejemplo n.º 9
0
 def __init__(self, config):
     super(Stage0, self).__init__()
     self.embedding_layer = BertEmbeddings(config)
     self.layers = []
     for i in range(config.num_hidden_layers // 24):
         self.layers.append(BertLayer(config))
     self.layers = torch.nn.ModuleList(self.layers)
     self.config = config
     self.apply(self.init_bert_weights)
Ejemplo n.º 10
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = DeeBertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
Ejemplo n.º 11
0
    def __init__(
        self,
        random_init: bool = False,
        bert_model_name: str = "bert-base-uncased",
        img_dim: int = 2048,
        hidden_size: int = 768,
        hidden_dropout_prob: float = 0,
        text_embeddings: DictConfig = EMPTY_CONFIG,
        encoder: DictConfig = EMPTY_CONFIG,
    ):
        super().__init__()

        bert_config = retry_n(
            NUM_RETRIES,
            BertConfig.from_pretrained,
            bert_model_name,
            **OmegaConf.to_container(text_embeddings),
        )
        self.text_embeddings = BertEmbeddings(bert_config)

        self.img_embeddings = UNITERImageEmbeddings(
            img_dim=img_dim,
            hidden_size=hidden_size,
            hidden_dropout_prob=hidden_dropout_prob,
        )

        bert_model_name = bert_model_name
        hf_config = retry_n(
            NUM_RETRIES,
            BertConfig.from_pretrained,
            bert_model_name,
            **OmegaConf.to_container(encoder),
        )
        if random_init:
            bert_model = BertModel(hf_config)
        else:
            bert_model = retry_n(
                NUM_RETRIES,
                BertModel.from_pretrained,
                bert_model_name,
                config=hf_config,
            )
        self.encoder = bert_model.encoder
        self.pooler = bert_model.pooler
Ejemplo n.º 12
0
    def __init__(self, config, bitW=1):
        super(QuantBertModel, self).__init__(config)
        self.config = config
        self.bitW = bitW
        self.embeddings = BertEmbeddings(config)
        self.encoder = QuantBertEncoder(config, self.bitW)
        # self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
Ejemplo n.º 13
0
    def _build_word_embedding(self):
        self.bert_config = BertConfig.from_pretrained(self.config.bert_model_name)
        if self.config.pretrained_bert:
            bert_model = BertForPreTraining.from_pretrained(self.config.bert_model_name)
            self.word_embedding = bert_model.bert.embeddings
            self.pooler = bert_model.bert.pooler
            self.pooler.apply(self.init_weights)

        else:
            self.pooler = BertPooler(self.bert_config)
            self.word_embedding = BertEmbeddings(self.bert_config)
Ejemplo n.º 14
0
    def __init__(self,config):
        super().__init__(config)
        self.config = config
        self.jointEmbeddings = JointEmbeddings(config.hidden_size,0.5,'mosei')
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        #self.Linear_v = nn.Linear()

        self.init_weights()
Ejemplo n.º 15
0
    def __init__(self, config):
        super().__init__(config)

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder_attention(config)
        self.pooler = BertPooler(config)
        # self.pooler =  BertPooler_Sigmoid(config)
        #self.pooler = BertPooler_reLu(config)

        #self.apply(self.init_weights)
        self.init_weights()
Ejemplo n.º 16
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)

        # 直接删除Pooler层,减少计算量和显存开销
        # self.pooler = BertPooler(config) if add_pooling_layer else None

        self.init_weights()
 def __init__(self):
     super(BertClassificationModel, self).__init__()
     model_class, tokenizer_class, pretrained_weights = (tfs.BertModel, tfs.BertTokenizer, 'bert-base-chinese')
     self.tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
     # 嵌入层BertEmbeddings().
     self.embeddings = BertEmbeddings(config)
     # 多层(12层)多头自注意力(multi-head self attention)编码层BertEncoder.
     self.encoder = BertEncoder(config)
     self.bert = model_class.from_pretrained(pretrained_weights)
     self.dense = nn.Linear(768, 2)  # bert默认的隐藏单元数是768, 输出单元是2,表示二分类
     self.dropout = nn.Dropout(p=0.5)  # dropout训练
Ejemplo n.º 18
0
    def __init__(self, config, add_pooling_layer=True):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)

        self.pooler = BertPooler(config) if add_pooling_layer else None

        self.init_weights()

        self.length_config = None
Ejemplo n.º 19
0
    def __init__(self, config: LukeConfig):
        super(LukeModelDoc, self).__init__()
        self.config = config

        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        if self.config.bert_model_name and "roberta" in self.config.bert_model_name:
            self.embeddings = RobertaEmbeddings(config)
            self.embeddings.token_type_embeddings.requires_grad = False
        else:
            self.embeddings = BertEmbeddings(config)
        self.entity_embeddings = EntityEmbeddings(config)
Ejemplo n.º 20
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.num_labels = config.num_labels
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.inject = DotAttention(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()
Ejemplo n.º 21
0
 def __init__(self, config):
     super().__init__(config)
     self.config = config
     self.embeddings = BertEmbeddings(config)
     # transformer blocks * N
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.MAG = MAG(beta=1.0,
                    hidden_size=hidden_size,
                    dropout=0.5,
                    device=device)
     self.MAG.apply(_init_weights)
     self.init_weights()
    def __init__(self,
                 num_labels,
                 pretrained_model_name_or_path=None,
                 cat_num=0,
                 token_size=None,
                 MAX_SEQUENCE_LENGTH=512):
        super(BertModelForBinaryMultiLabelClassifier, self).__init__()
        if pretrained_model_name_or_path:
            self.model = BertModel.from_pretrained(
                pretrained_model_name_or_path)
        else:
            raise NotImplementedError
        self.num_labels = num_labels
        if cat_num > 0:
            self.catembedding = nn.Embedding(cat_num, 768)
            self.catdropout = nn.Dropout(0.2)
            self.catactivate = nn.ReLU()

            self.catembeddingOut = nn.Embedding(cat_num, cat_num // 2 + 1)
            self.catactivateOut = nn.ReLU()
            self.dropout = nn.Dropout(0.2)
            self.classifier = nn.Linear(
                self.model.pooler.dense.out_features + cat_num // 2 + 1,
                num_labels)
        else:
            self.catembedding = None
            self.catdropout = None
            self.catactivate = None
            self.catembeddingOut = None
            self.catactivateOut = None
            self.dropout = nn.Dropout(0.2)
            self.classifier = nn.Linear(self.model.pooler.dense.out_features,
                                        num_labels)

        # resize
        if token_size:
            self.model.resize_token_embeddings(token_size)

        # define input embedding and transformers
        input_model_config = BertConfig(
            vocab_size=token_size, max_position_embeddings=MAX_SEQUENCE_LENGTH)
        self.input_embeddings = BertEmbeddings(input_model_config)
        self.input_bert_layer = BertLayer(input_model_config)

        # use bertmodel as decoder
        # self.model.config.is_decoder = True

        # add modules
        self.add_module('my_input_embeddings', self.input_embeddings)
        self.add_module('my_input_bert_layer', self.input_bert_layer)
        self.add_module('fc_output', self.classifier)
Ejemplo n.º 23
0
    def __init__(self, config, multimodal_config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.MAG = MAG(
            config.hidden_size,
            multimodal_config.beta_shift,
            multimodal_config.dropout_prob,
        )

        self.init_weights()
Ejemplo n.º 24
0
    def __init__(self, config: dict):
        # dont call constructor of bert-model but instead
        # call the constructor of bert-model super class
        super(BertModel, self).__init__(config)

        # basically the constructor of bert-model but
        # using know-bert-encoder instead of bert-encoder
        self.embeddings = BertEmbeddings(config)
        self.encoder = KnowBertEncoder(config)
        self.pooler = BertPooler(config)
        # initialize weights
        self.init_weights()

        # initialize helper
        KnowBertHelper.__init__(self, self.encoder)
Ejemplo n.º 25
0
    def __init__(self, config):
        super(BertImgModel, self).__init__(config)
        self.embeddings = BertEmbeddings(config)
        self.encoder = CaptionBertEncoder(config)
        self.pooler = BertPooler(config)

        self.img_dim = config.img_feature_dim
        logger.info('BertImgModel Image Dimension: {}'.format(self.img_dim))
        self.img_feature_type = config.img_feature_type
        if hasattr(config, 'use_img_layernorm'):
            self.use_img_layernorm = config.use_img_layernorm
        else:
            self.use_img_layernorm = None

        if config.img_feature_type == 'dis_code':
            self.code_embeddings = nn.Embedding(config.code_voc,
                                                config.code_dim,
                                                padding_idx=0)
            self.img_embedding = nn.Linear(config.code_dim,
                                           self.config.hidden_size,
                                           bias=True)
        elif config.img_feature_type == 'dis_code_t':  # transpose
            self.code_embeddings = nn.Embedding(config.code_voc,
                                                config.code_dim,
                                                padding_idx=0)
            self.img_embedding = nn.Linear(config.code_size,
                                           self.config.hidden_size,
                                           bias=True)
        elif config.img_feature_type == 'dis_code_scale':  # scaled
            self.input_embeddings = nn.Linear(config.code_dim,
                                              config.code_size,
                                              bias=True)
            self.code_embeddings = nn.Embedding(config.code_voc,
                                                config.code_dim,
                                                padding_idx=0)
            self.img_embedding = nn.Linear(config.code_dim,
                                           self.config.hidden_size,
                                           bias=True)
        else:
            self.img_embedding = nn.Linear(self.img_dim,
                                           self.config.hidden_size,
                                           bias=True)
            self.dropout = nn.Dropout(config.hidden_dropout_prob)
            if self.use_img_layernorm:
                self.LayerNorm = nn.LayerNorm(config.hidden_size,
                                              eps=config.img_layer_norm_eps)

        self.init_weights()
Ejemplo n.º 26
0
    def __init__(self, config, model_size, task=None, n_classes=None):
        """
        The bare Bert Model transformer outputting raw hidden-states without
        any specific head on top.

        The model can behave as an encoder (with only self-attention) as well as a
        decoder, in which case a layer of cross-attention is added between the
        self-attention layers, following the architecture described in `Attention
        is all you need`_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob
        Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

        This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
        Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
        usage and behavior.

        Args:
            config (:class:`~transformers.BertConfig`): Model configuration class with all the parameters of the model.
                Initializing with a config file does not load the weights associated with the model, only the configuration.
                Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
            model_size: Size of the Model
            task: MTB task
            n_classes: Number of classes

        References:
            Attention is all you need (https://arxiv.org/abs/1706.03762)
        """
        super(BertModel, self).__init__(config)
        self.config = config

        self.task = task
        self.model_size = model_size
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.init_weights()

        logger.info("Model config: ", self.config)
        if self.task is None:
            self.lm_head = BertOnlyMLMHead(config)
        elif self.task == "classification":
            self.n_classes = n_classes
            if self.model_size == "bert-base-uncased":
                self.classification_layer = nn.Linear(1536, n_classes)
            elif self.model_size == "bert-large-uncased":
                self.classification_layer = nn.Linear(2048, n_classes)
Ejemplo n.º 27
0
    def __init__(self, config, data_args=None, **kwargs):
        super().__init__(config)
        tasks = data_args.tasks
        self.task_id_2_task_idx = {i: i for i, t in enumerate(tasks)}
        self.config = config
        self.config.num_tasks = len(tasks)
        config.max_seq_length = data_args.max_seq_length
        self.task_type_embeddings = nn.Embedding(len(tasks),
                                                 config.hidden_size)
        self.conditional_alignment = FiLM(config.hidden_size,
                                          config.hidden_size)  # FiLM5

        self.embeddings = BertEmbeddings(config)
        self.encoder = MyBertEncoder10(config, tasks)
        self.pooler = BertPooler(config)

        self.init_weights()
Ejemplo n.º 28
0
 def __init__(self, config, visual_feat_size, visual_start_layer, num_visual_positions, use_pos_embedding=False, no_encoder_inputs=False, append_to_encoder_states=False):
     super().__init__(config)
     self.embeddings = BertEmbeddings(config)
     self.use_pos_embedding = use_pos_embedding
     if use_pos_embedding:
         self.visual_pos_embeddings = torch.nn.Embedding(num_visual_positions, config.hidden_size)
     else:
         self.visual_pos_embeddings = torch.nn.Linear(4, config.hidden_size)
     self.visual_feat_size = visual_feat_size
     self.visual_start_layer = visual_start_layer
     self.num_visual_positions = num_visual_positions
     self.visual_feat_projection = torch.nn.Linear(visual_feat_size, config.hidden_size)
     self.encoder = BertEncoder(config, visual_start_layer)
     self.pooler = BertPooler(config)
     self.dropout_layer = torch.nn.Dropout(p=self.config.hidden_dropout_prob)
     self.apply(self.init_bert_weights)
     self.no_encoder_inputs = no_encoder_inputs
     self.append_to_encoder_states = append_to_encoder_states
Ejemplo n.º 29
0
    def __init__(self, config: BertConfig):
        super().__init__(config)
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)

        self.img_dim = config.img_feature_dim
        self.use_img_layernorm = getattr(config, "use_img_layernorm", False)

        img_projection = nn.Linear(self.img_dim,
                                   self.config.hidden_size,
                                   bias=True)
        img_embedding_list = [img_projection]
        if self.use_img_layernorm:
            img_embedding_list += [
                nn.LayerNorm(config.hidden_size, eps=config.img_layer_norm_eps)
            ]
        dropout = nn.Dropout(config.hidden_dropout_prob)
        img_embedding_list += [dropout]
        # is an image encoding used as input to the transformer trunk
        self.img_embedding = nn.Sequential(*img_embedding_list)
Ejemplo n.º 30
0
  def __init__(self, bert_model: str, max_layer=None, pool=True, freeze_embeddings=False):
    super().__init__()
    self.freeze_embeddings = freeze_embeddings
    config = BertConfig.from_pretrained(bert_model, cache_dir=TRANSFORMER_CACHE_DIR)
    if max_layer is not None and not pool:
      config.num_hidden_layers = max_layer
    self.pool = pool
    self.max_layer = max_layer
    self.embeddings = BertEmbeddings(config)
    if config.num_hidden_layers > 0:
      self.encoder = BertEncoder(config)
      self.encoder.output_hidden_states = True
    else:
      self.encoder = None

    if pool:
      self.pooler = BertPooler(config)
    else:
      self.pooler = None
    self.config = config
    self.bert_model = bert_model