예제 #1
0
    def __init__(self, config, img_feature_dim=2052):

        # Features + Positions (2048 + 4)
        config.img_feature_dim = img_feature_dim
        config.img_feature_type = "faster_r-cnn"
        config.code_voc = 512

        # Original Repo uses 0.3 dropout
        config.hidden_dropout_prob = 0.3

        super(BertO, self).__init__(config)
        self.embeddings = BertEmbeddings(config)
        self.encoder = CaptionBertEncoder(config)
        self.pooler = BertPooler(config)

        self.img_dim = config.img_feature_dim
        logger.info('BertImgModel Image Dimension: {}'.format(self.img_dim))
        self.img_feature_type = config.img_feature_type
        if hasattr(config, 'use_img_layernorm'):
            self.use_img_layernorm = config.use_img_layernorm
        else:
            self.use_img_layernorm = None

        if config.img_feature_type == 'dis_code':
            self.code_embeddings = nn.Embedding(config.code_voc,
                                                config.code_dim,
                                                padding_idx=0)
            self.img_embedding = nn.Linear(config.code_dim,
                                           self.config.hidden_size,
                                           bias=True)
        elif config.img_feature_type == 'dis_code_t':  # transpose
            self.code_embeddings = nn.Embedding(config.code_voc,
                                                config.code_dim,
                                                padding_idx=0)
            self.img_embedding = nn.Linear(config.code_size,
                                           self.config.hidden_size,
                                           bias=True)
        elif config.img_feature_type == 'dis_code_scale':  # scaled
            self.input_embeddings = nn.Linear(config.code_dim,
                                              config.code_size,
                                              bias=True)
            self.code_embeddings = nn.Embedding(config.code_voc,
                                                config.code_dim,
                                                padding_idx=0)
            self.img_embedding = nn.Linear(config.code_dim,
                                           self.config.hidden_size,
                                           bias=True)
        else:
            self.img_embedding = nn.Linear(self.img_dim,
                                           self.config.hidden_size,
                                           bias=True)
            self.dropout = nn.Dropout(config.hidden_dropout_prob)
            if self.use_img_layernorm:
                self.LayerNorm = BertLayerNorm(config.hidden_size,
                                               eps=config.img_layer_norm_eps)

        self.init_weights()
예제 #2
0
 def __init__(self, config):
     super(BertPredictionHeadTransform, self).__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act,
                   str) or (sys.version_info[0] == 2
                            and isinstance(config.hidden_act, unicode)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
예제 #3
0
 def __init__(self, config):
     super(BertOutput, self).__init__()
     self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)