def __init__(self, config, img_feature_dim=2052): # Features + Positions (2048 + 4) config.img_feature_dim = img_feature_dim config.img_feature_type = "faster_r-cnn" config.code_voc = 512 # Original Repo uses 0.3 dropout config.hidden_dropout_prob = 0.3 super(BertO, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = CaptionBertEncoder(config) self.pooler = BertPooler(config) self.img_dim = config.img_feature_dim logger.info('BertImgModel Image Dimension: {}'.format(self.img_dim)) self.img_feature_type = config.img_feature_type if hasattr(config, 'use_img_layernorm'): self.use_img_layernorm = config.use_img_layernorm else: self.use_img_layernorm = None if config.img_feature_type == 'dis_code': self.code_embeddings = nn.Embedding(config.code_voc, config.code_dim, padding_idx=0) self.img_embedding = nn.Linear(config.code_dim, self.config.hidden_size, bias=True) elif config.img_feature_type == 'dis_code_t': # transpose self.code_embeddings = nn.Embedding(config.code_voc, config.code_dim, padding_idx=0) self.img_embedding = nn.Linear(config.code_size, self.config.hidden_size, bias=True) elif config.img_feature_type == 'dis_code_scale': # scaled self.input_embeddings = nn.Linear(config.code_dim, config.code_size, bias=True) self.code_embeddings = nn.Embedding(config.code_voc, config.code_dim, padding_idx=0) self.img_embedding = nn.Linear(config.code_dim, self.config.hidden_size, bias=True) else: self.img_embedding = nn.Linear(self.img_dim, self.config.hidden_size, bias=True) self.dropout = nn.Dropout(config.hidden_dropout_prob) if self.use_img_layernorm: self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.img_layer_norm_eps) self.init_weights()
def __init__(self, config): super(BertPredictionHeadTransform, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): self.transform_act_fn = ACT2FN[config.hidden_act] else: self.transform_act_fn = config.hidden_act self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
def __init__(self, config): super(BertOutput, self).__init__() self.dense = nn.Linear(config.intermediate_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)