def __init__(self, opt): super(AdaptiveReconstruct, self).__init__() num_layers = opt['rnn_num_layers'] hidden_size = opt['rnn_hidden_size'] num_dirs = 2 if opt['bidirectional'] > 0 else 1 self.word_vec_size = opt['word_vec_size'] self.pool5_dim, self.fc7_dim = opt['pool5_dim'], opt['fc7_dim'] self.lang_res_weight = opt['lang_res_weight'] self.vis_res_weight = opt['vis_res_weight'] self.att_res_weight = opt['att_res_weight'] self.loss_combined = opt['loss_combined'] self.loss_divided = opt['loss_divided'] # language rnn encoder self.rnn_encoder = RNNEncoder( vocab_size=opt['vocab_size'], word_embedding_size=opt['word_embedding_size'], word_vec_size=opt['word_vec_size'], hidden_size=opt['rnn_hidden_size'], bidirectional=opt['bidirectional'] > 0, input_dropout_p=opt['word_drop_out'], dropout_p=opt['rnn_drop_out'], n_layers=opt['rnn_num_layers'], rnn_type=opt['rnn_type'], variable_lengths=opt['variable_lengths'] > 0) self.weight_fc = nn.Linear(num_layers * num_dirs * hidden_size, 3) self.sub_attn = PhraseAttention(hidden_size * num_dirs) self.loc_attn = PhraseAttention(hidden_size * num_dirs) self.rel_attn = PhraseAttention(hidden_size * num_dirs) self.sub_encoder = SubjectEncoder(opt) self.loc_encoder = LocationEncoder(opt) self.rel_encoder = RelationEncoder(opt) self.sub_score = Score(self.pool5_dim + self.fc7_dim, opt['word_vec_size'], opt['jemb_dim']) self.loc_score = Score(25 + 5, opt['word_vec_size'], opt['jemb_dim']) self.rel_score = RelationScore(self.fc7_dim + 5, opt['word_vec_size'], opt['jemb_dim']) self.sub_decoder = SubjectDecoder(opt) self.loc_decoder = LocationDecoder(opt) self.rel_decoder = RelationDecoder(opt) self.att_res_loss = AttributeReconstructLoss(opt) self.vis_res_loss = AdapVisualReconstructLoss(opt) self.lang_res_loss = AdapLangReconstructLoss(opt) self.rec_loss = LangReconstructionLoss(opt) self.sub_mlp = nn.Sequential( nn.Linear(opt['jemb_dim'], self.pool5_dim + self.fc7_dim)) self.loc_mlp = nn.Sequential(nn.Linear(opt['jemb_dim'], 25 + 5)) self.rel_mlp = nn.Sequential( nn.Linear(opt['jemb_dim'], self.fc7_dim + 5)) self.feat_fuse = nn.Sequential( nn.Linear( self.fc7_dim + self.pool5_dim + 25 + 5 + self.fc7_dim + 5, opt['jemb_dim']), nn.ReLU())