コード例 #1
0
ファイル: match.py プロジェクト: beam0924/ARN
    def __init__(self, opt):
        super(AdaptiveReconstruct, self).__init__()
        num_layers = opt['rnn_num_layers']
        hidden_size = opt['rnn_hidden_size']
        num_dirs = 2 if opt['bidirectional'] > 0 else 1
        self.word_vec_size = opt['word_vec_size']
        self.pool5_dim, self.fc7_dim = opt['pool5_dim'], opt['fc7_dim']

        self.lang_res_weight = opt['lang_res_weight']
        self.vis_res_weight = opt['vis_res_weight']
        self.att_res_weight = opt['att_res_weight']
        self.loss_combined = opt['loss_combined']
        self.loss_divided = opt['loss_divided']

        # language rnn encoder
        self.rnn_encoder = RNNEncoder(
            vocab_size=opt['vocab_size'],
            word_embedding_size=opt['word_embedding_size'],
            word_vec_size=opt['word_vec_size'],
            hidden_size=opt['rnn_hidden_size'],
            bidirectional=opt['bidirectional'] > 0,
            input_dropout_p=opt['word_drop_out'],
            dropout_p=opt['rnn_drop_out'],
            n_layers=opt['rnn_num_layers'],
            rnn_type=opt['rnn_type'],
            variable_lengths=opt['variable_lengths'] > 0)

        self.weight_fc = nn.Linear(num_layers * num_dirs * hidden_size, 3)

        self.sub_attn = PhraseAttention(hidden_size * num_dirs)
        self.loc_attn = PhraseAttention(hidden_size * num_dirs)
        self.rel_attn = PhraseAttention(hidden_size * num_dirs)

        self.sub_encoder = SubjectEncoder(opt)
        self.loc_encoder = LocationEncoder(opt)
        self.rel_encoder = RelationEncoder(opt)

        self.sub_score = Score(self.pool5_dim + self.fc7_dim,
                               opt['word_vec_size'], opt['jemb_dim'])
        self.loc_score = Score(25 + 5, opt['word_vec_size'], opt['jemb_dim'])
        self.rel_score = RelationScore(self.fc7_dim + 5, opt['word_vec_size'],
                                       opt['jemb_dim'])

        self.sub_decoder = SubjectDecoder(opt)
        self.loc_decoder = LocationDecoder(opt)
        self.rel_decoder = RelationDecoder(opt)

        self.att_res_loss = AttributeReconstructLoss(opt)
        self.vis_res_loss = AdapVisualReconstructLoss(opt)
        self.lang_res_loss = AdapLangReconstructLoss(opt)
        self.rec_loss = LangReconstructionLoss(opt)

        self.sub_mlp = nn.Sequential(
            nn.Linear(opt['jemb_dim'], self.pool5_dim + self.fc7_dim))
        self.loc_mlp = nn.Sequential(nn.Linear(opt['jemb_dim'], 25 + 5))
        self.rel_mlp = nn.Sequential(
            nn.Linear(opt['jemb_dim'], self.fc7_dim + 5))

        self.feat_fuse = nn.Sequential(
            nn.Linear(
                self.fc7_dim + self.pool5_dim + 25 + 5 + self.fc7_dim + 5,
                opt['jemb_dim']), nn.ReLU())