Ejemplo n.º 1
0
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(Dual_Model, self).__init__()
        self.opt = opt
        
        # To fuse different glimpses
        dim_h = int(self.opt['vqa']['fusion']['dim_hv'] / opt['attention']['nb_glimpses'])
        
        self.vocab_answers = vocab_answers
        self.vocab_words = vocab_words
        self.num_classes = len(self.vocab_answers)

        # VQA Modules
        self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec'])
        # Modules for classification
        self.linear_q_fusion = nn.Linear(self.opt['dim_q'],
                                         self.opt['vqa']['fusion']['dim_hq'])
        self.linear_classif = nn.Linear(self.opt['vqa']['fusion']['dim_mm'],
                                        self.num_classes)
        self.fusion_classif_vqa = fusion.MutanFusion(self.opt['vqa']['fusion'],
                                                 visual_embedding=False,
                                                 question_embedding=False)
        self.attention_vqa = getattr(attention_modules, opt['attention']['arch'])(opt, use_linear=False)
        self.linear_q_att = nn.Linear(self.opt['dim_q'],
                                        self.opt['attention']['dim_q'])
        self.list_linear_v_fusion_vqa = nn.ModuleList([
            nn.Linear(self.opt['dim_v'], dim_h)
            for i in range(self.opt['attention']['nb_glimpses'])])

        # share W and E
        self.answer_embeddings =  nn.Embedding(self.linear_classif.out_features, self.linear_classif.in_features) 
        # VQG modules
        self.linear_va_transform = nn.Linear(self.linear_classif.in_features, 
                        self.opt['vqg']['vec2seq']['dim_embedding'])
        self.linear_a_att = nn.Linear(self.opt['dim_a'],
                                        self.opt['attention']['dim_q'])

        self.linear_a_fusion = nn.Linear(self.opt['vqa']['fusion']['dim_mm'], self.opt['vqa']['fusion']['dim_hq'])
        # Modules for Question Generation
        self.question_generation = getattr(vec2seq, opt['vqg']['vec2seq']['arch'])(vocab_words, opt['vqg']['vec2seq'])

        # Sharable modules
        if self.opt.get('share_modules', True):
            print('Sharing Modules: [Attention] and [Fusion]')
            self.fusion_classif_vqg = self.fusion_classif_vqa
            self.attention_vqg = self.attention_vqa
            self.list_linear_v_fusion_vqg = self.list_linear_v_fusion_vqa
        else:
            print('Disable Module Sharing')
            self.fusion_classif_vqg = fusion.MutanFusion(self.opt['vqa']['fusion'],
                                                     visual_embedding=False,
                                                     question_embedding=False)
            self.attention_vqg = getattr(attention_modules, opt['attention']['arch'])(opt, use_linear=False)
            self.list_linear_v_fusion_vqg = nn.ModuleList([
                nn.Linear(self.opt['dim_v'], dim_h)
                for i in range(self.opt['attention']['nb_glimpses'])])

        self.is_testing = False
        self.sample_num = 5
Ejemplo n.º 2
0
 def __init__(self, opt={}, vocab_answers=[]):
     # TODO: deep copy ?
     super(Mutan, self).__init__(opt, vocab_answers)
     # Modules for classification
     self.linear_q_fusion = nn.Linear(self.opt['dim_q'],
                                      self.opt['vqa']['fusion']['dim_hq'])
     self.linear_classif = nn.Linear(self.opt['vqa']['fusion']['dim_mm'],
                                     self.num_classes)
     self.fusion_classif = fusion.MutanFusion(self.opt['vqa']['fusion'],
                                              visual_embedding=False,
                                              question_embedding=False)
Ejemplo n.º 3
0
 def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
     # TODO: deep copy ?
     opt['attention']['dim_v'] = opt['attention']['dim_hv']
     opt['attention']['dim_q'] = opt['attention']['dim_hq']
     super(MutanAtt, self).__init__(opt, vocab_words, vocab_answers)
     # Modules for classification
     self.fusion_att = fusion.MutanFusion2d(self.opt['attention'],
                                            visual_embedding=False,
                                            question_embedding=False)
     self.list_linear_v_fusion = nn.ModuleList([
         nn.Linear(
             self.opt['dim_v'],
             int(self.opt['fusion']['dim_hv'] /
                 opt['attention']['nb_glimpses']))
         for i in range(self.opt['attention']['nb_glimpses'])
     ])
     self.linear_q_fusion = nn.Linear(self.opt['dim_q'],
                                      self.opt['fusion']['dim_hq'])
     self.linear_classif = nn.Linear(self.opt['fusion']['dim_mm'],
                                     self.num_classes)
     self.fusion_classif = fusion.MutanFusion(self.opt['fusion'],
                                              visual_embedding=False,
                                              question_embedding=False)
Ejemplo n.º 4
0
    def __init__(self,
                 vqa_module,
                 seq2vec,
                 opt={},
                 vocab_words=[],
                 vocab_answers=[]):
        super(VQA_Dual, self).__init__()
        self.opt = opt
        self.vocab_words = vocab_words
        self.vocab_answers = vocab_answers
        self.num_classes = len(self.vocab_answers)
        self.fusion_classif = fusion.MutanFusion(
            vqa_module.opt['vqa']['fusion'],
            visual_embedding=False,
            question_embedding=False)
        # self.fusion_classif = vqa_module.fusion_classif
        self.is_testing = False

        self.linear_va_transform = nn.Linear(
            vqa_module.linear_classif.in_features,
            self.opt['vqg']['vec2seq']['dim_embedding'])

        self.linear_a_fusion = nn.Linear(
            vqa_module.linear_classif.in_features,
            vqa_module.linear_q_fusion.out_features)
        # Modules for Question Generation
        self.question_generation = getattr(
            vec2seq, opt['vqg']['vec2seq']['arch'])(vocab_words,
                                                    opt['vqg']['vec2seq'])
        self.is_testing = False

        # share parameters for embeddings
        if self.opt['vqg']['vec2seq'].get('share_with_seq2vec', False):
            print('Sharing parameters between [seq2vec] and [vec2seq]')
            self.question_generation.embedder = seq2vec.embedding
            if self.opt['seq2vec']['arch'] == 'lstm':
                self.question_generation.rnn = seq2vec.rnn
Ejemplo n.º 5
0
 def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
     opt['fusion']['dim_h'] = opt['fusion']['dim_mm']
     super(MutanNoAtt, self).__init__(opt, vocab_words, vocab_answers)
     self.fusion = fusion.MutanFusion(self.opt['fusion'])
Ejemplo n.º 6
0
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(Dual_Model, self).__init__()
        self.opt = opt

        # To fuse different glimpses
        dim_h = int(self.opt["vqa"]["fusion"]["dim_hv"] /
                    opt["attention"]["nb_glimpses"])

        self.vocab_answers = vocab_answers
        self.vocab_words = vocab_words
        self.num_classes = len(self.vocab_answers) + 1  # for [UNK]

        # VQA Modules
        self.seq2vec = seq2vec.factory(self.vocab_words, self.opt["seq2vec"])

        # Modules for classification
        self.linear_q_fusion = nn.Linear(self.opt["dim_q"],
                                         self.opt["vqa"]["fusion"]["dim_hq"])
        self.linear_classif = nn.Linear(self.opt["vqa"]["fusion"]["dim_mm"],
                                        self.num_classes)
        self.fusion_classif_vqa = fusion.MutanFusion(
            self.opt["vqa"]["fusion"],
            visual_embedding=False,
            question_embedding=False,
        )
        self.attention_vqa = getattr(
            attention_modules, opt["attention"]["arch"])(opt, use_linear=False)
        self.linear_q_att = nn.Linear(self.opt["dim_q"],
                                      self.opt["attention"]["dim_q"])
        self.list_linear_v_fusion_vqa = nn.ModuleList([
            nn.Linear(self.opt["dim_v"], dim_h)
            for i in range(self.opt["attention"]["nb_glimpses"])
        ])

        # share W and E
        self.answer_embeddings = nn.Embedding(self.linear_classif.out_features,
                                              self.linear_classif.in_features)
        # VQG modules
        self.linear_va_transform = nn.Linear(
            self.linear_classif.in_features,
            self.opt["vqg"]["vec2seq"]["dim_embedding"],
        )
        self.linear_a_att = nn.Linear(self.opt["dim_a"],
                                      self.opt["attention"]["dim_q"])

        self.linear_a_fusion = nn.Linear(
            self.opt["vqa"]["fusion"]["dim_mm"],
            self.opt["vqa"]["fusion"]["dim_hq"],
        )
        # Modules for Question Generation
        self.question_generation = getattr(
            vec2seq, opt["vqg"]["vec2seq"]["arch"])(vocab_words,
                                                    opt["vqg"]["vec2seq"])

        # Sharable modules
        if self.opt.get("share_modules", True):
            print("Sharing Modules: [Attention] and [Fusion]")
            self.fusion_classif_vqg = self.fusion_classif_vqa
            self.attention_vqg = self.attention_vqa
            self.list_linear_v_fusion_vqg = self.list_linear_v_fusion_vqa
        else:
            print("Disable Module Sharing")
            self.fusion_classif_vqg = fusion.MutanFusion(
                self.opt["vqa"]["fusion"],
                visual_embedding=False,
                question_embedding=False,
            )
            self.attention_vqg = getattr(
                attention_modules, opt["attention"]["arch"])(opt,
                                                             use_linear=False)
            self.list_linear_v_fusion_vqg = nn.ModuleList([
                nn.Linear(self.opt["dim_v"], dim_h)
                for i in range(self.opt["attention"]["nb_glimpses"])
            ])

        self.is_testing = False
        self.sample_num = 5