Esempio n. 1
0
    def __init__(self,
                 model,
                 output_size,
                 classif_q,
                 classif_v,
                 fusion_mode,
                 end_classif=True,
                 is_va=True):
        super().__init__()
        self.net = model
        self.end_classif = end_classif

        assert fusion_mode in ['rubi', 'hm',
                               'sum'], "Fusion mode should be rubi/hm/sum."
        self.fusion_mode = fusion_mode
        self.is_va = is_va and (not fusion_mode == 'rubi'
                                )  # RUBi does not consider V->A

        # Q->A branch
        self.q_1 = MLP(**classif_q)
        if self.end_classif:  # default: True (following RUBi)
            self.q_2 = nn.Linear(output_size, output_size)

        # V->A branch
        if self.is_va:  # default: True (containing V->A)
            self.v_1 = MLP(**classif_v)
            if self.end_classif:  # default: True (following RUBi)
                self.v_2 = nn.Linear(output_size, output_size)

        self.constant = nn.Parameter(torch.tensor(0.0))
Esempio n. 2
0
 def __init__(self, model, output_size, classif, end_classif=True):
     super().__init__()
     self.net = model
     self.c_1 = MLP(**classif)
     self.end_classif = end_classif
     if self.end_classif:
         self.c_2 = nn.Linear(output_size, output_size)
Esempio n. 3
0
    def __init__(
        self,
        txt_enc={},
        self_q_att=False,
        agg={},
        classif={},
        wid_to_word={},
        word_to_wid={},
        aid_to_ans=[],
        ans_to_aid={},
        fusion={},
        residual=False,
        q_single=False,
    ):
        super().__init__()
        self.self_q_att = self_q_att
        self.agg = agg
        assert self.agg['type'] in ['max', 'mean']
        self.classif = classif
        self.wid_to_word = wid_to_word
        self.word_to_wid = word_to_wid
        self.aid_to_ans = aid_to_ans
        self.ans_to_aid = ans_to_aid
        self.fusion = fusion
        self.residual = residual

        # Modules
        self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(2400, 512)
            self.q_att_linear1 = nn.Linear(512, 2)

        if q_single:
            self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc)
            if self.self_q_att:
                self.q_att_linear0_single = nn.Linear(2400, 512)
                self.q_att_linear1_single = nn.Linear(512, 2)
        else:
            self.txt_enc_single = None

        self.fusion_module = block.factory_fusion(self.fusion)

        if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
            Logger(
            )(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})"
              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension."
              )
            self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans)

        self.classif_module = MLP(**self.classif['mlp'])

        Logger().log_value('nparams',
                           sum(p.numel() for p in self.parameters()
                               if p.requires_grad),
                           should_print=True)

        Logger().log_value('nparams_txt_enc',
                           self.get_nparams_txt_enc(),
                           should_print=True)
    def __init__(self,
                 txt_enc={},
                 self_q_att=False,
                 n_step=3,
                 shared=False,
                 cell={},
                 agg={},
                 classif={},
                 wid_to_word={},
                 word_to_wid={},
                 aid_to_ans=[],
                 ans_to_aid={}):
        super(MuRelNet, self).__init__()
        self.self_q_att = self_q_att
        self.n_step = n_step
        self.shared = shared
        self.cell = cell
        self.agg = agg
        assert self.agg['type'] in ['max', 'mean']
        self.classif = classif
        self.wid_to_word = wid_to_word
        self.word_to_wid = word_to_wid
        self.aid_to_ans = aid_to_ans
        self.ans_to_aid = ans_to_aid
        # Modules
        self.txt_enc = factory_text_enc(self.wid_to_word, txt_enc)
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(2400, 512)
            self.q_att_linear1 = nn.Linear(512, 2)

        if self.shared:
            self.cell = MuRelCell(**cell)
        else:
            self.cells = nn.ModuleList(
                [MuRelCell(**cell) for i in range(self.n_step)])

        if 'fusion' in self.classif:
            self.classif_module = block.factory_fusion(self.classif['fusion'])
        elif 'mlp' in self.classif:
            self.classif_module = MLP(self.classif['mlp'])
        else:
            raise ValueError(self.classif.keys())

        Logger().log_value('nparams',
                           sum(p.numel() for p in self.parameters()
                               if p.requires_grad),
                           should_print=True)

        Logger().log_value('nparams_txt_enc',
                           self.get_nparams_txt_enc(),
                           should_print=True)

        self.buffer = None
Esempio n. 5
0
    def __init__(self, config):
        super().__init__()
        # self.self_q_att = self_q_att
        self.agg = {'type': 'max'}
        assert self.agg['type'] in ['max', 'mean']
        self.classif = {
            'mlp': {
                'input_dim': 2048,
                'dimensions': [2048, 2048, config.num_ans_candidates]
            }
        }
        self.fusion = {
            'type': 'block',
            'input_dims': [config.q_emb_dim, 2048],
            'output_dim': 2048,
            'mm_dim': 1000,
            'chunks': 20,
            'rank': 15,
            'dropout_input': 0.,
            'dropout_pre_lin': 0.
        }
        self.residual = False

        # Modules
        txt_enc = {
            'name': 'skipthoughts',
            'type': 'BayesianUniSkip',
            'dropout': 0.25,
            'fixed_emb': False,
            'dir_st': '/hdd/robik/skip-thoughts'
        }
        self.wid_to_word = {
            i: w
            for i, w in enumerate(config.dictionary.idx2word)
        }
        self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
        self.self_q_att = True
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(config.q_emb_dim // 2, 512)
            self.q_att_linear1 = nn.Linear(512, 2)

        self.fusion_module = block.factory_fusion(self.fusion)

        # if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
        #     Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})"
        #              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.")
        #     self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans)

        self.classif_module = MLP(**self.classif['mlp'])
Esempio n. 6
0
    def __init__(
        self,
        txt_enc={},
        self_q_att=False,
        agg={},
        classif={},
        wid_to_word={},
        word_to_wid={},
        aid_to_ans=[],
        ans_to_aid={},
        fusion={},
        residual=False,
        use_counter=False,
    ):
        super().__init__()
        self.self_q_att = self_q_att
        self.agg = agg
        assert self.agg['type'] in ['max', 'mean', 'sum']
        self.classif = classif
        self.wid_to_word = wid_to_word
        self.word_to_wid = word_to_wid
        self.aid_to_ans = aid_to_ans
        self.ans_to_aid = ans_to_aid
        self.fusion = fusion
        self.residual = residual
        self.use_counter = use_counter

        # Modules
        self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(2400, 512)
            self.q_att_linear1 = nn.Linear(512, 2)

        self.fusion_module = block.factory_fusion(self.fusion)
        self.classif_module = MLP(**self.classif['mlp'])

        Logger().log_value('nparams',
                           sum(p.numel() for p in self.parameters()
                               if p.requires_grad),
                           should_print=True)

        Logger().log_value('nparams_txt_enc',
                           self.get_nparams_txt_enc(),
                           should_print=True)
Esempio n. 7
0
    def __init__(
        self,
        txt_enc={},
        self_q_att=False,
        agg={},
        classif={},
        wid_to_word={},
        word_to_wid={},
        aid_to_ans=[],
        ans_to_aid={},
        fusion={},
        residual=False,
    ):
        super().__init__()
        self.self_q_att = self_q_att
        self.agg = agg
        assert self.agg['type'] in ['max', 'mean']
        self.classif = classif
        self.wid_to_word = wid_to_word
        self.word_to_wid = word_to_wid
        self.aid_to_ans = aid_to_ans
        self.ans_to_aid = ans_to_aid
        self.fusion = fusion
        self.residual = residual

        # Modules
        self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(2400, 512)
            self.q_att_linear1 = nn.Linear(512, 2)
            self.q_att_linear2 = nn.Linear(4800, 1024)  # add for ban

        self.fusion_module = block.factory_fusion(self.fusion)

        if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
            Logger(
            )(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})"
              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension."
              )
            self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans)

        self.classif_module = MLP(**self.classif['mlp'])

        #BAN para

        num_hid = self.classif['mlp']['dimensions'][0]
        self.num_hid = num_hid
        num_ans = self.classif['mlp']['dimensions'][-1]
        gamma = 4
        v_dim = self.classif['mlp']['input_dim']
        v_att = BiAttention(v_dim, num_hid, num_hid, gamma)

        b_net = []
        q_prj = []
        # c_prj = []
        # objects = 10  # minimum number of boxes
        for i in range(gamma):
            b_net.append(BCNet(v_dim, num_hid, num_hid, None, k=1))
            q_prj.append(FCNet([num_hid, num_hid], '', .2))
            # c_prj.append(FCNet([objects + 1, num_hid], 'ReLU', .0))
        classifier = SimpleClassifier(num_hid, num_hid * 2, num_ans, .5)
        # counter = Counter(objects)
        # return BanModel(dataset, w_emb, q_emb, v_att, b_net, q_prj, classifier, op, gamma)

        # self.op = op
        self.glimpse = gamma
        # self.w_emb = w_emb
        # self.q_emb = q_emb
        self.v_att = v_att
        self.b_net = nn.ModuleList(b_net)
        self.q_prj = nn.ModuleList(q_prj)
        # self.c_prj = nn.ModuleList(c_prj)
        self.classifier = classifier
        # self.counter = counter
        self.drop = nn.Dropout(.5)
        self.tanh = nn.Tanh()

        Logger().log_value('nparams',
                           sum(p.numel() for p in self.parameters()
                               if p.requires_grad),
                           should_print=True)

        Logger().log_value('nparams_txt_enc',
                           self.get_nparams_txt_enc(),
                           should_print=True)
Esempio n. 8
0
    def __init__(self,
            txt_enc={},
            self_q_att=False,
            agg={},
            classif={},
            wid_to_word={},
            word_to_wid={},
            aid_to_ans=[],
            ans_to_aid={},
            fusion={},
            residual=False,
            ):
        super().__init__()
        self.self_q_att = self_q_att
        self.agg = agg
        assert self.agg['type'] in ['max', 'mean']
        self.classif = classif
        self.wid_to_word = wid_to_word
        self.word_to_wid = word_to_wid
        self.aid_to_ans = aid_to_ans
        self.ans_to_aid = ans_to_aid
        self.fusion = fusion
        self.residual = residual
        
        # Modules
        self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(2400, 512)
            self.q_att_linear1 = nn.Linear(512, 2)

        self.fusion_module = block.factory_fusion(self.fusion)

        if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
            Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" 
             f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.")
            self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) 

        self.classif_module = MLP(**self.classif['mlp'])


        #Add for UpDn
        num_hid = self.classif['mlp']['dimensions'][0]
        self.num_hid = num_hid
        num_ans = self.classif['mlp']['dimensions'][-1]
        v_dim = self.classif['mlp']['input_dim']
        
        v_att = NewAttention(v_dim, 4800, num_hid)
        q_net = FCNet([4800, num_hid])
        v_net = FCNet([v_dim, num_hid])
        classifier = SimpleClassifier(
            num_hid, num_hid * 2, num_ans, 0.5)

        self.v_att = v_att
        self.q_net = q_net
        self.v_net = v_net
        self.classifier = classifier



        Logger().log_value('nparams',
            sum(p.numel() for p in self.parameters() if p.requires_grad),
            should_print=True)

        Logger().log_value('nparams_txt_enc',
            self.get_nparams_txt_enc(),
            should_print=True)
Esempio n. 9
0
    def __init__(self,
                 txt_enc={},
                 self_q_att=False,
                 agg={},
                 classif={},
                 wid_to_word={},
                 word_to_wid={},
                 aid_to_ans=[],
                 ans_to_aid={},
                 fusion={},
                 residual=False,
                 q_single=False):
        super().__init__()
        self.self_q_att = self_q_att
        self.agg = agg
        assert self.agg['type'] in ['max', 'mean']
        self.classif = classif
        self.wid_to_word = wid_to_word
        self.word_to_wid = word_to_wid
        self.aid_to_ans = aid_to_ans
        self.ans_to_aid = ans_to_aid
        self.fusion = fusion
        self.residual = residual

        # Modules
        self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
        if self.self_q_att:
            self.q_att_linear0 = nn.Linear(2400, 512)
            self.q_att_linear1 = nn.Linear(512, 2)

        if q_single:
            self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc)
            if self.self_q_att:
                self.q_att_linear0_single = nn.Linear(2400, 512)
                self.q_att_linear1_single = nn.Linear(512, 2)

        if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
            Logger(
            )(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})"
              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension."
              )
            self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans)

        self.classif_module = MLP(**self.classif['mlp'])

        # UpDn
        q_dim = self.fusion['input_dims'][0]
        v_dim = self.fusion['input_dims'][1]
        output_dim = self.fusion['output_dim']
        att_size = 512
        self.v_att = Attention(v_dim,
                               v_dim,
                               att_size,
                               36,
                               output_dim,
                               drop_ratio=0.5)
        self.txt_enc.rnn = QuestionEmbedding(620, q_dim, 1, False, 0.0)

        self.q_net = FCNet([q_dim, output_dim])
        # self.v_net = FCNet([v_dim, output_dim])

        Logger().log_value('nparams',
                           sum(p.numel() for p in self.parameters()
                               if p.requires_grad),
                           should_print=True)

        Logger().log_value('nparams_txt_enc',
                           self.get_nparams_txt_enc(),
                           should_print=True)