def __init__(self, model, output_size, classif_q, classif_v, fusion_mode, end_classif=True, is_va=True): super().__init__() self.net = model self.end_classif = end_classif assert fusion_mode in ['rubi', 'hm', 'sum'], "Fusion mode should be rubi/hm/sum." self.fusion_mode = fusion_mode self.is_va = is_va and (not fusion_mode == 'rubi' ) # RUBi does not consider V->A # Q->A branch self.q_1 = MLP(**classif_q) if self.end_classif: # default: True (following RUBi) self.q_2 = nn.Linear(output_size, output_size) # V->A branch if self.is_va: # default: True (containing V->A) self.v_1 = MLP(**classif_v) if self.end_classif: # default: True (following RUBi) self.v_2 = nn.Linear(output_size, output_size) self.constant = nn.Parameter(torch.tensor(0.0))
def __init__(self, model, output_size, classif, end_classif=True): super().__init__() self.net = model self.c_1 = MLP(**classif) self.end_classif = end_classif if self.end_classif: self.c_2 = nn.Linear(output_size, output_size)
def __init__( self, txt_enc={}, self_q_att=False, agg={}, classif={}, wid_to_word={}, word_to_wid={}, aid_to_ans=[], ans_to_aid={}, fusion={}, residual=False, q_single=False, ): super().__init__() self.self_q_att = self_q_att self.agg = agg assert self.agg['type'] in ['max', 'mean'] self.classif = classif self.wid_to_word = wid_to_word self.word_to_wid = word_to_wid self.aid_to_ans = aid_to_ans self.ans_to_aid = ans_to_aid self.fusion = fusion self.residual = residual # Modules self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0 = nn.Linear(2400, 512) self.q_att_linear1 = nn.Linear(512, 2) if q_single: self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0_single = nn.Linear(2400, 512) self.q_att_linear1_single = nn.Linear(512, 2) else: self.txt_enc_single = None self.fusion_module = block.factory_fusion(self.fusion) if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans): Logger( )(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension." ) self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) self.classif_module = MLP(**self.classif['mlp']) Logger().log_value('nparams', sum(p.numel() for p in self.parameters() if p.requires_grad), should_print=True) Logger().log_value('nparams_txt_enc', self.get_nparams_txt_enc(), should_print=True)
def __init__(self, txt_enc={}, self_q_att=False, n_step=3, shared=False, cell={}, agg={}, classif={}, wid_to_word={}, word_to_wid={}, aid_to_ans=[], ans_to_aid={}): super(MuRelNet, self).__init__() self.self_q_att = self_q_att self.n_step = n_step self.shared = shared self.cell = cell self.agg = agg assert self.agg['type'] in ['max', 'mean'] self.classif = classif self.wid_to_word = wid_to_word self.word_to_wid = word_to_wid self.aid_to_ans = aid_to_ans self.ans_to_aid = ans_to_aid # Modules self.txt_enc = factory_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0 = nn.Linear(2400, 512) self.q_att_linear1 = nn.Linear(512, 2) if self.shared: self.cell = MuRelCell(**cell) else: self.cells = nn.ModuleList( [MuRelCell(**cell) for i in range(self.n_step)]) if 'fusion' in self.classif: self.classif_module = block.factory_fusion(self.classif['fusion']) elif 'mlp' in self.classif: self.classif_module = MLP(self.classif['mlp']) else: raise ValueError(self.classif.keys()) Logger().log_value('nparams', sum(p.numel() for p in self.parameters() if p.requires_grad), should_print=True) Logger().log_value('nparams_txt_enc', self.get_nparams_txt_enc(), should_print=True) self.buffer = None
def __init__(self, config): super().__init__() # self.self_q_att = self_q_att self.agg = {'type': 'max'} assert self.agg['type'] in ['max', 'mean'] self.classif = { 'mlp': { 'input_dim': 2048, 'dimensions': [2048, 2048, config.num_ans_candidates] } } self.fusion = { 'type': 'block', 'input_dims': [config.q_emb_dim, 2048], 'output_dim': 2048, 'mm_dim': 1000, 'chunks': 20, 'rank': 15, 'dropout_input': 0., 'dropout_pre_lin': 0. } self.residual = False # Modules txt_enc = { 'name': 'skipthoughts', 'type': 'BayesianUniSkip', 'dropout': 0.25, 'fixed_emb': False, 'dir_st': '/hdd/robik/skip-thoughts' } self.wid_to_word = { i: w for i, w in enumerate(config.dictionary.idx2word) } self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc) self.self_q_att = True if self.self_q_att: self.q_att_linear0 = nn.Linear(config.q_emb_dim // 2, 512) self.q_att_linear1 = nn.Linear(512, 2) self.fusion_module = block.factory_fusion(self.fusion) # if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans): # Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" # f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.") # self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) self.classif_module = MLP(**self.classif['mlp'])
def __init__( self, txt_enc={}, self_q_att=False, agg={}, classif={}, wid_to_word={}, word_to_wid={}, aid_to_ans=[], ans_to_aid={}, fusion={}, residual=False, use_counter=False, ): super().__init__() self.self_q_att = self_q_att self.agg = agg assert self.agg['type'] in ['max', 'mean', 'sum'] self.classif = classif self.wid_to_word = wid_to_word self.word_to_wid = word_to_wid self.aid_to_ans = aid_to_ans self.ans_to_aid = ans_to_aid self.fusion = fusion self.residual = residual self.use_counter = use_counter # Modules self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0 = nn.Linear(2400, 512) self.q_att_linear1 = nn.Linear(512, 2) self.fusion_module = block.factory_fusion(self.fusion) self.classif_module = MLP(**self.classif['mlp']) Logger().log_value('nparams', sum(p.numel() for p in self.parameters() if p.requires_grad), should_print=True) Logger().log_value('nparams_txt_enc', self.get_nparams_txt_enc(), should_print=True)
def __init__( self, txt_enc={}, self_q_att=False, agg={}, classif={}, wid_to_word={}, word_to_wid={}, aid_to_ans=[], ans_to_aid={}, fusion={}, residual=False, ): super().__init__() self.self_q_att = self_q_att self.agg = agg assert self.agg['type'] in ['max', 'mean'] self.classif = classif self.wid_to_word = wid_to_word self.word_to_wid = word_to_wid self.aid_to_ans = aid_to_ans self.ans_to_aid = ans_to_aid self.fusion = fusion self.residual = residual # Modules self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0 = nn.Linear(2400, 512) self.q_att_linear1 = nn.Linear(512, 2) self.q_att_linear2 = nn.Linear(4800, 1024) # add for ban self.fusion_module = block.factory_fusion(self.fusion) if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans): Logger( )(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension." ) self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) self.classif_module = MLP(**self.classif['mlp']) #BAN para num_hid = self.classif['mlp']['dimensions'][0] self.num_hid = num_hid num_ans = self.classif['mlp']['dimensions'][-1] gamma = 4 v_dim = self.classif['mlp']['input_dim'] v_att = BiAttention(v_dim, num_hid, num_hid, gamma) b_net = [] q_prj = [] # c_prj = [] # objects = 10 # minimum number of boxes for i in range(gamma): b_net.append(BCNet(v_dim, num_hid, num_hid, None, k=1)) q_prj.append(FCNet([num_hid, num_hid], '', .2)) # c_prj.append(FCNet([objects + 1, num_hid], 'ReLU', .0)) classifier = SimpleClassifier(num_hid, num_hid * 2, num_ans, .5) # counter = Counter(objects) # return BanModel(dataset, w_emb, q_emb, v_att, b_net, q_prj, classifier, op, gamma) # self.op = op self.glimpse = gamma # self.w_emb = w_emb # self.q_emb = q_emb self.v_att = v_att self.b_net = nn.ModuleList(b_net) self.q_prj = nn.ModuleList(q_prj) # self.c_prj = nn.ModuleList(c_prj) self.classifier = classifier # self.counter = counter self.drop = nn.Dropout(.5) self.tanh = nn.Tanh() Logger().log_value('nparams', sum(p.numel() for p in self.parameters() if p.requires_grad), should_print=True) Logger().log_value('nparams_txt_enc', self.get_nparams_txt_enc(), should_print=True)
def __init__(self, txt_enc={}, self_q_att=False, agg={}, classif={}, wid_to_word={}, word_to_wid={}, aid_to_ans=[], ans_to_aid={}, fusion={}, residual=False, ): super().__init__() self.self_q_att = self_q_att self.agg = agg assert self.agg['type'] in ['max', 'mean'] self.classif = classif self.wid_to_word = wid_to_word self.word_to_wid = word_to_wid self.aid_to_ans = aid_to_ans self.ans_to_aid = ans_to_aid self.fusion = fusion self.residual = residual # Modules self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0 = nn.Linear(2400, 512) self.q_att_linear1 = nn.Linear(512, 2) self.fusion_module = block.factory_fusion(self.fusion) if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans): Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.") self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) self.classif_module = MLP(**self.classif['mlp']) #Add for UpDn num_hid = self.classif['mlp']['dimensions'][0] self.num_hid = num_hid num_ans = self.classif['mlp']['dimensions'][-1] v_dim = self.classif['mlp']['input_dim'] v_att = NewAttention(v_dim, 4800, num_hid) q_net = FCNet([4800, num_hid]) v_net = FCNet([v_dim, num_hid]) classifier = SimpleClassifier( num_hid, num_hid * 2, num_ans, 0.5) self.v_att = v_att self.q_net = q_net self.v_net = v_net self.classifier = classifier Logger().log_value('nparams', sum(p.numel() for p in self.parameters() if p.requires_grad), should_print=True) Logger().log_value('nparams_txt_enc', self.get_nparams_txt_enc(), should_print=True)
def __init__(self, txt_enc={}, self_q_att=False, agg={}, classif={}, wid_to_word={}, word_to_wid={}, aid_to_ans=[], ans_to_aid={}, fusion={}, residual=False, q_single=False): super().__init__() self.self_q_att = self_q_att self.agg = agg assert self.agg['type'] in ['max', 'mean'] self.classif = classif self.wid_to_word = wid_to_word self.word_to_wid = word_to_wid self.aid_to_ans = aid_to_ans self.ans_to_aid = ans_to_aid self.fusion = fusion self.residual = residual # Modules self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0 = nn.Linear(2400, 512) self.q_att_linear1 = nn.Linear(512, 2) if q_single: self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc) if self.self_q_att: self.q_att_linear0_single = nn.Linear(2400, 512) self.q_att_linear1_single = nn.Linear(512, 2) if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans): Logger( )(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension." ) self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) self.classif_module = MLP(**self.classif['mlp']) # UpDn q_dim = self.fusion['input_dims'][0] v_dim = self.fusion['input_dims'][1] output_dim = self.fusion['output_dim'] att_size = 512 self.v_att = Attention(v_dim, v_dim, att_size, 36, output_dim, drop_ratio=0.5) self.txt_enc.rnn = QuestionEmbedding(620, q_dim, 1, False, 0.0) self.q_net = FCNet([q_dim, output_dim]) # self.v_net = FCNet([v_dim, output_dim]) Logger().log_value('nparams', sum(p.numel() for p in self.parameters() if p.requires_grad), should_print=True) Logger().log_value('nparams_txt_enc', self.get_nparams_txt_enc(), should_print=True)