def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(AbstractAtt, self).__init__() self.opt = opt self.vocab_words = vocab_words self.vocab_answers = vocab_answers self.num_classes = len(self.vocab_answers) # Modules self.seq2vec = seq2vec.factory( self.vocab_words, self.opt['seq2vec']) # seq2vec = skipthoughts # Modules for attention self.conv_v_att = nn.Conv2d( self.opt['dim_v'], # 贰元(通道數,輸出的深度,過濾器的高,過濾器的寬) self.opt['attention']['dim_v'], 1, 1) # (2048,2048,1,1) self.linear_q_att = nn.Linear( self.opt['dim_q'], self.opt['attention']['dim_q']) # (2400,2048, 310) self.conv_att = nn.Conv2d(self.opt['attention']['dim_mm'], self.opt['attention']['nb_glimpses'], 1, 1) # (510,2,1,1) # Modules for classification self.list_linear_v_fusion = None self.linear_q_fusion = None self.linear_classif = None
def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(Dual_Learning_Model_Abstract, self).__init__() self.opt = opt self.attention = getattr(attention_modules, opt['attention']['arch'])(opt) self.vocab_answers = vocab_answers self.vocab_words = vocab_words self.num_classes = len(self.vocab_answers) # VQA Modules self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec']) self.vqa_module = getattr(vqa_modules, self.opt['vqa']['arch'])(self.opt, self.vocab_answers) if self.opt['vqg']['arch'] == 'VQA_Dual': self.vqg_module = vqg_modules.VQA_Dual(self.vqa_module, self.seq2vec, self.opt, self.vocab_words, self. vocab_answers) else: self.vqg_module = getattr(vqg_modules, self.opt['vqg']['arch'])(self.opt, self.vocab_words, self. vocab_answers) self.answer_embeddings = nn.Embedding(self.vqa_module.linear_classif.out_features, self.vqa_module.linear_classif.in_features) self.answer_embeddings.weight = self.vqa_module.linear_classif.weight self.shared_conv_layer = None self.is_testing = False self.sample_num = 5 self.use_same_attention = opt['attention'].get('use_same_attention', True) # To fuse different glimpses if self.opt['vqa']['arch'] == 'Mutan': dim_h = int(self.opt['vqa']['fusion']['dim_hv'] / opt['attention']['nb_glimpses']) else: # MLB dim_h = self.opt['vqa']['fusion']['dim_h'] self.list_linear_v_fusion = nn.ModuleList([ nn.Linear(self.opt['dim_v'], dim_h) for i in range(self.opt['attention']['nb_glimpses'])])
def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(Dual_Model, self).__init__() self.opt = opt # To fuse different glimpses dim_h = int(self.opt['vqa']['fusion']['dim_hv'] / opt['attention']['nb_glimpses']) self.vocab_answers = vocab_answers self.vocab_words = vocab_words self.num_classes = len(self.vocab_answers) # VQA Modules self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec']) # Modules for classification self.linear_q_fusion = nn.Linear(self.opt['dim_q'], self.opt['vqa']['fusion']['dim_hq']) self.linear_classif = nn.Linear(self.opt['vqa']['fusion']['dim_mm'], self.num_classes) self.fusion_classif_vqa = fusion.MutanFusion(self.opt['vqa']['fusion'], visual_embedding=False, question_embedding=False) self.attention_vqa = getattr(attention_modules, opt['attention']['arch'])(opt, use_linear=False) self.linear_q_att = nn.Linear(self.opt['dim_q'], self.opt['attention']['dim_q']) self.list_linear_v_fusion_vqa = nn.ModuleList([ nn.Linear(self.opt['dim_v'], dim_h) for i in range(self.opt['attention']['nb_glimpses'])]) # share W and E self.answer_embeddings = nn.Embedding(self.linear_classif.out_features, self.linear_classif.in_features) # VQG modules self.linear_va_transform = nn.Linear(self.linear_classif.in_features, self.opt['vqg']['vec2seq']['dim_embedding']) self.linear_a_att = nn.Linear(self.opt['dim_a'], self.opt['attention']['dim_q']) self.linear_a_fusion = nn.Linear(self.opt['vqa']['fusion']['dim_mm'], self.opt['vqa']['fusion']['dim_hq']) # Modules for Question Generation self.question_generation = getattr(vec2seq, opt['vqg']['vec2seq']['arch'])(vocab_words, opt['vqg']['vec2seq']) # Sharable modules if self.opt.get('share_modules', True): print('Sharing Modules: [Attention] and [Fusion]') self.fusion_classif_vqg = self.fusion_classif_vqa self.attention_vqg = self.attention_vqa self.list_linear_v_fusion_vqg = self.list_linear_v_fusion_vqa else: print('Disable Module Sharing') self.fusion_classif_vqg = fusion.MutanFusion(self.opt['vqa']['fusion'], visual_embedding=False, question_embedding=False) self.attention_vqg = getattr(attention_modules, opt['attention']['arch'])(opt, use_linear=False) self.list_linear_v_fusion_vqg = nn.ModuleList([ nn.Linear(self.opt['dim_v'], dim_h) for i in range(self.opt['attention']['nb_glimpses'])]) self.is_testing = False self.sample_num = 5
def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(AbstractNoAtt, self).__init__() self.opt = opt self.vocab_words = vocab_words self.vocab_answers = vocab_answers self.num_classes = len(self.vocab_answers) # Modules self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec']) self.linear_classif = nn.Linear(self.opt['fusion']['dim_h'], self.num_classes)
def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(AbstractAtt, self).__init__() self.opt = opt self.vocab_words = vocab_words self.vocab_answers = vocab_answers self.num_classes = len(self.vocab_answers) # Modules self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec']) # Modules for attention self.conv_v_att = nn.Conv2d(self.opt['dim_v'], self.opt['attention']['dim_v'], 1, 1) self.linear_q_att = nn.Linear(self.opt['dim_q'], self.opt['attention']['dim_q']) self.conv_att = nn.Conv2d(self.opt['attention']['dim_mm'], self.opt['attention']['nb_glimpses'], 1, 1) # Modules for classification self.list_linear_v_fusion = None self.linear_q_fusion = None self.linear_classif = None
def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(AbstractAtt, self).__init__() self.opt = opt self.vocab_words = vocab_words self.vocab_answers = vocab_answers self.num_classes = len(self.vocab_answers) # Modules if self.opt['seq2vec']['arch'] == "bert": self.seq2vec = return_self else: self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec']) # Modules for attention self.conv_v_att = nn.Conv2d(self.opt['dim_v'], self.opt['attention']['dim_v'], 1, 1) self.linear_q_att = nn.Linear(self.opt['dim_q'], self.opt['attention']['dim_q']) self.conv_att = nn.Conv2d(self.opt['attention']['dim_mm'], self.opt['attention']['nb_glimpses'], 1, 1) # Modules for batch norm self.batchnorm_conv_v_att = nn.BatchNorm2d( self.opt['attention']['dim_v']) self.batchnorm_linear_q_att = nn.BatchNorm1d( self.opt['attention']['dim_q']) self.batchnorm_conv_att = nn.BatchNorm2d( self.opt['attention']['nb_glimpses']) self.batchnorm_fusion_att = nn.BatchNorm1d( self.opt['attention']['dim_mm']) self.batchnorm_list_linear_v_fusion = nn.BatchNorm1d( self.opt['attention']['dim_mm']) self.batchnorm_list_linear_q_fusion = nn.BatchNorm1d( self.opt['attention']['dim_mm'] * self.opt['attention']['nb_glimpses']) self.batchnorm_fusion_classif = nn.BatchNorm1d( self.opt['attention']['dim_mm'] * self.opt['attention']['nb_glimpses']) # Modules for classification self.list_linear_v_fusion = None self.linear_q_fusion = None self.linear_classif = None
def __init__(self, opt={}, vocab_words=[], vocab_answers=[]): super(Dual_Model, self).__init__() self.opt = opt # To fuse different glimpses dim_h = int(self.opt["vqa"]["fusion"]["dim_hv"] / opt["attention"]["nb_glimpses"]) self.vocab_answers = vocab_answers self.vocab_words = vocab_words self.num_classes = len(self.vocab_answers) + 1 # for [UNK] # VQA Modules self.seq2vec = seq2vec.factory(self.vocab_words, self.opt["seq2vec"]) # Modules for classification self.linear_q_fusion = nn.Linear(self.opt["dim_q"], self.opt["vqa"]["fusion"]["dim_hq"]) self.linear_classif = nn.Linear(self.opt["vqa"]["fusion"]["dim_mm"], self.num_classes) self.fusion_classif_vqa = fusion.MutanFusion( self.opt["vqa"]["fusion"], visual_embedding=False, question_embedding=False, ) self.attention_vqa = getattr( attention_modules, opt["attention"]["arch"])(opt, use_linear=False) self.linear_q_att = nn.Linear(self.opt["dim_q"], self.opt["attention"]["dim_q"]) self.list_linear_v_fusion_vqa = nn.ModuleList([ nn.Linear(self.opt["dim_v"], dim_h) for i in range(self.opt["attention"]["nb_glimpses"]) ]) # share W and E self.answer_embeddings = nn.Embedding(self.linear_classif.out_features, self.linear_classif.in_features) # VQG modules self.linear_va_transform = nn.Linear( self.linear_classif.in_features, self.opt["vqg"]["vec2seq"]["dim_embedding"], ) self.linear_a_att = nn.Linear(self.opt["dim_a"], self.opt["attention"]["dim_q"]) self.linear_a_fusion = nn.Linear( self.opt["vqa"]["fusion"]["dim_mm"], self.opt["vqa"]["fusion"]["dim_hq"], ) # Modules for Question Generation self.question_generation = getattr( vec2seq, opt["vqg"]["vec2seq"]["arch"])(vocab_words, opt["vqg"]["vec2seq"]) # Sharable modules if self.opt.get("share_modules", True): print("Sharing Modules: [Attention] and [Fusion]") self.fusion_classif_vqg = self.fusion_classif_vqa self.attention_vqg = self.attention_vqa self.list_linear_v_fusion_vqg = self.list_linear_v_fusion_vqa else: print("Disable Module Sharing") self.fusion_classif_vqg = fusion.MutanFusion( self.opt["vqa"]["fusion"], visual_embedding=False, question_embedding=False, ) self.attention_vqg = getattr( attention_modules, opt["attention"]["arch"])(opt, use_linear=False) self.list_linear_v_fusion_vqg = nn.ModuleList([ nn.Linear(self.opt["dim_v"], dim_h) for i in range(self.opt["attention"]["nb_glimpses"]) ]) self.is_testing = False self.sample_num = 5