def __init__(self, words_list): super(Net, self).__init__() question_features = 1024 vision_features = config.output_features glimpses = 2 self.text = word_embedding.TextProcessor( classes=words_list, embedding_features=300, lstm_features=question_features, drop=0.0, ) self.attention = Attention( v_features=vision_features, q_features=question_features, mid_features=1024, glimpses=glimpses, drop=0.2, ) self.classifier = Classifier( in_features=(glimpses * vision_features, question_features), mid_features=1024, out_features=config.max_answers, drop=0.5, )
def __init__(self, words_list): super(Net, self).__init__() self.question_features = config.question_features self.vision_features = config.output_features self.spatial_features = config.spatial_features self.hidden_features = config.hidden_features self.num_inter_head = config.num_inter_head self.num_intra_head = config.num_intra_head self.num_block = config.num_block self.spa_block = config.spa_block self.que_block = config.que_block self.visual_normalization = config.visual_normalization self.iteration = config.iteration assert (self.hidden_features % self.num_inter_head == 0) assert (self.hidden_features % self.num_intra_head == 0) words_list = list(words_list) words_list.insert( 0, '__unknown__') # add 'unk' key to the available vocab list self.text = word_embedding.TextProcessor( classes=words_list, embedding_features=300, lstm_features=self.question_features, drop=0.1, ) self.interIntraBlocks = SingleBlock( num_block=self.num_block, # 2 spa_block=self.spa_block, # 2 que_block=self.que_block, # 2 iteration=self.iteration, # 2 v_size=self.vision_features, # 2048 q_size=self.question_features, # 1280 b_size=self.spatial_features, # 4 output_size=self.hidden_features, # 512 num_inter_head=self.num_inter_head, # 8 num_intra_head=self.num_intra_head, # 8 drop=0.1, ) self.classifier = Classifier(in_features=self.hidden_features, mid_features=config.mid_features, out_features=config.max_answers, drop=config.classifier_dropout) for m in self.modules(): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.zero_()
def __init__(self, words_list): super(Net, self).__init__() mid_features = 1024 question_features = mid_features vision_features = config.output_features self.top_k_sparse = 16 num_kernels = 8 sparse_graph = True self.text = word_embedding.TextProcessor( classes=words_list, embedding_features=300, lstm_features=question_features, drop=0.0, ) self.pseudo_coord = PseudoCoord() self.graph_learner = GraphLearner( v_features=vision_features + 4, q_features=question_features, mid_features=512, dropout=0.5, sparse_graph=sparse_graph, ) self.graph_conv1 = GraphConv(v_features=vision_features + 4, mid_features=mid_features * 2, num_kernels=num_kernels, bias=False) self.graph_conv2 = GraphConv(v_features=mid_features * 2, mid_features=mid_features, num_kernels=num_kernels, bias=False) self.classifier = Classifier( in_features=mid_features, mid_features=mid_features * 2, out_features=config.max_answers, drop=0.5, ) self.relu = nn.ReLU() self.dropout = nn.Dropout(0.5)
def __init__(self, words_list): super(Net, self).__init__() self.question_features = 1280 self.vision_features = config.output_features self.hidden_features = 512 self.num_inter_head = 8 self.num_intra_head = 8 self.num_block = 2 self.visual_normalization = True assert(self.hidden_features % self.num_inter_head == 0) assert(self.hidden_features % self.num_intra_head == 0) words_list = list(words_list) words_list.insert(0, '__unknown__') self.text = word_embedding.TextProcessor( classes=words_list, embedding_features=300, lstm_features=self.question_features, drop=0.1, ) self.interIntraBlocks = SingleBlock( num_block=self.num_block, v_size=self.vision_features, q_size=self.question_features, output_size=self.hidden_features, num_inter_head=self.num_inter_head, num_intra_head=self.num_intra_head, drop=0.1, ) self.classifier = Classifier( in_features=self.hidden_features, mid_features=1024, out_features=config.max_answers, drop=0.1,) for m in self.modules(): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.zero_()
def __init__(self, words_list): super(Net, self).__init__() num_hid = 1280 question_features = num_hid vision_features = config.output_features glimpses = 12 objects = 10 self.text = word_embedding.TextProcessor( classes=words_list, embedding_features=300, lstm_features=question_features, use_hidden=False, drop=0.0, ) self.count = Counter(objects) self.attention = BiAttention( v_features=vision_features, q_features=question_features, mid_features=num_hid, glimpses=glimpses, drop=0.5,) self.apply_attention = ApplyAttention( v_features=vision_features, q_features=question_features, mid_features=num_hid, glimpses=glimpses, num_obj=objects, count = self.count, drop=0.2, ) self.classifier = Classifier( in_features=num_hid, mid_features=num_hid * 2, out_features=config.max_answers, drop=0.5,)
def __init__(self, words_list): super(Net, self).__init__() self.question_features = 1280 self.vision_features = config.output_features self.hidden_features = 512 self.num_inter_head = 8 self.num_intra_head = 8 self.num_block = 1 assert (self.hidden_features % self.num_inter_head == 0) assert (self.hidden_features % self.num_intra_head == 0) self.text = word_embedding.TextProcessor( classes=words_list, embedding_features=300, lstm_features=self.question_features, use_hidden=False, # use whole output, not just final hidden drop=0.0, ) self.interIntraBlocks = MultiBlock( num_block=self.num_block, v_size=self.vision_features, q_size=self.question_features, output_size=self.hidden_features, num_inter_head=self.num_inter_head, num_intra_head=self.num_intra_head, drop=0.1, ) self.classifier = Classifier( in_features=self.hidden_features, mid_features=2048, out_features=config.max_answers, drop=0.5, )