def __init__(self, words_list):
        super(Net, self).__init__()
        question_features = 1024
        vision_features = config.output_features
        glimpses = 2

        self.text = word_embedding.TextProcessor(
            classes=words_list,
            embedding_features=300,
            lstm_features=question_features,
            drop=0.0,
        )

        self.attention = Attention(
            v_features=vision_features,
            q_features=question_features,
            mid_features=1024,
            glimpses=glimpses,
            drop=0.2,
        )

        self.classifier = Classifier(
            in_features=(glimpses * vision_features, question_features),
            mid_features=1024,
            out_features=config.max_answers,
            drop=0.5,
        )
    def __init__(self, words_list):
        super(Net, self).__init__()
        self.question_features = config.question_features
        self.vision_features = config.output_features
        self.spatial_features = config.spatial_features
        self.hidden_features = config.hidden_features
        self.num_inter_head = config.num_inter_head
        self.num_intra_head = config.num_intra_head
        self.num_block = config.num_block
        self.spa_block = config.spa_block
        self.que_block = config.que_block
        self.visual_normalization = config.visual_normalization

        self.iteration = config.iteration

        assert (self.hidden_features % self.num_inter_head == 0)
        assert (self.hidden_features % self.num_intra_head == 0)
        words_list = list(words_list)
        words_list.insert(
            0, '__unknown__')  #  add 'unk' key to the available vocab list

        self.text = word_embedding.TextProcessor(
            classes=words_list,
            embedding_features=300,
            lstm_features=self.question_features,
            drop=0.1,
        )

        self.interIntraBlocks = SingleBlock(
            num_block=self.num_block,  # 2
            spa_block=self.spa_block,  # 2
            que_block=self.que_block,  # 2
            iteration=self.iteration,  # 2
            v_size=self.vision_features,  # 2048
            q_size=self.question_features,  # 1280
            b_size=self.spatial_features,  # 4
            output_size=self.hidden_features,  # 512
            num_inter_head=self.num_inter_head,  # 8
            num_intra_head=self.num_intra_head,  # 8
            drop=0.1,
        )

        self.classifier = Classifier(in_features=self.hidden_features,
                                     mid_features=config.mid_features,
                                     out_features=config.max_answers,
                                     drop=config.classifier_dropout)

        for m in self.modules():
            if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
                init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    m.bias.data.zero_()
Exemple #3
0
    def __init__(self, words_list):
        super(Net, self).__init__()
        mid_features = 1024
        question_features = mid_features
        vision_features = config.output_features
        self.top_k_sparse = 16
        num_kernels = 8
        sparse_graph = True

        self.text = word_embedding.TextProcessor(
            classes=words_list,
            embedding_features=300,
            lstm_features=question_features,
            drop=0.0,
        )

        self.pseudo_coord = PseudoCoord()

        self.graph_learner = GraphLearner(
            v_features=vision_features + 4,
            q_features=question_features,
            mid_features=512,
            dropout=0.5,
            sparse_graph=sparse_graph,
        )

        self.graph_conv1 = GraphConv(v_features=vision_features + 4,
                                     mid_features=mid_features * 2,
                                     num_kernels=num_kernels,
                                     bias=False)

        self.graph_conv2 = GraphConv(v_features=mid_features * 2,
                                     mid_features=mid_features,
                                     num_kernels=num_kernels,
                                     bias=False)

        self.classifier = Classifier(
            in_features=mid_features,
            mid_features=mid_features * 2,
            out_features=config.max_answers,
            drop=0.5,
        )

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
Exemple #4
0
    def __init__(self, words_list):
        super(Net, self).__init__()
        self.question_features = 1280
        self.vision_features = config.output_features
        self.hidden_features = 512
        self.num_inter_head = 8
        self.num_intra_head = 8
        self.num_block = 2
        self.visual_normalization = True

        assert(self.hidden_features % self.num_inter_head == 0)
        assert(self.hidden_features % self.num_intra_head == 0)
        words_list = list(words_list)
        words_list.insert(0, '__unknown__')

        self.text = word_embedding.TextProcessor(
            classes=words_list,
            embedding_features=300,
            lstm_features=self.question_features,
            drop=0.1,
        )

        self.interIntraBlocks = SingleBlock(
            num_block=self.num_block,
            v_size=self.vision_features,
            q_size=self.question_features,
            output_size=self.hidden_features,
            num_inter_head=self.num_inter_head,
            num_intra_head=self.num_intra_head,
            drop=0.1,
        )

        self.classifier = Classifier(
            in_features=self.hidden_features,
            mid_features=1024,
            out_features=config.max_answers,
            drop=0.1,)

        for m in self.modules():
            if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
                init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    m.bias.data.zero_()
    def __init__(self, words_list):
        super(Net, self).__init__()
        num_hid = 1280
        question_features = num_hid
        vision_features = config.output_features
        glimpses = 12
        objects = 10

        self.text = word_embedding.TextProcessor(
            classes=words_list,
            embedding_features=300,
            lstm_features=question_features,
            use_hidden=False, 
            drop=0.0,
        )

        self.count = Counter(objects)

        self.attention = BiAttention(
            v_features=vision_features,
            q_features=question_features,
            mid_features=num_hid,
            glimpses=glimpses,
            drop=0.5,)

        self.apply_attention = ApplyAttention(
            v_features=vision_features,
            q_features=question_features,
            mid_features=num_hid,
            glimpses=glimpses,
            num_obj=objects,
            count = self.count,
            drop=0.2,
        )
            
        self.classifier = Classifier(
            in_features=num_hid,
            mid_features=num_hid * 2,
            out_features=config.max_answers,
            drop=0.5,)
Exemple #6
0
    def __init__(self, words_list):
        super(Net, self).__init__()
        self.question_features = 1280
        self.vision_features = config.output_features
        self.hidden_features = 512
        self.num_inter_head = 8
        self.num_intra_head = 8
        self.num_block = 1

        assert (self.hidden_features % self.num_inter_head == 0)
        assert (self.hidden_features % self.num_intra_head == 0)

        self.text = word_embedding.TextProcessor(
            classes=words_list,
            embedding_features=300,
            lstm_features=self.question_features,
            use_hidden=False,  # use whole output, not just final hidden
            drop=0.0,
        )

        self.interIntraBlocks = MultiBlock(
            num_block=self.num_block,
            v_size=self.vision_features,
            q_size=self.question_features,
            output_size=self.hidden_features,
            num_inter_head=self.num_inter_head,
            num_intra_head=self.num_intra_head,
            drop=0.1,
        )

        self.classifier = Classifier(
            in_features=self.hidden_features,
            mid_features=2048,
            out_features=config.max_answers,
            drop=0.5,
        )