Ejemplo n.º 1
0
    def __init__(self, config):
        super(Ramen, self).__init__()
        self.config = config
        self.mmc_net = MultiModalCore(config)
        self.w_emb = WordEmbedding(config.w_emb_size, 300)
        self.w_emb.init_embedding(config.glove_file)
        self.q_emb = QuestionEmbedding(
            300,
            self.config.q_emb_dim,
            1,
            bidirect=True,
            dropout=0,
            rnn_type=config.question_rnn_type,
            dropout_before_rnn=config.question_dropout_before_rnn,
            dropout_after_rnn=config.question_dropout_after_rnn)

        clf_in_size = config.mmc_aggregator_dim * 2
        classifier_layers = []
        for ix, size in enumerate(config.classifier_sizes):
            in_s = clf_in_size if ix == 0 else config.classifier_sizes[ix - 1]
            out_s = size
            lin = nn.Linear(in_s, out_s)
            classifier_layers.append(lin)
            classifier_layers.append(
                getattr(nonlinearity, config.classifier_nonlinearity)())
            classifier_layers.append(nn.Dropout(p=config.classifier_dropout))

        if config.pre_classification_dropout is not None and config.pre_classification_dropout > 0:
            self.pre_classification_dropout = nn.Dropout(
                p=config.pre_classification_dropout)
        else:
            self.pre_classification_dropout = None
        self.pre_classification_layers = nn.Sequential(*classifier_layers)
        self.classifier = nn.Linear(out_s, config.num_ans_candidates)
Ejemplo n.º 2
0
class RelationNetwork(nn.Module):
    def __init__(self, config):
        super(RelationNetwork, self).__init__()
        self.config = config
        self.w_emb = WordEmbedding(config.w_emb_size, 300)
        self.w_emb.init_embedding(config.glove_file)
        self.q_emb = QuestionEmbedding(300,
                                       self.config.q_emb_dim,
                                       1,
                                       bidirect=False,
                                       dropout=0,
                                       rnn_type='GRU')
        self.relation_module = PairwiseRelationModule(
            config.v_dim + config.q_emb_dim, config.interactor_sizes,
            config.aggregator_sizes)
        self.classifier = nn.Linear(config.aggregator_sizes[-1],
                                    config.num_ans_candidates)

    def forward(self, v, b, q, a=None, qlen=None):
        """Forward

        v: [batch, num_objs, v_dim]
        b: [batch, num_objs, b_dim]
        q: [batch_size, seq_length]

        return: logits
        """
        q = self.w_emb(q)
        q_words_emb, q_emb = self.q_emb(q)
        q_emb_repeated = q_emb.unsqueeze(1)
        q_emb_repeated = q_emb_repeated.repeat(1, v.shape[1], 1)
        vq_paired = torch.cat((v, q_emb_repeated), dim=2)
        rel, _ = self.relation_module(vq_paired)
        logits = self.classifier(rel)
        return logits
Ejemplo n.º 3
0
class Ramen(nn.Module):
    def __init__(self, config):
        super(Ramen, self).__init__()
        self.config = config
        self.mmc_net = MultiModalCore(config)
        self.w_emb = WordEmbedding(config.w_emb_size, 300)
        self.w_emb.init_embedding(config.glove_file)
        self.q_emb = QuestionEmbedding(
            self.config,
            300,
            self.config.q_emb_dim,
            1,
            bidirect=True,
            dropout=0,
            rnn_type=config.question_rnn_type,
            dropout_before_rnn=config.question_dropout_before_rnn,
            dropout_after_rnn=config.question_dropout_after_rnn)

        clf_in_size = config.mmc_aggregator_dim * 2
        classifier_layers = []
        for ix, size in enumerate(config.classifier_sizes):
            in_s = clf_in_size if ix == 0 else config.classifier_sizes[ix - 1]
            out_s = size
            lin = nn.Linear(in_s, out_s)
            classifier_layers.append(lin)
            classifier_layers.append(
                getattr(nonlinearity, config.classifier_nonlinearity)())
            classifier_layers.append(nn.Dropout(p=config.classifier_dropout))

        if config.pre_classification_dropout is not None and config.pre_classification_dropout > 0:
            self.pre_classification_dropout = nn.Dropout(
                p=config.pre_classification_dropout)
        else:
            self.pre_classification_dropout = None
        self.pre_classification_layers = nn.Sequential(*classifier_layers)
        self.classifier = nn.Linear(out_s, config.num_ans_candidates)

    def forward(self, v, b, q, a=None, qlen=None):
        """Forward

        v: [batch, num_objs, v_dim]
        b: [batch, num_objs, b_dim]
        q: [batch_size, seq_length]

        return: logits
        """
        batch_size, num_objs, v_emb_dim = v.size()
        b = b[:, :, :4]
        q = self.w_emb(q)
        q_emb = self.q_emb(q, qlen)
        mmc, mmc_aggregated = self.mmc_net(
            v, b, q_emb)  # B x num_objs x num_hid and B x num_hid
        if self.pre_classification_dropout is not None:
            mmc_aggregated = self.pre_classification_dropout(mmc_aggregated)
        final_emb = self.pre_classification_layers(mmc_aggregated)
        logits = self.classifier(final_emb)
        out = {'logits': logits, 'q_emb': q_emb}
        return out
Ejemplo n.º 4
0
    def __init__(self, config):
        super(Ban, self).__init__()
        self.config = config
        self.w_emb = WordEmbedding(config.w_emb_size, 300, .0)
        self.q_emb = UpDnQuestionEmbedding(300, config.q_emb_dim, 1, False, .0)
        self.v_att = BiAttention(config.v_dim, config.num_hid, config.num_hid,
                                 config.glimpse)
        self.b_net = []
        self.q_prj = []
        self.c_prj = []
        self.objects = 10  # minimum number of boxes
        for i in range(config.glimpse):
            self.b_net.append(
                BCNet(config.v_dim, config.num_hid, config.num_hid, None, k=1))
            self.q_prj.append(FCNet([config.num_hid, config.num_hid], '', .2))
            self.c_prj.append(
                FCNet([self.objects + 1, config.num_hid], 'ReLU', .0))

        self.b_net = nn.ModuleList(self.b_net)
        self.q_prj = nn.ModuleList(self.q_prj)
        self.c_prj = nn.ModuleList(self.c_prj)

        self.classifier = SimpleClassifier(config.num_hid, config.num_hid * 2,
                                           config.num_ans_candidates, .5)
        self.counter = Counter(self.objects)
        self.drop = nn.Dropout(.5)
        self.tanh = nn.Tanh()
Ejemplo n.º 5
0
 def __init__(self, config):
     super(RelationNetwork, self).__init__()
     self.config = config
     self.w_emb = WordEmbedding(config.w_emb_size, 300)
     self.w_emb.init_embedding(config.glove_file)
     self.q_emb = QuestionEmbedding(300,
                                    self.config.q_emb_dim,
                                    1,
                                    bidirect=False,
                                    dropout=0,
                                    rnn_type='GRU')
     self.relation_module = PairwiseRelationModule(
         config.v_dim + config.q_emb_dim, config.interactor_sizes,
         config.aggregator_sizes)
     self.classifier = nn.Linear(config.aggregator_sizes[-1],
                                 config.num_ans_candidates)
Ejemplo n.º 6
0
 def __init__(self, config):
     super(UpDn, self).__init__()
     self.w_emb = WordEmbedding(config.w_emb_size, 300, 0.0)
     self.q_emb = UpDnQuestionEmbedding(300, config.q_emb_dim, 1, False,
                                        0.0)
     self.v_att = UpDnAttention(config.v_dim, self.q_emb.num_hid,
                                config.num_hid)
     self.q_net = FCNet([self.q_emb.num_hid, config.num_hid])
     self.v_net = FCNet([config.v_dim, config.num_hid])
     self.classifier = SimpleClassifier(config.num_hid, config.num_hid * 2,
                                        config.num_ans_candidates, 0.5)