Beispiel #1
0
def build_baseline0_newatt(dataset, num_hid):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([q_emb.num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    classifier = SimpleClassifier(num_hid, num_hid * 2,
                                  dataset.num_ans_candidates, 0.5)
    return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier)
Beispiel #2
0
def build_baseline0(dataset):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, 1024, 1, False, 0.0)
    v_att = Attention(dataset.v_dim, 1024, 1024)
    q_net = FCNet([1024, 1024])
    v_net = FCNet([dataset.v_dim, 1024])
    classifier = SimpleClassifier(1024, 2 * 1024, dataset.num_ans_candidates,
                                  0.5)
    return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier)
Beispiel #3
0
def visualize_vqe(dataset, num_hid, att_dim, dec_dim):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([q_emb.num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    generator = STDecoder(
        dataset.v_dim, num_hid, 300, dec_dim,\
        dataset.explanation_dictionary.ntoken, 1, 0.5)
    return VQE(w_emb, q_emb, v_att, q_net, v_net, generator)
Beispiel #4
0
def build_vqae_newatt(dataset, num_hid, att_dim, dec_dim):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([q_emb.num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    classifier = SimpleClassifier(num_hid, num_hid * 2,
                                  dataset.num_ans_candidates, 0.5)
    generator = STDecoder(
        dataset.v_dim, num_hid, 300, dec_dim,\
        dataset.explanation_dictionary.ntoken, 1, 0.5)
    return VQAE(w_emb, q_emb, v_att, q_net, v_net, classifier, generator)
Beispiel #5
0
def build_lstm_vqa(dataset, num_hid, att_dim, dec_dim):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([q_emb.num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    generator = SATDecoder(
        dataset.v_dim, num_hid, 300, att_dim, dec_dim,\
        dataset.explanation_dictionary.ntoken, 1, 0.5)
    #att_emb = nn.GRU(dataset.v_dim, num_hid, 1, False, batch_first=True)
    att_emb = nn.GRUCell(dataset.v_dim, num_hid)
    classifier = SimpleClassifier(num_hid, 2 * num_hid,
                                  dataset.num_ans_candidates, 0.5)
    return LSTM_VQA(w_emb, q_emb, v_att, q_net, v_net, generator, att_emb,
                    classifier)
Beispiel #6
0
def build_vqae2_newatt(dataset, num_hid, emb_rnn='GRU'):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, 1024, 1, False, 0.0)
    v_att = NewAttention(dataset.v_dim, 1024, 1024)
    q_net = FCNet([1024, 1024])
    v_net = FCNet([dataset.v_dim, 1024])
    classifier = SimpleClassifier(1024, 1024 * 2, dataset.num_ans_candidates,
                                  0.5)
    generator = STDecoder(
        dataset.v_dim, 1024, 300, 1024,\
        dataset.explanation_dictionary.ntoken, 1, 0.5)
    e_emb = ExplainEmbedding(generator.embed, 300, num_hid, 1, False, 0.0,
                             emb_rnn)
    e_net = FCNet([e_emb.num_hid, 1024])
    return VQAE2(w_emb, q_emb, v_att, q_net, v_net, classifier, generator,
                 e_emb, e_net)
Beispiel #7
0
def build_vqae3_split(dataset, num_hid, att_dim, dec_dim):
    w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att_1 = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net_1 = FCNet([q_emb.num_hid, num_hid])
    v_net_1 = FCNet([dataset.v_dim, num_hid])
    v_att_2 = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net_2 = FCNet([q_emb.num_hid, num_hid])
    v_net_2 = FCNet([dataset.v_dim, num_hid])
    classifier = SimpleClassifier(num_hid, num_hid * 2,
                                  dataset.num_ans_candidates, 0.5)
    generator = STDecoder(
        dataset.v_dim, num_hid, 300, dec_dim,\
        dataset.explanation_dictionary.ntoken, 1, 0.5)
    e_emb = ExplainEmbedding(generator.embed, 300, num_hid, 1, False, 0.0,
                             'GRU')
    T_vq = FCNet([num_hid, num_hid])
    T_e = FCNet([e_emb.num_hid, num_hid])
    return Split_VQAE(w_emb, q_emb, v_att_1, q_net_1, v_net_1, v_att_2,
                      q_net_2, v_net_2, classifier, generator, e_emb, T_vq,
                      T_e)