def build_baseline0_newatt(dataset, num_hid): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([q_emb.num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) classifier = SimpleClassifier(num_hid, num_hid * 2, dataset.num_ans_candidates, 0.5) return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier)
def build_baseline0(dataset): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, 1024, 1, False, 0.0) v_att = Attention(dataset.v_dim, 1024, 1024) q_net = FCNet([1024, 1024]) v_net = FCNet([dataset.v_dim, 1024]) classifier = SimpleClassifier(1024, 2 * 1024, dataset.num_ans_candidates, 0.5) return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier)
def visualize_vqe(dataset, num_hid, att_dim, dec_dim): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([q_emb.num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) generator = STDecoder( dataset.v_dim, num_hid, 300, dec_dim,\ dataset.explanation_dictionary.ntoken, 1, 0.5) return VQE(w_emb, q_emb, v_att, q_net, v_net, generator)
def build_vqae_newatt(dataset, num_hid, att_dim, dec_dim): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([q_emb.num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) classifier = SimpleClassifier(num_hid, num_hid * 2, dataset.num_ans_candidates, 0.5) generator = STDecoder( dataset.v_dim, num_hid, 300, dec_dim,\ dataset.explanation_dictionary.ntoken, 1, 0.5) return VQAE(w_emb, q_emb, v_att, q_net, v_net, classifier, generator)
def build_lstm_vqa(dataset, num_hid, att_dim, dec_dim): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([q_emb.num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) generator = SATDecoder( dataset.v_dim, num_hid, 300, att_dim, dec_dim,\ dataset.explanation_dictionary.ntoken, 1, 0.5) #att_emb = nn.GRU(dataset.v_dim, num_hid, 1, False, batch_first=True) att_emb = nn.GRUCell(dataset.v_dim, num_hid) classifier = SimpleClassifier(num_hid, 2 * num_hid, dataset.num_ans_candidates, 0.5) return LSTM_VQA(w_emb, q_emb, v_att, q_net, v_net, generator, att_emb, classifier)
def build_vqae2_newatt(dataset, num_hid, emb_rnn='GRU'): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, 1024, 1, False, 0.0) v_att = NewAttention(dataset.v_dim, 1024, 1024) q_net = FCNet([1024, 1024]) v_net = FCNet([dataset.v_dim, 1024]) classifier = SimpleClassifier(1024, 1024 * 2, dataset.num_ans_candidates, 0.5) generator = STDecoder( dataset.v_dim, 1024, 300, 1024,\ dataset.explanation_dictionary.ntoken, 1, 0.5) e_emb = ExplainEmbedding(generator.embed, 300, num_hid, 1, False, 0.0, emb_rnn) e_net = FCNet([e_emb.num_hid, 1024]) return VQAE2(w_emb, q_emb, v_att, q_net, v_net, classifier, generator, e_emb, e_net)
def build_vqae3_split(dataset, num_hid, att_dim, dec_dim): w_emb = WordEmbedding(dataset.question_dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att_1 = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid) q_net_1 = FCNet([q_emb.num_hid, num_hid]) v_net_1 = FCNet([dataset.v_dim, num_hid]) v_att_2 = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid) q_net_2 = FCNet([q_emb.num_hid, num_hid]) v_net_2 = FCNet([dataset.v_dim, num_hid]) classifier = SimpleClassifier(num_hid, num_hid * 2, dataset.num_ans_candidates, 0.5) generator = STDecoder( dataset.v_dim, num_hid, 300, dec_dim,\ dataset.explanation_dictionary.ntoken, 1, 0.5) e_emb = ExplainEmbedding(generator.embed, 300, num_hid, 1, False, 0.0, 'GRU') T_vq = FCNet([num_hid, num_hid]) T_e = FCNet([e_emb.num_hid, num_hid]) return Split_VQAE(w_emb, q_emb, v_att_1, q_net_1, v_net_1, v_att_2, q_net_2, v_net_2, classifier, generator, e_emb, T_vq, T_e)