コード例 #1
0
def build_ParalCoAtt(task_name, dataset, params):
    # w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    # q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    num_hid = params['num_hid']
    q_proj = FCNet([768, num_hid])
    bi_num_hid = num_hid * 2
    co_atts = nn.ModuleList([
        ParalCoAttention(dataset.v_dim,
                         num_hid,
                         num_hid,
                         inter_dims=params['scale'],
                         R=len(params['scale']))
        for _ in range(params['reasonSteps'])
    ])
    v_fusion_att = paraAttention(fuse_dim=dataset.v_dim,
                                 glimpses=params['sub_nums'],
                                 inputs_dim=dataset.v_dim,
                                 att_dim=num_hid)
    q_fusion_att = paraAttention(fuse_dim=num_hid,
                                 glimpses=params['sub_nums'],
                                 inputs_dim=num_hid,
                                 att_dim=num_hid)
    context_gate = FCNet([bi_num_hid, bi_num_hid])
    classifier = SimpleClassifier(bi_num_hid, num_hid * 2, 1, 0.5)
    return ActionModel(task_name, q_proj, co_atts, q_fusion_att, v_fusion_att,
                       context_gate, classifier)
コード例 #2
0
def build_baseline(task_name, dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_emb = QuestionEmbedding(dataset.v_dim, num_hid, 1, False, 0.0)
    q_net = FCNet([q_emb.num_hid, num_hid])
    v_net = FCNet([num_hid, num_hid])
    classifier = SimpleClassifier(num_hid, num_hid * 2, 1, 0.5)
    return CountModel(task_name, w_emb, q_emb, v_emb, q_net, v_net, classifier)
コード例 #3
0
def build_temporalAtt(task_name, n_layer, dataset, num_hid, dictionary, glove_file):
    vid_encoder = Encoder(n_layer=n_layer, n_head=8, d_k=256, d_v=256, v_len=36, v_emb_dim=300,
                          d_model=2048, d_inner_hid=512, dropout=0.1)
    w = WordEmbedding(dictionary.ntoken, dictionary.c_ntoken, 300, 64, 0.1)
    word_mat, char_mat = w.init_embedding(dictionary, glove_file, task_name)
    ques_encoder = Ques_Encoder(word_mat, char_mat)
    classifier = SimpleClassifier(
        num_hid, num_hid * 2, 1, 0.5)
    # classifier = weight_norm(nn.Linear(num_hid, 1), dim=None)
    return CountModel(task_name, vid_encoder, ques_encoder, classifier)
コード例 #4
0
def build_temporalAtt(task_name, dataset, params):
    # w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    # q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    num_hid = params['num_hid']
    q_proj = FCNet([768, num_hid])
    bi_num_hid = num_hid*2
    co_att = CoAttention(dataset.v_dim, num_hid, bi_num_hid)
    v_fusion_att = paraAttention(fuse_dim=dataset.v_dim, glimpses=params['sub_nums'], inputs_dim=dataset.v_dim, att_dim=num_hid)
    q_fusion_att = paraAttention(fuse_dim=num_hid, glimpses=params['sub_nums'], inputs_dim=num_hid, att_dim=num_hid)
    classifier = SimpleClassifier(
        bi_num_hid, num_hid * 2, dataset.num_ans_candidates, 0.5)
    return FrameQAModel(task_name, q_proj, co_att, q_fusion_att, v_fusion_att, classifier)
コード例 #5
0
def build_temporalAtt(task_name, dataset, params):
    num_hid = params['num_hid']
    q_proj = FCNet([768, num_hid])
    bi_num_hid = num_hid * 2
    co_att = CoAttention(dataset.v_dim, num_hid, bi_num_hid)
    v_fusion_att = paraAttention(fuse_dim=dataset.v_dim,
                                 glimpses=params['sub_nums'],
                                 inputs_dim=dataset.v_dim,
                                 att_dim=num_hid)
    q_fusion_att = paraAttention(fuse_dim=num_hid,
                                 glimpses=params['sub_nums'],
                                 inputs_dim=num_hid,
                                 att_dim=num_hid)
    classifier = SimpleClassifier(2 * num_hid, num_hid * 2, 1, 0.5)
    return ActionModel(task_name, q_proj, co_att, q_fusion_att, v_fusion_att,
                       classifier)