def __init__(self, v_dim, q_dim, num_hid, dropout=0.2): super(NewAttention, self).__init__() self.v_proj = FCNet([v_dim, num_hid]) self.q_proj = FCNet([q_dim, num_hid]) self.dropout = nn.Dropout(dropout) self.linear = weight_norm(nn.Linear(q_dim, 1), dim=None)
def build_ParalCoAtt(task_name, dataset, params): # w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) # q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) num_hid = params['num_hid'] q_proj = FCNet([768, num_hid]) bi_num_hid = num_hid * 2 co_atts = nn.ModuleList([ ParalCoAttention(dataset.v_dim, num_hid, num_hid, inter_dims=params['scale'], R=len(params['scale'])) for _ in range(params['reasonSteps']) ]) v_fusion_att = paraAttention(fuse_dim=dataset.v_dim, glimpses=params['sub_nums'], inputs_dim=dataset.v_dim, att_dim=num_hid) q_fusion_att = paraAttention(fuse_dim=num_hid, glimpses=params['sub_nums'], inputs_dim=num_hid, att_dim=num_hid) context_gate = FCNet([bi_num_hid, bi_num_hid]) classifier = SimpleClassifier(bi_num_hid, num_hid * 2, 1, 0.5) return ActionModel(task_name, q_proj, co_atts, q_fusion_att, v_fusion_att, context_gate, classifier)
def build_baseline(task_name, dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_emb = QuestionEmbedding(dataset.v_dim, num_hid, 1, False, 0.0) q_net = FCNet([q_emb.num_hid, num_hid]) v_net = FCNet([num_hid, num_hid]) classifier = SimpleClassifier(num_hid, num_hid * 2, 1, 0.5) return CountModel(task_name, w_emb, q_emb, v_emb, q_net, v_net, classifier)
def __init__(self, v_dim, q_dim, hid_dim, dropout=[.2, .5]): super(CoAttention, self).__init__() self.v_dim = v_dim self.q_dim = q_dim self.hid_dim = hid_dim act = "ReLU" self.v_net = FCNet([v_dim, self.hid_dim], act=act, dropout=dropout[0]) self.q_net = FCNet([q_dim, self.hid_dim], act=act, dropout=dropout[0])
def __init__(self, v_dim, q_dim, num_hid, dropout=0.2, max_len=35): super(CoAttention, self).__init__() self.v_proj = FCNet([v_dim, num_hid]) self.q_proj = FCNet([q_dim, num_hid]) self.tran_linear = weight_norm(nn.Linear(num_hid, num_hid)) self.dropout = nn.Dropout(dropout) self.linear_q = weight_norm(nn.Linear(max_len, 1), dim=None) self.linear_v = weight_norm(nn.Linear(max_len, 1), dim=None)
def __init__(self, v_dim, q_dim, num_hid, inter_dims, R, dropout=[.2, .5]): super(ParalCoAttention, self).__init__() self.R = R self.num_dim = num_hid self.v_dim = v_dim self.q_dim = q_dim self.inter_dims = inter_dims act = "ReLU" assert len(self.inter_dims) == self.R self.list_v_net = nn.ModuleList([ FCNet([v_dim, inter_dim], act=act, dropout=dropout[0]) for inter_dim in self.inter_dims ]) self.list_q_net = nn.ModuleList([ FCNet([q_dim, inter_dim], act=act, dropout=dropout[0]) for inter_dim in self.inter_dims ]) assert len(self.list_v_net) == self.R assert len(self.list_q_net) == self.R
def build_temporalAtt(task_name, dataset, params): # w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) # q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) num_hid = params['num_hid'] q_proj = FCNet([768, num_hid]) bi_num_hid = num_hid*2 co_att = CoAttention(dataset.v_dim, num_hid, bi_num_hid) v_fusion_att = paraAttention(fuse_dim=dataset.v_dim, glimpses=params['sub_nums'], inputs_dim=dataset.v_dim, att_dim=num_hid) q_fusion_att = paraAttention(fuse_dim=num_hid, glimpses=params['sub_nums'], inputs_dim=num_hid, att_dim=num_hid) classifier = SimpleClassifier( bi_num_hid, num_hid * 2, dataset.num_ans_candidates, 0.5) return FrameQAModel(task_name, q_proj, co_att, q_fusion_att, v_fusion_att, classifier)
def build_temporalAtt(task_name, dataset, params): num_hid = params['num_hid'] q_proj = FCNet([768, num_hid]) bi_num_hid = num_hid * 2 co_att = CoAttention(dataset.v_dim, num_hid, bi_num_hid) v_fusion_att = paraAttention(fuse_dim=dataset.v_dim, glimpses=params['sub_nums'], inputs_dim=dataset.v_dim, att_dim=num_hid) q_fusion_att = paraAttention(fuse_dim=num_hid, glimpses=params['sub_nums'], inputs_dim=num_hid, att_dim=num_hid) classifier = SimpleClassifier(2 * num_hid, num_hid * 2, 1, 0.5) return ActionModel(task_name, q_proj, co_att, q_fusion_att, v_fusion_att, classifier)
def __init__(self, v_dim, q_dim, num_hid): super(Attention, self).__init__() self.nonlinear = FCNet([v_dim + q_dim, num_hid]) self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None)