Beispiel #1
0
    def __init__(self, v_relation_dim, num_hid, gamma,
                 min_num_objects=10, use_counter=True):
        super(BAN, self).__init__()

        self.v_att = BiAttention(v_relation_dim, num_hid, num_hid, gamma)
        self.glimpse = gamma
        self.use_counter = use_counter
        b_net = []
        q_prj = []
        c_prj = []
        q_att = []
        v_prj = []

        for i in range(gamma):
            b_net.append(BCNet(v_relation_dim, num_hid, num_hid, None, k=1))
            q_prj.append(FCNet([num_hid, num_hid], '', .2))
            if self.use_counter:
                c_prj.append(FCNet([min_num_objects + 1, num_hid], 'ReLU', .0))

        self.b_net = nn.ModuleList(b_net)
        self.q_prj = nn.ModuleList(q_prj)
        self.q_att = nn.ModuleList(q_att)
        self.v_prj = nn.ModuleList(v_prj)
        if self.use_counter:
            self.c_prj = nn.ModuleList(c_prj)
            self.counter = Counter(min_num_objects)
Beispiel #2
0
def baseline(args, dataset, pretrained=False):

    # initialise model
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, args.num_hid, 1, False, 0.0)
    v_att = Attention(dataset.v_dim, q_emb.num_hid, args.num_hid)
    q_net = FCNet([args.num_hid, args.num_hid])
    v_net = FCNet([dataset.v_dim, args.num_hid])
    classifier = SimpleClassifier(args.num_hid, 2 * args.num_hid,
                                  dataset.num_ans_candidates, 0.5)
    model = BaseModel(args, w_emb, q_emb, v_att, q_net, v_net, classifier)

    # load model on device if available
    map_location = None
    if not model.cuda_available:
        map_location = torch.device('cpu')

    # download and load pretrained model
    if pretrained:
        key = 'baseline-vqa'
        url = pretrained_urls[key]
        model.load_state_dict(download_model(
            key, url, map_location=map_location)['model'],
                              strict=False)
    else:
        key = 'untrained'

    # set model name
    model.name = key

    return model
    def __init__(self, v_dim, q_dim, hid_dim, glimpses=1, dropout=0.2):
        super(Attention, self).__init__()

        self.v_proj = FCNet([v_dim, hid_dim], dropout)
        self.q_proj = FCNet([q_dim, hid_dim], dropout)
        self.drop = nn.Dropout(dropout)
        self.linear = weight_norm(nn.Linear(hid_dim, glimpses), dim=None)
Beispiel #4
0
    def __init__(self, v_dim, q_dim, num_hid, dropout=0.2):
        super(NewAttention, self).__init__()

        self.v_proj = FCNet([v_dim, num_hid])
        self.q_proj = FCNet([q_dim, num_hid])
        self.dropout = nn.Dropout(dropout)
        self.linear = weight_norm(nn.Linear(q_dim, 1), dim=None)
Beispiel #5
0
    def __init__(self, v_dim, q_dim, h_dim, h_out, act='ReLU',
                 dropout=[.2, .5], k=3):
        super(BCNet, self).__init__()

        self.c = 32
        self.k = k
        self.v_dim = v_dim
        self.q_dim = q_dim
        self.h_dim = h_dim
        self.h_out = h_out

        self.v_net = FCNet([v_dim, h_dim * self.k], act=act,
                           dropout=dropout[0])
        self.q_net = FCNet([q_dim, h_dim * self.k], act=act,
                           dropout=dropout[0])
        self.dropout = nn.Dropout(dropout[1])  # attention
        if 1 < k:
            self.p_net = nn.AvgPool1d(self.k, stride=self.k)

        if h_out is None:
            pass
        elif h_out <= self.c:
            self.h_mat = nn.Parameter(
                        torch.Tensor(1, h_out, 1, h_dim * self.k).normal_())
            self.h_bias = nn.Parameter(
                        torch.Tensor(1, h_out, 1, 1).normal_())
        else:
            self.h_net = weight_norm(
                            nn.Linear(h_dim * self.k, h_out), dim=None)
 def __init__(self, in_dim, hid_dim, out_dim, dropout=0.0):
     super(SimpleClassifier, self).__init__()
     self.q_net = FCNet([in_dim[0], hid_dim[0]], dropout)
     self.v_net = FCNet([in_dim[1], hid_dim[0]], dropout)
     self.main = nn.Sequential(nn.Linear(hid_dim[0], hid_dim[1]), nn.ReLU(),
                               nn.Dropout(dropout, inplace=True),
                               nn.Linear(hid_dim[1], out_dim))
Beispiel #7
0
 def __init__(self, v_relation_dim, q_dim, num_hid, dropout=0.2):
     super(BUTD, self).__init__()
     self.v_proj = FCNet([v_relation_dim, num_hid])
     self.q_proj = FCNet([q_dim, num_hid])
     self.dropout = nn.Dropout(dropout)
     self.linear = FCNet([q_dim, 1])
     self.q_net = FCNet([q_dim, num_hid])
     self.v_net = FCNet([v_relation_dim, num_hid])
Beispiel #8
0
 def __init__(self, num_hid, dropout):
     super(QuestionSelfAttention, self).__init__()
     self.num_hid = num_hid
     self.drop = nn.Dropout(dropout)
     self.W1_self_att_q = FCNet(dims=[num_hid, num_hid],
                                dropout=dropout,
                                act=None)
     self.W2_self_att_q = FCNet(dims=[num_hid, 1], act=None)
Beispiel #9
0
 def __init__(self, dim_v, dim_q, dim_out, method="Mutan", mlp_glimpses=0):
     super(MuTAN_Attention, self).__init__()
     self.mlp_glimpses = mlp_glimpses
     self.fusion = getattr(fusions, method)(
                     [dim_q, dim_v], dim_out, mm_dim=1200,
                     dropout_input=0.1)
     if self.mlp_glimpses > 0:
         self.linear0 = FCNet([dim_out, 512], '', 0)
         self.linear1 = FCNet([512, mlp_glimpses], '', 0)
Beispiel #10
0
def build_baseline0_newatt(dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = NewAttention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([q_emb.num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    classifier = SimpleClassifier(num_hid, num_hid * 2,
                                  dataset.num_ans_candidates, 0.5)
    return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier)
    def __init__(self, feat_dim, nongt_dim=20, pos_emb_dim=-1,
                 num_heads=16, dropout=[0.2, 0.5]):
        """ Attetion module with vectorized version
        自注意力的KVQ:其中K和Q是来自原始特征和其K的权重矩阵W_k的转换得来的——K=X*W_k,同理Q也是这样,
        而V可以通过转换,W_v得来,也可以是原始特征X一个嵌入即可。所以,K和Q需要一个网络来学习其权重矩阵,W_k,W_q

        Args:
            position_embedding: [num_rois, nongt_dim, pos_emb_dim] 用于隐式关系
                                used in implicit relation
            pos_emb_dim: set as -1 if explicit relation 如果关系显式,设为-1
            nongt_dim: number of objects consider relations per image 对象的数量考虑每个图像的关系
            fc_dim: should be same as num_heads
            feat_dim: dimension of roi_feat
            num_heads: number of attention heads
        Returns:
            output: [num_rois, ovr_feat_dim, output_dim]
        """
        super(GraphSelfAttentionLayer, self).__init__()
        # multi head
        self.fc_dim = num_heads #16
        self.feat_dim = feat_dim #1024
        self.dim = (feat_dim, feat_dim, feat_dim)
        self.dim_group = (int(self.dim[0] / num_heads),
                          int(self.dim[1] / num_heads),
                          int(self.dim[2] / num_heads))#64,64,64
        self.num_heads = num_heads #16
        self.pos_emb_dim = pos_emb_dim #隐式关系64,显式关系-1
        #如果是隐式关系执行 多一层pair_pos_fc1网络
        if self.pos_emb_dim > 0:
            self.pair_pos_fc1 = FCNet([pos_emb_dim, self.fc_dim], None, dropout[0]) #【64,16,None】

        self.query = FCNet([feat_dim, self.dim[0]], None, dropout[0]) #非线性全连接层作为查询【1024,1024】
        # self.query =
        # (query): FCNet(
        #     (main): Sequential(
        #       (0): Dropout(p=0.2, inplace=False)
        #       (1): Linear(in_features=1024, out_features=1024, bias=True)
        # )
        # )
        self.nongt_dim = nongt_dim #20

        self.key = FCNet([feat_dim, self.dim[1]], None, dropout[0]) ##非线性全连接层作为键【1024,1024】
        #  self.key =
        #     (key): FCNet(
        #         (main): Sequential(
        #         (0): Dropout(p=0.2, inplace=False)
        #     (1): Linear(in_features=1024, out_features=1024, bias=True) )
        #     )

        #weight_norm:pytorch自带的权重归一化[16*1024,1024,(1,1)]
        #卷积层:卷积计算
        self.linear_out_ = weight_norm(nn.Conv2d(in_channels=self.fc_dim * feat_dim,
                                      out_channels=self.dim[2],
                                      kernel_size=(1, 1),
                                      groups=self.fc_dim), dim=None)
Beispiel #12
0
    def __init__(self,
                 v_dim,
                 q_dim,
                 out_dim,
                 dir_num,
                 label_num,
                 nongt_dim=20,
                 num_heads=16,
                 num_steps=1,
                 residual_connection=True,
                 label_bias=True):
        super(ExplicitRelationEncoder, self).__init__()
        self.v_dim = v_dim
        self.q_dim = q_dim
        self.out_dim = out_dim
        self.num_steps = num_steps
        self.residual_connection = residual_connection
        print(
            "In ExplicitRelationEncoder, num of graph propogation steps:",
            "%d, residual_connection: %s" %
            (self.num_steps, self.residual_connection))

        if self.v_dim != self.out_dim:
            self.v_transform = FCNet([v_dim, out_dim])
        else:
            self.v_transform = None
        in_dim = out_dim + q_dim
        self.explicit_relation = GAT(dir_num,
                                     label_num,
                                     in_dim,
                                     out_dim,
                                     nongt_dim=nongt_dim,
                                     num_heads=num_heads,
                                     label_bias=label_bias,
                                     pos_emb_dim=-1)
Beispiel #13
0
    def __init__(self,
                 feat_dim,
                 nongt_dim=20,
                 pos_emb_dim=-1,
                 num_heads=16,
                 dropout=[0.2, 0.5]):
        """ Attetion module with vectorized version

        Args:
            position_embedding: [num_rois, nongt_dim, pos_emb_dim]
                                used in implicit relation
            pos_emb_dim: set as -1 if explicit relation
            nongt_dim: number of objects consider relations per image
            fc_dim: should be same as num_heads
            feat_dim: dimension of roi_feat
            num_heads: number of attention heads
        Returns:
            output: [num_rois, ovr_feat_dim, output_dim]
        """
        super(GraphSelfAttentionLayer, self).__init__()
        # multi head
        self.fc_dim = num_heads
        self.feat_dim = feat_dim
        self.dim = (feat_dim, feat_dim, feat_dim)
        self.dim_group = (int(self.dim[0] / num_heads),
                          int(self.dim[1] / num_heads),
                          int(self.dim[2] / num_heads))
        self.num_heads = num_heads
        self.pos_emb_dim = pos_emb_dim
        if self.pos_emb_dim > 0:
            self.pair_pos_fc1 = FCNet([pos_emb_dim, self.fc_dim], None,
                                      dropout[0])
        self.query = FCNet([feat_dim, self.dim[0]], None, dropout[0])
        self.nongt_dim = nongt_dim

        self.key = FCNet([feat_dim, self.dim[1]], None, dropout[0])

        self.linear_out_ = weight_norm(nn.Conv2d(in_channels=self.fc_dim *
                                                 feat_dim,
                                                 out_channels=self.dim[2],
                                                 kernel_size=(1, 1),
                                                 groups=self.fc_dim),
                                       dim=None)
Beispiel #14
0
    def __init__(self,
                 dir_num,
                 label_num,
                 in_feat_dim,
                 out_feat_dim,
                 nongt_dim=20,
                 dropout=0.2,
                 label_bias=True,
                 num_heads=16,
                 pos_emb_dim=-1):
        """ Attetion module with vectorized version

        Args:
            label_num: numer of edge labels
            dir_num: number of edge directions
            feat_dim: dimension of roi_feat
            pos_emb_dim: dimension of postion embedding for implicit relation, set as -1 for explicit relation

        Returns:
            output: [num_rois, ovr_feat_dim, output_dim]
        """
        super(GAttNet, self).__init__()
        assert dir_num <= 2, "Got more than two directions in a graph."
        self.dir_num = dir_num
        self.label_num = label_num
        self.in_feat_dim = in_feat_dim
        self.out_feat_dim = out_feat_dim
        self.dropout = nn.Dropout(dropout)
        self.self_weights = FCNet([in_feat_dim, out_feat_dim], '', dropout)
        self.bias = FCNet([label_num, 1], '', 0, label_bias)
        self.nongt_dim = nongt_dim
        self.pos_emb_dim = pos_emb_dim
        neighbor_net = []
        for i in range(dir_num):
            g_att_layer = GraphSelfAttentionLayer(pos_emb_dim=pos_emb_dim,
                                                  num_heads=num_heads,
                                                  feat_dim=out_feat_dim,
                                                  nongt_dim=nongt_dim)
            neighbor_net.append(g_att_layer)
        self.neighbor_net = nn.ModuleList(neighbor_net)
 def __init__(self,
              dir_num,
              label_num,
              in_feat_dim,
              out_feat_dim,
              nongt_dim=20,
              dropout=0.2,
              label_bias=True,
              num_heads=16,
              pos_emb_dim=-1):
     super(GAttNet, self).__init__()
     assert dir_num <= 2, "Got more than two directions in a graph."
     self.dir_num = dir_num  #2
     self.label_num = label_num  #11
     self.in_feat_dim = in_feat_dim  #2048
     self.out_feat_dim = out_feat_dim  #1024
     self.dropout = nn.Dropout(dropout)  #0.2
     # FCNet((main): Sequential(
     #     (0): Dropout(p=0.2, inplace=False)
     #     (1): Linear(in_features=2048, out_features=1024, bias=True)))
     self.self_weights = FCNet([in_feat_dim, out_feat_dim], '', dropout)
     # FCNet(
     #     (main): Sequential(
     #     (0): Linear(in_features=11, out_features=1, bias=False)))
     self.bias = FCNet([label_num, 1], '', 0, label_bias)
     self.nongt_dim = nongt_dim  #20
     self.pos_emb_dim = pos_emb_dim  #-1
     neighbor_net = []
     #两层图注意层
     for i in range(dir_num):
         g_att_layer = GraphSelfAttentionLayer(pos_emb_dim=pos_emb_dim,
                                               num_heads=num_heads,
                                               feat_dim=out_feat_dim,
                                               nongt_dim=nongt_dim)
         neighbor_net.append(g_att_layer)
     #图注意力网络
     self.neighbor_net = nn.ModuleList(neighbor_net)
Beispiel #16
0
 def __init__(self, v_dim, q_dim, num_hid):
     super(Attention, self).__init__()
     self.nonlinear = FCNet([v_dim + q_dim, num_hid])
     self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None)