def __init__(self, __C):
     super(QAtt, self).__init__()
     self.__C = __C
     self.mlp = MLP(in_size=__C.LSTM_OUT_SIZE,
                    mid_size=__C.HIDDEN_SIZE,
                    out_size=__C.Q_GLIMPSES,
                    dropout_r=__C.DROPOUT_R,
                    use_relu=True)
Example #2
0
    def __init__(self, __C):
        super(FFN, self).__init__()

        self.mlp = MLP(in_size=__C.HIDDEN_SIZE,
                       mid_size=__C.FF_SIZE,
                       out_size=__C.HIDDEN_SIZE,
                       dropout_r=__C.DROPOUT_R,
                       use_relu=True)
Example #3
0
 def __init__(self, __C):
     super(AGAttention, self).__init__()
     self.lin_v = FFN(__C)  # let self.lin take care of bias
     self.lin_q = FFN(__C)
     self.lin = MLP(in_size=__C.HIDDEN_SIZE,
                    mid_size=__C.FF_SIZE,
                    out_size=1,
                    dropout_r=__C.DROPOUT_R,
                    use_relu=True)
Example #4
0
    def __init__(self, __C):
        super(AttFlat, self).__init__()
        self.__C = __C

        self.mlp = MLP(in_size=__C.HIDDEN_SIZE,
                       mid_size=__C.FLAT_MLP_SIZE,
                       out_size=__C.FLAT_GLIMPSES,
                       dropout_r=__C.DROPOUT_R,
                       use_relu=True)

        self.linear_merge = nn.Linear(__C.HIDDEN_SIZE * __C.FLAT_GLIMPSES,
                                      __C.FLAT_OUT_SIZE)
Example #5
0
    def __init__(self, __C, q_emb, token_size, answer_size):
        super(QNet, self).__init__()

        # self.attflat_lang = AttFlat(__C)

        self.mlp = MLP(
            in_size=__C.FLAT_OUT_SIZE,  # 1024
            mid_size=__C.FLAT_OUT_SIZE,  # 1024
            out_size=answer_size,
            dropout_r=__C.DROPOUT_R,
            use_relu=True)

        self.proj_norm = LayerNorm(answer_size)

        self.proj = nn.Linear(answer_size, answer_size)
    def __init__(self, __C, img_feat_size, ques_att_feat_size, gen_func):
        super(IAtt, self).__init__()
        self.__C = __C
        self.dropout = nn.Dropout(__C.DROPOUT_R)
        self.mfb = MFB(__C, img_feat_size, ques_att_feat_size, True)
        self.mlp = MLP(in_size=__C.MFB_O,
                       mid_size=__C.HIDDEN_SIZE,
                       out_size=__C.I_GLIMPSES,
                       dropout_r=__C.DROPOUT_R,
                       use_relu=True)

        if str(gen_func) == 'tvmax':
            self.gen_func = 'tvmax'
            self.sparsemax = partial(sparsemax, k=512)
            self.tvmax = TV2DFunction.apply
        else:
            self.gen_func = gen_func
Example #7
0
    def __init__(self, __C, gen_func=torch.softmax):
        super(AttFlatText, self).__init__()
        self.__C = __C

        self.gen_func = gen_func

        if str(gen_func) == 'tvmax':
            self.sparsemax = partial(sparsemax, k=512)
            self.tvmax = TV2DFunction.apply

        self.mlp = MLP(in_size=__C.HIDDEN_SIZE,
                       mid_size=__C.FLAT_MLP_SIZE,
                       out_size=__C.FLAT_GLIMPSES,
                       dropout_r=__C.DROPOUT_R,
                       use_relu=True)

        self.linear_merge = nn.Linear(__C.HIDDEN_SIZE * __C.FLAT_GLIMPSES,
                                      __C.FLAT_OUT_SIZE)
Example #8
0
    def __init__(self, __C, pretrained_emb, token_size, answer_size):
        super(Net, self).__init__()
        copy_data = __C
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=__C.WORD_EMBED_SIZE
        )

        self.mlp = MLP(
            in_size=__C.HIDDEN_SIZE,
            mid_size=__C.FLAT_MLP_SIZE,
            out_size=__C.FLAT_GLIMPSES,
            dropout_r=__C.DROPOUT_R,
            use_relu=True
        )

        # Loading the GloVe embedding weights
        if __C.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        self.lstm = nn.LSTM(
            input_size=__C.WORD_EMBED_SIZE,
            hidden_size=__C.HIDDEN_SIZE,
            num_layers=1,
            batch_first=True
        )

        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,
            2048
        )

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(1024)
        self.proj = nn.Linear(1024, answer_size)

        self.dense_coattn = DCNLayer(2048, 1024, 4, 3, 5, 0.3)
        self.predict = PredictLayer(2048, 1024, 4, 3129, 0.3)

        self.apply(Initializer.xavier_normal)
Example #9
0
    def __init__(self, __C, gen_func=torch.softmax):
        super(AttFlat, self).__init__()
        self.__C = __C

        self.attention = __C.attention
        self.gen_func = gen_func

        if str(gen_func) == 'tvmax':
            self.sparsemax = partial(sparsemax, k=512)
            self.tvmax = TV2DFunction.apply

        self.mlp = MLP(in_size=__C.HIDDEN_SIZE,
                       mid_size=__C.FLAT_MLP_SIZE,
                       out_size=__C.FLAT_GLIMPSES,
                       dropout_r=__C.DROPOUT_R,
                       use_relu=True)

        self.linear_merge = nn.Linear(__C.HIDDEN_SIZE * __C.FLAT_GLIMPSES,
                                      __C.FLAT_OUT_SIZE)

        if (self.attention == 'cont-sparsemax'):
            self.transform = ContinuousSparsemax(
                psi=None
            )  # use basis functions in 'psi' to define continuous sparsemax
        else:
            self.transform = ContinuousSoftmax(
                psi=None
            )  # use basis functions in 'psi' to define continuous softmax

        device = 'cuda'

        # compute F and G offline for one length = 14*14 = 196
        self.Gs = [None]
        self.psi = [None]
        max_seq_len = 14 * 14  # 196 grid features
        attn_num_basis = 100  # 100 basis functions
        nb_waves = attn_num_basis
        self.psi.append([])
        self.add_gaussian_basis_functions(self.psi[1], nb_waves, device=device)

        # stack basis functions
        padding = True
        length = max_seq_len
        if padding:
            shift = 1 / float(2 * math.sqrt(length))
            positions_x = torch.linspace(-0.5 + shift, 1.5 - shift,
                                         int(2 * math.sqrt(length)))
            positions_x, positions_y = torch.meshgrid(positions_x, positions_x)
            positions_x = positions_x.flatten()
            positions_y = positions_y.flatten()
        else:
            shift = 1 / float(2 * math.sqrt(length))
            positions_x = torch.linspace(shift, 1 - shift,
                                         int(math.sqrt(length)))
            positions_x, positions_y = torch.meshgrid(positions_x, positions_x)
            positions_x = positions_x.flatten()
            positions_y = positions_y.flatten()

        positions = torch.zeros(len(positions_x), 2, 1).to(device)
        for position in range(1, len(positions_x) + 1):
            positions[position - 1] = torch.tensor(
                [[positions_x[position - 1]], [positions_y[position - 1]]])

        F = torch.zeros(nb_waves,
                        positions.size(0)).unsqueeze(2).unsqueeze(3).to(
                            device)  # torch.Size([N, 196, 1, 1])
        # print(positions.size()) # torch.Size([196, 2, 1])
        basis_functions = self.psi[1][0]
        # print(basis_functions.evaluate(positions[0]).size()) # torch.Size([N, 1, 1])

        for i in range(0, positions.size(0)):
            F[:, i] = basis_functions.evaluate(positions[i])[:]

        penalty = .01  # Ridge penalty
        I = torch.eye(nb_waves).to(device)
        F = F.squeeze(-2).squeeze(-1)  # torch.Size([N, 196])
        G = F.t().matmul(
            (F.matmul(F.t()) + penalty * I).inverse())  # torch.Size([196, N])
        if padding:
            G = G[length:-length, :]
            G = torch.cat([
                G[7:21, :], G[35:49, :], G[63:77, :], G[91:105, :],
                G[119:133, :], G[147:161, :], G[175:189, :], G[203:217, :],
                G[231:245, :], G[259:273, :], G[287:301, :], G[315:329, :],
                G[343:357, :], G[371:385, :]
            ])

        self.Gs.append(G.to(device))