Esempio n. 1
0
    def __init__(self, dataset, feature_size, vocab_size):
        super().__init__()
        self.dataset = dataset
        self.feature_size = feature_size
        hidden_size = args.hid_dim
        self.hidden_size = hidden_size
        self.emb_dim = args.emb_dim

        # Resnet Feature Extractor
        resnet_extractor = models.resnet101(pretrained=True)
        modules = list(resnet_extractor.children())[:-2]
        self.resnet_extractor = nn.Sequential(*modules)
        for p in self.resnet_extractor.parameters():
            p.requires_grad = False

        # ResNet Feat Mean and Std
        self.feat_mean = torch.from_numpy(dataset.feat_mean).cuda()
        self.feat_std = torch.from_numpy(dataset.feat_std).cuda()

        # Image Encoder
        self.pre_fc = LinearAct(self.feature_size, self.hidden_size, 'relu')

        # Lang Encoder
        self.w_emb = nn.Embedding(vocab_size, self.emb_dim)
        self.drop = nn.Dropout(args.dropout)
        self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True)

        # Decision
        self.classify = nn.Sequential(
            LinearAct(hidden_size * 3, hidden_size, 'tanh'), self.drop,
            LinearAct(hidden_size, 1, bias=False))
Esempio n. 2
0
 def __init__(self, dim, hid_dim=None):
     super().__init__()
     if hid_dim is None:
         hid_dim = dim
     self.dim = dim
     self.hid_dim = hid_dim
     self.q_fc = LinearAct(dim, hid_dim, bias=False)
     self.k_fc = LinearAct(dim, hid_dim, bias=False)
     self.mix = LinearAct(3 * dim, dim, 'relu', bias=False)
Esempio n. 3
0
    def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None):
        super().__init__()

        self.hid_dim = hid_dim
        if out_dim is None:
            out_dim = hid_dim

        self.query_linear = LinearAct(query_dim, hid_dim)
        self.src_linear = LinearAct(feat_dim, hid_dim)
        self.trg_linear = self.src_linear

        self.out_linear = LinearAct(hid_dim + query_dim, out_dim)
Esempio n. 4
0
    def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None):
        super().__init__()

        self.hid_dim = hid_dim
        if out_dim is None:
            out_dim = hid_dim

        self.query_linear = LinearAct(query_dim, hid_dim, bias=False)
        self.src_linear = LinearAct(feat_dim, hid_dim, bias=False)
        self.trg_linear = self.src_linear

        self.out_linear = LinearAct(hid_dim * 4 + query_dim,
                                    out_dim,
                                    act='relu',
                                    bias=False)
Esempio n. 5
0
    def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None):
        super().__init__()

        self.hid_dim = hid_dim
        if out_dim is None:
            out_dim = hid_dim

        self.query_linear = LinearAct(query_dim, hid_dim)
        self.src_linear = LinearAct(feat_dim, hid_dim)
        self.trg_linear = self.src_linear

        # self.alpha = 1.
        self.alpha = nn.Parameter(torch.ones(1))

        self.out_linear = LinearAct(hid_dim * 2 + query_dim,
                                    out_dim,
                                    act='relu')
Esempio n. 6
0
    def __init__(self, ntoken, ctx_size):
        super().__init__()
        hidden_size = args.hid_dim
        self.hidden_size = hidden_size
        self.ctx_size = ctx_size
        self.emb_dim = args.emb_dim

        self.img_fc = LinearAct(ctx_size, hidden_size, 'tanh')
        self.w_emb = nn.Embedding(ntoken, self.emb_dim)
        self.drop = nn.Dropout(args.dropout)
        self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True)
        self.att = DynamicAttention2(
            query_dim=hidden_size,
            feat_dim=hidden_size,
            hid_dim=hidden_size,
            out_dim=hidden_size
        )
        self.projection = LinearAct(hidden_size, ntoken)
Esempio n. 7
0
    def __init__(self, ntoken, ctx_size, heads=1):
        super().__init__()
        hidden_size = args.hid_dim
        self.hidden_size = hidden_size
        self.ctx_size = ctx_size
        self.emb_dim = args.emb_dim
        self.heads = heads

        self.img_fc = LinearAct(ctx_size, hidden_size, 'tanh')
        self.w_emb = nn.Embedding(ntoken, self.emb_dim)
        self.drop = nn.Dropout(args.dropout)
        self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True)
        # self.attention_layers = []
        # for _ in range(self.heads):
        #     self.attention_layers.append(
        #         TransformerAttention(hidden_size, ctx_size, hidden_size, merge_info=True).cuda()
        #     )
        self.attention_layers = nn.ModuleList([
            OneAttention(hidden_size, hidden_size) for _ in range(self.heads)
        ])
        self.projection = LinearAct(hidden_size, ntoken)
Esempio n. 8
0
    def __init__(self, ntoken, ctx_size, heads=2):
        super().__init__()
        hidden_size = args.hid_dim
        self.hidden_size = hidden_size
        self.ctx_size = ctx_size
        self.emb_dim = args.emb_dim
        self.heads = heads

        self.img_fc = LinearAct(ctx_size, hidden_size, 'tanh')
        self.w_emb = nn.Embedding(ntoken, self.emb_dim)
        self.drop = nn.Dropout(args.dropout)
        self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True)
        self.att = DynamicAttention5(
            query_dim=hidden_size,
            feat_dim=hidden_size,
            hid_dim=hidden_size,
            out_dim=hidden_size
        )
        self.attention_layers = nn.ModuleList([
            OneAttention(hidden_size, hidden_size)
            for _ in range(self.heads)
        ])
        self.projection = LinearAct(hidden_size, ntoken)
Esempio n. 9
0
    def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None):
        super().__init__()

        self.hid_dim = hid_dim
        if out_dim is None:
            out_dim = hid_dim

        self.query_linear = LinearAct(query_dim, hid_dim)
        self.src_linear = LinearAct(feat_dim, hid_dim)
        self.trg_linear = LinearAct(feat_dim, hid_dim)

        self.src_value_linear = LinearAct(feat_dim, hid_dim)
        self.trg_value_linear = LinearAct(feat_dim, hid_dim)

        self.out_linear = LinearAct(hid_dim * 2 + query_dim,
                                    out_dim,
                                    act='relu')
Esempio n. 10
0
 def __init__(self, dim):
     super().__init__()
     self.fc = nn.Sequential(LinearAct(dim, dim // 2, 'relu', bias=False),
                             LinearAct(dim // 2, 1, bias=False))