Esempio n. 1
0
    def forward(self, en, cn):
        en_embed = self.embedding(en)
        cn = self.linear(cn)

        _, (en_embed, _) = self.lstm(en_embed)
        en_embed = en_embed.squeeze(0)

        return l2norm(en_embed), l2norm(cn)
Esempio n. 2
0
    def forward(self, en, en_lengths, en_index, cn, cn_lengths, cn_index):
        """
        Input Variable:
            input_var: A variables whose size is (B,W), B is the batch size and W is the longest sequence length in the batch
            input_lengths: The lengths of each element in the batch.
            hidden: The hidden state variable whose size is (num_layer*num_directions,batch_size,hidden_size)
        Output:
            output: A variable with tensor size W*B*N, W is the maximum length of the batch, B is the batch size, and N is the hidden size
            hidden: The hidden state variable with tensor size (num_layer*num_direction,B,N)
        """
        en = self.sorted_forward(en, en_lengths, en_index)
        cn = self.sorted_forward(cn, cn_lengths, cn_index)

        return l2norm(en), l2norm(cn)
def txt2image(captions, images, npts=None, verbose=False):
    """
    :param captions: [5N, K]
    :param images:  [N , K]
    :param npts:
    :param verbose:
    :return:
    """
    if npts is None:
        npts = images.size()[0]

    ranks = np.zeros(5 * npts)
    images = l2norm(images)

    for index in range(npts):
        # Get query captions
        queries = captions[5 * index:5 * index + 5]
        # Compute scores
        d = torch.mm(queries, images.t())
        for i in range(d.size()[0]):
            d_sorted, inds = torch.sort(d[i], descending=True)
            inds = inds.data.squeeze(0).cpu().numpy()
            ranks[5 * index + i] = np.where(inds == index)[0][0]

    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1

    if verbose:
        print(
            "		* Text to image scores: R@1: %.1f, R@5: %.1f, R@10: %.1f, Medr: %.1f"
            % (r1, r5, r10, medr))
    return (r1, r5, r10, medr)
def image2txt(images, captions, npts=None, verbose=False):
    """ Image <--> 5 * captions
    Image->Text (Image, Annotation)
    :param images:   (N,K) matrix of images
    :param captions:  (5N,K) matrix of captions
    :param npts: numbers of image-text group
    :return: Recall & Median Rank
    """
    if npts is None:
        npts = images.size()[0]

    ranks = np.zeros(npts)  # Each Image's first rank
    captions = l2norm(captions)  # L2Norm
    for index in range(npts):
        # Get query Image
        im = images[index]
        im = im.unsqueeze(0)
        # Compute scores
        im = l2norm(im)
        d = torch.mm(im, captions.t())
        d_sorted, ins = torch.sort(d, descending=True)  # 从大到小的距离和索引
        inds = ins.data.squeeze(0).cpu().numpy()

        # Score
        rank = 1e20
        # find the highest ranking
        for i in range(5 * index, 5 * index + 5, 1):
            tmp = np.where(inds == i)[0][0]
            if tmp < rank:
                rank = tmp

        ranks[index] = rank

    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1

    if verbose:
        print(
            "		* Image to text scores: R@1: %.1f, R@5: %.1f, R@10: %.1f, Medr: %.1f"
            % (r1, r5, r10, medr))
    return (r1, r5, r10, medr)
Esempio n. 5
0
    def forward_sens(self, x):
        x_emb = self.embedding(x)

        _, (x_emb, _) = self.lstm(x_emb)
        x_cat = x_emb.squeeze(0)
        return l2norm(x_cat)
Esempio n. 6
0
 def forward_sens(self, x, x_lengths):
     x_cat = self.single_forward(x, x_lengths)
     return l2norm(x_cat)