Esempio n. 1
0
class RefDataset(Dataset):
    def __init__(self, split):
        self.refer = REFER(dataset='refcoco+', splitBy='unc')
        self.ref_ids = self.refer.getRefIds(split=split)

        self.image_embeds = np.load(
            os.path.join("data", "embeddings", "FINALImageEmbeddings.npy"))
        self.image_ids = list(
            np.load(os.path.join("data", "embeddings", "FINALImageIDs.npy")))
        before_text_embeds = time.time()
        self.text_embeds = np.concatenate(
            (np.load(
                os.path.join("data", "embeddings",
                             "FINALTextEmbeddings1of2.npy")),
             np.load(
                 os.path.join("data", "embeddings",
                              "FINALTextEmbeddings2of2.npy"))),
            axis=0)
        after_text_embeds = time.time()
        print("Text Embedding Time: ", after_text_embeds - before_text_embeds)
        assert (len(self.text_embeds) == 141564)
        assert (self.text_embeds[0].shape[1] == 3072)
        print('Found {} referred objects in {} split.'.format(
            len(self.ref_ids), split))

    def __len__(self):
        return len(self.ref_ids)

    def __getitem__(self, i):
        ref_id = self.ref_ids[i]
        ref = self.refer.loadRefs(ref_id)[0]

        image_id = ref['image_id']
        image = self.refer.Imgs[image_id]
        image_idx = self.image_ids.index(image_id)
        image_embed = self.image_embeds[image_idx, :, :, :]

        height = image['height']
        width = image['width']
        bound_box = torch.Tensor(self.refer.getRefBox(ref_id))
        bound_box[0] /= width
        bound_box[1] /= height
        bound_box[2] /= width
        bound_box[3] /= height
        #bound_box = bound_box.unsqueeze(dim=0)

        #whole_file_name = ref['file_name']
        #file_name = whole_file_name[:whole_file_name.rfind("_")]+".jpg"

        sent = random.choice(ref['sentences'])
        ref_expr = sent['raw']
        text_id = sent['sent_id']

        text_idx = text_id
        text_embed = torch.from_numpy(self.text_embeds[text_idx])

        return image_embed, text_embed, bound_box
Esempio n. 2
0
class RefDataset(Dataset):
    def __init__(self):
        self.refer = REFER(dataset='refcoco+', splitBy='unc')
        self.ref_ids = self.refer.getRefIds()

    def __len__(self):
        return len(self.ref_ids)

    def __getitem__(self, i):

        ref_id = self.ref_ids[i]
        ref = self.refer.loadRefs(ref_id)[0]

        for sent in ref['sentences']:
            s = sent['raw']
            sid = sent['sent_id']

        return s, sid