Example #1
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        size = self.input_size
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        att_map, out_catch = self.get_att.get_att(img)

        if img.mode != 'RGB':
            img = img.convert('RGB')
        boxes = torch.zeros(2, 4)
        img = resize(img, boxes, size, test_flag=True)
        att_map = resize(att_map, boxes, size, test_flag=True)
        img = center_crop(img, boxes, (size, size), test_flag=True)
        att_map = center_crop(att_map, boxes, (size, size), test_flag=True)
        img = self.transform(img)
        att_map = self.transform_att(att_map)
        att_map = torch.floor(100 * att_map)
        att_map = self.thresh(att_map)
        #att_map = np.array(att_map, dtype=np.float32)

        id_ = self.ids[idx]

        return img, img_path, id_, att_map
Example #2
0
    def __getitem__(self, idx):
        '''Load image.
        Args:
          idx: (int) image index.
        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''

        ####anchor
        # Load image and boxes.

        triplet_id = []
        triplet_id.append(idx)
        pos_counter = self.ids.count(self.ids(idx))
        pos_id = random.randint(self.ids.count(self.ids(idx)), self.ids.count(self.ids(idx))+pos_counter)
        triplet_id.append(pos_id)
        neg = self.ids(idx)
        neg_id = self.ids[random.randint(0, len(self.ids)-1)]
        while neg == neg_id:
            neg_id = self.ids[random.randint(0, len(self.ids)-1)]
        triplet_id.append(neg_id)

        tri_img = []
        tri_img_path = []
        tri_att_map = []
        for get_idx in triplet_id:
            size = self.input_size
            fname = self.fnames[get_idx]
            img_path = os.path.join(self.root, fname)
            tri_img_path.append(img_path)
            img = Image.open(img_path)
            att_map, out_catch = self.get_att.get_att(img)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            boxes = torch.zeros(2,4)
            img = resize(img, boxes, size, test_flag=True)
            att_map = resize(att_map, boxes, size, test_flag=True)
            img = center_crop(img, boxes, (size,size), test_flag=True)
            att_map = center_crop(att_map, boxes, (size,size), test_flag=True)
            img = self.transform(img)
            tri_img.append(img)
            att_map = self.transform_att(att_map)
            att_map = torch.floor(100*att_map)
            att_map = self.thresh(att_map)
            tri_att_map.append(att_map)
        
        return tri_img, tri_img_path, triplet_id, tri_att_map
Example #3
0
    def __getitem__(self, index):
        img_path = self.fnames[index]
        bbox = self.bboxs[index]
        texts = self.texts[index]
        texts_encoded = self.texts_encoded[index]

        # loading img
        img = Image.open(os.path.join(self.path_img_folder, img_path))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        if self.train:
            #img, boxes = random_flip(img, bbox)
            #img, boxes = random_crop(img, bbox)
            img, boxes = resize(img, bbox, (self.figsize, self.figsize))

        else:
            img, boxes = resize(img, bbox, (self.figsize, self.figsize))
            img, boxes = center_crop(img, bbox, (self.figsize, self.figsize))

        img = self.transform(img)
        # scale to -1~1
        img = 2 * img - 1

        return texts_encoded, img, boxes
Example #4
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels
Example #5
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels
Example #6
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        size = self.input_size
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        boxes = torch.zeros(2, 4)
        img = resize(img, boxes, size, test_flag=True)
        img = center_crop(img, boxes, (size, size), test_flag=True)
        img = self.transform(img)
        id_ = self.ids[idx]

        return img, img_path, id_
Example #7
0
    def __getitem__(self, index):
        fname = os.path.join(self.im_pth, self.fnames[index])
        img = Image.open(fname)
        if img.mode!='RGB':
            img = img.convert('RGB')
        boxes = self.boxes[index].clone()
        size = self.size
        #print(img.size)
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, size)
        else:
            img, boxes = center_crop(img, boxes, size)
            img, boxes = resize(img, boxes, size)
        if self.transform is not None:
            img = self.transform(img)

        dense_map = torch.zeros([1, img.size()[1], img.size()[2]], dtype=torch.float32)
        #print(dense_map.size())
        box_num = 0
        for box in boxes:
            area = (box[2]-box[0])*(box[3]-box[1])
            #print(box[0], box[1], box[2], box[3], area)
            if area<100.:
                continue
            box_num += 1
            try:
                dense_map[:, box[1].type(torch.int32):box[3].type(torch.int32), box[0].type(torch.int32):box[2].type(torch.int32)] += 1/area
            except:
                print(fname, dense_map.size())
                print(box[1].type(torch.int32), box[3].type(torch.int32), box[0].type(torch.int32), box[2].type(torch.int32), area)
        return img, dense_map, box_num
Example #8
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]

        prefix_name = fname[:2]

        if self.train:
            image_path = self.root + '/' + prefix_name + '/' + fname
        else:
            image_path = self.root + '/' + prefix_name + '/' + fname
            
        
        # img = Image.open(os.path.join(self.root, fname))

        img_a = Image.open(image_path + '_a.jpg')
        img_b = Image.open(image_path + '_b.jpg')
        img_c = Image.open(image_path + '_c.jpg')
        img = Image.merge('RGB', (img_a, img_b, img_c))


        # if img.mode != 'RGB':
            # img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        # if self.transforms is not None:
        #     # if img is a byte or uint8 array, it will convert from 0-255 to 0-1
        #     # this converts from (HxWxC) to (CxHxW) as well
        #     img_a, img_b, img_c = image
        #     img_a = self.transforms(img_a)
        #     img_b = self.transforms(img_b)
        #     img_c = self.transforms(img_c)
        #     img = (img_a, img_b, img_c)

        return img, boxes, labels
Example #9
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')
        width, height = img.size

        flabel = fname.replace('images/',
                               'labels/').replace('.jpg', '.txt').replace(
                                   '.png', '.txt').replace('.jpeg', '.txt')
        box = []
        label = []
        with open(flabel) as f:
            lines = f.readlines()
            for line in lines:
                ls = line.strip().split()
                x = float(ls[1]) * width
                y = float(ls[2]) * height
                w = float(ls[3]) * width
                h = float(ls[4]) * height
                box.append([x - w / 2, y - h / 2, x + w / 2, y + h / 2])
                label.append(int(ls[0]))

        boxes = torch.Tensor(box)
        labels = torch.LongTensor(label)
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels
Example #10
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        src_shape = self.shape_list[idx]

        att_map = np.zeros([src_shape[0], src_shape[1]])

        for att_box in boxes:
            att_map[int(att_box[0]):int(att_box[2]),
                    int(att_box[1]):int(att_box[3])] = 1

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))
        att_map = Image.fromarray(att_map)
        att_map = att_map.resize((size // 2, size // 2), Image.BILINEAR)

        #img.save('test_in_datagen.jpg')

        img = self.transform(img)
        att_map = self.transform(att_map)

        return img, boxes, labels, att_map, img_path