Exemple #1
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        
        image_id = self.ids[index]
        #get all annotation_id exsisted in this image
        #ann_ids = self.coco.getAnnIds(imgIds=image_id)

        # get all annaotation which fulfil the filter conditions
        #anns = self.coco.loadAnns(ann_ids)

        #for i in range(len(anns)):
            #anns[i]['keypoints'] = [0]*3
                
            # self.cat_ids now stores only one category_id, which is corresponding to the category we chose before,
            #if the category_id in this annotations is not the category we choose, we assign it to background
            #if self.cat_ids[0] != anns[i]['category_id']:
                #index_for_keypoints = 0
            #if self.cat_ids[0] == anns[i]['category_id']:
                #index_for_keypoints = 1
            
            #if index_for_keypoints = 0, means that it is not the object we want to detect.
            #if index_for_keypoints == 0:
                #overwrite the keypoints
                #anns[i]['keypoints'] = [0]*3
            #else:
                #anns[i]['keypoints'] = [0]*3
                #keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
                #index_for_keypoint_x = 0
                #index_for_keypoint_y = 1
                #index_for_keypoint_v = 2
                #anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
                #anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
                #anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v
                
                
            # if there is no 'keypoints' in the dict, initialize one and write
#             if 'keypoints' not in anns[i] and index_for_keypoints == 1:
                
#                 # initialize a list with length of 240, e.g. 3x80
#                 anns[i]['keypoints'] = [0]*3
#                 # calculate the keypoint from the bounding box
#                 keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
                
#                 # corresponding index in the keypoints list
#                 index_for_keypoint_x = 0
#                 index_for_keypoint_y = 1
#                 index_for_keypoint_v = 2
                
#                 # write center point of bounding box and visibility to keypoints
#                 anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
#                 anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
#                 anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v
                                
#             # if there is a 'keypoints' in the dict, just write keypoint information to the list
#             elif 'keypoints' in anns[i] and index_for_keypoints == 1:
#                 keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
#                 index_for_keypoint_x = 0
#                 index_for_keypoint_y = 1
#                 index_for_keypoint_v = 2
#                 anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
#                 anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
#                 anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v
                      
        #anns = copy.deepcopy(anns)
        image_info = self.coco.loadImgs(image_id)[0]
        
        #print(image_info)
        
        self.log.debug(image_info)
        with open(os.path.join(self.root, image_info['file_name']), 'rb') as f:
            image = Image.open(f).convert('RGB')

        meta = {
            'dataset_index': index,
            'image_id': image_id,
            'file_name': image_info['file_name'],
        }

        if 'flickr_url' in image_info:
            _, flickr_file_name = image_info['flickr_url'].rsplit('/', maxsplit=1)
            flickr_id, _ = flickr_file_name.split('_', maxsplit=1)
            meta['flickr_full_page'] = 'http://flickr.com/photo.gne?id={}'.format(flickr_id)
            
        #if imaage_id not in self.background_ids:
            #image, anns = paste_img(image, self.target_img, image_id) ## image is the COCO
        #else:
            #ann_ids = self.coco.getAnnIds(imgIds=image_id)
            #anns = self.coco.loadAnns(ann_ids)
            #for i in range(len(anns)):
                   #anns[i]['keypoints'] = [0]*3
        image, anns = paste_img(image, self.target_img, image_id)
        # preprocess image and annotations
        image, anns, preprocess_meta = self.preprocess(image, anns)
        meta.update(preprocess_meta)

        # transform image
        original_size = image.size
        image = self.image_transform(image)
        assert image.size(2) == original_size[0]
        assert image.size(1) == original_size[1]

        # mask valid
        valid_area = meta['valid_area']
        utils.mask_valid_image(image, valid_area)

        # if there are not target transforms, done here
        self.log.debug(meta)
        if self.target_transforms is None:
            return image, anns, meta

        # transform targets
        targets = [t(anns, original_size) for t in self.target_transforms]
        return image, targets, meta
Exemple #2
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        image_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=image_id)
        #print("image_id"+len(image_id))
        #print("ann_ids"+len(ann_ids))
        #ann_idss = self.coco.getAnnIds(imgIds=image_id, catIds=self.cat_ids)
        #print(len(ann_idss))
        
        anns = self.coco.loadAnns(ann_ids)
                
        for i in range(len(anns)):
            index_new = self.dict[anns[i]['category_id']]
            anns[i]['keypoints'] = [0]*240
            keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
            index_for_keypoint_x = 3*(index_new-1)
            index_for_keypoint_y = index_for_keypoint_x + 1
            index_for_keypoint_v = index_for_keypoint_y + 1
            anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
            anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
            anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v            
            
            
            
            # to overwrite the keypoints of 'person' category
#             if anns[i]['category_id'] == 1 and ('keypoints' in anns[i]):
#                 if len(anns[i]['keypoints']) != 240:
#                     anns[i]['keypoints'] = [0]*240
                
#             # to map the original category_id to our id, e.g. 90->80
#             index_new = self.dict[anns[i]['category_id']]
            
#             # if there is no 'keypoints' in the dict, initialize one and write
#             if 'keypoints' not in anns[i]:
                
#                 # initialize a list with length of 240, e.g. 3x80
#                 anns[i]['keypoints'] = [0]*240
                
#                 # calculate the keypoint from the bounding box
#                 keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
                
#                 # corresponding index in the keypoints list
#                 index_for_keypoint_x = 3*(index_new-1)
#                 index_for_keypoint_y = index_for_keypoint_x + 1
#                 index_for_keypoint_v = index_for_keypoint_y + 1
                
#                 # write center point of bounding box and visibility to keypoints
#                 anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
#                 anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
#                 anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v
                                
#             # if there is a 'keypoints' in the dict, just write keypoint information to the list
#             else:
#                 keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
#                 index_for_keypoint_x = 3*(index_new-1)
#                 index_for_keypoint_y = index_for_keypoint_x + 1
#                 index_for_keypoint_v = index_for_keypoint_y + 1
#                 anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
#                 anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
#                 anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v
            
        anns = copy.deepcopy(anns)
        image_info = self.coco.loadImgs(image_id)[0]
        
        #print(image_info)
        
        self.log.debug(image_info)
        with open(os.path.join(self.root, image_info['file_name']), 'rb') as f:
            image = Image.open(f).convert('RGB')

        meta = {
            'dataset_index': index,
            'image_id': image_id,
            'file_name': image_info['file_name'],
        }

        if 'flickr_url' in image_info:
            _, flickr_file_name = image_info['flickr_url'].rsplit('/', maxsplit=1)
            flickr_id, _ = flickr_file_name.split('_', maxsplit=1)
            meta['flickr_full_page'] = 'http://flickr.com/photo.gne?id={}'.format(flickr_id)

        # preprocess image and annotations
        image, anns, preprocess_meta = self.preprocess(image, anns)
        meta.update(preprocess_meta)

        # transform image
        original_size = image.size
        image = self.image_transform(image)
        assert image.size(2) == original_size[0]
        assert image.size(1) == original_size[1]

        # mask valid
        valid_area = meta['valid_area']
        utils.mask_valid_image(image, valid_area)

        # if there are not target transforms, done here
        self.log.debug(meta)
        if self.target_transforms is None:
            return image, anns, meta

        # transform targets
        targets = [t(anns, original_size) for t in self.target_transforms]
        return image, targets, meta
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        
        image_id = self.ids[index]
        #get all annotation_id exsisted in this image
        ann_ids = self.coco.getAnnIds(imgIds=image_id)

        # get all annaotation which fulfil the filter conditions
        anns = self.coco.loadAnns(ann_ids)

        for i in range(len(anns)):
                
            # self.cat_ids now stores only one category_id, which is corresponding to the category we chose before,
            #if the category_id in this annotations is not the category we choose, we assign it to background
            if self.cat_ids[0] != anns[i]['category_id']:
                index_for_keypoints = 0
            if self.cat_ids[0] == anns[i]['category_id']:
                index_for_keypoints = 1
            
            #if index_for_keypoints = 0, means that it is not the object we want to detect.
            if index_for_keypoints == 0:
                #overwrite the keypoints
                anns[i]['keypoints'] = [0]*3
            else:
                anns[i]['keypoints'] = [0]*3
                keypoint_x, keypoint_y, keypoint_v = self.keypoints(anns[i])
                index_for_keypoint_x = 0
                index_for_keypoint_y = 1
                index_for_keypoint_v = 2
                anns[i]['keypoints'][index_for_keypoint_x] = keypoint_x
                anns[i]['keypoints'][index_for_keypoint_y] = keypoint_y
                anns[i]['keypoints'][index_for_keypoint_v] = keypoint_v
                                     
        anns = copy.deepcopy(anns)
        image_info = self.coco.loadImgs(image_id)[0]
        
        #print(image_info)
        
        self.log.debug(image_info)
        with open(os.path.join(self.root, image_info['file_name']), 'rb') as f:
            image = Image.open(f).convert('RGB')
            #print(np.asarray(image)) not normalized
            #print(type(image))

        meta = {
            'dataset_index': index,
            'image_id': image_id,
            'file_name': image_info['file_name'],
        }

        if 'flickr_url' in image_info:
            _, flickr_file_name = image_info['flickr_url'].rsplit('/', maxsplit=1)
            flickr_id, _ = flickr_file_name.split('_', maxsplit=1)
            meta['flickr_full_page'] = 'http://flickr.com/photo.gne?id={}'.format(flickr_id)

        # preprocess image and annotations
        image, anns, preprocess_meta = self.preprocess(image, anns)
        meta.update(preprocess_meta)

        # transform image
        original_size = image.size
        image = self.image_transform(image)
        assert image.size(2) == original_size[0]
        assert image.size(1) == original_size[1]

        # mask valid
        valid_area = meta['valid_area']
        utils.mask_valid_image(image, valid_area)

        # if there are not target transforms, done here
        self.log.debug(meta)
        if self.target_transforms is None:
            return image, anns, meta

        # transform targets
        targets = [t(anns, original_size) for t in self.target_transforms]
        return image, targets, meta