Ejemplo n.º 1
0
    def __getitem__(self, index):
        # Sample random scales to use for each image in this batch
        item = {'rpn_targets': {}}

        target_scale = self.opts[self.cfg_key]['SCALES'][npr.randint(0, high=len(self.opts[self.cfg_key]['SCALES']))]
        img = cv2.imread(osp.join(self._data_path, self.annotations[index]['path']))
        img_original_shape = img.shape
        item['path']= self.annotations[index]['path']
        img, im_scale = self._image_resize(img, target_scale, self.opts[self.cfg_key]['MAX_SIZE'])
        # restore the [image_height, image_width, scale_factor, max_size]
        item['image_info'] = np.array([img.shape[0], img.shape[1], im_scale,
                    img_original_shape[0], img_original_shape[1]], dtype=np.float)
        item['visual'] = Image.fromarray(img)

        if self.transform is not None:
            item['visual']  = self.transform(item['visual'])

        # if self._batch_size > 1:
        #     # padding the image to MAX_SIZE, so all images can be stacked
        #     pad_h = self.opts[self.cfg_key]['MAX_SIZE'] - item['visual'].size(1)
        #     pad_w = self.opts[self.cfg_key]['MAX_SIZE'] - item['visual'].size(2)
        #     item['visual'] = F.pad(item['visual'], (0, pad_w, 0, pad_h)).data

        _annotation = self.annotations[index]
        gt_boxes_object = np.zeros((len(_annotation['objects']), 5))
        gt_boxes_object[:, 0:4] = np.array([obj['box'] for obj in _annotation['objects']], dtype=np.float) * im_scale
        gt_boxes_object[:, 4]   = np.array([obj['class'] for obj in _annotation['objects']])
        item['objects'] = gt_boxes_object
        if self._image_set == 'train': # calculate the RPN target
            item['rpn_targets']['object'] = anchor_target_layer(item['visual'], gt_boxes_object, item['image_info'],
                                self._feat_stride, self._rpn_opts['object'],
                                mappings = self._rpn_opts['mappings'])


        gt_relationships = np.zeros([len(_annotation['objects']), (len(_annotation['objects']))], dtype=np.long)
        for rel in _annotation['relationships']:
            gt_relationships[rel['sub_id'], rel['obj_id']] = rel['predicate']
        item['relations'] = gt_relationships

        if self.use_region:
            gt_boxes_region = np.zeros((len(_annotation['regions']), self.max_size + 4)) # 4 for box and 40 for sentences
            gt_boxes_region[:, 0:4] = np.array([reg['box'] for reg in _annotation['regions']], dtype=np.float) * im_scale
            gt_boxes_region[:, 4:]  = np.array([np.pad(reg['phrase'],
                                    (0,self.max_size-len(reg['phrase'])),'constant',constant_values=self.voc_sign['end'])
                                        for reg in _annotation['regions']])

            item['regions'] = gt_boxes_region
            if self._image_set == 'train' and 'region' in self._rpn_opts.keys(): # calculate the RPN target
                item['rpn_targets']['region'] = anchor_target_layer(item['visual'], gt_boxes_region, item['image_info'],
                                self._feat_stride, self._rpn_opts['region'],
                                mappings = self._rpn_opts['mappings'])
        else:
            item['regions'] = None

        return item
Ejemplo n.º 2
0
    def __getitem__(self, index):
        '''
        item数据结构:
            rpn_targets:
            path:           标注所在路径
            image_info:     图片信息(image_height, image_width, scale_factor, max_size)
            visual:         从array转换为Image的图片
            objects:        图片中的object的bbox和class(GroundTruth)
            relatonship:    图片中的relationship(GroundTruth)
        '''
        # Sample random scales to use for each image in this batch
        # item是每张图片load进来的数据结构
        item = {'rpn_targets': {}}

        target_scale = self.opts[self.cfg_key]['SCALES'][npr.randint(
            0, high=len(self.opts[self.cfg_key]['SCALES']))]
        img = cv2.imread(
            osp.join(self._data_path, self.annotations[index]['path']))
        img_original_shape = img.shape
        item['path'] = self.annotations[index]['path']
        # 缩放图片到小于option中的最大值
        img, im_scale = self._image_resize(img, target_scale,
                                           self.opts[self.cfg_key]['MAX_SIZE'])
        # restore the [image_height, image_width, scale_factor, max_size]
        item['image_info'] = np.array([
            img.shape[0], img.shape[1], im_scale, img_original_shape[0],
            img_original_shape[1]
        ],
                                      dtype=np.float)
        # 将矩阵转换为Image
        item['visual'] = Image.fromarray(img)
        # 图像归一化?
        if self.transform is not None:
            item['visual'] = self.transform(item['visual'])

        # if self._batch_size > 1:
        #     # padding the image to MAX_SIZE, so all images can be stacked
        #     pad_h = self.opts[self.cfg_key]['MAX_SIZE'] - item['visual'].size(1)
        #     pad_w = self.opts[self.cfg_key]['MAX_SIZE'] - item['visual'].size(2)
        #     item['visual'] = F.pad(item['visual'], (0, pad_w, 0, pad_h)).data

        _annotation = self.annotations[index]
        # 从_annotation['objects']中读入object的ground truth(bbox和class)
        gt_boxes_object = np.zeros((len(_annotation['objects']), 5))
        # box的坐标
        gt_boxes_object[:, 0:4] = np.array(
            [obj['bbox']
             for obj in _annotation['objects']], dtype=np.float) * im_scale
        # box对应的class
        gt_boxes_object[:, 4] = np.array(
            [obj['class'] for obj in _annotation['objects']])
        item['objects'] = gt_boxes_object
        if self.cfg_key == 'train':  # calculate the RPN target
            item['rpn_targets']['object'] = anchor_target_layer(
                item['visual'],
                gt_boxes_object,
                item['image_info'],
                self._feat_stride,
                self._rpn_opts['object'],
                mappings=self._rpn_opts['mappings'])

        # 获取relationship(predicate)的ground truth
        gt_relationships = np.zeros(
            [len(_annotation['objects']), (len(_annotation['objects']))],
            dtype=np.long)
        for rel in _annotation['relationships']:
            gt_relationships[rel['sub_id'], rel['obj_id']] = rel['predicate']
        item['relations'] = gt_relationships

        return item