def prepare_test_img(self, idx, gt=True): #keep ratio and padding to desired size """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) if gt: ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_mask: gt_masks = ann['masks'] def prepare_single(img, scale, flip): _img, border, offset = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio, crop=True) _img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=_img.shape, scale=scale, border=border, offset=offset, flip=flip) _img = to_tensor(_img) return _img, _img_meta imgs = [] img_metas = [] for scale in [1.0]: _img, _img_meta, = prepare_single(img, scale, False) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) if self.flip_ratio > 0: _img, _img_meta = prepare_single(img, scale, True) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) data = dict(img=imgs, img_meta=img_metas) if not self.with_mask: h, w = _img.shape[0:2] gt_masks = [np.zeros([h, w])] if len(gt_labels) == 0: gt_labels = np.array([-1]) h, w = _img.shape[0:2] gt_masks = [np.zeros([h, w])] gt_bboxes = np.array([[0, 0, 0, 0]]) if gt: data['gt_bboxes'] = gt_bboxes data['gt_labels'] = gt_labels data['gt_masks'] = gt_masks data['idx'] = idx return data
def __getitem__(self, idx): if self.test_mode: return self.prepare_test_img(idx) img_info = self.img_infos[idx] # load image img = mmcv.imread(img_info['file_name']) # load labels if os.path.exists(img_info['street_file_name']): lbl = np.array(Image.open(img_info['street_file_name']), dtype=np.uint8) if self.with_lane: gt_lane = lbl[:, :, :-1] if self.with_drivable: gt_drivable = lbl[:, :, -1:] else: h, w = img.shape[:2] if self.with_lane: gt_lane = np.zeros((h, w, 3), dtype=np.uint8) if self.with_drivable: gt_drivable = np.zeros((h, w, 1), dtype=np.uint8) # apply transforms flip = True if np.random.rand() < self.flip_ratio else False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) crop_info = random_crop([img_info['width'], img_info['height']], self.crop_size) if self.crop_size else None img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, crop_info=crop_info, keep_ratio=self.resize_keep_ratio) img = img.copy() if self.with_lane: gt_lane = self.seg_transform(gt_lane, img_scale, flip, crop_info=crop_info, pad_val=255) gt_lane = gt_lane.copy() if self.with_drivable: gt_drivable = self.seg_transform(gt_drivable, img_scale, flip, crop_info=crop_info, pad_val=255) gt_drivable = gt_drivable.copy() ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict( img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True)) if self.with_lane: data['gt_lane'] = DC(to_tensor(gt_lane)) if self.with_drivable: data['gt_drivable'] = DC(to_tensor(gt_drivable)) return data
def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" vid, frame_id = idx vid_info = self.vid_infos[vid] img = mmcv.imread( osp.join(self.img_prefix, vid_info['genre'] + '_' + vid_info['t_id'], vid_info['data']['filenames'][frame_id])) proposal = None def prepare_single(img, frame_id, scale, flip, proposal=None): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) _img_meta = dict(ori_shape=(vid_info['data']['size'][1], vid_info['data']['size'][0], 3), img_shape=img_shape, pad_shape=pad_shape, is_first=(frame_id == 0), video_id=vid, frame_id=frame_id, scale_factor=scale_factor, flip=flip) if proposal is not None: if proposal.shape[1] == 5: score = proposal[:, 4, None] proposal = proposal[:, :4] else: score = None _proposal = self.bbox_transform(proposal, img_shape, scale_factor, flip) _proposal = np.hstack([_proposal, score ]) if score is not None else _proposal _proposal = to_tensor(_proposal) else: _proposal = None return _img, _img_meta, _proposal imgs = [] img_metas = [] proposals = [] for scale in self.img_scales: _img, _img_meta, _proposal = prepare_single( img, frame_id, scale, False, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) if self.flip_ratio > 0: _img, _img_meta, _proposal = prepare_single( img, scale, True, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) data = dict(img=imgs, img_meta=img_metas) return data
def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name'])) if self.proposals is not None: proposal = self.proposals[idx][:self.num_max_proposals] if not (proposal.shape[1] == 4 or proposal.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposal.shape)) def prepare_single(img, scale, flip, proposal=None): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip) _img = to_tensor(_img) _img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: if proposal.shape[1] == 5: score = proposal[:, 4] proposal = proposal[:, :4] else: score = None _proposal = self.bbox_transform(proposal, img_shape, scale_factor, flip) _proposal = np.hstack([_proposal, score[:, None] ]) if score is not None else _proposal _proposal = to_tensor(_proposal) else: _proposal = None return _img, _img_meta, _proposal imgs = [] img_metas = [] proposals = [] for scale in self.img_scales: _img, _img_meta, _proposal = prepare_single( img, scale, False, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) if self.flip_ratio > 0: _img, _img_meta, _proposal = prepare_single( img, scale, True, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) data = dict(img=imgs, img_meta=img_metas) if self.proposals is not None: data['proposals'] = proposals return data
def filter_classes(self, data): if 'gt_labels' not in data: return if isinstance(data['gt_labels'], np.ndarray): mask = data['gt_labels'] >= 0 data['gt_bboxes'] = data['gt_bboxes'][mask] data['gt_labels'] = data['gt_labels'][mask] else: mask = data['gt_labels'].data >= 0 data['gt_bboxes'] = DC(data['gt_bboxes'].data[mask]) data['gt_labels'] = DC(data['gt_labels'].data[mask])
def __call__(self, results): data = {} img_meta = {} for key in self.meta_keys: img_meta[key] = results[key] if self.list_meta: data['img_meta'] = [DC(img_meta, cpu_only=True)] else: data['img_meta'] = DC(img_meta, cpu_only=True) for key in self.keys: data[key] = results[key] return data
def prepare_train_img(self, idx): img = self.get_img(idx) img_info = self.img_infos[idx] ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels) #TODO: check flipping is done correctly flip = False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip, ) if self.with_mask: gt_masks = self.mask_transform(ann['masks'], pad_shape, scale_factor, False) img, gt_bboxes, gt_masks = d_transform(img, gt_bboxes, gt_masks) else: gt_masks = None img, gt_bboxes = d_transform(img, gt_bboxes) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if gt_masks is not None: data['gt_masks'] = DC(gt_masks, cpu_only=True) return data
def _prepare_data(self, img, img_transform, cfg, device, frame_id): """Prepare an image for testing (multi-scale and flipping)""" vid = 0 proposal = None def prepare_single(img, frame_id, scale, flip, proposal=None): ori_shape = img.shape _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) ori_shape = img.shape _img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, is_first=(frame_id == 0), video_id=vid, frame_id=frame_id, scale_factor=scale_factor, flip=flip) if proposal is not None: if proposal.shape[1] == 5: score = proposal[:, 4, None] proposal = proposal[:, :4] else: score = None _proposal = self.bbox_transform(proposal, img_shape, scale_factor, flip) _proposal = np.hstack([_proposal, score ]) if score is not None else _proposal _proposal = to_tensor(_proposal) else: _proposal = None return _img, _img_meta, _proposal imgs = [] img_metas = [] proposals = [] for scale in self.img_scales: _img, _img_meta, _proposal = prepare_single( img, frame_id, scale, False, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) if self.flip_ratio > 0: _img, _img_meta, _proposal = prepare_single( img, scale, True, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) data = dict(img=imgs, img_meta=img_metas) return data
def __call__(self, results): if 'img' in results: img = results['img'] if len(img.shape) < 3: img = np.expand_dims(img, -1) img = np.ascontiguousarray(img.transpose(2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']: if key not in results: continue if key == 'gt_bboxes': results[key][0] = DC(to_tensor(results[key][0])) results[key][1] = DC(to_tensor(results[key][1])) elif key == 'gt_labels': results[key][0] = DC(to_tensor(results[key][0])) results[key][1] = DC(to_tensor(results[key][1])) else: results[key] = DC(to_tensor(results[key])) if 'gt_masks' in results: results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = DC(to_tensor( results['gt_semantic_seg'][None, ...]), stack=True) return results
def prepare_test_img(self, idx): sample_id = self.sample_ids[idx] # load image img = mmcv.imread(self.img_filenames[idx]) img, img_shape, pad_shape, scale_factor = self.img_transform( img, 1, False) data = dict(img=DC(to_tensor(img), stack=True), img_shape=DC(img_shape, cpu_only=True), sample_idx=DC(sample_id, cpu_only=True), calib=DC(self.calib, cpu_only=True)) if self.with_mask: NotImplemented if self.with_point: points = read_lidar(self.lidar_filenames[idx]) points = get_lidar_in_image_fov(points, self.calib, 0, 0, img_shape[1], img_shape[0], clip_distance=0.1) if self.generator is not None: voxels, coordinates, num_points = self.generator.generate(points) data['voxels'] = DC(to_tensor(voxels)) data['coordinates'] = DC(to_tensor(coordinates)) data['num_points'] = DC(to_tensor(num_points)) data['anchors'] = DC(to_tensor(self.anchors)) return data
def __call__(self, results): for key in filter(lambda x: x.startswith('img') and isinstance(results[x], np.ndarray), results.keys()): img = np.ascontiguousarray(results[key].transpose(2, 0, 1)) results[key] = DC(to_tensor(img), stack=True) for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']: if key not in results: continue results[key] = DC(to_tensor(results[key])) if 'gt_masks' in results: results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = DC( to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) return results
def __call__(self, results): if 'img' in results: img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']: if key not in results: continue results[key] = DC(to_tensor(results[key])) if 'gt_masks' in results: results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = DC( to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) return results
def __call__(self, results): """Performs the Collect formatting. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ if 'ann_info' in results: results.update(results['ann_info']) data = {} for key in self.keys: if isinstance(key, tuple): assert len(key) == 2 key_src, key_tgt = key[:2] else: key_src = key_tgt = key data[key_tgt] = results[key_src] meta = {} if len(self.meta_keys) != 0: for key in self.meta_keys: if isinstance(key, tuple): assert len(key) == 2 key_src, key_tgt = key[:2] else: key_src = key_tgt = key meta[key_tgt] = results[key_src] if 'bbox_id' in results: meta['bbox_id'] = results['bbox_id'] data[self.meta_name] = DC(meta, cpu_only=True) return data
def __call__(self, results): if self.visualize['flag']: img = results['img'].astype(np.uint8) boundary_key = self.visualize['boundary_key'] if boundary_key is not None: img = overlay_mask_img(img, results[boundary_key].masks[0]) features = [img] names = ['img'] to_uint8 = [1] for k in results['mask_fields']: for iter in range(len(results[k].masks)): features.append(results[k].masks[iter]) names.append(k + str(iter)) to_uint8.append(0) show_feature(features, names, to_uint8) if self.call_super: results = super().__call__(results) for k in self.keys: results[k] = DC(results[k], cpu_only=True) return results
def prepare_test_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # corrupt image if self.corruption is not None: img = corrupt(img, severity=self.corruption_severity, corruption_name=self.corruption) def prepare_single(img, scale, flip, proposal=None): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=True) _img = to_tensor(_img) _img_meta = dict(ori_shape=(img.shape[0], img.shape[1], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) return _img, _img_meta imgs = [] img_metas = [] for scale in self.img_scales: _img, _img_meta = prepare_single(img, scale, False, None) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) data = dict(img=imgs, img_meta=img_metas) return data
def __call__(self, results): """Call function to collect keys in results. The keys in ``meta_keys`` and ``default_meta_keys`` will be converted to :obj:mmcv.DataContainer. Args: results (list[dict] | dict): List of dict or dict which contains the data to collect. Returns: list[dict] | dict: List of dict or dict that contains the following keys: - keys in ``self.keys`` - ``img_metas`` """ results_is_dict = isinstance(results, dict) if results_is_dict: results = [results] outs = [] for _results in results: _results = self._add_default_meta_keys(_results) _results = self._collect_meta_keys(_results) outs.append(_results) if results_is_dict: outs[0]['img_metas'] = DC(outs[0]['img_metas'], cpu_only=True) return outs[0] if results_is_dict else outs
def prepare_train_img(self, idx): img_info = self.img_infos[idx] ann_info = self.get_ann_info(idx) results = dict(img_info=img_info, ann_info=ann_info) if self.proposals is not None: results['proposals'] = self.proposals[idx] self.pre_pipeline(results) results = self.pipeline(results) # ic(results) # random choice a normal img according to original shape ori_h, ori_w, = img_info['height'], img_info['width'] if ori_h < 1000: name = random.choice(self.pg_normal_imgs) img_prefix = self.pg_normal_path else: name = random.choice(self.ps_normal_imgs) img_prefix = self.ps_normal_path img_info_ = {} img_info_['file_name'] = name img_info_['filename'] = name img_info_['height'] = ori_h img_info_['width'] = ori_w results_normal = dict(img_info=img_info_) results_normal['scale'] = results['img_meta'].data['scale_factor'] results_normal['flip'] = results['img_meta'].data['flip'] results_normal['img_prefix'] = img_prefix results_normal = self.normal_pipeline(results_normal) # ic(results_normal) results['img'] = DC(torch.cat( (results['img'].data, results_normal['img'].data)), stack=True) return results
def img_pre_process(img): ''' To pre process the image for given network. Values used in this function can be found in config file (test pipeline) Input argument: img: Image read by openCv or mmcv Return: data: dictionary of preprocessed data. ''' data = {} result = {} result['ori_shape'] = img.shape img, scale_factor = mmcv.imrescale(img, (1333, 800), return_scale=True) result['img_shape'] = img.shape result['scale_factor'] = scale_factor mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) std = np.array([58.395, 57.12, 57.375], dtype=np.float32) img = mmcv.imnormalize(img, mean, std, True) img = mmcv.impad_to_multiple(img, 32, pad_val=0) result['pad_shape'] = img.shape img = img.transpose(2, 0, 1) img = torch.from_numpy(img) result['filename'] = None result['flip'] = False result['img_norm_cfg'] = {'mean': mean, 'std': std, 'to_rgb': True} data['img_meta'] = [DC(result, cpu_only=True)] data['img'] = [img] data = scatter(collate([data], samples_per_gpu=1), ['cuda:0'])[0] return data
def __call__(self, results): """Call function to collect keys in results. The keys in ``meta_keys`` will be converted to :obj:`mmcv.DataContainer`. Args: results (dict): Result dict contains the data to collect. Returns: dict: The result dict contains the following keys - keys in ``self.keys`` - ``img_metas`` """ data = {} img_metas = {} for key in self.meta_keys: if key in results: img_metas[key] = results[key] data['img_metas'] = DC(img_metas, cpu_only=True) for key in self.keys: data[key] = results[key] # Feng Xiang code # code begin # print("FENG XIANG DEBUG FORMATTING DATA") # print(data) # code end return data
def __call__(self, results): """Call function to transform and format common fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with default bundle. """ super().__call__(results) if 'ann_info' in results: for key in ['relations', 'texts']: value = results['ann_info'][key] if key == 'relations' and 'scale_factor' in results: scale_factor = results['scale_factor'] if isinstance(scale_factor, float): sx = sy = scale_factor else: sx, sy = results['scale_factor'][:2] r = sx / sy factor = np.array([sx, sy, r, 1, r]).astype(np.float32) value = value * factor[None, None] results[key] = DC(to_tensor(value)) return results
def __getitem__(self, idx): record = self.data[idx] data = super().__getitem__(idx) lfb = self.sample_lfb(record.start_frame, record.end_frame, self.fb[record.path]) data.update({'lfb':DC(to_tensor(lfb), stack=True, pad_dims=None)}) return data
def __call__(self, results): """Performs the ToDataContainer formating. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ for field in self.fields: _field = field.copy() key = _field.pop('key') if isinstance(key, list): for item in key: results[item] = DC(results[item], **_field) else: results[key] = DC(results[key], **_field) return results
def prepare_train_img(self, idx): img_info = self.img_infos[idx] ann_info = self.get_ann_info(idx) results = dict(img_info=img_info, ann_info=ann_info) if self.proposals is not None: results['proposals'] = self.proposals[idx] self.pre_pipeline(results) results = self.pipeline(results) #random choice a path from root name = random.choice(self.normal_imgs) img_info = {} img_info['file_name'] = name img_info['filename'] = name img_info['height'] = 1000 img_info['width'] = 2446 results_normal = dict(img_info=img_info) # ic(results) results_normal['scale'] = results['img_meta'].data['scale_factor'] self.pre_pipeline(results_normal) results_normal['img_prefix'] = self.normal_path results_normal = self.normal_pipeline(results_normal) results['img'] = DC(torch.cat( (results['img'].data, results_normal['img'].data)), stack=True) return results
def __getitem__(self, idx): record, reader = self._get_record(idx) if self.test_mode: segment_indices, skip_offsets = self._get_test_indices(record) else: segment_indices, skip_offsets = self._sample_indices(record)\ if self.random_shift else self._get_val_indices(record) eval_enabled = self.test_mode or not self.random_shift # handle the first modality if self.proxy_generator: record_indices = self._get_proxy_frame_ids(record, segment_indices, eval_enabled) else: record_indices = self._get_regular_frame_ids( record, segment_indices, skip_offsets) img_group = self._load_images(record.path, record_indices, self.modality, reader) flip = True if self.flip_ratio is not None and np.random.rand( ) < self.flip_ratio else False rotate = np.random.uniform( -self.rotate_delta, self.rotate_delta) if self.rotate_delta is not None else None if self.img_scale_dict is not None and record.path in self.img_scale_dict: img_scale = self.img_scale_dict[record.path] else: img_scale = self.img_scale img_group, img_shape, pad_shape, scale_factor, crop_quadruple = self.img_group_transform( img_group, img_scale, flip=flip, rotate=rotate, keep_ratio=self.resize_keep_ratio, div_255=self.div_255, transpose=True, stack=True) img_group = np.transpose(img_group, (1, 0, 2, 3)) data = dict(img_group=DC(to_tensor(img_group), stack=True, pad_dims=2), gt_label=DC(to_tensor(record.label), stack=True, pad_dims=None)) return data
def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] img = mmcv.imread(img_info['file_name']) def prepare_single(img, scale, flip): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) # load labels # if not self.test_mode: if os.path.exists(img_info['label_file_name']): _lbl = mmcv.imread(img_info['label_file_name'], flag='grayscale') _lbl = self.seg_transform(_lbl, scale, flip, pad_val=255) else: # import pdb; pdb.set_trace() _lbl = torch.zeros_like(_img) # # add background class # _lbl[_lbl == 255] = 254 # _lbl += 1 # _lbl[_lbl == 255] = 0 _img_meta = dict( ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip, task='sem_seg', gt_sem_seg= _lbl, file_name=img_info['file_name']) return _img, _img_meta imgs = [] img_metas = [] for scale in self.img_scales: _img, _img_meta = prepare_single( img, scale, False) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) if self.flip_ratio > 0: _img, _img_meta = prepare_single( img, scale, True) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) data = dict(img=imgs, img_meta=img_metas) return data
def prepare_data(self, idx): info = self.ann_info[idx] mid = info['mid'] bbox = info['bbox'] syn_id = info['syn_id'] data = mmcv.load(osp.join(self.prefix, '{}.pkl'.format(mid))) # get video feat clip = self._load_feat(data, self.element, 'video', start=bbox[0], end=bbox[1] + 1, syn_id=syn_id) clip = clip.astype(np.float32) if clip.shape[0] == 0: if self.test_mode: clip = np.zeros((1, self.indims[0]), dtype=np.float32) else: return None #print(syn_id, type(syn_id)) # get syn feat, syn bbox syn = self._load_feat(data, self.element, 'synopsis', start=bbox[0], end=bbox[1] + 1, syn_id=syn_id) syn = syn.astype(np.float32) if syn.shape[0] == 0: if self.test_mode: syn = np.zeros((1, self.indims[1]), dtype=np.float32) else: return None meta = dict(bbox=bbox, mid=mid) meta['c_{}_len'.format(self.element)] = clip.shape[0] meta['s_{}_len'.format(self.element)] = syn.shape[0] data = dict(clip=DC(to_tensor(clip[None, ...]), stack=True), meta=DC(meta, cpu_only=True), syn=DC(to_tensor(syn[None, ...]), stack=True)) return data
def __getitem__(self, idx): if self.test_mode: return self.prepare_test_img(idx) img_info = self.img_infos[idx] # load image img = mmcv.imread(img_info['file_name']) lbl = mmcv.imread(img_info['label_file_name'], flag='grayscale') # # add background class # lbl[lbl == 255] = 254 # lbl += 1 # lbl[lbl == 255] = 0 # apply transforms flip = True if np.random.rand() < self.flip_ratio else False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) crop_info = random_crop([img_info['width'], img_info['height']], self.crop_size) if self.crop_size else None img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, crop_info=crop_info, keep_ratio=self.resize_keep_ratio) img = img.copy() gt_sem_seg = self.seg_transform(lbl, img_scale, flip, crop_info=crop_info, pad_val=255) gt_sem_seg = gt_sem_seg.copy() ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_sem_seg=DC(to_tensor(gt_sem_seg))) return data
def __getitem__(self, idx): record = self.data[idx] data = super().__getitem__(idx) g = self.process_graph_feats(record.graph, self.fb[record.path], future_labels=record.future_labels) data.update({'gfb':DC(g, stack=False, cpu_only=True)}) return data
def __call__(self, results): print('to data container format.py') i += 1 for field in self.fields: field = field.copy() key = field.pop('key') results[key] = DC(results[key], **field) return results
def prepare_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # load proposals if necessary proposals = self.proposals[idx] # idx 个 proposal box and label if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) assert proposals.shape[1] == 5 proposals_label = proposals[:, 4, None] proposals_bboxes = proposals[:, :4] # 这里是提出的标注信息, 也就是gtbox 和label ann = self.get_ann_info(idx) # bboxes: actionlabels: human_bboxes = ann['bboxes'] action_labels = ann['actionlabels'] # apply transforms img_scale = (self.img_scales) img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, keep_ratio=False) img = img.copy() proposals_bboxes = self.bbox_transform(proposals_bboxes, img_shape, scale_factor, False) proposals = np.hstack([proposals_bboxes, proposals_label]) human_bboxes = self.bbox_transform(human_bboxes, img_shape, scale_factor, False) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, ) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(human_bboxes))) data['proposals'] = DC(to_tensor(proposals)) data['gt_labels'] = DC(to_tensor(action_labels), stack=True) return data