def preprocess_example_input(input_config): """Prepare an example input image for ``generate_inputs_and_wrap_model``. Args: input_config (dict): customized config describing the example input. Returns: tuple: (one_img, one_meta), tensor of the example input image and \ meta information for the example input image. Examples: >>> from mmdet.core.export import preprocess_example_input >>> input_config = { >>> 'input_shape': (1,3,224,224), >>> 'input_path': 'demo/demo.jpg', >>> 'normalize_cfg': { >>> 'mean': (123.675, 116.28, 103.53), >>> 'std': (58.395, 57.12, 57.375) >>> } >>> } >>> one_img, one_meta = preprocess_example_input(input_config) >>> print(one_img.shape) torch.Size([1, 3, 224, 224]) >>> print(one_meta) {'img_shape': (224, 224, 3), 'ori_shape': (224, 224, 3), 'pad_shape': (224, 224, 3), 'filename': '<demo>.png', 'scale_factor': 1.0, 'flip': False} """ input_path = input_config['input_path'] input_shape = input_config['input_shape'] one_img = mmcv.imread(input_path) if 'normalize_cfg' in input_config.keys(): normalize_cfg = input_config['normalize_cfg'] mean = np.array(normalize_cfg['mean'], dtype=np.float32) std = np.array(normalize_cfg['std'], dtype=np.float32) one_img = mmcv.imnormalize(one_img, mean, std) one_img = mmcv.imresize(one_img, input_shape[2:][::-1]).transpose(2, 0, 1) one_img = torch.from_numpy(one_img).unsqueeze(0).float().requires_grad_( True) (_, C, H, W) = input_shape one_meta = { 'img_shape': (H, W, C), 'ori_shape': (H, W, C), 'pad_shape': (H, W, C), 'filename': '<demo>.png', 'scale_factor': 1.0, 'flip': False } return one_img, one_meta
def __call__(self, results): norm_cfgs = {} for key in filter( lambda x: x.startswith('img') and isinstance( results[x], np.ndarray), results.keys()): results[key] = mmcv.imnormalize(results[key], self.mean, self.std, self.to_rgb) norm_cfgs[f'{key}_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) results.update(norm_cfgs) return results
def __call__(self, results): """Call function to normalize images. Args: results (dict): Result dict from loading pipeline. Returns: dict: Normalized results, 'img_norm_cfg' key is added into result dict. """ results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) results['sequence_imgs'] = [ mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) for img in results['sequence_imgs'] ] results['img_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def __call__(self, img, scale, flip=False): img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ for key in self.keys: if isinstance(results[key], list): results[key] = [ mmcv.imnormalize(v, self.mean, self.std, self.to_rgb) for v in results[key] ] else: results[key] = mmcv.imnormalize(results[key], self.mean, self.std, self.to_rgb) results['img_norm_cfg'] = dict( mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def __call__(self, results): """Call function to normalize images. Args: results (dict): Result dict from loading pipeline. Returns: dict: Normalized results, 'img_norm_cfg' key is added into result dict. """ if 'img' in results: results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) if 'img2' in results: results['img1'] = mmcv.imnormalize(results['img1'], self.mean, self.std, self.to_rgb) results['img2'] = mmcv.imnormalize(results['img2'], self.mean, self.std, self.to_rgb) # sys.exit('ppppp') results['img_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def __call__(self, img, scale, flip=False, keep_ratio=True, hsv_h=0, hsv_s=0, hsv_v=0, noisy_mode=None, blur_mode=None): # Augment colorspace if hsv_h+hsv_s+hsv_v > 5: # SV augmentation by 50% img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val H = img_hsv[:, :, 0].astype(np.float32) # hue S = img_hsv[:, :, 1].astype(np.float32) # saturation V = img_hsv[:, :, 2].astype(np.float32) # value a = random.uniform(-1, 1) * hsv_h + 1 b = random.uniform(-1, 1) * hsv_s + 1 c = random.uniform(-1, 1) * hsv_v + 1 H *= a S *= b V *= c img_hsv[:, :, 0] = H if a < 1 else H.clip(None, 255) img_hsv[:, :, 1] = S if b < 1 else S.clip(None, 255) img_hsv[:, :, 2] = V if c < 1 else V.clip(None, 255) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # Add noise if noisy_mode is not None: img = self.add_noise(img, noisy_mode) # Blur if blur_mode is not None: img = self.opencv_blur(img, blur_mode) if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def _imrenormalize(img, img_norm_cfg, new_img_norm_cfg): """Re-normalize the image.""" img_norm_cfg = img_norm_cfg.copy() new_img_norm_cfg = new_img_norm_cfg.copy() for k, v in img_norm_cfg.items(): if (k == 'mean' or k == 'std') and not isinstance(v, np.ndarray): img_norm_cfg[k] = np.array(v, dtype=img.dtype) # reverse cfg if 'to_rgb' in img_norm_cfg: img_norm_cfg['to_bgr'] = img_norm_cfg['to_rgb'] img_norm_cfg.pop('to_rgb') for k, v in new_img_norm_cfg.items(): if (k == 'mean' or k == 'std') and not isinstance(v, np.ndarray): new_img_norm_cfg[k] = np.array(v, dtype=img.dtype) img = mmcv.imdenormalize(img, **img_norm_cfg) img = mmcv.imnormalize(img, **new_img_norm_cfg) return img
def __call__(self, results): """Call function to normalize images. Args: results (dict): Result dict from loading pipeline. Returns: dict: Normalized results, 'img_norm_cfg' key is added into result dict. """ for key in results.get('img_fields', ['img']): results[key] = mmcv.imnormalize(results[key], self.mean, self.std, self.to_rgb) results['img_norm_cfg'] = dict( mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def __call__(self, img, scale, flip=False, keep_ratio=True): # 1. rescale/resize the image to expected size if keep_ratio: # Resize image while keeping the aspect ratio. # The image will be rescaled as large as possible within the scale. img, scale_factor = mmcv.imrescale( img=img, scale=scale, return_scale=True, interpolation='bilinear', ) else: # Resize image to a given size ignoring the aspect ratio. img, w_scale, h_scale = mmcv.imresize( img=img, size=scale, return_scale=True, interpolation='bilinear', ) scale_factor = np.array( [w_scale, h_scale, w_scale, h_scale], dtype=np.float32 ) # 2. normalize the image img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) # 3. flip the image (if needed) if flip: img = mmcv.imflip(img) # 4. pad the image if size_divisor is not None. # size_divisor=32 means sizes are multiplier of 32. if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape # 5. transpose to (c, h, w) img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, crop_info=None, keep_ratio=True): # image jittering try: img = Image.fromarray(img) except: print(img) if hasattr(self, 'random_color'): img = self.random_color(img) if hasattr(self, 'random_contrast'): img = self.random_contrast(img) if hasattr(self, 'random_sharpness'): img = self.random_sharpness(img) img = np.array(img) if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if crop_info is not None: # if crop, no need to pad cx1, cy1, cx2, cy2 = crop_info img = img[cy1:cy2, cx1:cx2] pad_shape = img.shape # pad and set pad_shape if crop_info is None and self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)# scale 包括一个长边一个短边,顺序无所谓 else:# 做识别 不需要保持ratio不变,resize到 224 img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor)# pad 边,为了每条边都能被divisor这个数字整除,比如 32 pad_shape = img.shape # pad后的图片形状 else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array( [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) # (x - mean) / std , brg2rgb if flip: img = mmcv.imflip(img) # flip images if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) # padding image to make sure divided by divisor. pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) # HWC to CHW return img, img_shape, pad_shape, scale_factor
def onnx_infer(session, input_path, shape=(896, 512)): if isinstance(input_path, str): raw_img = mmcv.imread(input_path) else: raw_img = input_path ori_shape = raw_img.shape one_img = mmcv.imresize(raw_img, shape) mean = np.array((123.675, 116.28, 103.53), dtype=np.float32) std = np.array((58.395, 57.12, 57.375), dtype=np.float32) one_img = mmcv.imnormalize(one_img, mean, std, to_rgb=True) one_img = np.expand_dims(one_img.transpose(2, 0, 1), axis=0) result = session.run(None, {"input": one_img}) seg_preds = np.concatenate(result[:5], axis=1)[0] cate_preds = [np.reshape(res, (res.shape[0], -1, res.shape[-1])) for res in result[5:]] cate_preds = np.concatenate(cate_preds, axis=1)[0] # print(seg_preds.shape, cate_preds.shape) result = get_seg(seg_preds, cate_preds, ori_shape) return result
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, results): els = ['img'] if 'ref_img' in results: els += ['ref_img'] if 'flow' in results: # 2 channel flow is raw no need to normalize used for warping solely if results['flow'].shape[-1] == 3: els += ['flow'] if 'depth' in results: els += ['depth'] if 'ref_depth' in results: els += ['ref_depth'] for el in els: results[el] = mmcv.imnormalize( results[el], self.mean, self.std, self.to_rgb) results['img_norm_cfg'] = dict( mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def preprocess_example_input(input_config): """Prepare an example input image for `generate_inputs_and_wrap_model`. Args: input_config (dict): customized config describing the example input. Example: input_config: { 'input_shape':[1,3,224,224], 'input_path': 'demo/demo.jpg', 'normalize_cfg': { 'mean': [123.675, 116.28, 103.53], 'std': [58.395, 57.12, 57.375] } } Returns: tuple: (one_img, one_meta), tensor of the example input image and meta information for the example input image. """ input_path = input_config['input_path'] input_shape = input_config['input_shape'] one_img = mmcv.imread(input_path) if 'normalize_cfg' in input_config.keys(): normalize_cfg = input_config['normalize_cfg'] mean = np.array(normalize_cfg['mean'], dtype=np.float32) std = np.array(normalize_cfg['std'], dtype=np.float32) one_img = mmcv.imnormalize(one_img, mean, std) one_img = mmcv.imresize(one_img, input_shape[2:]).transpose(2, 0, 1) one_img = torch.from_numpy(one_img).unsqueeze(0).float().requires_grad_( True) (_, C, H, W) = input_shape one_meta = { 'img_shape': (H, W, C), 'ori_shape': (H, W, C), 'pad_shape': (H, W, C), 'filename': '<demo>.png', 'scale_factor': 1.0, 'flip': False } return one_img, one_meta
def single_call(self, results, img_ref): if results['keep_ratio']: img_ref = mmcv.imrescale( img_ref, results['scale'], return_scale=False) else: img_ref = mmcv.imresize( img_ref, results['scale'], return_scale=False) if results['flip']: img_ref = mmcv.imflip(img_ref) if results['img_norm_cfg']: img_norm_cfg = results['img_norm_cfg'] img_ref = mmcv.imnormalize( img_ref, img_norm_cfg['mean'], img_norm_cfg['std'], img_norm_cfg['to_rgb']) if 'crop_coords' in results: crds = results['crop_coords'] img_ref = img_ref[crds[0]:crds[1], crds[2]:crds[3], :] if img_ref.shape != results['pad_shape']: img_ref = mmcv.impad(img_ref, results['pad_shape'][:2]) return img_ref
def get_img(img_meta): img = cv2.imread(img_meta[0]['filename']) # Resize sf = img_meta[0]['scale_factor'] img, scale_factor = mmcv.imrescale(img, sf, True) # Normalize m = img_meta[0]['img_norm_cfg']['mean'] s = img_meta[0]['img_norm_cfg']['std'] t = img_meta[0]['img_norm_cfg']['to_rgb'] img = mmcv.imnormalize(img, m, s, t) # Pad sd = 32 # size_divisor img = mmcv.impad_to_multiple(img, 32, 0) # H x W x C -> C x H x W and expand an dim img = torch.from_numpy(np.transpose(img, (2, 0, 1))).expand(1, -1, -1, -1) return img
def processing_one_image(file_path): img_meta = {} img_meta['filename'] = file_path img_meta['ori_filename'] = file_path img_meta['flip'] = False # 1. Read image file_client = mmcv.FileClient(backend='disk') img_bytes = file_client.get(file_path) orig_img = mmcv.imfrombytes(img_bytes, flag='color') # BGR order img_meta['ori_shape'] = orig_img.shape # 2. Resize test_scale = (1333, 800) img, scale_factor = mmcv.imrescale(orig_img, test_scale, return_scale=True) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] h, w = orig_img.shape[:2] w_scale = new_w / w h_scale = new_h / h scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_meta['scale_factor'] = scale_factor img_meta['img_shape'] = img.shape # 3. Normalize # mean = np.array([102.9801, 115.9465, 122.7717], dtype=np.float32) # std = np.array([1.0, 1.0, 1.0], dtype=np.float32) mean = np.array([103.53, 116.28, 123.675], dtype=np.float32) std = np.array([1.0, 1.0, 1.0], dtype=np.float32) to_rgb = False img = mmcv.imnormalize(img, mean, std, to_rgb) img_meta['img_norm_cfg'] = dict(mean=mean, std=std, to_rgb=to_rgb) # 4. Pad img = mmcv.impad_to_multiple(img, divisor=32, pad_val=0) img_meta["pad_shape"] = img.shape # 5. ToTensor img = torch.from_numpy(img.transpose(2, 0, 1)) return img, img_meta
def __call__(self, results): """ #for Debugging img = Image.fromarray(results['img'].astype('uint8'), 'RGB') #img = Image.new('L', (1024, 1024), 0) for bx in results['gt_bboxes']: bx = ((bx[0], bx[1]), (bx[2], bx[3]), bx[4]) #print(bx) box = cv2.boxPoints(bx) #print(box) ImageDraw.Draw(img).polygon(box, outline=255) print('/content/gdrive/My Drive/Arirang/data/image_test/' + results['img_info']['file_name']) img.save('/content/gdrive/My Drive/Arirang/data/image_test/' + results['img_info']['file_name']) self.count += 1 if self.count > 10: print(data['img']) """ results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) results['img_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def __call__(self, results): results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) results['img_norm_cfg'] = dict( mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num): """imnormalize operation for image""" img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True) img_data = img_data.astype(np.float32) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def pytorch2onnx(model, input_img, input_shape, opset_version=11, show=False, output_file='tmp.onnx', verify=False, normalize_cfg=None): model.cpu().eval() # read image one_img = mmcv.imread(input_img) if normalize_cfg: one_img = mmcv.imnormalize(one_img, normalize_cfg['mean'], normalize_cfg['std']) one_img = mmcv.imresize(one_img, input_shape[2:]).transpose(2, 0, 1) one_img = torch.from_numpy(one_img).unsqueeze(0).float() (_, C, H, W) = input_shape one_meta = { 'img_shape': (H, W, C), 'ori_shape': (H, W, C), 'pad_shape': (H, W, C), 'filename': '<demo>.png', 'scale_factor': 1.0, 'flip': False } # onnx.export does not support kwargs origin_forward = model.forward model.forward = partial(model.forward, img_metas=[[one_meta]], return_loss=False) # pytorch has some bug in pytorch1.3, we have to fix it # by replacing these existing op register_extra_symbolics(opset_version) torch.onnx.export(model, ([one_img]), output_file, export_params=True, keep_initializers_as_inputs=True, verbose=show, opset_version=opset_version) model.forward = origin_forward print(f'Successfully exported ONNX model: {output_file}') if verify: # check by onnx onnx_model = onnx.load(output_file) onnx.checker.check_model(onnx_model) # check the numerical value # get pytorch output pytorch_result = model([one_img], [[one_meta]], return_loss=False) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) sess = rt.InferenceSession(output_file) from mmdet.core import bbox2result det_bboxes, det_labels = sess.run( None, {net_feed_input[0]: one_img.detach().numpy()}) # only compare a part of result bbox_results = bbox2result(det_bboxes, det_labels, 1) onnx_results = bbox_results[0] assert np.allclose( pytorch_result[0][:, 4], onnx_results[:, 4] ), 'The outputs are different between Pytorch and ONNX' print('The numerical values are same between Pytorch and ONNX')
def infer(show_score_thr=0.3): args = parse_args() model = get_model(args) img_meta = {'filename': 'temp.jpg', 'ori_shape': (800, 800, 3), 'img_shape': (800, 800, 3), 'pad_shape': (800, 800, 3), 'scale_factor': np.array([1.000, 1.000, 1.000, 1.000]).astype(np.float32), 'flip': False, 'img_norm_cfg': {'mean': np.array([123.675, 116.28, 103.53]).astype(np.float32), 'std': np.array([58.395, 57.12, 57.375]).astype(np.float32), 'to_rgb': True}} file_name_list = os.listdir(args.img_file_dir) results = {} ik = 0 for file_name in file_name_list: if os.path.splitext(file_name)[1] not in ['.jpg', '.png', '.bmp', '.gif']: continue # ---patched image--- img_meta['filename'] = file_name img = mmcv.imread(args.img_file_dir + file_name) img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) img = torch.from_numpy(img).cuda() img_shape = (img.size()[0], img.size()[1], img.size()[2]) img_meta['ori_shape'] = img_shape img_meta['img_shape'] = img_shape img_meta['pad_shape'] = img_shape img = img.permute(2, 0, 1) img_p = img.unsqueeze(0) # ----clean image---- img_meta['filename'] = file_name img_file_dir2 = args.img_file_dir.replace('_p', '') img = mmcv.imread(img_file_dir2 + file_name) img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) img = torch.from_numpy(img).cuda() img_shape = (img.size()[0], img.size()[1], img.size()[2]) img_meta['ori_shape'] = img_shape img_meta['img_shape'] = img_shape img_meta['pad_shape'] = img_shape img = img.permute(2, 0, 1) img_c = img.unsqueeze(0) # pdb.set_trace() with torch.no_grad(): result_p = model(return_loss=False, rescale=True, img=[img_p], img_metas=[[img_meta]]) result_c = model(return_loss=False, rescale=True, img=[img_c], img_metas=[[img_meta]]) if isinstance(result_p, tuple): bbox_results, mask_results = result_p encoded_mask_results = encode_mask_results(mask_results) result_p = bbox_results, encoded_mask_results bbox_results, mask_results = result_c encoded_mask_results = encode_mask_results(mask_results) result_c = bbox_results, encoded_mask_results # from mmdet.models.detectors.base import # model.module.show_result( # img=img_file_dir2 + file_name, # # img_show=True, # result=result_c, # show=True, # # score_thr=show_score_thr) result_above_confidence_num_p = 0 result_above_confidence_num_c = 0 result_p = np.concatenate(result_p) result_c = np.concatenate(result_c) for ir in range(len(result_p)): if result_p[ir, 4] > show_score_thr: result_above_confidence_num_p = result_above_confidence_num_p + 1 for ir in range(len(result_c)): if result_c[ir, 4] > show_score_thr: result_above_confidence_num_c = result_above_confidence_num_c + 1 # result_above_confidence = >show_score_thr # results[file_name] = np.concatenate(result) if result_above_confidence_num_c == 0: # can't find any object in clean img bb_score = 0 print('i=',ik) print(file_name) ik += 1 else: bb_score = 1 - min(result_above_confidence_num_c, result_above_confidence_num_p) / result_above_confidence_num_c results[file_name] = bb_score json_name = args.out_json_name with open(json_name, 'w') as f_obj: json.dump(results, f_obj) return results
def pre_process_img(one_img, mean, std, to_RGB): one_img = mmcv.imnormalize(one_img, mean, std, to_RGB) one_img = one_img.transpose(2, 0, 1) return one_img
def infer(show_score_thr=0.3): args = parse_args() # model = get_model(args) model = init_detector(args.config, args.checkpoint, device='cuda:0') img_meta = { 'filename': 'temp.jpg', 'ori_shape': (480, 640, 3), 'img_shape': (800, 1067, 3), 'pad_shape': (800, 1088, 3), 'scale_factor': np.array([1.000, 1.000, 1.000, 1.000]).astype(np.float32), 'flip': False, 'img_norm_cfg': { 'mean': np.array([123.675, 116.28, 103.53]).astype(np.float32), 'std': np.array([58.395, 57.12, 57.375]).astype(np.float32), 'to_rgb': True } } file_name_list = os.listdir(args.img_file_dir) results = {} ik = 0 for file_name in file_name_list: if os.path.splitext(file_name)[1] not in [ '.jpg', '.png', '.bmp', '.gif' ]: continue # ---patched image--- img_meta['filename'] = file_name img = mmcv.imread(args.img_file_dir + file_name) img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) img = torch.from_numpy(img).cuda() img_shape = (img.size()[0], img.size()[1], img.size()[2]) img_meta['ori_shape'] = img_shape img_meta['img_shape'] = img_shape img_meta['pad_shape'] = img_shape img = img.permute(2, 0, 1) img_p = img.unsqueeze(0) # ----clean image---- img_meta['filename'] = file_name img_file_dir2 = args.img_file_dir.replace('_p', '') img = mmcv.imread(img_file_dir2 + file_name) img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) img = torch.from_numpy(img).cuda() img_shape = (img.size()[0], img.size()[1], img.size()[2]) img_meta['ori_shape'] = img_shape img_meta['img_shape'] = img_shape img_meta['pad_shape'] = img_shape img = img.permute(2, 0, 1) img_c = img.unsqueeze(0) result_p = inference_detector(model, args.img_file_dir + file_name) result_c = inference_detector(model, img_file_dir2 + file_name) if isinstance(result_p, tuple): bbox_results, mask_results = result_p encoded_mask_results = encode_mask_results(mask_results) result_p = bbox_results, encoded_mask_results bbox_results, mask_results = result_c encoded_mask_results = encode_mask_results(mask_results) result_c = bbox_results, encoded_mask_results result_above_confidence_num_p = 0 result_above_confidence_num_c = 0 result_p = np.concatenate(result_p) result_c = np.concatenate(result_c) for ir in range(len(result_p)): if result_p[ir, 4] > show_score_thr: result_above_confidence_num_p = result_above_confidence_num_p + 1 for ir in range(len(result_c)): if result_c[ir, 4] > show_score_thr: result_above_confidence_num_c = result_above_confidence_num_c + 1 ''' # -----------------------orig img_meta['filename'] = file_name img = mmcv.imread(args.img_file_dir + file_name) img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) img = torch.from_numpy(img).cuda() img_shape = (img.size()[0], img.size()[1], img.size()[2]) img_meta['ori_shape'] = img_shape img_meta['img_shape'] = img_shape img_meta['pad_shape'] = img_shape # -----------------------orig result = inference_detector(model, args.img_file_dir + file_name) img = model.show_result(args.img_file_dir + file_name, result, score_thr=0.3, show=False) mmcv.imwrite(img, args.img_file_dir + file_name[:-4] + "_out.png") results[file_name] = np.concatenate(result)''' if result_above_confidence_num_c == 0: # can't find any object in clean img bb_score = 0 print('i=', ik) print(file_name) ik += 1 else: bb_score = 1 - min( result_above_confidence_num_c, result_above_confidence_num_p) / result_above_confidence_num_c results[file_name] = bb_score import json json_name = args.out_json_name with open(json_name, 'w') as f_obj: json.dump(results, f_obj) return results
def infer(show_score_thr=0.3): args = parse_args() model = get_model(args) img_meta = { 'filename': 'temp.jpg', 'ori_shape': (800, 800, 3), 'img_shape': (800, 800, 3), 'pad_shape': (800, 800, 3), 'scale_factor': np.array([1.000, 1.000, 1.000, 1.000]).astype(np.float32), 'flip': False, 'img_norm_cfg': { 'mean': np.array([123.675, 116.28, 103.53]).astype(np.float32), 'std': np.array([58.395, 57.12, 57.375]).astype(np.float32), 'to_rgb': True } } file_name_list = os.listdir(args.img_file_dir) results = {} ik = 0 for file_name in file_name_list: if os.path.splitext(file_name)[1] not in [ '.jpg', '.png', '.bmp', '.gif' ]: continue # # ---patched image--- # img_meta['filename'] = file_name # img = mmcv.imread(args.img_file_dir + file_name) # img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) # img = torch.from_numpy(img).cuda() # img_shape = (img.size()[0], img.size()[1], img.size()[2]) # img_meta['ori_shape'] = img_shape # img_meta['img_shape'] = img_shape # img_meta['pad_shape'] = img_shape # img = img.permute(2, 0, 1) # img_p = img.unsqueeze(0) # ----clean image---- img_meta['filename'] = file_name # img_file_dir2 = args.img_file_dir.replace('_p', '') img = mmcv.imread(os.path.join(args.img_file_dir, file_name)) img = mmcv.imnormalize(img, img_meta['img_norm_cfg']['mean'], img_meta['img_norm_cfg']['std']) img = torch.from_numpy(img).cuda() img_shape = (img.size()[0], img.size()[1], img.size()[2]) img_meta['ori_shape'] = img_shape img_meta['img_shape'] = img_shape img_meta['pad_shape'] = img_shape img = img.permute(2, 0, 1) img_c = img.unsqueeze(0) # pdb.set_trace() with torch.no_grad(): result_c = model(return_loss=False, rescale=True, img=[img_c], img_metas=[[img_meta]]) if isinstance(result_c, tuple): bbox_results, mask_results = result_c encoded_mask_results = encode_mask_results(mask_results) result_c = bbox_results, encoded_mask_results result_above_confidence_num_c = 0 result_c = np.concatenate(result_c) for ir in range(len(result_c)): if result_c[ir, 4] > show_score_thr: result_above_confidence_num_c = result_above_confidence_num_c + 1 # print(result_above_confidence_num_c) if result_above_confidence_num_c == 0: # can't find any object in clean img bb_score = 0 print('i=', ik) print(file_name) os.remove(os.path.join(args.img_file_dir, file_name)) ik += 1 if result_above_confidence_num_c == 1: # can't find any object in clean img bb_score = 0 print('i=', ik) print(file_name) os.remove(os.path.join(args.img_file_dir, file_name)) ik += 1 if result_above_confidence_num_c == 2: # can't find any object in clean img bb_score = 0 print('i=', ik) print(file_name) os.remove(os.path.join(args.img_file_dir, file_name)) ik += 1 return results
def __call__(self, img_group, scale, crop_history=None, flip=False, keep_ratio=True, div_255=False, is_flow=False): if self.resize_crop or self.rescale_crop: img_group, crop_quadruple = self.op_crop(img_group) img_shape = img_group[0].shape scale_factor = None else: # 1. rescale if keep_ratio: tuple_list = [ mmcv.imrescale(img, scale, return_scale=True) for img in img_group ] img_group, scale_factors = list(zip(*tuple_list)) scale_factor = scale_factors[0] else: tuple_list = [ mmcv.imresize(img, scale, return_scale=True) for img in img_group ] img_group, w_scales, h_scales = list(zip(*tuple_list)) scale_factor = np.array( [w_scales[0], h_scales[0], w_scales[0], h_scales[0]], dtype=np.float32) # 2. crop (if necessary) if crop_history is not None: self.op_crop = GroupCrop(crop_history) if self.op_crop is not None: img_group, crop_quadruple = self.op_crop(img_group, is_flow=is_flow) else: crop_quadruple = None img_shape = img_group[0].shape # 3. flip if flip: img_group = [mmcv.imflip(img) for img in img_group] if is_flow: for i in range(0, len(img_group), 2): img_group[i] = mmcv.iminvert(img_group[i]) # 4a. div_255 if div_255: img_group = [ mmcv.imnormalize(img, 0, 255, False) for img in img_group ] # 4. normalize img_group = [ mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) for img in img_group ] # 5. pad if self.size_divisor is not None: img_group = [ mmcv.impad_to_multiple(img, self.size_divisor) for img in img_group ] pad_shape = img_group[0].shape else: pad_shape = img_shape if is_flow: assert len(img_group[0].shape) == 2 img_group = [ np.stack((flow_x, flow_y), axis=2) for flow_x, flow_y in zip(img_group[0::2], img_group[1::2]) ] # 6. transpose img_group = [img.transpose(2, 0, 1) for img in img_group] # Stack into numpy.array img_group = np.stack(img_group, axis=0) return img_group, img_shape, pad_shape, scale_factor, crop_quadruple
def __call__(self, img_group, scale, crop_history=None, flip=False, keep_ratio=True, div_255=False, is_flow=False, interpolation='bilinear', normalize=True, more_aug=False): # 1. rescale if keep_ratio: tuple_list = [ mmcv.imrescale(img, scale, return_scale=True, interpolation=interpolation) for img in img_group ] img_group, scale_factors = list(zip(*tuple_list)) scale_factor = scale_factors[0] else: tuple_list = [ mmcv.imresize(img, scale, return_scale=True, interpolation=interpolation) for img in img_group ] img_group, w_scales, h_scales = list(zip(*tuple_list)) scale_factor = np.array( [w_scales[0], h_scales[0], w_scales[0], h_scales[0]], dtype=np.float32) # 2. crop (if necessary) if crop_history is not None: self.op_crop = GroupCrop(crop_history, input_size=self.crop_size, resize=True) if self.op_crop is not None and isinstance( self.op_crop, (GroupCrop, GroupMultiScaleCrop)): img_group, crop_quadruple = self.op_crop( img_group, is_flow=is_flow, interpolation=interpolation) elif self.op_crop is not None: img_group, crop_quadruple = self.op_crop(img_group, is_flow=is_flow) else: crop_quadruple = None img_shape = img_group[0].shape if more_aug: seq = iaa.Sequential([ iaa.GaussianBlur(sigma=np.random.uniform(0, 5)), iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), ]) img_group = seq(images=np.array(img_group)) # 3. flip if flip: img_group = [mmcv.imflip(img) for img in img_group] if is_flow: for i in range(0, len(img_group), 2): img_group[i] = mmcv.iminvert(img_group[i]) # 4a. div_255 if div_255: img_group = [ mmcv.imnormalize(img, 0, 255, False) for img in img_group ] # 4. normalize if normalize: img_group = [ mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) for img in img_group ] # 5. pad if self.size_divisor is not None: img_group = [ mmcv.impad_to_multiple(img, self.size_divisor) for img in img_group ] pad_shape = img_group[0].shape else: pad_shape = img_shape if is_flow: assert len(img_group[0].shape) == 2 img_group = [ np.stack((flow_x, flow_y), axis=2) for flow_x, flow_y in zip(img_group[0::2], img_group[1::2]) ] # 6. transpose if len(img_shape) == 2: img_group = [img[:, :, np.newaxis] for img in img_group] img_group = [img.transpose(2, 0, 1) for img in img_group] # Stack into numpy.array img_group = np.stack(img_group, axis=0) return img_group, img_shape, pad_shape, scale_factor, crop_quadruple