def inference_mot(model, img, frame_id): """Inference image(s) with the mot model. Args: model (nn.Module): The loaded mot model. img (str | ndarray): Either image name or loaded image. frame_id (int): frame id. Returns: dict[str : ndarray]: The tracking results. """ cfg = model.cfg device = next(model.parameters()).device # model device # prepare data if isinstance(img, np.ndarray): # directly add img data = dict(img=img, img_info=dict(frame_id=frame_id), img_prefix=None) cfg = cfg.copy() # set loading pipeline type cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' else: # add information into dict data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None) # build the data pipeline test_pipeline = Compose(cfg.data.test.pipeline) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: for m in model.modules(): assert not isinstance( m, RoIPool ), 'CPU inference with RoIPool is not supported currently.' # just get the actual data from DataContainer data['img_metas'] = data['img_metas'][0].data # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result
def after_train_epoch(self, runner): if not self.every_n_epochs(runner, self.interval): return runner.model.eval() results = [None for _ in range(len(self.dataset))] if runner.rank == 0: prog_bar = mmcv.ProgressBar(len(self.dataset)) for idx in range(runner.rank, len(self.dataset), runner.world_size): data = self.dataset[idx] data_gpu = scatter( collate([data], samples_per_gpu=1), [torch.cuda.current_device()])[0] # compute output with torch.no_grad(): result = runner.model( return_loss=False, rescale=True, **data_gpu) results[idx] = result batch_size = runner.world_size if runner.rank == 0: for _ in range(batch_size): prog_bar.update() if runner.rank == 0: print('\n') print('PROGRESS: {:.2f}%'.format(100.0 * (runner.epoch + 1) / runner.max_epochs)) dist.barrier() for i in range(1, runner.world_size): tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) tmp_results = mmcv.load(tmp_file) for idx in range(i, len(results), runner.world_size): results[idx] = tmp_results[idx] os.remove(tmp_file) self.evaluate(runner, results) else: tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(runner.rank)) mmcv.dump(results, tmp_file) dist.barrier() dist.barrier()
def inpainting_inference(model, masked_img, mask): """Inference image with the model. Args: model (nn.Module): The loaded model. masked_img (str): File path of image with mask. mask (str): Mask file path. Returns: Tensor: The predicted inpainting result. """ device = next(model.parameters()).device # model device infer_pipeline = [ dict(type='LoadImageFromFile', key='masked_img'), dict(type='LoadMask', mask_mode='file', mask_config=dict()), dict(type='Pad', keys=['masked_img', 'mask'], mode='reflect'), dict( type='Normalize', keys=['masked_img'], mean=[127.5] * 3, std=[127.5] * 3, to_rgb=False), dict(type='GetMaskedImage', img_name='masked_img'), dict( type='Collect', keys=['masked_img', 'mask'], meta_keys=['masked_img_path']), dict(type='ImageToTensor', keys=['masked_img', 'mask']) ] # build the data pipeline test_pipeline = Compose(infer_pipeline) # prepare data data = dict(masked_img_path=masked_img, mask_path=mask) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model with torch.no_grad(): result = model(test_mode=True, **data) return result['fake_img']
def prepare_image(model, img): # class LoadImagee(object): # def __call__(self, results): # if isinstance(results['img'], str): # results['filename'] = ''#results['img'] # else: # results['filename'] = '' # # img = mmcv.imread(results['img']) # # img = np.random.randint(0, 255, (720, 1280, 3)) # results['img'] = np.float32(results['img'].cpu().numpy()) # img = results['img'] # results['img_shape'] = img.shape # results['ori_shape'] = img.shape # return results class LoadImage(object): def __call__(self, results): if isinstance(results['img'], str): results['filename'] = results['img'] else: results['filename'] = None img = mmcv.imread(results['img']) results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape return results # img = '/content/gdrive/My Drive/catapulta/Overhead_train_images/frame10000.jpg' cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = [LoadImagee()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) # prepare data data = dict(img=img) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model return data
def inference_detector(model, pcd): """Inference point cloud with the detector. Args: model (nn.Module): The loaded detector. pcd (str): Point cloud files. Returns: tuple: Predicted results and data from pipeline. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = deepcopy(cfg.data.test.pipeline) test_pipeline = Compose(test_pipeline) box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d) data = dict( pts_filename=pcd, box_type_3d=box_type_3d, box_mode_3d=box_mode_3d, img_fields=[], bbox3d_fields=[], pts_mask_fields=[], pts_seg_fields=[], bbox_fields=[], mask_fields=[], seg_fields=[]) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device.index])[0] else: # this is a workaround to avoid the bug of MMDataParallel data['img_metas'] = data['img_metas'][0].data data['points'] = data['points'][0].data # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result, data
def inference(): score_cache = deque() scores_sum = 0 while True: cur_windows = [] while len(cur_windows) == 0: if len(frame_queue) == sample_length: cur_windows = list(np.array(frame_queue)) if data['img_shape'] is None: data['img_shape'] = frame_queue.popleft().shape[:2] cur_data = data.copy() cur_data['imgs'] = cur_windows cur_data = test_pipeline(cur_data) cur_data = collate([cur_data], samples_per_gpu=1) if next(model.parameters()).is_cuda: cur_data = scatter(cur_data, [device])[0] with torch.no_grad(): scores = model(return_loss=False, **cur_data)[0] score_cache.append(scores) scores_sum += scores if len(score_cache) == average_size: scores_avg = scores_sum / average_size num_selected_labels = min(len(label), 5) scores_tuples = tuple(zip(label, scores_avg)) scores_sorted = sorted(scores_tuples, key=itemgetter(1), reverse=True) results = scores_sorted[:num_selected_labels] result_queue.append(results) scores_sum -= score_cache.popleft() camera.release() cv2.destroyAllWindows()
def inference_detector(model, img): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) # prepare data data = dict(img=img) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: # Use torchvision ops for CPU mode instead for m in model.modules(): if isinstance(m, (RoIPool, RoIAlign)): if not m.aligned: # aligned=False is not implemented on CPU # set use_torchvision on-the-fly m.use_torchvision = True warnings.warn('We set use_torchvision=True in CPU mode.') # just get the actual data from DataContainer data['img_metas'] = data['img_metas'][0].data # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result
def _inference(self, filename): ss = time.time() data = dict(img=filename) data = self.test_pipeline(data) data = collate([data], samples_per_gpu=1) data = scatter(data, [self.device])[0] with torch.no_grad(): pred = self.model(return_loss=False, rescale=True, **data) #print(len(pred[0][0]), len(pred[0][1]), 'haha') #print(pred[0][0][0].shape, pred[0][0][1].shape) bbox = pred[0][0][1] area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) valid = area > 100 bbox = bbox[valid] pred = pred[0][1] tot = len(pred[1]) if tot != 0: res = pred[1][0].astype(np.int32) for i in range(1, tot): res = res + pred[1][i].astype(np.int32) self.h, self.w = res.shape else: res = np.zeros((self.h, self.w), dtype=np.int32) res = res > 0 res = res.astype(np.uint8) res = res * 255 return res, bbox
def model_inference(model, img): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str): Image files. Returns: result (dict): Detection results. """ assert isinstance(img, str) cfg = model.cfg device = next(model.parameters()).device # model device data = dict(img_info=dict(filename=img), img_prefix=None) # build the data pipeline test_pipeline = Compose(cfg.data.test.pipeline) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) # process img_metas if isinstance(data['img_metas'], list): data['img_metas'] = data['img_metas'][0].data else: data['img_metas'] = data['img_metas'].data[0] if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: for m in model.modules(): assert not isinstance( m, RoIPool ), 'CPU inference with RoIPool is not supported currently.' # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data)[0] return result
def predict(self, model, epoch): self.epoch = epoch y_true = [] y_pred = [] names = [] with torch.no_grad(): for ind in tqdm(range(len(self.dataset)), total=len(self.dataset)): if self.debug: if ind > 100: break # Get data data = self.dataset[ind] # Wrap img, img_meta in list # Not sure why I have to do this ... if type(data['img']) != list and type( data['img_meta']) != list: data['img'] = [data['img']] data['img_meta'] = [data['img_meta']] data_gpu = collate([data], samples_per_gpu=1) if not self.predict_mode: # Get annotations ann = self.dataset.get_ann_info(ind) bboxes = ann['bboxes'] labels = ann['labels'] y_true.append({'bboxes': bboxes, 'labels': labels}) names.append(self.dataset.img_infos[ind]['filename']) # We can alter NMS params using model.module.test_cfg # If we want to tune thresholds/NMS thresholds ## # Get model output output = model(**data_gpu, return_loss=False, rescale=True) # output is a list with length = num_classes - 1 # Each element in output corresponds to a list of predicted # boxes for that class y_pred.append(output) return y_true, y_pred, names
def restoration_inference(model, img, ref=None): """Inference image with the model. Args: model (nn.Module): The loaded model. img (str): File path of input image. ref (str | None): File path of reference image. Default: None. Returns: Tensor: The predicted restoration result. """ cfg = model.cfg device = next(model.parameters()).device # model device # remove gt from test_pipeline keys_to_remove = ['gt', 'gt_path'] for key in keys_to_remove: for pipeline in list(cfg.test_pipeline): if 'key' in pipeline and key == pipeline['key']: cfg.test_pipeline.remove(pipeline) if 'keys' in pipeline and key in pipeline['keys']: pipeline['keys'].remove(key) if len(pipeline['keys']) == 0: cfg.test_pipeline.remove(pipeline) if 'meta_keys' in pipeline and key in pipeline['meta_keys']: pipeline['meta_keys'].remove(key) # build the data pipeline test_pipeline = Compose(cfg.test_pipeline) # prepare data if ref: # Ref-SR data = dict(lq_path=img, ref_path=ref) else: # SISR data = dict(lq_path=img) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model with torch.no_grad(): result = model(test_mode=True, **data) return result['output']
def run_with_onnx_runtime(model_path, w, h): session = onnxruntime.InferenceSession(model_path, None) input_name = session.get_inputs()[0].name # test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = [LoadImage()] + test_cfg test_pipeline = Compose(test_pipeline) device = torch.device(0) with open(test_path, "r") as f: filenames = f.readlines() for filename in filenames: img_file = filename.strip() + ".jpg" xml_file = filename.strip() + ".xml" img = cv2.imread(os.path.join(test_img_path, img_file)) if img is not None: # prepare data data = dict(img=img) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] result = session.run([], {input_name: data}) print(f'Output y.shape: {result.shape}') break
def preprocess(model, img): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) # prepare data data = dict(img=img) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] return data
def myinferencedetector(model, img): image = Image.open(img) # summarize some details about the image print(image.format) print(image.size) print(image.mode) # convert image to numpy array image_np = np.asarray(image) print(type(image_np)) # summarize shape print(image_np.shape) #(1280, 1920, 3) datas = [] cfg = model.cfg device = next(model.parameters()).device # model device cfg = cfg.copy() # set loading pipeline type cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) test_pipeline = Compose(cfg.data.test.pipeline) if isinstance(image_np, np.ndarray): # directly add img data = dict(img=image_np) # build the data pipeline data = test_pipeline(data) datas.append(data) data = collate(datas, samples_per_gpu=1) # just get the actual data from DataContainer data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']] data['img'] = [img.data[0] for img in data['img']] data = scatter(data, [device])[0] # forward the model with torch.no_grad(): results = model(return_loss=False, rescale=True, **data) return results[0]
def inference_model(model, img): """Inference image(s) with the classifier. Args: model (nn.Module): The loaded classifier. img (str/ndarray): The image filename or loaded image. Returns: result (dict): The classification results that contains `class_name`, `pred_label` and `pred_score`. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline if isinstance(img, str): if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile': cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile')) data = dict(img_info=dict(filename=img), img_prefix=None) else: if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile': cfg.data.test.pipeline.pop(0) data = dict(img=img) test_pipeline = Compose(cfg.data.test.pipeline) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] # forward the model with torch.no_grad(): scores = model(return_loss=False, **data) pred_score = np.max(scores, axis=1)[0] pred_label = np.argmax(scores, axis=1)[0] result = {'pred_label': pred_label, 'pred_score': float(pred_score)} result['pred_class'] = model.CLASSES[result['pred_label']] return result
def inference_sot(model, image, init_bbox, frame_id): """Inference image with the single object tracker. Args: model (nn.Module): The loaded tracker. image (ndarray): Loaded images. init_bbox (ndarray): The target needs to be tracked. frame_id (int): frame id. Returns: dict[str : ndarray]: The tracking results. """ cfg = model.cfg device = next(model.parameters()).device # model device data = dict(img=image.astype(np.float32), gt_bboxes=np.array(init_bbox).astype(np.float32), img_info=dict(frame_id=frame_id)) # remove the "LoadImageFromFile" and "LoadAnnotations" in pipeline test_pipeline = Compose(cfg.data.test.pipeline[2:]) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: for m in model.modules(): assert not isinstance( m, RoIPool ), 'CPU inference with RoIPool is not supported currently.' # just get the actual data from DataContainer data['img_metas'] = data['img_metas'][0].data # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result
def inference_detector(model, img): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) # prepare data data = dict(img=img) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model with torch.no_grad(): torch.cuda.synchronize() st = time.time() try: result = model(return_loss=False, rescale=True, **data) except RuntimeError as exception: if 'out of memory' in str(exception): print('WARNING: out of memory') if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() else: raise exception torch.cuda.synchronize() cost_time = time.time() - st return result, cost_time
def inference_detector(cfg, model, img): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ # build the data pipeline test_pipeline = [LoadImage()] + cfg.data.test.pipeline[2:] test_pipeline = Compose(test_pipeline) # prepare data data = dict(img=img) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result
def extract_encoder_feat(model, tokenizer, img): """Extract encoder features of caption model. Args: model (nn.Module): Image Captioning Model tokenizer: For preprocess pipeline img (str): img file path Returns: Extracted feature result. """ cfg = model.cfg device = next(model.parameters()).device # model device # prepare data # Add dummy caption cap_info = dict(caption='', tokenizer=tokenizer) data = dict(img_info=dict(filename=img), img_prefix=None, cap_info=cap_info) # build the data pipeline test_pipeline = Compose(cfg.data.test.pipeline) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] # forward the model with torch.no_grad(): # img, img_mask, pos result = model.extract_feat(data['img'][0], data['img_mask'][0]) #result = model(return_loss=False, rescale=True, **data)[0] return result
def matting_inference(model, img, trimap): """Inference image(s) with the model. Args: model (nn.Module): The loaded model. img (str): Image file path. trimap (str): Trimap file path. Returns: np.ndarray: The predicted alpha matte. """ cfg = model.cfg device = next(model.parameters()).device # model device # remove alpha from test_pipeline keys_to_remove = ['alpha', 'ori_alpha'] for key in keys_to_remove: for pipeline in list(cfg.test_pipeline): if 'key' in pipeline and key == pipeline['key']: cfg.test_pipeline.remove(pipeline) if 'keys' in pipeline and key in pipeline['keys']: pipeline['keys'].remove(key) if len(pipeline['keys']) == 0: cfg.test_pipeline.remove(pipeline) if 'meta_keys' in pipeline and key in pipeline['meta_keys']: pipeline['meta_keys'].remove(key) # build the data pipeline test_pipeline = Compose(cfg.test_pipeline) # prepare data data = dict(merged_path=img, trimap_path=trimap) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model with torch.no_grad(): result = model(test_mode=True, **data) return result['pred_alpha']
def inference_recognizer(model, frames): cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_transform = GroupImageTransform(crop_size=cfg.data.test.input_size, oversample=None, resize_crop=False, **dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)) # prepare data frames, *l = test_transform( frames, (cfg.data.test.img_scale, cfg.data.test.img_scale), crop_history=None, flip=False, keep_ratio=False, div_255=False, is_flow=False) data = dict(img_group_0=frames, num_modalities=1, img_meta={}) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result
def after_train_epoch(self, runner): if not self.every_n_epochs(runner, self.interval): return runner.model.eval() range_idxs = list(range(len(self.dataset))) if self.shuffle: np.random.shuffle(range_idxs) range_idxs = range_idxs[:self.num_evals] prog_bar = mmcv.ProgressBar(len(range_idxs)) results = [] for idx in range_idxs: data = self.dataset[idx] data_gpu = scatter(collate([data], samples_per_gpu=1), [torch.cuda.current_device()])[0] with torch.no_grad(): result, out_dict = runner.model(return_loss=False, rescale=True, **data_gpu) results.extend(result) prog_bar.update() self.evaluate(runner, results, range_idxs=range_idxs)
def after_train_epoch(self, runner): if not self.every_n_epochs(runner, self.interval): return runner.model.eval() results = [None for _ in range(len(self.dataset))] prog_bar = mmcv.ProgressBar(len(self.dataset)) for idx in range(len(self.dataset)): data = self.dataset[idx] data_gpu = scatter(collate([data], samples_per_gpu=1), [torch.cuda.current_device()])[0] # compute output with torch.no_grad(): result = runner.model(return_loss=False, rescale=True, **data_gpu) results[idx] = result batch_size = 1 for _ in range(batch_size): prog_bar.update() print('\n') self.evaluate(runner, results)
def inference_bottom_up_pose_model(model, img_or_path): """Inference a single image. num_people: P num_keypoints: K bbox height: H bbox width: W Args: model (nn.Module): The loaded pose model. image_name (str| np.ndarray): Image_name. Returns: list[ndarray]: The predicted pose info. The length of the list is the number of people (P). Each item in the list is a ndarray, containing each person's pose (ndarray[Kx3]): x, y, score """ pose_results = [] cfg = model.cfg device = next(model.parameters()).device # build the data pipeline test_pipeline = [LoadImage()] + cfg.test_pipeline[1:] test_pipeline = Compose(test_pipeline) # prepare data data = { 'img_or_path': img_or_path, 'dataset': 'coco', 'ann_info': { 'image_size': cfg.data_cfg['image_size'], 'num_joints': cfg.data_cfg['num_joints'], 'flip_index': [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15], } } data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: # just get the actual data from DataContainer data['img_metas'] = data['img_metas'].data[0] # forward the model with torch.no_grad(): all_preds, _, _ = model( return_loss=False, img=data['img'], img_metas=data['img_metas']) for pred in all_preds: pose_results.append({ 'keypoints': pred[:, :3], }) return pose_results
def _data_func(data, device_id): data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] return dict(return_loss=False, rescale=True, **data)
def _inference_single_pose_model(model, img_or_path, bbox, dataset): """Inference a single bbox. num_keypoints: K Args: model (nn.Module): The loaded pose model. image_name (str | np.ndarray):Image_name bbox (list | np.ndarray): Bounding boxes (with scores), shaped (4, ) or (5, ). (left, top, width, height, [score]) dataset (str): Dataset name. Returns: ndarray[Kx3]: Predicted pose x, y, score. """ cfg = model.cfg device = next(model.parameters()).device # build the data pipeline test_pipeline = [LoadImage()] + cfg.test_pipeline[1:] test_pipeline = Compose(test_pipeline) assert len(bbox) in [4, 5] center, scale = _box2cs(cfg, bbox) flip_pairs = None if dataset == 'TopDownCocoDataset' or dataset == 'TopDownOCHumanDataset': flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] elif dataset == 'TopDownCocoWholeBodyDataset': body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] flip_pairs = body + foot + face + hand elif dataset == 'TopDownAicDataset': flip_pairs = [[0, 3], [1, 4], [2, 5], [6, 9], [7, 10], [8, 11]] elif dataset == 'TopDownOneHand10KDataset' or \ dataset == 'TopDownFreiHandDataset': flip_pairs = [] else: raise NotImplementedError() # prepare data data = { 'img_or_path': img_or_path, 'center': center, 'scale': scale, 'bbox_score': bbox[4] if len(bbox) == 5 else 1, 'dataset': dataset, 'joints_3d': np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32), 'joints_3d_visible': np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32), 'rotation': 0, 'ann_info': { 'image_size': cfg.data_cfg['image_size'], 'num_joints': cfg.data_cfg['num_joints'], 'flip_pairs': flip_pairs } } data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: # just get the actual data from DataContainer data['img_metas'] = data['img_metas'].data[0] # forward the model with torch.no_grad(): all_preds, _, _ = model( return_loss=False, img=data['img'], img_metas=data['img_metas']) return all_preds[0]
def attact_detector(model, img0,dstimg,gt_bboxes,gt_labels,filename=None,attack_roi=None, at_times=50,e=30.0,image_size=800,mode='frcnn',random_begin=False, return_grad=False,rpn=False): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg=model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) gt_labels=np.array(gt_labels) gt_bboxes=np.array(gt_bboxes,dtype=np.float32) gt_bboxes=gt_bboxes/500*image_size gt_labels=torch.tensor(gt_labels,device=device) gt_bboxes=torch.tensor(gt_bboxes,device=device) #gt_bboxes[:,2:4]=gt_bboxes[:,2:4]/80 pertubation = np.zeros_like(dstimg) momentom=np.zeros_like(dstimg) if mode=='frcnn': mean = [123.675, 116.28, 103.53] std = [58.395, 57.12, 57.375] elif mode=='ssd': mean = [123.675, 116.28, 103.53] std = [1, 1, 1] if random_begin==True: dstimg[attack_roi]=np.random.randint(0,256,dstimg[attack_roi].shape) if return_grad==True: at_times=1 adv_x = np.array(dstimg[...,::-1],dtype=np.float) res=np.array(adv_x) data = dict(img=adv_x) adv_x=adv_x[...,::-1] data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: # Use torchvision ops for CPU mode instead for m in model.modules(): if isinstance(m, (RoIPool, RoIAlign)): if not m.aligned: # aligned=False is not implemented on CPU # set use_torchvision on-the-fly m.use_torchvision = True warnings.warn('We set use_torchvision=True in CPU mode.') # just get the actual data from DataContainer data['img_metas'] = data['img_metas'][0].data data['img'] = torch.tensor(data['img'][0].clone().detach(), device=device) data['img_metas'] = data['img_metas'][0] data['gt_bboxes']=[gt_bboxes] data['gt_labels']=[gt_labels] data_min = torch.min(data['img']) data_max = torch.max(data['img']) loss_last=100 times=0 for k in range(at_times): data['img'] = torch.autograd.Variable(data['img']) data['img'].requires_grad = True #data_img=F.interpolate(data['img'],size=[800,800],mode='bilinear') loss = model(return_loss=True,img=data['img'],img_metas=data['img_metas'],gt_bboxes=data['gt_bboxes'],gt_labels=data['gt_labels']) #r = model(return_loss=False, img=[data['img']], img_metas=[data['img_metas']],rescale=True) loss_rpn_cls = 0 loss_rpn_bbox = 0 for i in range(len(loss['loss_rpn_cls'])): loss_rpn_cls = loss_rpn_cls + loss['loss_rpn_cls'][i] loss_rpn_bbox = loss_rpn_bbox + loss['loss_rpn_bbox'][i] if mode=='ssd': loss_back = loss['loss_cls'][0] else: if rpn==True: loss_back = -loss_rpn_cls#+loss['loss_cls']#-loss['loss_bbox'] else: loss_back=loss['loss_cls'] model.zero_grad() loss_back.backward() data_grad = data['img'].grad.data data_grad=data_grad.cpu().numpy() data_grad=data_grad.squeeze() data_grad=data_grad.transpose([1,2,0]) data_grad=cv2.resize(data_grad,(500,500),cv2.INTER_AREA) if loss_back<=0.1: momentom = 0.9 * momentom + e/2 * data_grad else: momentom=0.9*momentom+e*data_grad momentom=momentom*std if mode=='ssd': data_grad=100*e*data_grad*std else: data_grad = e * data_grad * std #data_grad=np.clip(data_grad,-10,10) momentom=np.clip(momentom,-10,10) if return_grad == True: return data_grad, gt_bboxes # grad_range=np.sort(np.reshape(momentom[attack_roi],[-1])) # print('梯度范围', grad_range[:100],grad_range[-100:]) adv_x[attack_roi]=adv_x[attack_roi]-momentom[attack_roi] momentom = momentom / std adv_x[attack_roi]=np.clip(adv_x[attack_roi],0,255) data['img']=(adv_x-mean)/std data['img']=cv2.resize(data['img'],(image_size,image_size)) data['img']=torch.from_numpy(data['img'].transpose(2, 0, 1)).float().unsqueeze(0) #data['img']=torch.clamp(data['img'],data_min,data_max) data['img']=data['img'].cuda() if loss_back < loss_last: res=adv_x loss_last = loss_back if (k+1)%5==0 or (k+1)==at_times: print(filename,'frcn step:%d'%(k+1),loss_back,loss['loss_cls'],loss_rpn_cls) if k==199: e=e/2 # ix = np.where(adv_x[attack_roi]==img[attack_roi]) # adv_x[attack_roi[0][ix],attack_roi[1][ix],attack_roi[2][ix]]=\ # 255-img[attack_roi[0][ix],attack_roi[1][ix],attack_roi[2][ix]] #data_img=data_img*std+mean #adv_x[attack_roi]=data_img[attack_roi] return res
def Dpatch_detector(model, img, dstimg,patch, gt_bboxes, gt_labels, filename=None, at_times=50, e=10.0,image_size=800,mode='frcnn'): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) gt_labels = np.array(gt_labels) gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_bboxes = gt_bboxes / 500 *image_size #gt_bboxes[:,2:4]=0 gt_labels = torch.tensor(gt_labels, device=device) gt_bboxes = torch.tensor(gt_bboxes, device=device) pertubation = np.zeros_like(img) adv_x = np.array(dstimg, dtype=np.uint8) patch=np.random.randint(0, 256,size=adv_x.shape) adv_x[230:270,230:270]=patch[230:270,230:270] attack_roi=np.where(adv_x!=img) momentom = np.zeros_like(dstimg) mean = [123.675, 116.28, 103.53] std = [58.395, 57.12, 57.375] # mean=mean[::-1] # std=std[::-1] adv_x=adv_x[...,::-1] data = dict(img=adv_x) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) adv_x = adv_x[..., ::-1] if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: # Use torchvision ops for CPU mode instead for m in model.modules(): if isinstance(m, (RoIPool, RoIAlign)): if not m.aligned: # aligned=False is not implemented on CPU # set use_torchvision on-the-fly m.use_torchvision = True warnings.warn('We set use_torchvision=True in CPU mode.') # just get the actual data from DataContainer data['img_metas'] = data['img_metas'][0].data data['img'] = torch.tensor(data['img'][0].clone().detach(), device=device) data['img_metas'] = data['img_metas'][0] data['gt_bboxes']=[gt_bboxes] data['gt_labels']=[gt_labels] #print(data) data_min = torch.min(data['img']) data_max = torch.max(data['img']) loss_last = 100 times = 0 for k in range(at_times): data['img'] = torch.autograd.Variable(data['img']) data['img'].requires_grad = True # data_img=F.interpolate(data['img'],size=[800,800]) loss = model(return_loss=True, **data) # r = model(return_loss=False, img=[data['img']], img_metas=[data['img_metas']],rescale=True) loss_rpn_cls = 0 loss_rpn_bbox = 0 for i in range(len(loss['loss_rpn_cls'])): loss_rpn_cls = loss_rpn_cls + loss['loss_rpn_cls'][i] loss_rpn_bbox = loss_rpn_bbox + loss['loss_rpn_bbox'][i] if mode == 'ssd': loss_back = loss['loss_cls'][0] else: loss_back =loss['loss_cls']+loss['loss_bbox'] model.zero_grad() loss_back.backward() data_grad = data['img'].grad.data data_grad = data_grad.cpu().numpy() data_grad = data_grad.squeeze() data_grad = data_grad.transpose([1, 2, 0]) data_grad = cv2.resize(data_grad, (500, 500)) momentom = 0.9 * momentom + e * data_grad data['img'] = data['img'].cpu().detach().numpy() data['img'] = data['img'].squeeze() data['img'] = data['img'].transpose([1, 2, 0]) data['img'] = cv2.resize(data['img'], (500, 500)) data['img'][attack_roi] = data['img'][attack_roi] - momentom[attack_roi] data['img'][attack_roi] = np.clip(data['img'][attack_roi], data_min.cpu(), data_max.cpu()) momentom = momentom * std if mode == 'ssd': data_grad = 100 * e * data_grad * std else: data_grad = e * data_grad * std data_grad = np.clip(data_grad, -10, 10) momentom = np.clip(momentom, -10, 10) adv_x[attack_roi] = adv_x[attack_roi] - data_grad[attack_roi] # grad_range=np.sort(np.reshape(data_grad[attack_roi],[-1])) # print('梯度范围', grad_range[:100],grad_range[-100:]) momentom = momentom / std adv_x[attack_roi] = np.clip(adv_x[attack_roi], 0, 255) data['img'] = (adv_x - mean) / std # data['img'][200:600, 200:600] = data['img'][200:600, 200:600] - 1000 * data_grad[200:600, 200:600] data['img'] = cv2.resize(data['img'], (image_size, image_size)) data['img'] = torch.from_numpy(data['img'].transpose(2, 0, 1)).float().unsqueeze(0) data['img'] = torch.clamp(data['img'], data_min, data_max) data['img'] = data['img'].cuda() # momentom=0.9*momentom+e*data_grad_ # pertubation=data_grad_*255 # pertubation[attack_roi]=np.where(pertubation[attack_roi]>0,pertubation[attack_roi]+0.5,pertubation[attack_roi]) # pertubation[attack_roi] = np.where(pertubation[attack_roi] < 0, pertubation[attack_roi] - 0.5, # pertubation[attack_roi]) # pertubation = np.clip(pertubation, -3, 3) # print(np.min(pertubation[attack_roi]),np.max(pertubation[attack_roi])) if loss_back < loss_last: res = adv_x loss_last = loss_back if (k + 1) % 5 == 0 or (k + 1) == at_times: print(filename, 'frcn step:%d' % (k + 1), loss_back, loss['loss_cls'], loss_rpn_cls) return adv_x
def _prepare_data(cfg, imgs): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]): Either image files or loaded images. Returns: result (dict): Predicted results. """ if isinstance(imgs, (list, tuple)): if not isinstance(imgs[0], (np.ndarray, str)): raise AssertionError('imgs must be strings or numpy arrays') elif isinstance(imgs, (np.ndarray, str)): imgs = [imgs] else: raise AssertionError('imgs must be strings or numpy arrays') is_ndarray = isinstance(imgs[0], np.ndarray) if is_ndarray: cfg = cfg.copy() # set loading pipeline type cfg.data.test.pipeline[0].type = 'LoadImageFromNdarray' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) test_pipeline = Compose(cfg.data.test.pipeline) data = [] for img in imgs: # prepare data if is_ndarray: # directly add img datum = dict(img=img) else: # add information into dict datum = dict(img_info=dict(filename=img), img_prefix=None) # build the data pipeline datum = test_pipeline(datum) # get tensor from list to stack for batch mode (text detection) data.append(datum) if isinstance(data[0]['img'], list) and len(data) > 1: raise Exception('aug test does not support ' f'inference with batch size ' f'{len(data)}') data = collate(data, samples_per_gpu=len(imgs)) # process img_metas if isinstance(data['img_metas'], list): data['img_metas'] = [ img_metas.data[0] for img_metas in data['img_metas'] ] else: data['img_metas'] = data['img_metas'].data if isinstance(data['img'], list): data['img'] = [img.data for img in data['img']] if isinstance(data['img'][0], list): data['img'] = [img[0] for img in data['img']] else: data['img'] = data['img'].data return data
def inference_bottom_up_pose_model(model, img_or_path, return_heatmap=False, outputs=None): """Inference a single image. num_people: P num_keypoints: K bbox height: H bbox width: W Args: model (nn.Module): The loaded pose model. img_or_path (str| np.ndarray): Image filename or loaded image. return_heatmap (bool) : Flag to return heatmap, default: False outputs (list(str) | tuple(str)) : Names of layers whose outputs need to be returned, default: None Returns: list[ndarray]: The predicted pose info. The length of the list is the number of people (P). Each item in the list is a ndarray, containing each person's pose (ndarray[Kx3]): x, y, score. list[dict[np.ndarray[N, K, H, W] | torch.tensor[N, K, H, W]]]: Output feature maps from layers specified in `outputs`. Includes 'heatmap' if `return_heatmap` is True. """ pose_results = [] returned_outputs = [] cfg = model.cfg device = next(model.parameters()).device # build the data pipeline channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb') test_pipeline = [LoadImage(channel_order=channel_order) ] + cfg.test_pipeline[1:] test_pipeline = Compose(test_pipeline) # prepare data data = { 'img_or_path': img_or_path, 'dataset': 'coco', 'ann_info': { 'image_size': cfg.data_cfg['image_size'], 'num_joints': cfg.data_cfg['num_joints'], 'flip_index': [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15], } } data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: # just get the actual data from DataContainer data['img_metas'] = data['img_metas'].data[0] with OutputHook(model, outputs=outputs, as_tensor=False) as h: # forward the model with torch.no_grad(): result = model( img=data['img'], img_metas=data['img_metas'], return_loss=False, return_heatmap=return_heatmap) if return_heatmap: h.layer_outputs['heatmap'] = result['output_heatmap'] returned_outputs.append(h.layer_outputs) for pred in result['preds']: pose_results.append({ 'keypoints': pred[:, :3], }) return pose_results, returned_outputs