def apply_transforms(img_path, pipeline_cfg): """Apply transforms pipeline and get both formatted data and the image without formatting.""" data = dict(img_info=dict(filename=img_path), img_prefix=None) def split_pipeline_cfg(pipeline_cfg): """to split the transfoms into image_transforms and format_transforms.""" image_transforms_cfg, format_transforms_cfg = [], [] if pipeline_cfg[0]['type'] != 'LoadImageFromFile': pipeline_cfg.insert(0, dict(type='LoadImageFromFile')) for transform in pipeline_cfg: if transform['type'] in FORMAT_TRANSFORMS_SET: format_transforms_cfg.append(transform) else: image_transforms_cfg.append(transform) return image_transforms_cfg, format_transforms_cfg image_transforms, format_transforms = split_pipeline_cfg(pipeline_cfg) image_transforms = Compose(image_transforms) format_transforms = Compose(format_transforms) intermediate_data = image_transforms(data) inference_img = copy.deepcopy(intermediate_data['img']) format_data = format_transforms(intermediate_data) return format_data, inference_img
def test_randomgrayscale(): # test rgb2gray, return the grayscale image with p>1 in_img = np.random.rand(10, 10, 3).astype(np.float32) kwargs = dict(gray_prob=2) aug = [] aug.extend([mmcls_transforms.RandomGrayscale(**kwargs)]) composed_transform = Compose(aug) print(composed_transform) results = dict() results['img'] = in_img img = composed_transform(results)['img'] computed_gray = (in_img[:, :, 0] * 0.299 + in_img[:, :, 1] * 0.587 + in_img[:, :, 2] * 0.114) for i in range(img.shape[2]): assert_array_almost_equal(img[:, :, i], computed_gray, decimal=4) assert img.shape == (10, 10, 3) # test rgb2gray, return the original image with p=-1 in_img = np.random.rand(10, 10, 3).astype(np.float32) kwargs = dict(gray_prob=-1) aug = [] aug.extend([mmcls_transforms.RandomGrayscale(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = in_img img = composed_transform(results)['img'] assert_array_equal(img, in_img) assert img.shape == (10, 10, 3) # test image with one channel with our method # and the function from torchvision in_img = np.random.rand(10, 10, 1).astype(np.float32) kwargs = dict(gray_prob=2) aug = [] aug.extend([mmcls_transforms.RandomGrayscale(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = in_img img = composed_transform(results)['img'] assert_array_equal(img, in_img) assert img.shape == (10, 10, 1) in_img_pil = Image.fromarray(in_img[:, :, 0], mode='L') kwargs = dict(p=2) aug = [] aug.extend([torchvision.transforms.RandomGrayscale(**kwargs)]) composed_transform = Compose(aug) img_pil = composed_transform(in_img_pil) assert_array_equal(np.array(img_pil), np.array(in_img_pil)) assert np.array(img_pil).shape == (10, 10)
def inference_model(model, img): """Inference image(s) with the classifier. Args: model (nn.Module): The loaded classifier. img (str/ndarray): The image filename. Returns: result (dict): The classification results that contains `class_name`, `pred_label` and `pred_score`. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = Compose(cfg.data.test.pipeline) # prepare data data = dict(img_info=dict(filename=img), img_prefix=None) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] # forward the model with torch.no_grad(): scores = model(return_loss=False, **data) pred_score = np.max(scores, axis=1)[0] pred_label = np.argmax(scores, axis=1)[0] result = {'pred_label': pred_label, 'pred_score': pred_score} result['class_name'] = model.CLASSES[result['pred_label']] return result
def inference_model_batch(model, images_nps, topn=5): """Inference image(s) with the classifier. Args: model (nn.Module): The loaded classifier. img (str/ndarray): The image filename or loaded image. Returns: result (list of dict): The classification results that contains `class_name`, `pred_label` and `pred_score`. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile': cfg.data.test.pipeline.pop(0) test_pipeline = Compose(cfg.data.test.pipeline) with torch.no_grad(): inference_results = [] for images_batch in sly.batched(images_nps, g.batch_size): data = [dict(img=img) for img in images_batch] data = [test_pipeline(row) for row in data] data = collate(data, samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] batch_scores = np.asarray(model(return_loss=False, **data)) batch_top_indexes = batch_scores.argsort(axis=1)[:, -topn:][:, ::-1] for scores, top_indexes in zip(batch_scores, batch_top_indexes): inference_results.append({ 'label': top_indexes.astype(int).tolist(), 'score': scores[top_indexes].astype(float).tolist(), 'class': np.asarray(model.CLASSES)[top_indexes].tolist() }) return inference_results
def build_dataset_pipeline(cfg, phase): """build dataset and pipeline from config. Separate the pipeline except 'LoadImageFromFile' step if 'LoadImageFromFile' in the pipeline. """ data_cfg = cfg.data[phase] loadimage_pipeline = [] if len(data_cfg.pipeline ) != 0 and data_cfg.pipeline[0]['type'] == 'LoadImageFromFile': loadimage_pipeline.append(data_cfg.pipeline.pop(0)) origin_pipeline = data_cfg.pipeline data_cfg.pipeline = loadimage_pipeline dataset = build_dataset(data_cfg) pipeline = Compose(origin_pipeline) return dataset, pipeline
def make_img_transform(img_size, norm_cfg=imagenet_norm, to_rgb=True, divisor=32): img_size = tuple(img_size) imw, imh = img_size return Compose([ AlbuAug([ A.LongestMaxSize(max_size=imw), A.PadIfNeeded(min_height=imh, min_width=imw, border_mode=cv2.BORDER_REPLICATE), ]), Normalize(**norm_cfg, to_rgb=to_rgb), ImageToTensor(['img']), Collect(['img']), ])
def inference(config_file, checkpoint, classes, args): cfg = Config.fromfile(config_file) model = init_model(cfg, checkpoint, device=args.device) model.CLASSES = classes # build the data pipeline if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile': cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile')) if cfg.data.test.type in ['CIFAR10', 'CIFAR100']: # The image shape of CIFAR is (32, 32, 3) cfg.data.test.pipeline.insert(1, dict(type='Resize', size=32)) data = dict(img_info=dict(filename=args.img), img_prefix=None) test_pipeline = Compose(cfg.data.test.pipeline) data = test_pipeline(data) resolution = tuple(data['img'].shape[1:]) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [args.device])[0] # forward the model result = {'resolution': resolution} with torch.no_grad(): if args.inference_time: time_record = [] for _ in range(10): start = time() scores = model(return_loss=False, **data) time_record.append((time() - start) * 1000) result['time_mean'] = np.mean(time_record[1:-1]) result['time_std'] = np.std(time_record[1:-1]) else: scores = model(return_loss=False, **data) pred_score = np.max(scores, axis=1)[0] pred_label = np.argmax(scores, axis=1)[0] result['pred_label'] = pred_label result['pred_score'] = float(pred_score) result['pred_class'] = model.CLASSES[result['pred_label']] result['model'] = config_file.stem return result
def inference_model(model, img, topn=5): """Inference image(s) with the classifier. Args: model (nn.Module): The loaded classifier. img (str/ndarray): The image filename or loaded image. Returns: result (list of dict): The classification results that contains `class_name`, `pred_label` and `pred_score`. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline if isinstance(img, str): if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile': cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile')) data = dict(img_info=dict(filename=img), img_prefix=None) else: if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile': cfg.data.test.pipeline.pop(0) data = dict(img=img) test_pipeline = Compose(cfg.data.test.pipeline) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] # forward the model with torch.no_grad(): scores = model(return_loss=False, **data) model_out = scores[0] top_scores = model_out[model_out.argsort()[-topn:]][::-1] top_labels = model_out.argsort()[-topn:][::-1] result = [] for label, score in zip(top_labels, top_scores): result.append({ 'label': int(label), 'score': float(score), 'class': model.CLASSES[label] }) return result
def test_randomresizedcrop(): ori_img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') ori_img_pil = Image.open( osp.join(osp.dirname(__file__), '../data/color.jpg')) seed = random.randint(0, 100) # test when scale is not of kind (min, max) with pytest.raises(ValueError): kwargs = dict(size=(200, 300), scale=(1.0, 0.08), ratio=(3. / 4., 4. / 3.)) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img composed_transform(results)['img'] # test when ratio is not of kind (min, max) with pytest.raises(ValueError): kwargs = dict(size=(200, 300), scale=(0.08, 1.0), ratio=(4. / 3., 3. / 4.)) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img composed_transform(results)['img'] # test crop size is int kwargs = dict(size=200, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.)) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) # test __repr__() print(composed_transform) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (200, 200, 3) assert np.array(baseline).shape == (200, 200, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test crop size < image size kwargs = dict(size=(200, 300), scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.)) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (200, 300, 3) assert np.array(baseline).shape == (200, 300, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test crop size > image size kwargs = dict(size=(600, 700), scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.)) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (600, 700, 3) assert np.array(baseline).shape == (600, 700, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test cropping the whole image kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]), scale=(1.0, 2.0), ratio=(1.0, 2.0)) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (ori_img.shape[0], ori_img.shape[1], 3) assert np.array(baseline).shape == (ori_img.shape[0], ori_img.shape[1], 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # assert_array_equal(ori_img, img) # assert_array_equal(np.array(ori_img_pil), np.array(baseline)) # test central crop when in_ratio < min(ratio) kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]), scale=(1.0, 2.0), ratio=(2, 3)) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (ori_img.shape[0], ori_img.shape[1], 3) assert np.array(baseline).shape == (ori_img.shape[0], ori_img.shape[1], 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test central crop when in_ratio > max(ratio) kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]), scale=(1.0, 2.0), ratio=(3. / 4, 1)) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (ori_img.shape[0], ori_img.shape[1], 3) assert np.array(baseline).shape == (ori_img.shape[0], ori_img.shape[1], 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test different interpolation types for mode in ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']: kwargs = dict(size=(600, 700), scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=mode) aug = [] aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert img.shape == (600, 700, 3)
def test_randomcrop(): ori_img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') ori_img_pil = Image.open( osp.join(osp.dirname(__file__), '../data/color.jpg')) seed = random.randint(0, 100) # test crop size is int kwargs = dict(size=200, padding=0, pad_if_needed=True, fill=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) kwargs = dict(size=200, padding=0, pad_if_needed=True, pad_val=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) # test __repr__() print(composed_transform) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (200, 200, 3) assert np.array(baseline).shape == (200, 200, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test crop size < image size kwargs = dict(size=(200, 300), padding=0, pad_if_needed=True, fill=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) kwargs = dict(size=(200, 300), padding=0, pad_if_needed=True, pad_val=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (200, 300, 3) assert np.array(baseline).shape == (200, 300, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test crop size > image size kwargs = dict(size=(600, 700), padding=0, pad_if_needed=True, fill=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) kwargs = dict(size=(600, 700), padding=0, pad_if_needed=True, pad_val=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (600, 700, 3) assert np.array(baseline).shape == (600, 700, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform # test crop size == image size kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]), padding=0, pad_if_needed=True, fill=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]), padding=0, pad_if_needed=True, pad_val=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (img.shape[0], img.shape[1], 3) assert np.array(baseline).shape == (img.shape[0], img.shape[1], 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform assert_array_equal(ori_img, img) assert_array_equal(np.array(baseline), np.array(ori_img_pil)) # test different padding mode for mode in ['constant', 'edge', 'reflect', 'symmetric']: kwargs = dict(size=(500, 600), padding=0, pad_if_needed=True, fill=0) kwargs['padding_mode'] = mode random.seed(seed) np.random.seed(seed) aug = [] aug.extend([torchvision.transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) baseline = composed_transform(ori_img_pil) kwargs = dict(size=(500, 600), padding=0, pad_if_needed=True, pad_val=0) random.seed(seed) np.random.seed(seed) aug = [] aug.extend([mmcls_transforms.RandomCrop(**kwargs)]) composed_transform = Compose(aug) results = dict() results['img'] = ori_img img = composed_transform(results)['img'] assert np.array(img).shape == (500, 600, 3) assert np.array(baseline).shape == (500, 600, 3) nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero()) nonzero_transform = len( (img - np.array(baseline)[:, :, ::-1]).nonzero()) assert nonzero == nonzero_transform