def test_label_mapping_arrs(): """ """ tc = TaxonomyConverter() train_idx = load_class_names('ade20k-150').index('minibike') u_idx = get_universal_class_names().index('motorcycle') assert tc.label_mapping_arr_dict['ade20k-150'][train_idx] == u_idx train_idx = load_class_names('mapillary-public65').index('Bird') u_idx = get_universal_class_names().index('bird') assert tc.label_mapping_arr_dict['mapillary-public65'][train_idx] == u_idx
def run_universal_demo_batched(args, use_gpu: bool = True) -> None: """ Args: - args: - use_gpu """ if 'scannet' in args.dataset: args.img_name_unique = False else: args.img_name_unique = True args.u_classes = get_universal_class_names() args.print_freq = 10 args.split = 'test' logger.info(args) logger.info("=> creating model ...") args.num_model_classes = len(args.u_classes) args.base_size = determine_max_possible_base_size(h=args.native_img_h, w=args.native_img_w, crop_sz=min( args.test_h, args.test_w)) itask = BatchedInferenceTask(args, base_size=args.base_size, crop_h=args.test_h, crop_w=args.test_w, input_file=args.input_file, model_taxonomy='universal', eval_taxonomy='universal', scales=args.scales) itask.execute()
def run_universal_demo(args, use_gpu: bool = True) -> None: """ Args: - args: - use_gpu """ if 'scannet' in args.dataset: args.img_name_unique = False else: args.img_name_unique = True args.u_classes = get_universal_class_names() args.print_freq = 10 args.split = 'test' #os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") args.num_model_classes = len(args.u_classes) itask = InferenceTask( args, base_size=args.base_size, crop_h=args.test_h, crop_w=args.test_w, input_file=args.input_file, output_taxonomy='universal', #eval_taxonomy='universal', scales=args.scales) itask.execute()
def test_transform_predictions_test(): """ Consider predictions made within the universal taxonomy over a tiny 2x3 image. We use a linear mapping to bring these predictions into a test dataset's taxonomy (summing the probabilities where necessary). For Camvid, universal probabilities for `person',`bicycle' should both go into the 'Bicyclist' class. """ u_classnames = get_universal_class_names() person_uidx = u_classnames.index('person') bicycle_uidx = u_classnames.index('bicycle') sky_uidx = u_classnames.index('sky') tc = TaxonomyConverter() input = np.zeros((194,2,3)) input[sky_uidx,0,:] = 1.0 # top row is sky input[person_uidx,1,:] = 0.5 # bottom row is 50/50 person or bicyclist input[bicycle_uidx,1,:] = 0.5 # bottom row is 50/50 person or bicyclist input = torch.from_numpy(input) input = input.unsqueeze(0).float() # CHW -> NCHW assert input.shape == (1,194,2,3) test_dname = 'camvid-11' output = tc.transform_predictions_test(input, test_dname) output = output.squeeze() # NCHW -> CHW prediction = torch.argmax(output, dim=0).numpy() camvid_classnames = load_class_names(test_dname) # Camvid should have predictions across 11 classes. prediction_gt = np.zeros((2,3)) prediction_gt[0,:] = camvid_classnames.index('Sky') prediction_gt[1,:] = camvid_classnames.index('Bicyclist') assert np.allclose(prediction, prediction_gt)
def test_eval_relabeled_pair_annotated_as_unlabel(): """ When labels were inaccurate, we often marked them as `unlabeled`, e.g. COCO cabinets included `counter` pixels. """ orig_dname = 'coco-panoptic-133' relabeled_dname = 'coco-panoptic-133-relabeled' original_names = load_class_names(orig_dname) relabeled_names = load_class_names(relabeled_dname) u_names = get_universal_class_names() wall = u_names.index('wall') counter = u_names.index('counter_other') cabinet = u_names.index('cabinet') pred_rel = np.array([[wall, wall, wall, wall], [counter, counter, counter, counter], [cabinet, cabinet, cabinet, cabinet], [cabinet, cabinet, cabinet, cabinet]]).astype(np.uint8) # original COCO image, in coco-panoptic-133 wall = original_names.index('wall-wood') cabinet = original_names.index('cabinet-merged') target_img = np.array([[wall, wall, wall, wall], [cabinet, cabinet, cabinet, cabinet], [cabinet, cabinet, cabinet, cabinet], [cabinet, cabinet, cabinet, cabinet]]).astype(np.uint8) # relabeled COCO image, in coco-panoptic-133-relabeled # since the counter & cabinet could not be separated w/o # drawing new boundary, mark both as `unlabeled`, i.e. 255 wall = relabeled_names.index('wall') target_img_relabeled = np.array([[wall, wall, wall, wall], [255, 255, 255, 255], [255, 255, 255, 255], [255, 255, 255, 255]]).astype(np.uint8) orig_to_u_transform = ToUniversalLabel(orig_dname) relabeled_to_u_transform = ToUniversalLabel(relabeled_dname) pred_unrel, target_gt_univ, acc_diff = eval_rel_model_pred_on_unrel_data( pred_rel, target_img, target_img_relabeled, orig_to_u_transform, relabeled_to_u_transform) # goes from 75% to 100% assert acc_diff == 25 wall = u_names.index('wall') gt_pred_unrel = np.array([[wall, wall, wall, wall], [255, 255, 255, 255], [255, 255, 255, 255], [255, 255, 255, 255]], dtype=np.uint8) assert np.allclose(pred_unrel, gt_pred_unrel) gt_target_gt_univ = np.array( [[wall, wall, wall, wall], [255, 255, 255, 255], [255, 255, 255, 255], [255, 255, 255, 255]], dtype=np.uint8) assert np.allclose(target_gt_univ, gt_target_gt_univ)
def segment_image(input_path, args, device_type, output_path): args.u_classes = get_universal_class_names() args.print_freq = 10 args.num_model_classes = len(args.u_classes) itask = InferenceTask(args, base_size=args.base_size, crop_h=args.test_h, crop_w=args.test_w, input_file=input_path, output_taxonomy='universal', scales=args.scales, device_type=device_type, output_path=output_path) itask.execute()
def test_eval_relabeled_pair2(): """ Person vs. Motorcyclist in center. Relabeled model incorrectly predicts `person` instead of `motorcylist`. [0,0,0,0], [0,1,1,0], [0,1,1,0], [0,1,1,0] """ orig_dname = 'coco-panoptic-133' relabeled_dname = 'coco-panoptic-133-relabeled' original_names = load_class_names(orig_dname) relabeled_names = load_class_names(relabeled_dname) u_names = get_universal_class_names() pred_rel = np.ones((4, 4), dtype=np.uint8) * u_names.index('sky') pred_rel[1:, 1:3] = u_names.index('person') # original COCO image, in coco-panoptic-133 target_img = np.ones((4, 4)) * original_names.index('sky-other-merged') target_img[1:, 1:3] = original_names.index('person') # relabeled COCO image, in coco-panoptic-133-relabeled target_img_relabeled = np.ones((4, 4)) * relabeled_names.index('sky') target_img_relabeled[1:, 1:3] = relabeled_names.index('motorcyclist') orig_to_u_transform = ToUniversalLabel(orig_dname) relabeled_to_u_transform = ToUniversalLabel(relabeled_dname) pred_unrel, target_gt_univ, _ = eval_rel_model_pred_on_unrel_data( pred_rel, target_img, target_img_relabeled, orig_to_u_transform, relabeled_to_u_transform) # treated as 0% accuracy for person's silhouette and interior target_gt = np.ones((4, 4), dtype=np.uint8) * u_names.index('sky') target_gt[1:, 1:3] = u_names.index('person') assert np.allclose(target_gt_univ, target_gt) IGNORE_IDX = 255 # represents unlabeled gt_pred_unrel = np.ones((4, 4), dtype=np.uint8) * u_names.index('sky') gt_pred_unrel[1:, 1:3] = IGNORE_IDX assert np.allclose(pred_unrel, gt_pred_unrel)
def test_label_transform(): """ Bring label from training taxonomy (mapillary-public65) to the universal taxonomy. 21 is the motorcyclist class in mapillary-public65 """ dname = 'mapillary-public65' txt_classnames = load_class_names(dname) train_idx = txt_classnames.index('Motorcyclist') tc = TaxonomyConverter() # training dataset label traind_label = torch.ones(4,4)*train_idx traind_label = traind_label.type(torch.LongTensor) # Get back the universal label u_label = tc.transform_label(traind_label, dname) u_idx = get_universal_class_names().index('motorcyclist') gt_u_label = np.ones((4,4)).astype(np.int64) * u_idx assert np.allclose(u_label.numpy(), gt_u_label)
def test_eval_relabeled_pair1(): """ Person vs. Motorcyclist in center Relabeled model correctly predicts `motorcylist`. for `motorcylist`. Motorcyclist silhouette pattern: [0,0,0,0], [0,1,1,0], [0,1,1,0], [0,1,1,0] """ orig_dname = 'coco-panoptic-133' relabeled_dname = 'coco-panoptic-133-relabeled' original_names = load_class_names(orig_dname) relabeled_names = load_class_names(relabeled_dname) u_names = get_universal_class_names() # prediction in universal taxonomy pred_rel = np.ones((4, 4), dtype=np.uint8) * u_names.index('sky') pred_rel[1:, 1:3] = u_names.index('motorcyclist') # original COCO image, in coco-panoptic-133 target_img = np.ones((4, 4)) * original_names.index('sky-other-merged') target_img[1:, 1:3] = original_names.index('person') #target_img = target_img.reshape(1,4,4) # relabeled COCO image, in coco-panoptic-133-relabeled target_img_relabeled = np.ones((4, 4)) * relabeled_names.index('sky') target_img_relabeled[1:, 1:3] = relabeled_names.index('motorcyclist') #target_img_relabeled = target_img_relabeled.reshape(1,4,4) orig_to_u_transform = ToUniversalLabel(orig_dname) relabeled_to_u_transform = ToUniversalLabel(relabeled_dname) pred_unrel, target_img, _ = eval_rel_model_pred_on_unrel_data( pred_rel, target_img, target_img_relabeled, orig_to_u_transform, relabeled_to_u_transform) # treated as 100% accuracy assert np.allclose(pred_unrel, target_img)
def __init__(self, args, base_size: int, crop_h: int, crop_w: int, input_file: str, model_taxonomy: str, eval_taxonomy: str, scales: List[float], use_gpu: bool = True ): """ We always use the ImageNet mean and standard deviation for normalization. mean: 3-tuple of floats, representing pixel mean value std: 3-tuple of floats, representing pixel standard deviation 'args' should contain at least 5 fields (shown below). See brief explanation at top of file regarding taxonomy arg configurations. Args: args: experiment configuration arguments base_size: shorter side of image crop_h: integer representing crop height, e.g. 473 crop_w: integer representing crop width, e.g. 473 input_file: could be absolute path to .txt file, .mp4 file, or to a directory full of jpg images model_taxonomy: taxonomy in which trained model makes predictions eval_taxonomy: taxonomy in which trained model is evaluated scales: floats representing image scales for multi-scale inference use_gpu: TODO, not supporting cpu at this time """ self.args = args # Required arguments: assert isinstance(self.args.save_folder, str) assert isinstance(self.args.dataset, str) assert isinstance(self.args.img_name_unique, bool) assert isinstance(self.args.print_freq, int) assert isinstance(self.args.num_model_classes, int) assert isinstance(self.args.model_path, str) self.num_model_classes = self.args.num_model_classes self.base_size = base_size self.crop_h = crop_h self.crop_w = crop_w self.input_file = input_file self.model_taxonomy = model_taxonomy self.eval_taxonomy = eval_taxonomy self.scales = scales self.use_gpu = use_gpu self.mean, self.std = get_imagenet_mean_std() self.model = self.load_model(args) self.softmax = nn.Softmax(dim=1) self.gray_folder = None # optional, intended for dataloader use self.data_list = None # optional, intended for dataloader use if model_taxonomy == 'universal' and eval_taxonomy == 'universal': # See note above. # no conversion of predictions required self.num_eval_classes = self.num_model_classes elif model_taxonomy == 'test_dataset' and eval_taxonomy == 'test_dataset': # no conversion of predictions required self.num_eval_classes = len(load_class_names(args.dataset)) elif model_taxonomy == 'naive' and eval_taxonomy == 'test_dataset': self.tc = NaiveTaxonomyConverter() if args.dataset in self.tc.convs.keys() and use_gpu: self.tc.convs[args.dataset].cuda() self.tc.softmax.cuda() self.num_eval_classes = len(load_class_names(args.dataset)) elif model_taxonomy == 'universal' and eval_taxonomy == 'test_dataset': # no label conversion required here, only predictions converted self.tc = TaxonomyConverter() if args.dataset in self.tc.convs.keys() and use_gpu: self.tc.convs[args.dataset].cuda() self.tc.softmax.cuda() self.num_eval_classes = len(load_class_names(args.dataset)) if self.args.arch == 'psp': assert isinstance(self.args.zoom_factor, int) assert isinstance(self.args.network_name, int) # `id_to_class_name_map` only used for visualizing universal taxonomy self.id_to_class_name_map = { i: classname for i, classname in enumerate(get_universal_class_names()) } # indicate which scales were used to make predictions # (multi-scale vs. single-scale) self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'
def evaluate_universal_tax_model(args, use_gpu: bool = True) -> None: """ Args: - args: - use_gpu Returns: - None """ if 'scannet' in args.dataset: args.img_name_unique = False else: args.img_name_unique = True model_taxonomy = 'universal' # automatically decide which evaluation taxonomy to use if args.dataset in DEFAULT_TRAIN_DATASETS: eval_taxonomy = 'universal' elif args.dataset in TEST_DATASETS: eval_taxonomy = 'test_dataset' else: logger.info("Unknown dataset, please check") if eval_taxonomy == 'universal' \ and 'mseg' in args.model_name \ and ('unrelabeled' not in args.model_name): eval_relabeled = True else: eval_relabeled = False args.data_root = infos[args.dataset].dataroot dataset_name = args.dataset if len(args.scales) > 1: scale_type = 'ms' # multi-scale else: scale_type = 'ss' # single-scale model_results_root = f'{Path(args.model_path).parent}/{Path(args.model_path).stem}' if eval_taxonomy == 'universal': if eval_relabeled: args.save_folder = f'{model_results_root}/{args.dataset}_universal_relabeled/{args.base_size}/{scale_type}/' else: args.save_folder = f'{model_results_root}/{args.dataset}_universal/{args.base_size}/{scale_type}/' else: args.save_folder = f'{model_results_root}/{args.dataset}/{args.base_size}/{scale_type}/' args.print_freq = 300 #os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) # always evaluating on val split args.test_list = infos[args.dataset].vallist if args.split == 'test': args.vis_freq = 1 args.num_model_classes = len(get_universal_class_names()) if not args.has_prediction: itask = InferenceTask( args=args, base_size = args.base_size, crop_h = args.test_h, crop_w = args.test_w, input_file=None, model_taxonomy=model_taxonomy, eval_taxonomy=eval_taxonomy, scales = args.scales ) itask.execute() if args.split == 'test': logger.info("Ground truth labels are not known for test set, cannot compute its accuracy.") return if eval_taxonomy == 'universal' and (args.dataset in DEFAULT_TRAIN_DATASETS): # evaluating on training datasets, within a subset of the universal taxonomy excluded_ids = get_excluded_class_ids(dataset_name) else: excluded_ids = [] if eval_taxonomy == 'universal': class_names = get_universal_class_names() num_eval_classes = len(class_names) elif eval_taxonomy == 'test_dataset': class_names = load_class_names(args.dataset) num_eval_classes = len(class_names) elif eval_taxonomy == 'naive': # get from NaiveTaxonomyConverter class attributes raise NotImplementedError _, test_data_list = create_test_loader(args) ac = AccuracyCalculator( args=args, data_list=test_data_list, dataset_name=dataset_name, class_names=class_names, save_folder=args.save_folder, eval_taxonomy=eval_taxonomy, num_eval_classes=num_eval_classes, excluded_ids=excluded_ids ) if eval_relabeled: logger.info(">>>>>>>>> Calculating *relabeled* accuracy from cached results >>>>>>>>>>") args.dataset_relabeled = get_relabeled_dataset(args.dataset) relabeled_args = { 'split': 'val', 'data_root': infos[args.dataset_relabeled].dataroot, 'test_list': infos[args.dataset_relabeled].vallist, 'index_start': args.index_start, 'index_step': args.index_step, 'workers': args.workers } relabeled_args = SimpleNamespace(**relabeled_args) _, test_data_relabeled_list = create_test_loader(relabeled_args) # AccuracyCalculator is constructed for the unrelabeled dataset # we will pass relabeled dataset info as args later ac.compute_metrics_relabeled_data(test_data_relabeled_list) else: logger.info(">>>>>>>>> Calculating accuracy from cached results >>>>>>>>>>") ac.compute_metrics() logger.info(">>>>>>>>> Accuracy computation completed >>>>>>>>>>")
def __init__(self, args, base_size: int, crop_h: int, crop_w: int, input_file: str, output_taxonomy: str, scales: List[float], use_gpu: bool = True): """ We always use the ImageNet mean and standard deviation for normalization. mean: 3-tuple of floats, representing pixel mean value std: 3-tuple of floats, representing pixel standard deviation 'args' should contain at least two fields (shown below). Args: - args: - base_size: - crop_h: integer representing crop height, e.g. 473 - crop_w: integer representing crop width, e.g. 473 - input_file: could be absolute path to .txt file, .mp4 file, or to a directory full of jpg images - output_taxonomy - scales - use_gpu """ self.args = args assert isinstance(self.args.img_name_unique, bool) assert isinstance(self.args.print_freq, int) assert isinstance(self.args.num_model_classes, int) assert isinstance(self.args.model_path, str) self.pred_dim = self.args.num_model_classes self.base_size = base_size self.crop_h = crop_h self.crop_w = crop_w self.input_file = input_file self.output_taxonomy = output_taxonomy self.scales = scales self.use_gpu = use_gpu self.mean, self.std = get_imagenet_mean_std() self.model = self.load_model(args) self.softmax = nn.Softmax(dim=1) self.gray_folder = None # optional, intended for dataloader use self.data_list = None # optional, intended for dataloader use if self.output_taxonomy != 'universal': assert isinstance(self.args.dataset, str) self.dataset_name = args.dataset self.tc = TaxonomyConverter() if self.args.arch == 'psp': assert isinstance(self.args.zoom_factor, int) assert isinstance(self.args.network_name, int) self.id_to_class_name_map = { i: classname for i, classname in enumerate(get_universal_class_names()) } # indicate which scales were used to make predictions # (multi-scale vs. single-scale) self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'