def create_test_loader(
    args,
    use_batched_inference: bool = False
) -> Tuple[torch.utils.data.dataloader.DataLoader, List[Tuple[str, str]]]:
    """Create a Pytorch dataloader from a dataroot and list of relative paths.
	
	Args:
	    args: CfgNode object
	    use_batched_inference: whether to process images in batch mode
	
	Returns:
	    test_loader
	    data_list: list of 2-tuples (relative rgb path, relative label path)
	"""
    preprocess_imgs_in_loader = True if use_batched_inference else False

    if preprocess_imgs_in_loader:
        # resize and normalize images in advance
        mean, std = get_imagenet_mean_std()
        test_transform = transform.Compose([
            transform.ResizeShort(args.base_size),
            transform.ToTensor(),
            transform.Normalize(mean=mean, std=std)
        ])
    else:
        # no resizing on the fly using OpenCV and also normalize images on the fly
        test_transform = transform.Compose([transform.ToTensor()])
    test_data = dataset.SemData(split=args.split,
                                data_root=args.data_root,
                                data_list=args.test_list,
                                transform=test_transform)

    index_start = args.index_start
    if args.index_step == 0:
        index_end = len(test_data.data_list)
    else:
        index_end = min(index_start + args.index_step,
                        len(test_data.data_list))
    test_data.data_list = test_data.data_list[index_start:index_end]
    data_list = test_data.data_list

    # limit batch size to 1 if not performing batched inference
    batch_size = args.batch_size_val if use_batched_inference else 1

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)
    return test_loader, data_list
Example #2
0
def get_train_transform_list(args, split: str):
    """ Return the input data transform for training (w/ data augmentations)
        Args:
        -   args:
        -   split

        Return:
        -   List of transforms
    """
    from mseg_semantic.utils.normalization_utils import get_imagenet_mean_std
    from mseg_semantic.utils import transform


    mean, std = get_imagenet_mean_std()
    if split == 'train':
        transform_list = [
            transform.ResizeShort(args.short_size),
            transform.RandScale([args.scale_min, args.scale_max]),
            transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label),
            transform.RandomGaussianBlur(),
            transform.RandomHorizontalFlip(),
            transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label),
            transform.ToTensor(),
            transform.Normalize(mean=mean, std=std)
        ]
    elif split == 'val':
        transform_list = [
            transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label),
            transform.ToTensor(),
            transform.Normalize(mean=mean, std=std)
        ]
    else:
        print('Unknown split. Quitting ...')
        quit()

    if len(args.dataset) > 1 and args.universal:
        transform_list += [ToFlatLabel(args.tc, args.dataset_name)]
    elif args.universal:
        transform_list += [ToFlatLabel(args.tc, args.dataset[0])]

    return transform.Compose(transform_list)
	def __init__(self,
		args,
		base_size: int,
		crop_h: int,
		crop_w: int,
		input_file: str,
		model_taxonomy: str,
		eval_taxonomy: str,
		scales: List[float],
		use_gpu: bool = True
		):
		"""
		We always use the ImageNet mean and standard deviation for normalization.
		mean: 3-tuple of floats, representing pixel mean value
		std: 3-tuple of floats, representing pixel standard deviation

		'args' should contain at least 5 fields (shown below).
		See brief explanation at top of file regarding taxonomy arg configurations.
		
		Args:
		    args: experiment configuration arguments
		    base_size: shorter side of image
		    crop_h: integer representing crop height, e.g. 473
		    crop_w: integer representing crop width, e.g. 473
		    input_file: could be absolute path to .txt file, .mp4 file, or to a directory full of jpg images
		    model_taxonomy: taxonomy in which trained model makes predictions
		    eval_taxonomy: taxonomy in which trained model is evaluated
		    scales: floats representing image scales for multi-scale inference
		    use_gpu: TODO, not supporting cpu at this time
		"""
		self.args = args

		# Required arguments:
		assert isinstance(self.args.save_folder, str)
		assert isinstance(self.args.dataset, str)
		assert isinstance(self.args.img_name_unique, bool)
		assert isinstance(self.args.print_freq, int)
		assert isinstance(self.args.num_model_classes, int)
		assert isinstance(self.args.model_path, str)
		self.num_model_classes = self.args.num_model_classes

		self.base_size = base_size
		self.crop_h = crop_h
		self.crop_w = crop_w
		self.input_file = input_file
		self.model_taxonomy = model_taxonomy
		self.eval_taxonomy = eval_taxonomy
		self.scales = scales
		self.use_gpu = use_gpu

		self.mean, self.std = get_imagenet_mean_std()
		self.model = self.load_model(args)
		self.softmax = nn.Softmax(dim=1)

		self.gray_folder = None # optional, intended for dataloader use
		self.data_list = None # optional, intended for dataloader use

		if model_taxonomy == 'universal' and eval_taxonomy == 'universal':
			# See note above.
			# no conversion of predictions required
			self.num_eval_classes = self.num_model_classes 

		elif model_taxonomy == 'test_dataset' and eval_taxonomy == 'test_dataset':
			# no conversion of predictions required
			self.num_eval_classes = len(load_class_names(args.dataset))

		elif model_taxonomy == 'naive' and eval_taxonomy == 'test_dataset':
			self.tc = NaiveTaxonomyConverter()
			if args.dataset in self.tc.convs.keys() and use_gpu:
				self.tc.convs[args.dataset].cuda()
			self.tc.softmax.cuda()
			self.num_eval_classes = len(load_class_names(args.dataset))

		elif model_taxonomy == 'universal' and eval_taxonomy == 'test_dataset':
			# no label conversion required here, only predictions converted
			self.tc = TaxonomyConverter()
			if args.dataset in self.tc.convs.keys() and use_gpu:
				self.tc.convs[args.dataset].cuda()
			self.tc.softmax.cuda()
			self.num_eval_classes = len(load_class_names(args.dataset))

		if self.args.arch == 'psp':
			assert isinstance(self.args.zoom_factor, int)
			assert isinstance(self.args.network_name, int)

		# `id_to_class_name_map` only used for visualizing universal taxonomy
		self.id_to_class_name_map = {
			i: classname for i, classname in enumerate(get_universal_class_names())
		}

		# indicate which scales were used to make predictions
		# (multi-scale vs. single-scale)
		self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'
Example #4
0
    def __init__(self,
                 args,
                 base_size: int,
                 crop_h: int,
                 crop_w: int,
                 input_file: str,
                 output_taxonomy: str,
                 scales: List[float],
                 use_gpu: bool = True):
        """
		We always use the ImageNet mean and standard deviation for normalization.
		mean: 3-tuple of floats, representing pixel mean value
		std: 3-tuple of floats, representing pixel standard deviation

		'args' should contain at least two fields (shown below).

			Args:
			-	args:
			-	base_size:
			-	crop_h: integer representing crop height, e.g. 473
			-	crop_w: integer representing crop width, e.g. 473
			-	input_file: could be absolute path to .txt file, .mp4 file,
					or to a directory full of jpg images
			-	output_taxonomy
			-	scales
			-	use_gpu
		"""
        self.args = args
        assert isinstance(self.args.img_name_unique, bool)
        assert isinstance(self.args.print_freq, int)
        assert isinstance(self.args.num_model_classes, int)
        assert isinstance(self.args.model_path, str)
        self.pred_dim = self.args.num_model_classes

        self.base_size = base_size
        self.crop_h = crop_h
        self.crop_w = crop_w
        self.input_file = input_file
        self.output_taxonomy = output_taxonomy
        self.scales = scales
        self.use_gpu = use_gpu

        self.mean, self.std = get_imagenet_mean_std()
        self.model = self.load_model(args)
        self.softmax = nn.Softmax(dim=1)

        self.gray_folder = None  # optional, intended for dataloader use
        self.data_list = None  # optional, intended for dataloader use

        if self.output_taxonomy != 'universal':
            assert isinstance(self.args.dataset, str)
            self.dataset_name = args.dataset
            self.tc = TaxonomyConverter()

        if self.args.arch == 'psp':
            assert isinstance(self.args.zoom_factor, int)
            assert isinstance(self.args.network_name, int)

        self.id_to_class_name_map = {
            i: classname
            for i, classname in enumerate(get_universal_class_names())
        }

        # indicate which scales were used to make predictions
        # (multi-scale vs. single-scale)
        self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'