Esempio n. 1
0
def test_transform_predictions_test():
	"""
		Consider predictions made within the universal taxonomy
		over a tiny 2x3 image. We use a linear mapping to bring
		these predictions into a test dataset's taxonomy 
		(summing the probabilities where necessary).

		For Camvid, universal probabilities for `person',`bicycle'
		should both go into the 'Bicyclist' class.
	"""
	u_classnames = get_universal_class_names()
	person_uidx = u_classnames.index('person')
	bicycle_uidx = u_classnames.index('bicycle')
	sky_uidx = u_classnames.index('sky')

	tc = TaxonomyConverter()
	input = np.zeros((194,2,3))
	input[sky_uidx,0,:] = 1.0 # top row is sky
	input[person_uidx,1,:] = 0.5 # bottom row is 50/50 person or bicyclist
	input[bicycle_uidx,1,:] = 0.5 # bottom row is 50/50 person or bicyclist
	input = torch.from_numpy(input)
	input = input.unsqueeze(0).float() # CHW -> NCHW
	assert input.shape == (1,194,2,3)

	test_dname = 'camvid-11'
	output = tc.transform_predictions_test(input, test_dname)
	output = output.squeeze() # NCHW -> CHW
	prediction = torch.argmax(output, dim=0).numpy()

	camvid_classnames = load_class_names(test_dname)
	# Camvid should have predictions across 11 classes.
	prediction_gt = np.zeros((2,3))
	prediction_gt[0,:] = camvid_classnames.index('Sky')
	prediction_gt[1,:] = camvid_classnames.index('Bicyclist')
	assert np.allclose(prediction, prediction_gt)
Esempio n. 2
0
class ToUniversalLabel(object):
    def __init__(self, dataset):
        self.dataset = dataset
        self.tax_converter = TaxonomyConverter()

    def __call__(self, image, label):
        return image, self.tax_converter.transform_label(label, self.dataset)
Esempio n. 3
0
def test_label_mapping_arrs():
	""" """
	tc = TaxonomyConverter()
	train_idx = load_class_names('ade20k-150').index('minibike')
	u_idx = get_universal_class_names().index('motorcycle')
	assert tc.label_mapping_arr_dict['ade20k-150'][train_idx] == u_idx

	train_idx = load_class_names('mapillary-public65').index('Bird')
	u_idx = get_universal_class_names().index('bird')
	assert tc.label_mapping_arr_dict['mapillary-public65'][train_idx] == u_idx
Esempio n. 4
0
class ToUniversalLabel(object):
    def __init__(self, dataset: str, use_naive_taxonomy: bool = False) -> None:
        self.dataset = dataset
        if use_naive_taxonomy:
            self.tax_converter = NaiveTaxonomyConverter()
        else:
            self.tax_converter = TaxonomyConverter()

    def __call__(self, image, label):
        return image, self.tax_converter.transform_label(label, self.dataset)
Esempio n. 5
0
def test_label_transform_unlabeled():
	"""
	Make sure 255 stays mapped to 255 at each level (to be ignored in cross-entropy loss).
	"""
	IGNORE_LABEL = 255
	dname = 'mapillary-public65'
	txt_classnames = load_class_names(dname)
	name2id = get_classname_to_dataloaderid_map(dname, include_ignore_idx_cls = True)
	train_idx = name2id['unlabeled']

	tc = TaxonomyConverter()
	# training dataset label
	traind_label = torch.ones(4,4)*train_idx
	traind_label = traind_label.type(torch.LongTensor)

	# Get back the universal label
	u_label = tc.transform_label(traind_label, dname)
	u_idx = IGNORE_LABEL
	gt_u_label = np.ones((4,4)).astype(np.int64) * u_idx
	assert np.allclose(u_label.numpy(), gt_u_label)
Esempio n. 6
0
def test_label_transform():
	"""
	Bring label from training taxonomy (mapillary-public65)
	to the universal taxonomy.
	
	21 is the motorcyclist class in mapillary-public65
	"""
	dname = 'mapillary-public65'
	txt_classnames = load_class_names(dname)
	train_idx = txt_classnames.index('Motorcyclist')
	tc = TaxonomyConverter()
	# training dataset label
	traind_label = torch.ones(4,4)*train_idx
	traind_label = traind_label.type(torch.LongTensor)

	# Get back the universal label
	u_label = tc.transform_label(traind_label, dname)
	u_idx = get_universal_class_names().index('motorcyclist')
	gt_u_label = np.ones((4,4)).astype(np.int64) * u_idx
	assert np.allclose(u_label.numpy(), gt_u_label)
Esempio n. 7
0
def relabel_pair(old_dataroot: str,
                 new_dataroot: str,
                 orig_pair: Tuple[str, str],
                 remapped_pair: Tuple[str, str],
                 dname: str,
                 tax_converter: TaxonomyConverter,
                 segm_to_class: Mapping[int, int],
                 dataset_colors: Optional[np.ndarray] = None):
    """
	No need to copy the RGB files again. We just update the label file paths.

		Args:
		-	old_dataroot:
		-	new_dataroot: 
		-	orig_pair: Tuple containing relative path to RGB image and label image
		-	remapped_pair: Tuple containing relative path to RGB image and label image
		-	label_mapping_arr: 
		-	dataset_colors:

		Returns:
		-	None
	"""
    _, orig_rel_label_fpath = orig_pair
    _, remapped_rel_label_fpath = remapped_pair

    old_label_fpath = f'{old_dataroot}/{orig_rel_label_fpath}'
    if not os.path.exists(old_label_fpath):
        print("Warning: File " + old_label_fpath + " not found!")
        return

    if dataset_colors is None:
        label_img = imageio.imread(old_label_fpath)
    else:
        # remap from RGB encoded labels to 1-channel class indices
        label_img_rgb = cv2_imread_rgb(old_label_fpath)
        label_img = rgb_img_to_obj_cls_img(label_img_rgb, dataset_colors)

    if not segm_to_class is None:
        label_img_id = label_img[:, :, 0] + (label_img[:, :, 1] * 256) + (
            label_img[:, :, 2] * 256**2)
        label_img = np.ones(label_img.shape[:2],
                            dtype=np.uint8) * 255  #initialize with unlabeled
        for src, dst in segm_to_class.items():
            label_img[label_img_id == src] = dst

    labels = torch.tensor(label_img, dtype=torch.int64)
    remapped_img = tax_converter.transform_label(labels, dname)

    new_label_fpath = f'{new_dataroot}/{remapped_rel_label_fpath}'
    create_leading_fpath_dirs(new_label_fpath)
    remapped_img = remapped_img.numpy().astype(dtype=np.uint8)
    imageio.imwrite(new_label_fpath, remapped_img)
Esempio n. 8
0
def get_excluded_class_ids(dataset: str) -> List[int]:
    """Find the classes to exclude when evaluating a "relabeled" MSeg model
    on the val split of a training dataset.

    We retrieve the dictionary `id_to_uid_maps` with (k,v) pairs where 
    "k" is the original, unrelabeled training dataset ID, and "v" is 
    the universal taxonomy ID.

        Args:
        -   dataset: name of a MSeg training dataset, e.g. 'coco-panoptic-133'

        Returns:
        -   zero_class_ids
    """
    tc = TaxonomyConverter()
    id_maps = tc.id_to_uid_maps[dataset] # from train to universal. do this zero out or not does not affect when training and testing on same dataset.
    nonzero_class_ids = set(id_maps.values())
    zero_class_ids = [x for x in range(tc.num_uclasses) if x not in nonzero_class_ids]
    return zero_class_ids
Esempio n. 9
0
    def __init__(self,
                 args,
                 data_list: List[Tuple[str, str]],
                 dataset_name: str,
                 class_names: List[str],
                 save_folder: str,
                 num_eval_classes: int,
                 render_confusion_matrix: bool = False) -> None:
        """
            Args:
            -   args,
            -   data_list
            -   dataset_name: 
            -   class_names: 
            -   save_folder: 
            -   num_eval_classes: 
            -   render_confusion_matrix: 

            Returns:
            -   None
        """
        self.num_eval_classes = num_eval_classes
        self.args = args
        self.data_list = data_list
        self.dataset_name = dataset_name
        self.class_names = class_names
        self.save_folder = save_folder
        self.gray_folder = os.path.join(save_folder, 'gray')
        self.render_confusion_matrix = render_confusion_matrix

        if self.render_confusion_matrix:
            self.cmr = ConfusionMatrixRenderer(self.save_folder, class_names,
                                               self.dataset_name)
        self.sam = SegmentationAverageMeter()
        self.id_to_class_name_map = get_dataloader_id_to_classname_map(
            self.dataset_name, class_names, include_ignore_idx_cls=True)
        self.tc = TaxonomyConverter()
        self.excluded_ids = []

        assert isinstance(args.vis_freq, int)
        assert isinstance(args.img_name_unique, bool)
        assert isinstance(args.taxonomy, str)
        assert isinstance(args.model_path, str)
Esempio n. 10
0
def test_constructor_types():
	""" """
	tc = TaxonomyConverter()
	for dname, conv in tc.convs.items():
		assert isinstance(conv, torch.nn.Module)
Esempio n. 11
0
def remap_dataset(dname: str,
                  tsv_fpath: str,
                  old_dataroot: str,
                  remapped_dataroot: str,
                  panoptic_json_path: str,
                  num_processes: int = 4,
                  create_symlink_cpy: bool = False,
                  convert_label_from_rgb: bool = False):
    """
	Given path to a dataset, given names of _names.txt
	Remap according to the provided tsv.
	(also account for the fact that 255 is always unlabeled)

		Args:
		-	dname: string representing name of taxonomy for original dataset
		-	tsv_fpath: string representing path to a .tsv file
		-	old_dataroot: string representing path to original dataset
		-	remapped_dataroot: string representing path at which to new dataset
		-   panoptic_json_path: string representing path to coco-style json file
		-	num_processes: integer representing number of workers to exploit
		-   create_symlink_cpy: adds symbolic links for images in the same folder structure as annotations

		Returns:
		-	None
	"""
    # form one-way mapping between IDs
    tconv = TaxonomyConverter(train_datasets=[dname],
                              test_datasets=[],
                              tsv_fpath=tsv_fpath)
    dataset_colors = load_dataset_colors_arr(
        dname) if convert_label_from_rgb else None

    for split_idx, split in enumerate(['train', 'val']):
        panoptic_json_content = None
        orig_relative_img_label_pairs = generate_all_img_label_pair_relative_fpaths(
            dname, split)
        if not panoptic_json_path is None:
            with open(
                    panoptic_json_path.format(split=split,
                                              split_idx=str(split_idx)),
                    'r') as ifile:
                json_cont = json.load(ifile)
            panoptic_json_content = {
                a["file_name"]: a
                for a in json_cont["annotations"]
            }
            if dname[:4] == "coco":  #hacky  needed for inplace coco support
                orig_relative_img_label_pairs = [[
                    fix_path_coco_inplace(p[0]),
                    fix_path_coco_inplace(p[1])
                ] for p in orig_relative_img_label_pairs]
        basedir = 'images/' + split + '/' + dname
        img_subdirs = list(
            set([os.path.dirname(p[0])
                 for p in orig_relative_img_label_pairs]))
        img_dir_remapping = {}
        for d in img_subdirs:
            img_dir_remapping[d] = basedir if len(
                img_subdirs) == 1 else basedir + '/' + d.replace(
                    '/color', '').replace('/leftImg8bit', '')
            if create_symlink_cpy:
                unpriv_symb_link(
                    old_dataroot + '/' + d,
                    remapped_dataroot + '/' + img_dir_remapping[d])
        remapped_relative_img_label_pairs = [
            (img_dir_remapping[os.path.dirname(p[0])] + '/' +
             os.path.basename(p[0]), img_dir_remapping[os.path.dirname(
                 p[0])].replace('images', 'annotations') + '/' +
             os.path.basename(p[0]).replace('.jpg', '.png'))
            for p in orig_relative_img_label_pairs
        ]

        send_list_to_workers(
            num_processes=num_processes,
            list_to_split=orig_relative_img_label_pairs,
            worker_func_ptr=relabel_pair_worker,
            remapped_relative_img_label_pairs=remapped_relative_img_label_pairs,
            tax_converter=tconv,
            panoptic_json_content=panoptic_json_content,
            old_dataroot=old_dataroot,
            new_dataroot=remapped_dataroot,
            dname=dname,
            dataset_colors=dataset_colors)
Esempio n. 12
0
 def __init__(self, dataset):
     self.dataset = dataset
     self.tax_converter = TaxonomyConverter()
class InferenceTask:

	def __init__(self,
		args,
		base_size: int,
		crop_h: int,
		crop_w: int,
		input_file: str,
		model_taxonomy: str,
		eval_taxonomy: str,
		scales: List[float],
		use_gpu: bool = True
		):
		"""
		We always use the ImageNet mean and standard deviation for normalization.
		mean: 3-tuple of floats, representing pixel mean value
		std: 3-tuple of floats, representing pixel standard deviation

		'args' should contain at least 5 fields (shown below).
		See brief explanation at top of file regarding taxonomy arg configurations.
		
		Args:
		    args: experiment configuration arguments
		    base_size: shorter side of image
		    crop_h: integer representing crop height, e.g. 473
		    crop_w: integer representing crop width, e.g. 473
		    input_file: could be absolute path to .txt file, .mp4 file, or to a directory full of jpg images
		    model_taxonomy: taxonomy in which trained model makes predictions
		    eval_taxonomy: taxonomy in which trained model is evaluated
		    scales: floats representing image scales for multi-scale inference
		    use_gpu: TODO, not supporting cpu at this time
		"""
		self.args = args

		# Required arguments:
		assert isinstance(self.args.save_folder, str)
		assert isinstance(self.args.dataset, str)
		assert isinstance(self.args.img_name_unique, bool)
		assert isinstance(self.args.print_freq, int)
		assert isinstance(self.args.num_model_classes, int)
		assert isinstance(self.args.model_path, str)
		self.num_model_classes = self.args.num_model_classes

		self.base_size = base_size
		self.crop_h = crop_h
		self.crop_w = crop_w
		self.input_file = input_file
		self.model_taxonomy = model_taxonomy
		self.eval_taxonomy = eval_taxonomy
		self.scales = scales
		self.use_gpu = use_gpu

		self.mean, self.std = get_imagenet_mean_std()
		self.model = self.load_model(args)
		self.softmax = nn.Softmax(dim=1)

		self.gray_folder = None # optional, intended for dataloader use
		self.data_list = None # optional, intended for dataloader use

		if model_taxonomy == 'universal' and eval_taxonomy == 'universal':
			# See note above.
			# no conversion of predictions required
			self.num_eval_classes = self.num_model_classes 

		elif model_taxonomy == 'test_dataset' and eval_taxonomy == 'test_dataset':
			# no conversion of predictions required
			self.num_eval_classes = len(load_class_names(args.dataset))

		elif model_taxonomy == 'naive' and eval_taxonomy == 'test_dataset':
			self.tc = NaiveTaxonomyConverter()
			if args.dataset in self.tc.convs.keys() and use_gpu:
				self.tc.convs[args.dataset].cuda()
			self.tc.softmax.cuda()
			self.num_eval_classes = len(load_class_names(args.dataset))

		elif model_taxonomy == 'universal' and eval_taxonomy == 'test_dataset':
			# no label conversion required here, only predictions converted
			self.tc = TaxonomyConverter()
			if args.dataset in self.tc.convs.keys() and use_gpu:
				self.tc.convs[args.dataset].cuda()
			self.tc.softmax.cuda()
			self.num_eval_classes = len(load_class_names(args.dataset))

		if self.args.arch == 'psp':
			assert isinstance(self.args.zoom_factor, int)
			assert isinstance(self.args.network_name, int)

		# `id_to_class_name_map` only used for visualizing universal taxonomy
		self.id_to_class_name_map = {
			i: classname for i, classname in enumerate(get_universal_class_names())
		}

		# indicate which scales were used to make predictions
		# (multi-scale vs. single-scale)
		self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'


	def load_model(self, args):
		"""Load Pytorch pre-trained model from disk of type torch.nn.DataParallel. 
		
		Note that `args.num_model_classes` will be size of logits output.
		
		Args:
		    args: 
		
		Returns:
		    model
		"""
		if args.arch == 'psp':
			model = PSPNet(
			layers=args.layers,
			classes=args.num_model_classes,
			zoom_factor=args.zoom_factor,
			pretrained=False,
			network_name=args.network_name
			)
		elif args.arch == 'hrnet':
			from mseg_semantic.model.seg_hrnet import get_configured_hrnet
			# note apex batchnorm is hardcoded 
			model = get_configured_hrnet(args.num_model_classes, load_imagenet_model=False)
		elif args.arch == 'hrnet_ocr':
			from mseg_semantic.model.seg_hrnet_ocr import get_configured_hrnet_ocr
			model = get_configured_hrnet_ocr(args.num_model_classes)
		# logger.info(model)
		model = torch.nn.DataParallel(model)
		if self.use_gpu:
			model = model.cuda()
		cudnn.benchmark = True

		if os.path.isfile(args.model_path):
			logger.info(f"=> loading checkpoint '{args.model_path}'")
			if self.use_gpu:
				checkpoint = torch.load(args.model_path)
			else:
				checkpoint = torch.load(args.model_path, map_location='cpu')
			model.load_state_dict(checkpoint['state_dict'], strict=False)
			logger.info(f"=> loaded checkpoint '{args.model_path}'")
		else:
			raise RuntimeError(f"=> no checkpoint found at '{args.model_path}'")

		return model


	def execute(self) -> None:
		"""
		Execute the demo, i.e. feed all of the desired input through the
		network and obtain predictions. Gracefully handles .txt, 
		or video file (.mp4, etc), or directory input.
		"""
		logger.info('>>>>>>>>>>>>>> Start inference task >>>>>>>>>>>>>')
		self.model.eval()

		if self.input_file is None and self.args.dataset != 'default':
			# evaluate on a train or test dataset
			test_loader, self.data_list = create_test_loader(self.args)
			self.execute_on_dataloader(test_loader)
			logger.info('<<<<<<<<< Inference task completed <<<<<<<<<')
			return

		suffix = self.input_file[-4:]
		is_dir = os.path.isdir(self.input_file)
		is_img = suffix in ['.png', '.jpg']
		is_vid = suffix in ['.mp4', '.avi', '.mov']

		if is_img:
			self.render_single_img_pred()
		elif is_dir:
			# argument is a path to a directory
			self.create_path_lists_from_dir()
			test_loader, self.data_list = create_test_loader(self.args)
			self.execute_on_dataloader(test_loader)
		elif is_vid:
			# argument is a video
			self.execute_on_video()
		else:
			logger.info('Error: Unknown input type')

		logger.info('<<<<<<<<<<< Inference task completed <<<<<<<<<<<<<<')

	def render_single_img_pred(self, min_resolution: int = 1080):
		"""Since overlaid class text is difficult to read below 1080p, we upsample predictions."""
		in_fname_stem = Path(self.input_file).stem
		output_gray_fpath = f'{in_fname_stem}_gray.jpg'
		output_demo_fpath = f'{in_fname_stem}_overlaid_classes.jpg'
		logger.info(f'Write image prediction to {output_demo_fpath}')

		rgb_img = imread_rgb(self.input_file)
		pred_label_img = self.execute_on_img(rgb_img)

		# avoid blurry images by upsampling RGB before overlaying text
		if np.amin(rgb_img.shape[:2]) < min_resolution:
			rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb')
			pred_label_img = resize_img_by_short_side(pred_label_img, min_resolution, 'label')

		metadata = None
		frame_visualizer = Visualizer(rgb_img, metadata)
		overlaid_img = frame_visualizer.overlay_instances(
			label_map=pred_label_img,
			id_to_class_name_map=self.id_to_class_name_map
		)
		imageio.imwrite(output_demo_fpath, overlaid_img)
		imageio.imwrite(output_gray_fpath, pred_label_img)

	def create_path_lists_from_dir(self) -> None:
		"""Populate a .txt file with relative paths that will be used to create a Pytorch dataloader."""
		self.args.data_root = self.input_file
		txt_output_dir = str(Path(f'{_ROOT}/temp_files').resolve())
		txt_save_fpath = dump_relpath_txt(self.input_file, txt_output_dir)
		self.args.test_list = txt_save_fpath


	def execute_on_img(self, image: np.ndarray) -> np.ndarray:
		"""
		Rather than feeding in crops w/ sliding window across the full-res image, we 
		downsample/upsample the image to a default inference size. This may differ
		from the best training size.

		For example, if trained on small images, we must shrink down the image in 
		testing (preserving the aspect ratio), based on the parameter "base_size",
		which is the short side of the image.
		
		Args:
		    image: Numpy array representing RGB image
		
		Returns:
		    gray_img: prediction, representing predicted label map
		"""
		h, w, _ = image.shape
		is_single_scale = len(self.scales) == 1

		if is_single_scale:
			# single scale, do addition and argmax on CPU
			image_scaled = resize_by_scaled_short_side(image, self.base_size, self.scales[0])
			prediction = torch.Tensor(self.scale_process_cuda(image_scaled, h, w))

		else:
			# multi-scale, prefer to use fast addition on the GPU
			prediction = np.zeros((h, w, self.num_eval_classes), dtype=float)
			prediction = torch.Tensor(prediction).cuda()
			for scale in self.scales:
				image_scaled = resize_by_scaled_short_side(image, self.base_size, scale)
				prediction = prediction + torch.Tensor(self.scale_process_cuda(image_scaled, h, w)).cuda()

		prediction /= len(self.scales)
		prediction = torch.argmax(prediction, axis=2)
		prediction = prediction.data.cpu().numpy()
		gray_img = np.uint8(prediction)
		return gray_img

	def execute_on_video(self, max_num_frames: int = 5000, min_resolution: int = 1080) -> None:
		"""
		input_file is a path to a video file.
		Read frames from an RGB video file, and write overlaid predictions into a new video file.
		"""
		in_fname_stem = Path(self.input_file).stem
		out_fname = f'{in_fname_stem}_{self.args.model_name}_universal'
		out_fname += f'_scales_{self.scales_str}_base_sz_{self.args.base_size}.mp4'

		output_video_fpath = f'{_ROOT}/temp_files/{out_fname}'
		create_leading_fpath_dirs(output_video_fpath)
		logger.info(f'Write video to {output_video_fpath}')
		writer = VideoWriter(output_video_fpath)

		reader = VideoReader(self.input_file)
		for frame_idx in range(reader.num_frames):
			logger.info(f'On image {frame_idx}/{reader.num_frames}')
			rgb_img = reader.get_frame()
			if frame_idx > max_num_frames:
				break
			pred_label_img = self.execute_on_img(rgb_img)

			# avoid blurry images by upsampling RGB before overlaying text
			if np.amin(rgb_img.shape[:2]) < min_resolution:
				rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb')
				pred_label_img = resize_img_by_short_side(pred_label_img, min_resolution, 'label')

			metadata = None
			frame_visualizer = Visualizer(rgb_img, metadata)
			output_img = frame_visualizer.overlay_instances(
				label_map=pred_label_img,
				id_to_class_name_map=self.id_to_class_name_map
			)
			writer.add_frame(output_img)

		reader.complete()
		writer.complete()

	def execute_on_dataloader(self, test_loader: torch.utils.data.dataloader.DataLoader):
		"""Run a pretrained model over each batch in a dataloader.
		
		Args:
		     test_loader: 
		"""
		if self.args.save_folder == 'default':
			self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}'

		os.makedirs(self.args.save_folder, exist_ok=True)
		gray_folder = os.path.join(self.args.save_folder, 'gray')
		self.gray_folder = gray_folder

		data_time = AverageMeter()
		batch_time = AverageMeter()
		end = time.time()

		check_mkdir(self.gray_folder)

		for i, (input, _) in enumerate(test_loader):
			logger.info(f'On image {i}')
			data_time.update(time.time() - end)

			# determine path for grayscale label map
			image_path, _ = self.data_list[i]
			if self.args.img_name_unique:
				image_name = Path(image_path).stem
			else:
				image_name = get_unique_stem_from_last_k_strs(image_path)
			gray_path = os.path.join(self.gray_folder, image_name + '.png')
			if Path(gray_path).exists():
				continue

			# convert Pytorch tensor -> Numpy, then feedforward
			input = np.squeeze(input.numpy(), axis=0)
			image = np.transpose(input, (1, 2, 0))
			gray_img = self.execute_on_img(image)

			batch_time.update(time.time() - end)
			end = time.time()
			cv2.imwrite(gray_path, gray_img)

			# todo: update to time remaining.
			if ((i + 1) % self.args.print_freq == 0) or (i + 1 == len(test_loader)):
				logger.info('Test: [{}/{}] '
				'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
				'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'.format(i + 1, len(test_loader),
				data_time=data_time,
				batch_time=batch_time))


	def scale_process_cuda(self, image: np.ndarray, raw_h: int, raw_w: int, stride_rate: float = 2/3) -> np.ndarray:
		""" First, pad the image. If input is (384x512), then we must pad it up to shape
		to have shorter side "scaled base_size". 

		Then we perform the sliding window on this scaled image, and then interpolate 
		(downsample or upsample) the prediction back to the original one.

		At each pixel, we increment a counter for the number of times this pixel
		has passed through the sliding window.

		Args:
		    image: Array, representing image where shortest edge is adjusted to base_size
		    raw_h: integer representing native/raw image height on disk, e.g. for NYU it is 480
		    raw_w: integer representing native/raw image width on disk, e.g. for NYU it is 640
		    stride_rate: stride rate of sliding window operation

		Returns:
		    prediction: Numpy array representing predictions with shorter side equal to self.base_size
		"""
		resized_h, resized_w, _ = image.shape
		padded_image, pad_h_half, pad_w_half = pad_to_crop_sz(image, self.crop_h, self.crop_w, self.mean)
		new_h, new_w, _ = padded_image.shape
		stride_h = int(np.ceil(self.crop_h*stride_rate))
		stride_w = int(np.ceil(self.crop_w*stride_rate))
		grid_h = int(np.ceil(float(new_h-self.crop_h)/stride_h) + 1)
		grid_w = int(np.ceil(float(new_w-self.crop_w)/stride_w) + 1)

		prediction_crop = torch.zeros((self.num_eval_classes, new_h, new_w)).cuda()
		count_crop = torch.zeros((new_h, new_w)).cuda()

		# loop w/ sliding window, obtain start/end indices
		for index_h in range(0, grid_h):
			for index_w in range(0, grid_w):
				# height indices are s_h to e_h (start h index to end h index)
				# width indices are s_w to e_w (start w index to end w index)
				s_h = index_h * stride_h
				e_h = min(s_h + self.crop_h, new_h)
				s_h = e_h - self.crop_h
				s_w = index_w * stride_w
				e_w = min(s_w + self.crop_w, new_w)
				s_w = e_w - self.crop_w
				image_crop = padded_image[s_h:e_h, s_w:e_w].copy()
				count_crop[s_h:e_h, s_w:e_w] += 1
				prediction_crop[:, s_h:e_h, s_w:e_w] += self.net_process(image_crop)

		prediction_crop /= count_crop.unsqueeze(0)
		# disregard predictions from padded portion of image
		prediction_crop = prediction_crop[:, pad_h_half:pad_h_half+resized_h, pad_w_half:pad_w_half+resized_w]

		# CHW -> HWC
		prediction_crop = prediction_crop.permute(1,2,0)
		prediction_crop = prediction_crop.data.cpu().numpy()

		# upsample or shrink predictions back down to scale=1.0
		prediction = cv2.resize(prediction_crop, (raw_w, raw_h), interpolation=cv2.INTER_LINEAR)
		return prediction


	def net_process(self, image: np.ndarray, flip: bool = True) -> torch.Tensor:
		""" Feed input through the network.
		
		In addition to running a crop through the network, we can flip
		the crop horizontally, run both crops through the network, and then
		average them appropriately. Afterwards, apply softmax, then convert
		the prediction to the label taxonomy.
		
		Args:
		    image:
		    flip: boolean, whether to average with flipped patch output
		
		Returns:
		    output: Pytorch tensor representing network predicting in evaluation taxonomy 
		            (not necessarily the model taxonomy)
		"""
		input = torch.from_numpy(image.transpose((2, 0, 1))).float()
		normalize_img(input, self.mean, self.std)
		input = input.unsqueeze(0)

		if self.use_gpu:
			input = input.cuda()
		if flip:
			# add another example to batch dimension, that is the flipped crop
			input = torch.cat([input, input.flip(3)], 0)
		with torch.no_grad():
			output = self.model(input)
		_, _, h_i, w_i = input.shape
		_, _, h_o, w_o = output.shape
		if (h_o != h_i) or (w_o != w_i):
			output = F.interpolate(output, (h_i, w_i), mode='bilinear', align_corners=True)

		prediction_conversion_req = self.model_taxonomy != self.eval_taxonomy
		if prediction_conversion_req:
			# Either (model_taxonomy='naive', eval_taxonomy='test_dataset')
			# Or (model_taxonomy='universal', eval_taxonomy='test_dataset')
			output = self.tc.transform_predictions_test(output, self.args.dataset)
		else:
			# model & eval tax match, so no conversion needed
			assert self.model_taxonomy in ['universal','test_dataset']
			# todo: determine when .cuda() needed here
			output = self.softmax(output)

		if flip:
			# take back out the flipped crop, correct its orientation, and average result
			output = (output[0] + output[1].flip(2)) / 2
		else:
			output = output[0]
		# output = output.data.cpu().numpy()
		# convert CHW to HWC order
		# output = output.transpose(1, 2, 0)
		# output = output.permute(1,2,0)

		return output
Esempio n. 14
0
    def __init__(self,
                 args,
                 base_size: int,
                 crop_h: int,
                 crop_w: int,
                 input_file: str,
                 output_taxonomy: str,
                 scales: List[float],
                 use_gpu: bool = True):
        """
		We always use the ImageNet mean and standard deviation for normalization.
		mean: 3-tuple of floats, representing pixel mean value
		std: 3-tuple of floats, representing pixel standard deviation

		'args' should contain at least two fields (shown below).

			Args:
			-	args:
			-	base_size:
			-	crop_h: integer representing crop height, e.g. 473
			-	crop_w: integer representing crop width, e.g. 473
			-	input_file: could be absolute path to .txt file, .mp4 file,
					or to a directory full of jpg images
			-	output_taxonomy
			-	scales
			-	use_gpu
		"""
        self.args = args
        assert isinstance(self.args.img_name_unique, bool)
        assert isinstance(self.args.print_freq, int)
        assert isinstance(self.args.num_model_classes, int)
        assert isinstance(self.args.model_path, str)
        self.pred_dim = self.args.num_model_classes

        self.base_size = base_size
        self.crop_h = crop_h
        self.crop_w = crop_w
        self.input_file = input_file
        self.output_taxonomy = output_taxonomy
        self.scales = scales
        self.use_gpu = use_gpu

        self.mean, self.std = get_imagenet_mean_std()
        self.model = self.load_model(args)
        self.softmax = nn.Softmax(dim=1)

        self.gray_folder = None  # optional, intended for dataloader use
        self.data_list = None  # optional, intended for dataloader use

        if self.output_taxonomy != 'universal':
            assert isinstance(self.args.dataset, str)
            self.dataset_name = args.dataset
            self.tc = TaxonomyConverter()

        if self.args.arch == 'psp':
            assert isinstance(self.args.zoom_factor, int)
            assert isinstance(self.args.network_name, int)

        self.id_to_class_name_map = {
            i: classname
            for i, classname in enumerate(get_universal_class_names())
        }

        # indicate which scales were used to make predictions
        # (multi-scale vs. single-scale)
        self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'
Esempio n. 15
0
class InferenceTask:
    def __init__(self,
                 args,
                 base_size: int,
                 crop_h: int,
                 crop_w: int,
                 input_file: str,
                 output_taxonomy: str,
                 scales: List[float],
                 use_gpu: bool = True):
        """
		We always use the ImageNet mean and standard deviation for normalization.
		mean: 3-tuple of floats, representing pixel mean value
		std: 3-tuple of floats, representing pixel standard deviation

		'args' should contain at least two fields (shown below).

			Args:
			-	args:
			-	base_size:
			-	crop_h: integer representing crop height, e.g. 473
			-	crop_w: integer representing crop width, e.g. 473
			-	input_file: could be absolute path to .txt file, .mp4 file,
					or to a directory full of jpg images
			-	output_taxonomy
			-	scales
			-	use_gpu
		"""
        self.args = args
        assert isinstance(self.args.img_name_unique, bool)
        assert isinstance(self.args.print_freq, int)
        assert isinstance(self.args.num_model_classes, int)
        assert isinstance(self.args.model_path, str)
        self.pred_dim = self.args.num_model_classes

        self.base_size = base_size
        self.crop_h = crop_h
        self.crop_w = crop_w
        self.input_file = input_file
        self.output_taxonomy = output_taxonomy
        self.scales = scales
        self.use_gpu = use_gpu

        self.mean, self.std = get_imagenet_mean_std()
        self.model = self.load_model(args)
        self.softmax = nn.Softmax(dim=1)

        self.gray_folder = None  # optional, intended for dataloader use
        self.data_list = None  # optional, intended for dataloader use

        if self.output_taxonomy != 'universal':
            assert isinstance(self.args.dataset, str)
            self.dataset_name = args.dataset
            self.tc = TaxonomyConverter()

        if self.args.arch == 'psp':
            assert isinstance(self.args.zoom_factor, int)
            assert isinstance(self.args.network_name, int)

        self.id_to_class_name_map = {
            i: classname
            for i, classname in enumerate(get_universal_class_names())
        }

        # indicate which scales were used to make predictions
        # (multi-scale vs. single-scale)
        self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'

    def load_model(self, args):
        """
		Load Pytorch pre-trained model from disk of type 
		torch.nn.DataParallel. Note that
		`args.num_model_classes` will be size of logits output.

			Args:
			-   args: 

			Returns:
			-   model
		"""
        if args.arch == 'psp':
            model = PSPNet(layers=args.layers,
                           classes=args.num_model_classes,
                           zoom_factor=args.zoom_factor,
                           pretrained=False,
                           network_name=args.network_name)
        elif args.arch == 'hrnet':
            from mseg_semantic.model.seg_hrnet import get_configured_hrnet
            # note apex batchnorm is hardcoded
            model = get_configured_hrnet(args.num_model_classes)
        elif args.arch == 'hrnet_ocr':
            from mseg_semantic.model.seg_hrnet_ocr import get_configured_hrnet_ocr
            model = get_configured_hrnet_ocr(args.num_model_classes)
        # logger.info(model)
        model = torch.nn.DataParallel(model)
        if self.use_gpu:
            model = model.cuda()
        cudnn.benchmark = True

        if os.path.isfile(args.model_path):
            logger.info(f"=> loading checkpoint '{args.model_path}'")
            if self.use_gpu:
                checkpoint = torch.load(args.model_path)
            else:
                checkpoint = torch.load(args.model_path, map_location='cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            logger.info(f"=> loaded checkpoint '{args.model_path}'")
        else:
            raise RuntimeError(
                f"=> no checkpoint found at '{args.model_path}'")

        return model

    def execute(self) -> None:
        """
		Execute the demo, i.e. feed all of the desired input through the
		network and obtain predictions. Gracefully handles .txt, 
		or video file (.mp4, etc), or directory input.
		"""
        logger.info('>>>>>>>>>>>>>>>> Start inference task >>>>>>>>>>>>>>>>')
        self.model.eval()

        suffix = self.input_file[-4:]
        is_dir = os.path.isdir(self.input_file)

        if is_dir:
            # argument is a path to a directory
            self.create_path_lists_from_dir()
            test_loader = self.create_test_loader()
            self.execute_on_dataloader(test_loader)

        elif not is_dir and suffix in ['.mp4', '.avi', '.mov']:
            # argument is a video
            self.execute_on_video()

        elif not is_dir and self.args.dataset != 'default':
            # evaluate on a train or test dataset
            test_loader = self.create_test_loader()
            self.execute_on_dataloader(test_loader)

        else:
            logger.info('Error: Unknown input type')

        logger.info(
            '<<<<<<<<<<<<<<<<< Inference task completed <<<<<<<<<<<<<<<<<')

    def create_path_lists_from_dir(self) -> None:
        """
		Populate a .txt file with relative paths that will be used to create 
		a Pytorch dataloader.

			Args:
			-	None

			Returns:
			-	None
		"""
        self.args.data_root = self.input_file
        txt_output_dir = str(Path(f'{_ROOT}/temp_files').resolve())
        txt_save_fpath = dump_relpath_txt(self.input_file, txt_output_dir)
        self.args.test_list = txt_save_fpath

    def create_test_loader(self):
        """
			Create a Pytorch dataloader from a dataroot and list of 
			relative paths.
		"""
        test_transform = transform.Compose([transform.ToTensor()])
        test_data = dataset.SemData(split=self.args.split,
                                    data_root=self.args.data_root,
                                    data_list=self.args.test_list,
                                    transform=test_transform)

        index_start = self.args.index_start
        if self.args.index_step == 0:
            index_end = len(test_data.data_list)
        else:
            index_end = min(index_start + args.index_step,
                            len(test_data.data_list))
        test_data.data_list = test_data.data_list[index_start:index_end]
        self.data_list = test_data.data_list
        test_loader = torch.utils.data.DataLoader(
            test_data,
            batch_size=1,
            shuffle=False,
            num_workers=self.args.workers,
            pin_memory=True)
        return test_loader

    def execute_on_img(self, image: np.ndarray) -> np.ndarray:
        """
		Rather than feeding in crops w/ sliding window across the full-res image, we 
		downsample/upsample the image to a default inference size. This may differ
		from the best training size.

		For example, if trained on small images, we must shrink down the image in 
		testing (preserving the aspect ratio), based on the parameter "base_size",
		which is the short side of the image.

			Args:
			-	image: Numpy array representing RGB image
			
			Returns:
			-	gray_img: prediction, representing predicted label map
		"""
        h, w, _ = image.shape

        prediction = np.zeros((h, w, self.pred_dim), dtype=float)
        prediction = torch.Tensor(prediction).cuda()

        for scale in self.scales:
            image_scale = resize_by_scaled_short_side(image, self.base_size,
                                                      scale)
            prediction = prediction + torch.Tensor(
                self.scale_process_cuda(image_scale, h, w)).cuda()

        prediction /= len(self.scales)
        prediction = torch.argmax(prediction, axis=2)
        prediction = prediction.data.cpu().numpy()
        gray_img = np.uint8(prediction)
        return gray_img

    def execute_on_video(self,
                         max_num_frames: int = 5000,
                         min_resolution: int = 1080) -> None:
        """
		input_file is a path to a video file.
		Read frames from an RGB video file, and write overlaid
		predictions into a new video file.
			
			Args:
			-	None

			Returns:
			-	None
		"""
        in_fname_stem = Path(self.input_file).stem
        out_fname = f'{in_fname_stem}_{self.args.model_name}_universal'
        out_fname += f'_scales_{self.scales_str}_base_sz_{self.args.base_size}.mp4'

        output_video_fpath = f'{_ROOT}/temp_files/{out_fname}'
        create_leading_fpath_dirs(output_video_fpath)
        logger.info(f'Write video to {output_video_fpath}')
        writer = VideoWriter(output_video_fpath)

        video_fpath = '/Users/johnlamb/Downloads/sample_ffmpeg.mp4'
        reader = VideoReader(self.input_file)
        for frame_idx in range(reader.num_frames):
            logger.info(f'On image {frame_idx}/{reader.num_frames}')
            rgb_img = reader.get_frame()
            if frame_idx > max_num_frames:
                break
            pred_label_img = self.execute_on_img(rgb_img)

            # avoid blurry images by upsampling RGB before overlaying text
            if np.amin(rgb_img.shape[:2]) < min_resolution:
                rgb_img = resize_img_by_short_side(rgb_img, min_resolution,
                                                   'rgb')
                pred_label_img = resize_img_by_short_side(
                    pred_label_img, min_resolution, 'label')

            metadata = None
            frame_visualizer = Visualizer(rgb_img, metadata)
            output_img = frame_visualizer.overlay_instances(
                label_map=pred_label_img,
                id_to_class_name_map=self.id_to_class_name_map)
            writer.add_frame(output_img)

        reader.complete()
        writer.complete()

    def execute_on_dataloader(
            self, test_loader: torch.utils.data.dataloader.DataLoader):
        """
			Args:
			-   test_loader: 

			Returns:
			-   None
		"""
        if self.args.save_folder == 'default':
            self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}'

        os.makedirs(self.args.save_folder, exist_ok=True)
        gray_folder = os.path.join(self.args.save_folder, 'gray')
        self.gray_folder = gray_folder

        data_time = AverageMeter()
        batch_time = AverageMeter()
        end = time.time()

        for i, (input, _) in enumerate(test_loader):
            logger.info(f'On image {i}')

            data_time.update(time.time() - end)
            # convert Pytorch tensor -> Numpy
            input = np.squeeze(input.numpy(), axis=0)
            image = np.transpose(input, (1, 2, 0))
            gray_img = self.execute_on_img(image)

            batch_time.update(time.time() - end)
            end = time.time()
            check_mkdir(self.gray_folder)
            image_path, _ = self.data_list[i]

            if self.args.img_name_unique:
                image_name = Path(image_path).stem
            else:
                image_name = get_unique_stem_from_last_k_strs(image_path)

            gray_path = os.path.join(self.gray_folder, image_name + '.png')
            cv2.imwrite(gray_path, gray_img)

            # todo: update to time remaining.
            if ((i + 1) % self.args.print_freq == 0) or (i + 1
                                                         == len(test_loader)):
                logger.info(
                    'Test: [{}/{}] '
                    'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                    'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'.
                    format(i + 1,
                           len(test_loader),
                           data_time=data_time,
                           batch_time=batch_time))

    def scale_process_cuda(self,
                           image: np.ndarray,
                           h: int,
                           w: int,
                           stride_rate: float = 2 / 3):
        """ First, pad the image. If input is (384x512), then we must pad it up to shape
		to have shorter side "scaled base_size". 

		Then we perform the sliding window on this scaled image, and then interpolate 
		(downsample or upsample) the prediction back to the original one.

		At each pixel, we increment a counter for the number of times this pixel
		has passed through the sliding window.

		Args:
		-   image: Array, representing image where shortest edge is adjusted to base_size
		-   h: integer representing raw image height, e.g. for NYU it is 480
		-   w: integer representing raw image width, e.g. for NYU it is 640
		-   stride_rate

		Returns:
		-   prediction: predictions with shorter side equal to self.base_size
		"""
        start1 = time.time()

        ori_h, ori_w, _ = image.shape
        image, pad_h_half, pad_w_half = pad_to_crop_sz(image, self.crop_h,
                                                       self.crop_w, self.mean)
        new_h, new_w, _ = image.shape
        stride_h = int(np.ceil(self.crop_h * stride_rate))
        stride_w = int(np.ceil(self.crop_w * stride_rate))
        grid_h = int(np.ceil(float(new_h - self.crop_h) / stride_h) + 1)
        grid_w = int(np.ceil(float(new_w - self.crop_w) / stride_w) + 1)

        prediction_crop = torch.zeros((self.pred_dim, new_h, new_w)).cuda()
        count_crop = torch.zeros((new_h, new_w)).cuda()

        for index_h in range(0, grid_h):
            for index_w in range(0, grid_w):
                s_h = index_h * stride_h
                e_h = min(s_h + self.crop_h, new_h)
                s_h = e_h - self.crop_h
                s_w = index_w * stride_w
                e_w = min(s_w + self.crop_w, new_w)
                s_w = e_w - self.crop_w
                image_crop = image[s_h:e_h, s_w:e_w].copy()
                count_crop[s_h:e_h, s_w:e_w] += 1
                prediction_crop[:, s_h:e_h,
                                s_w:e_w] += self.net_process(image_crop)

        prediction_crop /= count_crop.unsqueeze(0)
        # disregard predictions from padded portion of image
        prediction_crop = prediction_crop[:, pad_h_half:pad_h_half + ori_h,
                                          pad_w_half:pad_w_half + ori_w]

        # CHW -> HWC
        prediction_crop = prediction_crop.permute(1, 2, 0)
        prediction_crop = prediction_crop.data.cpu().numpy()

        # upsample or shrink predictions back down to scale=1.0
        prediction = cv2.resize(prediction_crop, (w, h),
                                interpolation=cv2.INTER_LINEAR)

        return prediction

    def net_process(self, image: np.ndarray, flip: bool = True):
        """ Feed input through the network.

			In addition to running a crop through the network, we can flip
			the crop horizontally, run both crops through the network, and then
			average them appropriately.

			Args:
			-   model:
			-   image:
			-   flip: boolean, whether to average with flipped patch output

			Returns:
			-   output:
		"""
        input = torch.from_numpy(image.transpose((2, 0, 1))).float()
        normalize_img(input, self.mean, self.std)
        input = input.unsqueeze(0)

        if self.use_gpu:
            input = input.cuda()
        if flip:
            # add another example to batch dimension, that is the flipped crop
            input = torch.cat([input, input.flip(3)], 0)
        with torch.no_grad():
            output = self.model(input)
        _, _, h_i, w_i = input.shape
        _, _, h_o, w_o = output.shape
        if (h_o != h_i) or (w_o != w_i):
            output = F.interpolate(output, (h_i, w_i),
                                   mode='bilinear',
                                   align_corners=True)

        if self.output_taxonomy == 'universal':
            output = self.softmax(output)
        elif self.output_taxonomy == 'test_dataset':
            output = self.convert_pred_to_label_tax_and_softmax(output)
        else:
            print('Unrecognized output taxonomy. Quitting....')
            quit()
        # print(time.time() - start1, image_scale.shape, h, w)

        if flip:
            # take back out the flipped crop, correct its orientation, and average result
            output = (output[0] + output[1].flip(2)) / 2
        else:
            output = output[0]
        # output = output.data.cpu().numpy()
        # convert CHW to HWC order
        # output = output.transpose(1, 2, 0)
        # output = output.permute(1,2,0)

        return output

    def convert_pred_to_label_tax_and_softmax(self, output):
        """
		"""
        if not self.args.universal:
            output = self.tc.transform_predictions_test(
                output, self.args.dataset)
        else:
            output = self.tc.transform_predictions_universal(
                output, self.args.dataset)
        return output
Esempio n. 16
0
 def __init__(self, dataset: str, use_naive_taxonomy: bool = False) -> None:
     self.dataset = dataset
     if use_naive_taxonomy:
         self.tax_converter = NaiveTaxonomyConverter()
     else:
         self.tax_converter = TaxonomyConverter()
Esempio n. 17
0
class InferenceTask:
    def __init__(self, args, base_size: int, crop_h: int, crop_w: int,
                 input_file: str, output_taxonomy: str, scales: List[float],
                 device_type: str, output_path: str):
        """
        We always use the ImageNet mean and standard deviation for normalization.
        mean: 3-tuple of floats, representing pixel mean value
        std: 3-tuple of floats, representing pixel standard deviation

        'args' should contain at least two fields (shown below).

            Args:mseg-3m.pth
            -    args:
            -    base_size:
            -    crop_h: integer representing crop height, e.g. 473
            -    crop_w: integer representing crop width, e.g. 473
            -    input_file: could be absolute path to .txt file, .mp4 file,
                    or to a directory full of jpg images
            -    output_taxonomy
            -    scales
            -    use_gpu
        """
        self.args = args
        assert isinstance(self.args.img_name_unique, bool)
        assert isinstance(self.args.print_freq, int)
        assert isinstance(self.args.num_model_classes, int)
        assert isinstance(self.args.model_path, str)
        self.pred_dim = self.args.num_model_classes

        self.base_size = base_size
        self.crop_h = crop_h
        self.crop_w = crop_w
        self.input_file = input_file
        self.output_taxonomy = output_taxonomy
        self.scales = scales
        self.use_gpu = device_type == 'cuda'
        self.device = torch.device(device_type)
        self.output_path = output_path

        self.mean, self.std = get_imagenet_mean_std()
        self.model = self.load_model(args)
        self.softmax = nn.Softmax(dim=1)

        self.gray_folder = None  # optional, intended for dataloader use
        self.data_list = None  # optional, intended for dataloader use

        if self.output_taxonomy != 'universal':
            assert isinstance(self.args.dataset, str)
            self.dataset_name = args.dataset
            self.tc = TaxonomyConverter()

        if self.args.arch == 'psp':
            assert isinstance(self.args.zoom_factor, int)
            assert isinstance(self.args.network_name, int)

        self.id_to_class_name_map = {
            i: classname
            for i, classname in enumerate(get_universal_class_names())
        }

        # indicate which scales were used to make predictions
        # (multi-scale vs. single-scale)
        self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'

    def load_model(self, args):
        """
        Load Pytorch pre-trained model from disk of type 
        torch.nn.DataParallel. Note that
        `args.num_model_classes` will be size of logits output.

            Args:
            -   args: 

            Returns:
            -   model
        """
        if args.arch == 'psp':
            model = PSPNet(layers=args.layers,
                           classes=args.num_model_classes,
                           zoom_factor=args.zoom_factor,
                           pretrained=False,
                           network_name=args.network_name)
        elif args.arch == 'hrnet':
            from mseg_semantic.model.seg_hrnet import get_configured_hrnet
            # note apex batchnorm is hardcoded
            model = get_configured_hrnet(args.num_model_classes,
                                         load_imagenet_model=False)
        elif args.arch == 'hrnet_ocr':
            from mseg_semantic.model.seg_hrnet_ocr import get_configured_hrnet_ocr
            model = get_configured_hrnet_ocr(args.num_model_classes)

        model = torch.nn.DataParallel(model)
        #model.to(self.device)

        if os.path.isfile(args.model_path):
            logger.info(f"=> loading checkpoint '{args.model_path}'")
            if self.use_gpu:
                checkpoint = torch.load(args.model_path)
            else:
                checkpoint = torch.load(args.model_path, map_location='cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            logger.info(f"=> loaded checkpoint '{args.model_path}'")
        else:
            raise RuntimeError(
                f"=> no checkpoint found at '{args.model_path}'")

        return model

    def execute(self, min_resolution=1080):
        """
        Execute the demo, i.e. feed all of the desired input through the
        network and obtain predictions. Gracefully handles .txt, 
        or video file (.mp4, etc), or directory input.
        """
        logger.info('>>>>>>>>>>>>>>>> Start inference task >>>>>>>>>>>>>>>>')
        self.model.eval()
        """
        Since overlaid class text is difficult to read below 1080p, we upsample
        predictions.
        """
        logger.info(f'Write image prediction to {self.output_path}')

        rgb_img = imread_rgb(self.input_file)
        pred_label_img = self.execute_on_img(rgb_img)

        # avoid blurry images by upsampling RGB before overlaying text
        if np.amin(rgb_img.shape[:2]) < min_resolution:
            rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb')
            pred_label_img = resize_img_by_short_side(pred_label_img,
                                                      min_resolution, 'label')

        imageio.imwrite(self.output_path, pred_label_img)

        logger.info(
            '<<<<<<<<<<<<<<<<< Inference task completed <<<<<<<<<<<<<<<<<')

    def execute_on_img(self, image: np.ndarray) -> np.ndarray:
        """
        Rather than feeding in crops w/ sliding window across the full-res image, we 
        downsample/upsample the image to a default inference size. This may differ
        from the best training size.

        For example, if trained on small images, we must shrink down the image in 
        testing (preserving the aspect ratio), based on the parameter "base_size",
        which is the short side of the image.

            Args:
            -    image: Numpy array representing RGB image
            
            Returns:
            -    gray_img: prediction, representing predicted label map
        """
        h, w, _ = image.shape

        prediction = np.zeros((h, w, self.pred_dim), dtype=float)
        prediction = torch.Tensor(prediction).to(self.device)

        for scale in self.scales:
            image_scale = resize_by_scaled_short_side(image, self.base_size,
                                                      scale)
            prediction = prediction + torch.Tensor(
                self.scale_process_cuda(image_scale, h, w)).to(self.device)

        prediction /= len(self.scales)
        prediction = torch.argmax(prediction, axis=2)
        prediction = prediction.data.cpu().numpy()
        gray_img = np.uint8(prediction)
        return gray_img

    def scale_process_cuda(self,
                           image: np.ndarray,
                           h: int,
                           w: int,
                           stride_rate: float = 2 / 3):
        """ First, pad the image. If input is (384x512), then we must pad it up to shape
        to have shorter side "scaled base_size". 

        Then we perform the sliding window on this scaled image, and then interpolate 
        (downsample or upsample) the prediction back to the original one.

        At each pixel, we increment a counter for the number of times this pixel
        has passed through the sliding window.

        Args:
        -   image: Array, representing image where shortest edge is adjusted to base_size
        -   h: integer representing raw image height, e.g. for NYU it is 480
        -   w: integer representing raw image width, e.g. for NYU it is 640
        -   stride_rate

        Returns:
        -   prediction: predictions with shorter side equal to self.base_size
        """
        start1 = time.time()

        ori_h, ori_w, _ = image.shape
        image, pad_h_half, pad_w_half = pad_to_crop_sz(image, self.crop_h,
                                                       self.crop_w, self.mean)
        new_h, new_w, _ = image.shape
        stride_h = int(np.ceil(self.crop_h * stride_rate))
        stride_w = int(np.ceil(self.crop_w * stride_rate))
        grid_h = int(np.ceil(float(new_h - self.crop_h) / stride_h) + 1)
        grid_w = int(np.ceil(float(new_w - self.crop_w) / stride_w) + 1)

        prediction_crop = torch.zeros(
            (self.pred_dim, new_h, new_w)).to(self.device)
        count_crop = torch.zeros((new_h, new_w)).to(self.device)

        for index_h in range(0, grid_h):
            for index_w in range(0, grid_w):
                s_h = index_h * stride_h
                e_h = min(s_h + self.crop_h, new_h)
                s_h = e_h - self.crop_h
                s_w = index_w * stride_w
                e_w = min(s_w + self.crop_w, new_w)
                s_w = e_w - self.crop_w
                image_crop = image[s_h:e_h, s_w:e_w].copy()
                count_crop[s_h:e_h, s_w:e_w] += 1
                prediction_crop[:, s_h:e_h,
                                s_w:e_w] += self.net_process(image_crop)

        prediction_crop /= count_crop.unsqueeze(0)
        # disregard predictions from padded portion of image
        prediction_crop = prediction_crop[:, pad_h_half:pad_h_half + ori_h,
                                          pad_w_half:pad_w_half + ori_w]

        # CHW -> HWC
        prediction_crop = prediction_crop.permute(1, 2, 0)
        prediction_crop = prediction_crop.data.cpu().numpy()

        # upsample or shrink predictions back down to scale=1.0
        prediction = cv2.resize(prediction_crop, (w, h),
                                interpolation=cv2.INTER_LINEAR)

        return prediction

    def net_process(self, image: np.ndarray, flip: bool = True):
        """ Feed input through the network.

            In addition to running a crop through the network, we can flip
            the crop horizontally, run both crops through the network, and then
            average them appropriately.

            Args:
            -   model:
            -   image:
            -   flip: boolean, whether to average with flipped patch output

            Returns:
            -   output:
        """
        input = torch.from_numpy(image.transpose(
            (2, 0, 1))).float().to(self.device)
        normalize_img(input, self.mean, self.std)
        input = input.unsqueeze(0)

        if flip:
            # add another example to batch dimension, that is the flipped crop
            input = torch.cat([input, input.flip(3)], 0)
        with torch.no_grad():
            output = self.model(input)
        _, _, h_i, w_i = input.shape
        _, _, h_o, w_o = output.shape
        if (h_o != h_i) or (w_o != w_i):
            output = F.interpolate(output, (h_i, w_i),
                                   mode='bilinear',
                                   align_corners=True)

        if self.output_taxonomy == 'universal':
            output = self.softmax(output)
        elif self.output_taxonomy == 'test_dataset':
            output = self.convert_pred_to_label_tax_and_softmax(output)
        else:
            print('Unrecognized output taxonomy. Quitting....')
            quit()

        if flip:
            # take back out the flipped crop, correct its orientation, and average result
            output = (output[0] + output[1].flip(2)) / 2
        else:
            output = output[0]

        return output

    def convert_pred_to_label_tax_and_softmax(self, output):
        """
        """
        if not self.args.universal:
            output = self.tc.transform_predictions_test(
                output, self.args.dataset)
        else:
            output = self.tc.transform_predictions_universal(
                output, self.args.dataset)
        return output
	def __init__(self,
		args,
		base_size: int,
		crop_h: int,
		crop_w: int,
		input_file: str,
		model_taxonomy: str,
		eval_taxonomy: str,
		scales: List[float],
		use_gpu: bool = True
		):
		"""
		We always use the ImageNet mean and standard deviation for normalization.
		mean: 3-tuple of floats, representing pixel mean value
		std: 3-tuple of floats, representing pixel standard deviation

		'args' should contain at least 5 fields (shown below).
		See brief explanation at top of file regarding taxonomy arg configurations.
		
		Args:
		    args: experiment configuration arguments
		    base_size: shorter side of image
		    crop_h: integer representing crop height, e.g. 473
		    crop_w: integer representing crop width, e.g. 473
		    input_file: could be absolute path to .txt file, .mp4 file, or to a directory full of jpg images
		    model_taxonomy: taxonomy in which trained model makes predictions
		    eval_taxonomy: taxonomy in which trained model is evaluated
		    scales: floats representing image scales for multi-scale inference
		    use_gpu: TODO, not supporting cpu at this time
		"""
		self.args = args

		# Required arguments:
		assert isinstance(self.args.save_folder, str)
		assert isinstance(self.args.dataset, str)
		assert isinstance(self.args.img_name_unique, bool)
		assert isinstance(self.args.print_freq, int)
		assert isinstance(self.args.num_model_classes, int)
		assert isinstance(self.args.model_path, str)
		self.num_model_classes = self.args.num_model_classes

		self.base_size = base_size
		self.crop_h = crop_h
		self.crop_w = crop_w
		self.input_file = input_file
		self.model_taxonomy = model_taxonomy
		self.eval_taxonomy = eval_taxonomy
		self.scales = scales
		self.use_gpu = use_gpu

		self.mean, self.std = get_imagenet_mean_std()
		self.model = self.load_model(args)
		self.softmax = nn.Softmax(dim=1)

		self.gray_folder = None # optional, intended for dataloader use
		self.data_list = None # optional, intended for dataloader use

		if model_taxonomy == 'universal' and eval_taxonomy == 'universal':
			# See note above.
			# no conversion of predictions required
			self.num_eval_classes = self.num_model_classes 

		elif model_taxonomy == 'test_dataset' and eval_taxonomy == 'test_dataset':
			# no conversion of predictions required
			self.num_eval_classes = len(load_class_names(args.dataset))

		elif model_taxonomy == 'naive' and eval_taxonomy == 'test_dataset':
			self.tc = NaiveTaxonomyConverter()
			if args.dataset in self.tc.convs.keys() and use_gpu:
				self.tc.convs[args.dataset].cuda()
			self.tc.softmax.cuda()
			self.num_eval_classes = len(load_class_names(args.dataset))

		elif model_taxonomy == 'universal' and eval_taxonomy == 'test_dataset':
			# no label conversion required here, only predictions converted
			self.tc = TaxonomyConverter()
			if args.dataset in self.tc.convs.keys() and use_gpu:
				self.tc.convs[args.dataset].cuda()
			self.tc.softmax.cuda()
			self.num_eval_classes = len(load_class_names(args.dataset))

		if self.args.arch == 'psp':
			assert isinstance(self.args.zoom_factor, int)
			assert isinstance(self.args.network_name, int)

		# `id_to_class_name_map` only used for visualizing universal taxonomy
		self.id_to_class_name_map = {
			i: classname for i, classname in enumerate(get_universal_class_names())
		}

		# indicate which scales were used to make predictions
		# (multi-scale vs. single-scale)
		self.scales_str = 'ms' if len(args.scales) > 1 else 'ss'
Esempio n. 19
0
def main():
    """
    """
    import pickle

    import torch, os, math
    import torch.backends.cudnn as cudnn
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.nn.parallel
    import torch.optim
    import torch.utils.data
    import torch.multiprocessing as mp
    import torch.distributed as dist
    # from tensorboardX import SummaryWriter
    from mseg.utils.dataset_config import infos
    from mseg.taxonomy.taxonomy_converter import TaxonomyConverter
    from mseg.taxonomy.naive_taxonomy_converter import NaiveTaxonomyConverter

    from mseg_semantic.utils import config
    from mseg_semantic.utils.avg_meter import AverageMeter, SegmentationAverageMeter
    from mseg_semantic.util.verification_utils import verify_architecture

    print('Using PyTorch version: ', torch.__version__)
    args = get_parser()
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.train_gpu)


    ###### FLAT-MIX CODE #######################
    print(os.environ["CUDA_VISIBLE_DEVICES"])

    # Randomize args.dist_url too avoid conflicts on same machine
    args.dist_url = args.dist_url[:-2] + str(os.getpid() % 100).zfill(2)


    if isinstance(args.dataset, str): # only one dataset
        args.dataset = [args.dataset]
        print(args.dataset)
        args.dataset_gpu_mapping = {args.dataset[0]: [0,1,2,3,4,5,6,7]}

    
    if len(args.dataset) > 1 and args.universal: # multiple datasets training, must be on universal taxononmy
        if args.tax_version == 0:
            args.tc = StupidTaxonomyConverter(version=args.tax_version)
        else:
            if args.finetune:
                args.tc = TaxonomyConverter(version=args.tax_version, finetune=True, finetune_dataset=args.finetune_dataset)
            else:
                args.tc = TaxonomyConverter(version=args.tax_version) #, train_datasets=args.dataset, test_datasets=args.test_dataset) #, train_datasets=args.dataset, test_datasets=args.test_dataset)

        args.data_root = {dataset:infos[dataset].dataroot for dataset in args.dataset}
        args.train_list = {dataset:infos[dataset].trainlist for dataset in args.dataset}
        args.classes = args.tc.classes
        # args.save_path = args.save_path.replace("{}", '-'.join([infos[dataset].shortname for dataset in args.dataset]))

    elif (len(args.dataset) == 1) and args.universal: # single dataset on universal taxonomy training
        args.tc = TaxonomyConverter(version=args.tax_version, train_datasets=args.dataset)
        args.data_root = infos[args.dataset[0]].dataroot
        args.train_list = infos[args.dataset[0]].trainlist
        args.classes = args.tc.classes
        # args.save_path = args.save_path.replace("{}", info[args.dataset].shortname)

    elif (len(args.dataset) == 1) and (not args.universal): # single dataset on self taxnonmy training
        args.data_root = infos[args.dataset[0]].dataroot
        args.train_list = infos[args.dataset[0]].trainlist
        args.classes = infos[args.dataset[0]].num_classes
        # args.save_path = args.save_path.replace("{}", infos[args.dataset].shortname)
    else:
        print('wrong mode, please check')
        exit()
    
    # verify arch after args.classes is populated
    verify_architecture(args)

    if args.manual_seed is not None:
        cudnn.benchmark = False
        cudnn.deterministic = True
        torch.manual_seed(args.manual_seed)
        np.random.seed(args.manual_seed)
        torch.manual_seed(args.manual_seed)
        torch.cuda.manual_seed_all(args.manual_seed)
    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])
    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
    args.ngpus_per_node = len(args.train_gpu)
    if len(args.train_gpu) == 1:
        args.sync_bn = False
        args.distributed = False
        args.multiprocessing_distributed = False
    if args.multiprocessing_distributed:
        args.world_size = args.ngpus_per_node * args.world_size
        mp.spawn(main_worker, nprocs=args.ngpus_per_node, args=(args.ngpus_per_node, args))
    else:
        main_worker(args.train_gpu, args.ngpus_per_node, args)