Ejemplo n.º 1
0
def remap_dataset(dname: str,
                  remapped_dname: str,
                  tsv_fpath: str,
                  old_dataroot: str,
                  remapped_dataroot: str,
                  include_ignore_idx_cls: bool = True,
                  convert_label_from_rgb: bool = False,
                  num_processes: int = 4):
    """
	Given path to a dataset, given names of _names.txt
	Remap according to the provided tsv.
	(also account for the fact that 255 is always unlabeled)

		Args:
		-	dname: string representing name of taxonomy for original dataset
		-	remapped_dname: string representing name of taxonomy for new dataset
		-	tsv_fpath: string representing path to a .tsv file
		-	old_dataroot: string representing path to original dataset
		-	remapped_dataroot: string representing path at which to new dataset
		-	include_ignore_idx_cls: whether to include unlabeled=255 from source
		-	convert_label_from_rgb: labels of original dataset are stored as RGB
		-	num_processes: integer representing number of workers to exploit

		Returns:
		-	None
	"""
    # load colors ordered with class indices, if labels encoded as RGB
    dataset_colors = load_dataset_colors_arr(
        dname) if convert_label_from_rgb else None

    # load up the dictionary from the tsv
    classname_remapping_dict = read_label_mapping(filename=tsv_fpath,
                                                  label_from=dname,
                                                  label_to=remapped_dname,
                                                  convert_val_to_int=False)
    oldid_to_oldname = get_dataloader_id_to_classname_map(dname)
    newname_tonewid_map = get_classname_to_dataloaderid_map(
        remapped_dname, include_ignore_idx_cls=include_ignore_idx_cls)
    # form one-way mapping between IDs
    old_name_to_newid = convert_dictionaries(classname_remapping_dict,
                                             newname_tonewid_map)
    class_idx_remapping_dict = convert_dictionaries(oldid_to_oldname,
                                                    old_name_to_newid)
    label_mapping_arr = form_label_mapping_array(class_idx_remapping_dict)

    for split in ['train', 'val']:  #'trainval']:# 'val']: #
        orig_relative_img_label_pairs = generate_all_img_label_pair_relative_fpaths(
            dname, split)
        remapped_relative_img_label_pairs = generate_all_img_label_pair_relative_fpaths(
            remapped_dname, split)

        send_list_to_workers(
            num_processes=num_processes,
            list_to_split=orig_relative_img_label_pairs,
            worker_func_ptr=relabel_pair_worker,
            remapped_relative_img_label_pairs=remapped_relative_img_label_pairs,
            label_mapping_arr=label_mapping_arr,
            old_dataroot=old_dataroot,
            new_dataroot=remapped_dataroot,
            dataset_colors=dataset_colors)
Ejemplo n.º 2
0
def test_load_colors():
    """
	"""
    tax_data = read_mapillary_config_helper()
    num_classes = 66
    gt_dataset_ordered_colors = np.zeros((66, 3), dtype=np.uint8)
    for i in range(num_classes):
        gt_dataset_ordered_colors[i] = np.array(tax_data['labels'][i]['color'])

    colors = load_dataset_colors_arr('mapillary-public66')
    assert np.allclose(colors, gt_dataset_ordered_colors)
Ejemplo n.º 3
0
    def __init__(self, dataroot: str):
        """
			Args:
			-	dataroot: string representing path to unzipped Mapillary file

			Returns:
			-	None
		"""
        self.dataroot = dataroot
        self.dataset_ordered_colors = load_dataset_colors_arr(
            'mapillary-public66')
        self.id_to_classname_map = get_dataloader_id_to_classname_map(
            dataset_name='mapillary-public66')
Ejemplo n.º 4
0
def test_load_dataset_colors_arr():
    """ """
    dataset_name = 'camvid-32'
    colors_arr = load_dataset_colors_arr(dataset_name)
    assert np.allclose(colors_arr[0], np.array([64, 128, 64]))
    assert np.allclose(colors_arr[-1], np.array([64, 192, 0]))
Ejemplo n.º 5
0
def remap_dataset(dname: str,
                  tsv_fpath: str,
                  old_dataroot: str,
                  remapped_dataroot: str,
                  panoptic_json_path: str,
                  num_processes: int = 4,
                  create_symlink_cpy: bool = False,
                  convert_label_from_rgb: bool = False):
    """
	Given path to a dataset, given names of _names.txt
	Remap according to the provided tsv.
	(also account for the fact that 255 is always unlabeled)

		Args:
		-	dname: string representing name of taxonomy for original dataset
		-	tsv_fpath: string representing path to a .tsv file
		-	old_dataroot: string representing path to original dataset
		-	remapped_dataroot: string representing path at which to new dataset
		-   panoptic_json_path: string representing path to coco-style json file
		-	num_processes: integer representing number of workers to exploit
		-   create_symlink_cpy: adds symbolic links for images in the same folder structure as annotations

		Returns:
		-	None
	"""
    # form one-way mapping between IDs
    tconv = TaxonomyConverter(train_datasets=[dname],
                              test_datasets=[],
                              tsv_fpath=tsv_fpath)
    dataset_colors = load_dataset_colors_arr(
        dname) if convert_label_from_rgb else None

    for split_idx, split in enumerate(['train', 'val']):
        panoptic_json_content = None
        orig_relative_img_label_pairs = generate_all_img_label_pair_relative_fpaths(
            dname, split)
        if not panoptic_json_path is None:
            with open(
                    panoptic_json_path.format(split=split,
                                              split_idx=str(split_idx)),
                    'r') as ifile:
                json_cont = json.load(ifile)
            panoptic_json_content = {
                a["file_name"]: a
                for a in json_cont["annotations"]
            }
            if dname[:4] == "coco":  #hacky  needed for inplace coco support
                orig_relative_img_label_pairs = [[
                    fix_path_coco_inplace(p[0]),
                    fix_path_coco_inplace(p[1])
                ] for p in orig_relative_img_label_pairs]
        basedir = 'images/' + split + '/' + dname
        img_subdirs = list(
            set([os.path.dirname(p[0])
                 for p in orig_relative_img_label_pairs]))
        img_dir_remapping = {}
        for d in img_subdirs:
            img_dir_remapping[d] = basedir if len(
                img_subdirs) == 1 else basedir + '/' + d.replace(
                    '/color', '').replace('/leftImg8bit', '')
            if create_symlink_cpy:
                unpriv_symb_link(
                    old_dataroot + '/' + d,
                    remapped_dataroot + '/' + img_dir_remapping[d])
        remapped_relative_img_label_pairs = [
            (img_dir_remapping[os.path.dirname(p[0])] + '/' +
             os.path.basename(p[0]), img_dir_remapping[os.path.dirname(
                 p[0])].replace('images', 'annotations') + '/' +
             os.path.basename(p[0]).replace('.jpg', '.png'))
            for p in orig_relative_img_label_pairs
        ]

        send_list_to_workers(
            num_processes=num_processes,
            list_to_split=orig_relative_img_label_pairs,
            worker_func_ptr=relabel_pair_worker,
            remapped_relative_img_label_pairs=remapped_relative_img_label_pairs,
            tax_converter=tconv,
            panoptic_json_content=panoptic_json_content,
            old_dataroot=old_dataroot,
            new_dataroot=remapped_dataroot,
            dname=dname,
            dataset_colors=dataset_colors)