def convert_mask(path: str, mask_path: str) -> None: """Convert the mat format labels of the PASCALContext dataset to masks. The file structure of the input path should be like:: <path> <trainval> <image_name>.mat ... Arguments: path: The root directory of the dataset. mask_path: The root directory where to save the masks. Raises: ModuleImportError: When the module "scipy" or "Pillow" can not be found. """ try: from PIL import Image # pylint: disable=import-outside-toplevel from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: module_name = error.name package_name = "Pillow" if module_name == "PIL" else None raise ModuleImportError(module_name=module_name, package_name=package_name) from error root_path = os.path.abspath(os.path.expanduser(path)) root_mask_path = os.path.abspath(os.path.expanduser(mask_path)) for mat_path in glob(os.path.join(root_path, "trainval", "*.mat")): stem = os.path.splitext(os.path.basename(mat_path))[0] mat = loadmat(mat_path) image = Image.fromarray(mat["LabelMap"]) image.save(os.path.join(root_mask_path, f"{stem}.png"))
def _get_data(stem: str, image_path: str, annotation_path: str) -> Data: try: import xmltodict # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error data = Data(os.path.join(image_path, f"{stem}.jpg")) box2d = [] with open(os.path.join(annotation_path, f"{stem}.xml"), encoding="utf-8") as fp: labels: Any = xmltodict.parse(fp.read()) objects = labels["annotation"]["object"] if not isinstance(objects, list): objects = [objects] for item in objects: category = item["name"] attributes = {k: bool(int(v)) for k, v in item["actions"].items()} bndbox = item["bndbox"] box2d.append( LabeledBox2D( float(bndbox["xmin"]), float(bndbox["ymin"]), float(bndbox["xmax"]), float(bndbox["ymax"]), category=category, attributes=attributes, )) data.label.box2d = box2d return data
def _get_labels_map(path: str) -> Dict[str, Tuple[str, Dict[str, Any]]]: """Get celebrity_image_data from .mat file. Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "h5py" can not be found. Returns: A Dict of attributes. """ try: from h5py import File # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error mat_file = File(path, "r") celebrity_image_data = mat_file["celebrityImageData"] celebrity_data = mat_file["celebrityData"] # Name is a h5r object which can be searched in .mat file. id2name_map = { identity: _hdf5_to_str(mat_file[name]) for identity, name in zip(celebrity_data["identity"][0], celebrity_data["name"][0]) } labels_map = {} # The "name" is not the name of the celebrity but the name of the image file. for name, identity, *values in zip(*(celebrity_image_data[key][0] for key in _MAT_KEYS)): attribute = {"name": id2name_map[identity]} attribute.update(zip(_MAT_KEYS[2:], values)) labels_map[_hdf5_to_str(mat_file[name])] = (str(int(identity)).zfill(4), attribute) return labels_map
def LeedsSportsPose(path: str) -> Dataset: """`Leeds Sports Pose <http://sam.johnson.io/research/lsp.html>`_ dataset. The folder structure should be like:: <path> joints.mat images/ im0001.jpg im0002.jpg ... Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() mat = loadmat(os.path.join(root_path, "joints.mat")) joints = mat["joints"].T image_paths = glob(os.path.join(root_path, "images", "*.jpg")) for image_path in image_paths: data = Data(image_path) data.label.keypoints2d = [] index = int(os.path.basename(image_path) [2:6]) - 1 # get image index from "im0001.jpg" keypoints = LabeledKeypoints2D() for keypoint in joints[index]: keypoints.append( Keypoint2D(keypoint[0], keypoint[1], int(not keypoint[2]))) data.label.keypoints2d.append(keypoints) segment.append(data) return dataset
def FLIC(path: str) -> Dataset: """`FLIC <https://bensapp.github.io/flic-dataset.html>`_ dataset. The folder structure should be like:: <path> exampls.mat images/ 2-fast-2-furious-00003571.jpg ... Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) annotations = loadmat(os.path.join(root_path, "examples.mat"))["examples"][0] dataset.create_segment("train") dataset.create_segment("test") dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) # try whether the dataset has bad segment try: _ = annotations["isbad"] flag = True dataset.create_segment("bad") dataset.catalog.classification.add_attribute(name="isunchecked", type_="boolean") except ValueError: flag = False for data, segment_name in _get_data(root_path, annotations, flag): dataset[segment_name].append(data) return dataset
def _get_data_part1(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]: try: import xmltodict # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error for animal in aniamls: for image_path in glob( os.path.join(root_path, "keypoint_image_part1", animal, "*.jpg")): data = Data( image_path, target_remote_path=f"{animal}/{os.path.basename(image_path)}") for annotation_path in glob( os.path.join( root_path, "PASCAL2011_animal_annotation", animal, f"{os.path.splitext(os.path.basename(image_path))[0]}_*.xml", )): with open(annotation_path, encoding="utf-8") as fp: labels: Any = xmltodict.parse(fp.read()) box2d = labels["annotation"]["visible_bounds"] data.label.box2d = [ LabeledBox2D.from_xywh( x=float(box2d["@xmin"]), y=float(box2d["@ymin"]), width=float(box2d["@width"]), height=float(box2d["@height"]), category=animal, ) ] keypoints2d: List[Tuple[float, float, int]] = [ () ] * 20 # type: ignore[list-item] for keypoint in labels["annotation"]["keypoints"]["keypoint"]: keypoints2d[_KEYPOINT_TO_INDEX[keypoint["@name"]]] = ( float(keypoint["@x"]), float(keypoint["@y"]), int(keypoint["@visible"]), ) data.label.keypoints2d = [ LabeledKeypoints2D(keypoints2d, category=animal) ] yield data
def SVHN(path: str) -> Dataset: """`SVHN <http://ufldl.stanford.edu/housenumbers>`_ dataset. The file structure should be like:: <path> Cropped/ extra_32x32.mat test_32x32.mat train_32x32.mat FullNumbers/ extra/ 116507.png 116508.png ... digitStruct.mat see_bboxes.m test/ train/ Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "h5py" can not be found. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ try: from h5py import File # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "FullNumbers") dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name in _SEGMENTS: segment = dataset.create_segment(segment_name) file_path = os.path.join(root_path, segment_name) mat = File(os.path.join(file_path, "digitStruct.mat")) names = mat["digitStruct"]["name"] bboxes = mat["digitStruct"]["bbox"] for name, bbox in zip(names, bboxes): segment.append(_get_data(mat, name, bbox, file_path)) return dataset
def Flower17(path: str) -> Dataset: """`17 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/17/index.html>`_ dataset. The dataset are 3 separate splits. The results in the paper are averaged over the 3 splits. We just use (trn1, val1, tst1) to split it. The file structure should be like:: <path> jpg/ image_0001.jpg ... datasplits.mat Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) segment_info = loadmat(os.path.join(root_path, "datasplits.mat")) dataset = Dataset(DATASET_NAME_17) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_17.json")) index_to_category = dataset.catalog.classification.get_index_to_category() for key, value in _SEGMENT_NAMES_17.items(): segment = dataset.create_segment(key) segment_info[value][0].sort() for index in segment_info[value][0]: data = Data(os.path.join(root_path, "jpg", f"image_{index:04d}.jpg")) # There are 80 images for each category data.label.classification = Classification( category=index_to_category[(index - 1) // 80] ) segment.append(data) return dataset
def Flower102(path: str) -> Dataset: """`102 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html>`_ dataset. The file structure should be like:: <path> jpg/ image_00001.jpg ... imagelabels.mat setid.mat Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) labels = loadmat(os.path.join(root_path, "imagelabels.mat"))["labels"][0] segment_info = loadmat(os.path.join(root_path, "setid.mat")) dataset = Dataset(DATASET_NAME_102) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_102.json")) index_to_category = dataset.catalog.classification.get_index_to_category() for key, value in _SEGMENT_NAMES_102.items(): segment = dataset.create_segment(key) segment_info[value][0].sort() for index in segment_info[value][0]: data = Data(os.path.join(root_path, "jpg", f"image_{index:05d}.jpg")) data.label.classification = Classification( index_to_category[int(labels[index - 1]) - 1] ) segment.append(data) return dataset
def _get_data_part2(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]: try: import xmltodict # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error for animal in aniamls: for image_path in glob( os.path.join(root_path, "animalpose_image_part2", animal, "*.jpeg")): data = Data( image_path, target_remote_path=f"{animal}/{os.path.basename(image_path)}") annotation_path = os.path.join( root_path, "animalpose_anno2", animal, f"{os.path.splitext(os.path.basename(image_path))[0]}.xml", ) with open(annotation_path, encoding="utf-8") as fp: labels: Any = xmltodict.parse(fp.read()) box2d = labels["annotation"]["visible_bounds"] data.label.box2d = [ LabeledBox2D.from_xywh( x=float(box2d["@xmin"]), y=float( box2d["@xmax"]), # xmax means ymin in the annotation width=float(box2d["@width"]), height=float(box2d["@height"]), category=animal, ) ] keypoints2d = LabeledKeypoints2D(category=animal) for keypoint in labels["annotation"]["keypoints"]["keypoint"]: keypoints2d.append( Keypoint2D(float(keypoint["@x"]), float(keypoint["@y"]), int(keypoint["@visible"]))) data.label.keypoints2d = [keypoints2d] yield data
def _load_sensors(calib_path: str) -> Sensors: try: import yaml # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name, package_name="pyyaml") from error sensors = Sensors() lidar = Lidar("LIDAR") lidar.set_extrinsics() sensors.add(lidar) with open(os.path.join(calib_path, "extrinsics.yaml"), "r", encoding="utf-8") as fp: extrinsics = yaml.load(fp, Loader=yaml.FullLoader) for camera_calibration_file in glob(os.path.join(calib_path, "[0-9]*.yaml")): with open(camera_calibration_file, "r", encoding="utf-8") as fp: camera_calibration = yaml.load(fp, Loader=yaml.FullLoader) # camera_calibration_file looks like: # /path-to-CADC/2018_03_06/calib/00.yaml camera_name = f"CAM{os.path.splitext(os.path.basename(camera_calibration_file))[0]}" camera = Camera(camera_name) camera.description = camera_calibration["camera_name"] camera.set_extrinsics(matrix=extrinsics[f"T_LIDAR_{camera_name}"]) camera_matrix = camera_calibration["camera_matrix"]["data"] camera.set_camera_matrix( matrix=[camera_matrix[:3], camera_matrix[3:6], camera_matrix[6:9]]) distortion = camera_calibration["distortion_coefficients"]["data"] camera.set_distortion_coefficients( **dict(zip(("k1", "k2", "p1", "p2", "k3"), distortion))) sensors.add(camera) return sensors
def BSTLD(path: str) -> Dataset: """`BSTLD <https://hci.iwr.uni-heidelberg.de/content\ /bosch-small-traffic-lights-dataset>`_ dataset. The file structure should be like:: <path> rgb/ additional/ 2015-10-05-10-52-01_bag/ <image_name>.jpg ... ... test/ <image_name>.jpg ... train/ 2015-05-29-15-29-39_arastradero_traffic_light_loop_bag/ <image_name>.jpg ... ... test.yaml train.yaml additional_train.yaml Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "yaml" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: import yaml # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name, package_name="pyyaml") from error root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for mode, label_file_name in _LABEL_FILENAME_DICT.items(): segment = dataset.create_segment(mode) label_file_path = os.path.join(root_path, label_file_name) with open(label_file_path, encoding="utf-8") as fp: labels = yaml.load(fp, yaml.FullLoader) for label in labels: if mode == "test": # the path in test label file looks like: # /absolute/path/to/<image_name>.png file_path = os.path.join(root_path, "rgb", "test", label["path"].rsplit("/", 1)[-1]) else: # the path in label file looks like: # ./rgb/additional/2015-10-05-10-52-01_bag/<image_name>.png file_path = os.path.join(root_path, *label["path"][2:].split("/")) data = Data(file_path) data.label.box2d = [ LabeledBox2D( box["x_min"], box["y_min"], box["x_max"], box["y_max"], category=box["label"], attributes={"occluded": box["occluded"]}, ) for box in label["boxes"] ] segment.append(data) return dataset
def __getattribute__(self, name: str) -> None: raise ModuleImportError(module_name="xmltodict")
def __getattribute__(self, name: str) -> None: raise ModuleImportError(module_name="pillow")