def _get_data_part1(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]: try: import xmltodict # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error for animal in aniamls: for image_path in glob( os.path.join(root_path, "keypoint_image_part1", animal, "*.jpg")): data = Data( image_path, target_remote_path=f"{animal}/{os.path.basename(image_path)}") for annotation_path in glob( os.path.join( root_path, "PASCAL2011_animal_annotation", animal, f"{os.path.splitext(os.path.basename(image_path))[0]}_*.xml", )): with open(annotation_path, encoding="utf-8") as fp: labels: Any = xmltodict.parse(fp.read()) box2d = labels["annotation"]["visible_bounds"] data.label.box2d = [ LabeledBox2D.from_xywh( x=float(box2d["@xmin"]), y=float(box2d["@ymin"]), width=float(box2d["@width"]), height=float(box2d["@height"]), category=animal, ) ] keypoints2d: List[Tuple[float, float, int]] = [ () ] * 20 # type: ignore[list-item] for keypoint in labels["annotation"]["keypoints"]["keypoint"]: keypoints2d[_KEYPOINT_TO_INDEX[keypoint["@name"]]] = ( float(keypoint["@x"]), float(keypoint["@y"]), int(keypoint["@visible"]), ) data.label.keypoints2d = [ LabeledKeypoints2D(keypoints2d, category=animal) ] yield data
def CarConnection(path: str) -> Dataset: """`Car Connection Picture <https://github.com/nicolas-gervais\ /predicting-car-price-from-scraped-data/tree/master/picture-scraper>`_ dataset. The file structure should be like:: <path> <imagename>.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() image_paths = glob(os.path.join(root_path, "*.jpg")) keys = dataset.catalog.classification.attributes.keys() for image_path in image_paths: data = Data(image_path) basename = os.path.basename(image_path) label = _extract_label_from_basename(keys, basename) data.label.classification = label segment.append(data) return dataset
def _load_negative_segment(root_path: str) -> Segment: segment = Segment("negative") for negative_image_path in glob(os.path.join(root_path, "negatives", "negativePics", "*.png")): data = Data(negative_image_path) data.label.box2d = [] segment.append(data) return segment
def _load_segment_10k(dataset: Dataset, root_path: str, labels_dir: str) -> None: for segment_name in _SEGMENT_NAMES: segment = dataset.create_segment(segment_name) image_paths = glob(os.path.join(root_path, "images", "10k", segment_name, "*.jpg")) print(f"Reading data to segment '{segment_name}'...") if segment_name == "test": for image_path in image_paths: segment.append(Data(image_path)) else: single_channel_mask_dirs: Dict[str, str] = {} original_mask_dirs: Dict[str, str] = {} for seg_type, dir_names in _SEGMENTATIONS_INFO.items(): original_mask_dirs[seg_type] = os.path.join(labels_dir, *dir_names, segment_name) if seg_type != "sem": single_channel_mask_dir = os.path.join( labels_dir, "single_channel_mask", segment_name, dir_names[0], ) single_channel_mask_dirs[seg_type] = single_channel_mask_dir os.makedirs(single_channel_mask_dir, exist_ok=True) label_contents = _read_label_file_10k(labels_dir, segment_name) for image_path in image_paths: segment.append( _get_data_10k( image_path, original_mask_dirs, label_contents[os.path.basename(image_path)], single_channel_mask_dirs, ) ) print(f"Finished reading data to segment '{segment_name}'")
def CACD(path: str) -> Dataset: """`Cross-Age Celebrity Dataset (CACD) <https://bcsiriuschen.github.io/CARC/>`_ dataset. The file structure should be like:: <path> CACD2000/ 14_Aaron_Johnson_0001.jpg ... celebrity2000.mat Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.catalog.classification = _get_subcatalog() segment = dataset.create_segment() image_files = glob(os.path.join(root_path, "CACD2000", "*.jpg")) labels_map = _get_labels_map(os.path.join(root_path, "celebrity2000.mat")) for image in image_files: category, attribute = labels_map[os.path.basename(image)] image_data = Data(image) image_data.label.classification = Classification(category, attribute) segment.append(image_data) return dataset
def _load_segment_100k(dataset: Dataset, root_path: str, labels_dir: str) -> None: for segment_name in _SEGMENT_NAMES: segment = dataset.create_segment(segment_name) image_paths = glob(os.path.join(root_path, "images", "100k", segment_name, "*.jpg")) print(f"Reading data to segment '{segment_name}'...") if segment_name == "test": for image_path in image_paths: segment.append(Data(image_path)) else: label_contents = _read_label_file_100k(labels_dir, segment_name) for image_path in image_paths: data = Data(image_path) box2d: List[LabeledBox2D] = [] polygon: List[LabeledPolygon] = [] polyline2d: List[LabeledPolyline2D] = [] label = data.label label_content = label_contents[os.path.basename(image_path)] label.classification = Classification(attributes=label_content["attributes"]) for label_info in label_content["labels"]: if "box2d" in label_info: _add_box2d_label(label_info, box2d) if "poly2d" in label_info: _add_poly2d_label_100k(label_info, polygon, polyline2d) label.box2d = box2d label.polygon = polygon label.polyline2d = polyline2d segment.append(data) print(f"Finished reading data to segment '{segment_name}'")
def _load_tracking_segment( dataset: Dataset, images_dir: str, labels_dir: str, tracking_type: str, ) -> None: for segment_prefix in _SEGMENT_NAMES: image_subdirs = glob(os.path.join(images_dir, segment_prefix, "*")) segment_labels_dir = os.path.join(labels_dir, "polygons", segment_prefix) original_mask_dir = os.path.join(labels_dir, "bitmasks", segment_prefix) mask_dir = os.path.join(labels_dir, "single_channel_masks", segment_prefix) os.makedirs(mask_dir, exist_ok=True) if segment_prefix == "test": generate_data: _DATA_GENERATOR = _generate_test_data else: generate_data = _generate_data for image_subdir in image_subdirs: segment = dataset.create_segment( f"{segment_prefix}_{os.path.basename(image_subdir)}") segment.extend( generate_data( image_subdir, segment_labels_dir, original_mask_dir, mask_dir, tracking_type, ))
def _get_segment(path: str, segment_name: str) -> Segment: segment = Segment(segment_name) image_paths = glob(os.path.join(path, segment_name, "*.png")) for image_path in image_paths: segment.append(Data(image_path)) return segment
def convert_mask(path: str, mask_path: str) -> None: """Convert the mat format labels of the PASCALContext dataset to masks. The file structure of the input path should be like:: <path> <trainval> <image_name>.mat ... Arguments: path: The root directory of the dataset. mask_path: The root directory where to save the masks. Raises: ModuleImportError: When the module "scipy" or "Pillow" can not be found. """ try: from PIL import Image # pylint: disable=import-outside-toplevel from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: module_name = error.name package_name = "Pillow" if module_name == "PIL" else None raise ModuleImportError(module_name=module_name, package_name=package_name) from error root_path = os.path.abspath(os.path.expanduser(path)) root_mask_path = os.path.abspath(os.path.expanduser(mask_path)) for mat_path in glob(os.path.join(root_path, "trainval", "*.mat")): stem = os.path.splitext(os.path.basename(mat_path))[0] mat = loadmat(mat_path) image = Image.fromarray(mat["LabelMap"]) image.save(os.path.join(root_mask_path, f"{stem}.png"))
def PASCALContext(mask_path: str, image_path: str) -> Dataset: """`PASCALContext <https://cs.stanford.edu/~roozbeh/pascal-context/>`_ dataset. The file structure should be like:: <mask_path> <image_name>.png ... <image_path> <image_name>.jpg ... Arguments: mask_path: The root directory of the dataset mask. image_path: The root directory of the dataset image. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_mask_path = os.path.abspath(os.path.expanduser(mask_path)) root_image_path = os.path.abspath(os.path.expanduser(image_path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment("trainval") for mask_filename in glob(os.path.join(root_mask_path, "*.png")): stem = os.path.splitext(os.path.basename(mask_filename))[0] data = Data(os.path.join(root_image_path, f"{stem}.jpg")) data.label.semantic_mask = SemanticMask(mask_filename) segment.append(data) return dataset
def FSDD(path: str) -> Dataset: """`Free Spoken Digit <https://github.com/Jakobovski/free-spoken-digit-dataset>`_ dataset. The file structure should be like:: <path> recordings/ 0_george_0.wav 0_george_1.wav ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ label_map = {} for key, value in _METADATA.items(): attributes = {"name": key} attributes.update(value) label_map[key] = attributes dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() audio_paths = glob(os.path.join(path, "recordings", "*.wav")) for audio_path in audio_paths: category, name = os.path.basename(audio_path).split("_")[:2] data = Data(audio_path) data.label.classification = Classification(category, label_map[name]) segment.append(data) return dataset
def ImageEmotionArtphoto(path: str) -> Dataset: """`Image Emotion-art Photo <https://www.imageemotion.org/>`_ dataset. The file structure should be like:: <path> <filename>.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_ARTPHOTO) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog_artphoto.json")) segment = dataset.create_segment() image_paths = glob(os.path.join(root_path, "*.jpg")) for image_path in image_paths: image_category = os.path.basename(image_path).split("_", 1)[0] data = Data(image_path) data.label.classification = Classification(category=image_category) segment.append(data) return dataset
def _read_label_file_100k(label_dir: str, segment_name: str) -> Dict[str, Any]: source_label_contents = [] label_filenames = glob(os.path.join(label_dir, "**", f"*_{segment_name}.json"), recursive=True) label_prefixes = set(_LABEL_TYPE_INFO_100K) for label_filename in label_filenames: label_file_basename = os.path.basename(label_filename) label_prefix = label_file_basename.replace(f"_{segment_name}.json", "") try: label_prefixes.remove(label_prefix) except KeyError: warn_message = f"Invalid label file name '{label_file_basename}'! Ignoring.." warn(warn_message) continue label_description = _LABEL_TYPE_INFO_100K[label_prefix][0] print(f"Reading '{label_description}' labels to segment '{segment_name}'...") with open(label_filename, "r", encoding="utf-8") as fp: source_label_contents.append(json.load(fp)) print(f"Finished reading '{label_description}' labels to segment '{segment_name}'...") for missing_label_prefix in label_prefixes: warn_message = ( f"Missing label file '{missing_label_prefix}_{segment_name}.json'! " f"The correspondent '{_LABEL_TYPE_INFO_100K[missing_label_prefix][1]}' " f"label will be set to empty!" ) warn(warn_message) print(f"Merging '{segment_name}' labels...") label_contents = _merge_label(source_label_contents) print(f"Finished merging '{segment_name}' labels") return label_contents
def AnimalsWithAttributes2(path: str) -> Dataset: """`Animals with attributes 2 <https://cvml.ist.ac.at/AwA2/>`_ dataset. The file structure should be like:: <path> classes.txt predicates.txt predicate-matrix-binary.txt JPEGImages/ <classname>/ <imagename>.jpg ... ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() with open(os.path.join(root_path, "classes.txt"), encoding="utf-8") as fp: class_names = [line[:-1].split("\t", 1)[-1] for line in fp] with open(os.path.join(root_path, "predicates.txt"), encoding="utf-8") as fp: attribute_keys = [line[:-1].split("\t", 1)[-1] for line in fp] with open(os.path.join(root_path, "predicate-matrix-binary.txt"), encoding="utf-8") as fp: attribute_values = [line[:-1].split(" ") for line in fp] attribute_mapping = {} for class_name, values in zip(class_names, attribute_values): attribute_mapping[class_name] = Classification( category=class_name, attributes=dict( zip(attribute_keys, (bool(int(value)) for value in values))), ) for class_name in sorted(os.listdir(os.path.join(root_path, "JPEGImages"))): image_paths = glob( os.path.join(root_path, "JPEGImages", class_name, "*.jpg")) label = attribute_mapping[class_name] for image_path in image_paths: data = Data(image_path) data.label.classification = label segment.append(data) return dataset
def DeepRoute(path: str) -> Dataset: """`DeepRoute <https://gas.graviti.cn/dataset/graviti-open-dataset\ /DeepRoute>`_ dataset. The file structure should be like:: <path> pointcloud/ 00001.bin 00002.bin ... 10000.bin groundtruth/ 00001.txt 00002.txt ... 10000.txt Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() point_cloud_paths = glob(os.path.join(root_path, "pointcloud", "*.bin")) for point_cloud_path in point_cloud_paths: point_cloud_id = os.path.splitext(os.path.basename(point_cloud_path))[0] label_path = os.path.join(root_path, "groundtruth", f"{point_cloud_id}.txt") data = Data(point_cloud_path) data.label.box3d = [] with open(label_path, encoding="utf-8") as fp: annotations = json.load(fp)["objects"] for annotation in annotations: bounding_box = annotation["bounding_box"] position = annotation["position"] label = LabeledBox3D( size=(bounding_box["length"], bounding_box["width"], bounding_box["height"]), translation=(position["x"], position["y"], position["z"]), rotation=from_rotation_vector((0, 0, annotation["heading"])), category=annotation["type"], ) data.label.box3d.append(label) segment.append(data) return dataset
def KenyanFoodType(path: str) -> Dataset: """`Kenyan Food Type <https://github.com/monajalal/Kenyan-Food>`_ dataset. The file structure should be like:: <path> test.csv test/ bhaji/ 1611654056376059197.jpg ... chapati/ 1451497832469337023.jpg ... ... train/ bhaji/ 190393222473009410.jpg ... chapati/ 1310641031297661755.jpg ... val/ bhaji/ 1615408264598518873.jpg ... chapati/ 1553618479852020228.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_FOOD_TYPE) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog_food_type.json")) for segment_name in SEGMENTS_FOOD_TYPE: segment = dataset.create_segment(segment_name) segment_path = os.path.join(root_path, segment_name) for category in sorted(os.listdir(segment_path)): image_paths = glob(os.path.join(segment_path, category, "*.jpg")) label = Classification(category) for image_path in image_paths: data = Data(image_path) data.label.classification = label segment.append(data) return dataset
def RarePlanesReal(path: str) -> Dataset: """`RarePlanesReal <https://www.cosmiqworks.org/RarePlanes/>`_ dataset. The folder structure should be like:: <path> metadata_annotations/ RarePlanes_Public_Metadata.csv RarePlanes_Test_Coco_Annotations_tiled.json RarePlanes_Train_Coco_Annotations_tiled.json test/ PS-RGB_tiled/ 105_104001003108D900_tile_47.png ... train/ PS-RGB_tiled/ 100_1040010029990A00_tile_319.png ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) catalog = dataset.catalog annotations_dir = os.path.join(root_path, "metadata_annotations") classification_attributes = _get_classification_attributes( os.path.join(annotations_dir, "RarePlanes_Public_Metadata.csv"), catalog.classification.attributes.keys(), ) for segment_name in _SEGMENT_NAMES: segment = dataset.create_segment(segment_name) image_name_to_polygons = _get_polygon_labels( annotations_dir, segment_name, catalog.polygon.attributes.keys()) for image_path in glob( os.path.join(root_path, segment_name, "PS-RGB_tiled", "*.png")): data = Data(image_path) label = data.label filename = os.path.basename(image_path) image_id = filename.rsplit("_", 2)[0] label.polygon = image_name_to_polygons[filename] label.classification = Classification( attributes=classification_attributes[image_id]) segment.append(data) return dataset
def BioIDFace(path: str) -> Dataset: """`BioID Face <https://www.bioid.com/facedb/>`_ dataset. The folder structure should be like:: <path> BioID-FaceDatabase-V1.2/ BioID_0000.eye BioID_0000.pgm ... points_20/ bioid_0000.pts Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() image_paths = glob( os.path.join(root_path, "BioID-FaceDatabase-V1.2", "*.pgm")) face_keypoints_paths = glob(os.path.join(root_path, "points_20", "*.pts")) for image_path, face_keypoints_path in zip(image_paths, face_keypoints_paths): data = Data(image_path) data.label.keypoints2d = _get_label( f"{os.path.splitext(image_path)[0]}.eye", face_keypoints_path) segment.append(data) return dataset
def LeedsSportsPose(path: str) -> Dataset: """`Leeds Sports Pose <http://sam.johnson.io/research/lsp.html>`_ dataset. The folder structure should be like:: <path> joints.mat images/ im0001.jpg im0002.jpg ... Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() mat = loadmat(os.path.join(root_path, "joints.mat")) joints = mat["joints"].T image_paths = glob(os.path.join(root_path, "images", "*.jpg")) for image_path in image_paths: data = Data(image_path) data.label.keypoints2d = [] index = int(os.path.basename(image_path) [2:6]) - 1 # get image index from "im0001.jpg" keypoints = LabeledKeypoints2D() for keypoint in joints[index]: keypoints.append( Keypoint2D(keypoint[0], keypoint[1], int(not keypoint[2]))) data.label.keypoints2d.append(keypoints) segment.append(data) return dataset
def NightOwls(path: str) -> Dataset: """`NightOwls <http://www.nightowls-dataset.org/>`_ dataset. The file structure should be like:: <path> nightowls_test/ <image_name>.png ... nightowls_training/ <image_name>.png ... nightowls_validation/ <image_name>.png ... nightowls_training.json nightowls_validation.json Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.notes.is_continuous = True dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for mode, (labels_filename, labels_handler) in _LABELS_HANDEL_METHODS.items(): segment = dataset.create_segment(mode) image_paths = glob( os.path.join(root_path, f"nightowls_{mode}", "*.png")) labels = _load_labels(root_path, labels_filename) for image_path in image_paths: data = labels_handler(image_path, labels) # pylint: disable=not-callable segment.append(data) return dataset
def JHU_CROWD(path: str) -> Dataset: """`JHU-CROWD++ <http://www.crowd-counting.com/>`_ dataset. The file structure should be like:: <path> train/ images/ 0000.jpg ... gt/ 0000.txt ... image_labels.txt test/ val/ Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name in SEGMENT_LIST: segment = dataset.create_segment(segment_name) segment_path = os.path.join(path, segment_name) image_root_path = os.path.join(segment_path, "images") image_paths = glob(os.path.join(image_root_path, "*.jpg")) image_labels = _load_image_labels( os.path.join(segment_path, "image_labels.txt")) for image_path in image_paths: data = Data(image_path) image_file = os.path.basename(image_path) label_file = image_file.replace("jpg", "txt") data.label.box2d = _load_box_labels( os.path.join(segment_path, "gt", label_file)) data.label.classification = image_labels[os.path.splitext( image_file)[0]] segment.append(data) return dataset
def _load_positive_segment(segment_name: str, segment_path: str) -> Segment: if segment_name.startswith("vid"): # Pad zero for segment name to change "vid0" to "vid00" segment_name = f"{segment_name[:3]}{int(segment_name[3:]):02}" segment = Segment(segment_name) annotation_file = glob( os.path.join(segment_path, "frameAnnotations-*", "frameAnnotations.csv"))[0] image_folder = os.path.dirname(annotation_file) pre_filename = "" with open(annotation_file, "r", encoding="utf-8") as fp: for annotation in csv.DictReader(fp, delimiter=";"): filename = annotation["Filename"] if filename != pre_filename: data = Data(os.path.join(image_folder, filename)) data.label.box2d = [] segment.append(data) pre_filename = filename occluded, on_another_road = annotation[ "Occluded,On another road"].split(",", 1) data.label.box2d.append( LabeledBox2D( int(annotation["Upper left corner X"]), int(annotation["Upper left corner Y"]), int(annotation["Lower right corner X"]), int(annotation["Lower right corner Y"]), category=annotation["Annotation tag"], attributes={ "Occluded": bool(int(occluded)), "On another road": bool(int(on_another_road)), "Origin file": annotation["Origin file"], "Origin frame number": int(annotation["Origin frame number"]), "Origin track": annotation["Origin track"], "Origin track frame number": int(annotation["Origin track frame number"]), }, )) return segment
def HeadPoseImage(path: str) -> Dataset: """`Head Pose Image <http://crowley-coutaz.fr\ /Head%20Pose%20Image%20Database.html>`_ dataset. The file structure should be like:: <path> Person01/ person01100-90+0.jpg person01100-90+0.txt person01101-60-90.jpg person01101-60-90.txt ... Person02/ Person03/ ... Person15/ Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() image_paths = glob(os.path.join(path, "Person*", "*.jpg")) for image_path in image_paths: image_name = os.path.basename(image_path) data = Data(image_path) data.label.box2d = [ LabeledBox2D( *_load_label_box(image_path.replace("jpg", "txt")), category=image_name[6:8], attributes=_load_attributes(image_name), ) ] segment.append(data) return dataset
def _get_data_part2(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]: try: import xmltodict # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error for animal in aniamls: for image_path in glob( os.path.join(root_path, "animalpose_image_part2", animal, "*.jpeg")): data = Data( image_path, target_remote_path=f"{animal}/{os.path.basename(image_path)}") annotation_path = os.path.join( root_path, "animalpose_anno2", animal, f"{os.path.splitext(os.path.basename(image_path))[0]}.xml", ) with open(annotation_path, encoding="utf-8") as fp: labels: Any = xmltodict.parse(fp.read()) box2d = labels["annotation"]["visible_bounds"] data.label.box2d = [ LabeledBox2D.from_xywh( x=float(box2d["@xmin"]), y=float( box2d["@xmax"]), # xmax means ymin in the annotation width=float(box2d["@width"]), height=float(box2d["@height"]), category=animal, ) ] keypoints2d = LabeledKeypoints2D(category=animal) for keypoint in labels["annotation"]["keypoints"]["keypoint"]: keypoints2d.append( Keypoint2D(float(keypoint["@x"]), float(keypoint["@y"]), int(keypoint["@visible"]))) data.label.keypoints2d = [keypoints2d] yield data
def KylbergTexture(path: str) -> Dataset: """`Kylberg Texture <http://www.cb.uu.se/~gustaf/texture/>`_ dataset. The file structure should be like:: <path> originalPNG/ <imagename>.png ... withoutRotateAll/ <imagename>.png ... RotateAll/ <imagename>.png ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name, label_getter in _LABEL_GETTERS.items(): image_paths = glob(os.path.join(root_path, segment_name, "*.png")) segment = dataset.create_segment(segment_name) for image_path in image_paths: data = Data(image_path) stem = os.path.splitext(os.path.basename(image_path))[0] data.label.classification = label_getter(stem) segment.append(data) return dataset
def _load_sensors(calib_path: str) -> Sensors: try: import yaml # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name, package_name="pyyaml") from error sensors = Sensors() lidar = Lidar("LIDAR") lidar.set_extrinsics() sensors.add(lidar) with open(os.path.join(calib_path, "extrinsics.yaml"), "r", encoding="utf-8") as fp: extrinsics = yaml.load(fp, Loader=yaml.FullLoader) for camera_calibration_file in glob(os.path.join(calib_path, "[0-9]*.yaml")): with open(camera_calibration_file, "r", encoding="utf-8") as fp: camera_calibration = yaml.load(fp, Loader=yaml.FullLoader) # camera_calibration_file looks like: # /path-to-CADC/2018_03_06/calib/00.yaml camera_name = f"CAM{os.path.splitext(os.path.basename(camera_calibration_file))[0]}" camera = Camera(camera_name) camera.description = camera_calibration["camera_name"] camera.set_extrinsics(matrix=extrinsics[f"T_LIDAR_{camera_name}"]) camera_matrix = camera_calibration["camera_matrix"]["data"] camera.set_camera_matrix( matrix=[camera_matrix[:3], camera_matrix[3:6], camera_matrix[6:9]]) distortion = camera_calibration["distortion_coefficients"]["data"] camera.set_distortion_coefficients( **dict(zip(("k1", "k2", "p1", "p2", "k3"), distortion))) sensors.add(camera) return sensors
def CoinImage(path: str) -> Dataset: """`Coin Image <https://cvl.tuwien.ac.at/research/cvl-databases/coin-image-dataset/>`_ dataset. The file structure should be like:: <path> classes.csv <imagename>.png ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() csv_path = os.path.join(root_path, "classes.csv") with open(csv_path, "r", encoding="utf-8") as fp: reader = csv.reader(fp, delimiter=";") mapping: Dict[str, str] = dict( row for row in reader) # type: ignore[arg-type, misc] image_paths = glob(os.path.join(root_path, "*.png")) for image_path in image_paths: data = Data(image_path) filename = os.path.basename(image_path) class_id = filename[5:].split("_", 1)[0] data.label.classification = Classification(category=mapping[class_id]) segment.append(data) return dataset
def TLR(path: str) -> Dataset: """`TLR <http://www.lara.prd.fr/benchmarks/trafficlightsrecognition>`_ dataset. The file structure should like:: <path> root_path/ Lara3D_URbanSeq1_JPG/ frame_011149.jpg frame_011150.jpg frame_<frame_index>.jpg ... Lara_UrbanSeq1_GroundTruth_cvml.xml Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() file_paths = glob(os.path.join(root_path, "Lara3D_UrbanSeq1_JPG", "*.jpg")) labels = _parse_xml( os.path.join(root_path, "Lara_UrbanSeq1_GroundTruth_cvml.xml")) for file_path in file_paths: # the image file name looks like: # frame_000001.jpg frame_index = int(os.path.basename(file_path)[6:-4]) data = Data(file_path) data.label.box2d = labels[frame_index] segment.append(data) return dataset
def DogsVsCats(path: str) -> Dataset: """`Dogs vs Cats <https://www.kaggle.com/c/dogs-vs-cats>`_ dataset. The file structure should be like:: <path> train/ cat.0.jpg ... dog.0.jpg ... test/ 1000.jpg 1001.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name, is_labeled in _SEGMENTS.items(): segment = dataset.create_segment(segment_name) image_paths = glob(os.path.join(root_path, segment_name, "*.jpg")) for image_path in image_paths: data = Data(image_path) if is_labeled: data.label.classification = Classification( os.path.basename(image_path)[:3]) segment.append(data) return dataset
def RarePlanesSynthetic(path: str) -> Dataset: """`RarePlanesSynthetic <https://www.cosmiqworks.org/RarePlanes/>`_ dataset. The file structure of RarePlanesSynthetic looks like:: <path> images/ Atlanta_Airport_0_0_101_1837.png ... masks/ Atlanta_Airport_0_0_101_1837_mask.png ... xmls/ Atlanta_Airport_0_0_101_1837.xml ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) category_ids = dataset.catalog.panoptic_mask.get_category_to_index() segment = dataset.create_segment() original_mask_dir = os.path.join(root_path, "masks") new_mask_dir = os.path.join(root_path, "new_masks") os.makedirs(new_mask_dir, exist_ok=True) annotation_dir = os.path.join(root_path, "xmls") for image_path in glob(os.path.join(root_path, "images", "*.png")): segment.append( _get_data(image_path, original_mask_dir, annotation_dir, new_mask_dir, category_ids)) return dataset