def test_get_callback_body(self, mask_file): semantic_mask = SemanticMask(mask_file) semantic_mask.all_attributes = { 1: { "occluded": True }, 2: { "occluded": False } } assert semantic_mask.get_callback_body() == { "checksum": "c86aca4e348b051f60c2d7d1bf750fb3accdfeaf", "fileSize": 12, "info": [ { "attributes": { "occluded": True }, "categoryId": 1 }, { "attributes": { "occluded": False }, "categoryId": 2 }, ], }
def test_get_data(self, accesskey, url, tmp_path, mask_file): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.upload_catalog(Catalog.loads(CATALOG)) segment_client = dataset_client.get_or_create_segment("segment1") path = tmp_path / "sub" path.mkdir() # Upload data with label for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text(f"CONTENT{i}") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) semantic_mask = SemanticMask(str(mask_file)) semantic_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}} data.label.semantic_mask = semantic_mask instance_mask = InstanceMask(str(mask_file)) instance_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}} data.label.instance_mask = instance_mask panoptic_mask = PanopticMask(str(mask_file)) panoptic_mask.all_category_ids = {100: 0, 101: 1} data.label.panoptic_mask = panoptic_mask segment_client.upload_data(data) for i in range(10): data = segment_client.get_data(f"hello{i}.txt") assert data.path == f"hello{i}.txt" assert data.label.box2d == Label.loads(LABEL).box2d stem = os.path.splitext(data.path)[0] remote_semantic_mask = data.label.semantic_mask semantic_mask = RemoteSemanticMask.from_response_body(SEMANTIC_MASK_LABEL) assert remote_semantic_mask.path == f"{stem}.png" assert remote_semantic_mask.all_attributes == semantic_mask.all_attributes remote_instance_mask = data.label.instance_mask instance_mask = RemoteInstanceMask.from_response_body(INSTANCE_MASK_LABEL) assert remote_instance_mask.path == f"{stem}.png" assert remote_instance_mask.all_attributes == instance_mask.all_attributes remote_panoptic_mask = data.label.panoptic_mask panoptic_mask = RemotePanopticMask.from_response_body(PANOPTIC_MASK_LABEL) assert remote_panoptic_mask.path == f"{stem}.png" assert remote_panoptic_mask.all_category_ids == panoptic_mask.all_category_ids gas_client.delete_dataset(dataset_name)
def test_upload_dataset_with_mask(self, accesskey, url, tmp_path, mask_file): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG_CONTENTS) path = tmp_path / "sub" path.mkdir() local_path = path / "hello.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) remote_semantic_mask = SemanticMask(str(mask_file)) remote_semantic_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}} data.label.semantic_mask = remote_semantic_mask instance_mask = InstanceMask(str(mask_file)) instance_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}} data.label.instance_mask = instance_mask panoptic_mask = PanopticMask(str(mask_file)) panoptic_mask.all_category_ids = {100: 0, 101: 1} data.label.panoptic_mask = panoptic_mask segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("upload dataset with label") dataset = Dataset(dataset_name, gas_client) remote_semantic_mask = dataset[0][0].label.semantic_mask semantic_mask = RemoteSemanticMask.from_response_body(SEMANTIC_MASK_LABEL) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert remote_semantic_mask.path == semantic_mask.path assert remote_semantic_mask.all_attributes == semantic_mask.all_attributes remote_instance_mask = dataset[0][0].label.instance_mask instance_mask = RemoteInstanceMask.from_response_body(INSTANCE_MASK_LABEL) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert remote_instance_mask.path == instance_mask.path assert remote_instance_mask.all_attributes == instance_mask.all_attributes remote_panoptic_mask = dataset[0][0].label.panoptic_mask panoptic_mask = RemotePanopticMask.from_response_body(PANOPTIC_MASK_LABEL) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert remote_panoptic_mask.path == panoptic_mask.path assert remote_panoptic_mask.all_category_ids == panoptic_mask.all_category_ids gas_client.delete_dataset(dataset_name)
def PASCALContext(mask_path: str, image_path: str) -> Dataset: """`PASCALContext <https://cs.stanford.edu/~roozbeh/pascal-context/>`_ dataset. The file structure should be like:: <mask_path> <image_name>.png ... <image_path> <image_name>.jpg ... Arguments: mask_path: The root directory of the dataset mask. image_path: The root directory of the dataset image. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_mask_path = os.path.abspath(os.path.expanduser(mask_path)) root_image_path = os.path.abspath(os.path.expanduser(image_path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment("trainval") for mask_filename in glob(os.path.join(root_mask_path, "*.png")): stem = os.path.splitext(os.path.basename(mask_filename))[0] data = Data(os.path.join(root_image_path, f"{stem}.jpg")) data.label.semantic_mask = SemanticMask(mask_filename) segment.append(data) return dataset
def _get_data(image_path: str, root_path: str, segment_name: str, folder_name: str) -> Data: filename = os.path.basename(image_path) city = filename.split("_", 1)[0] image_prefix = filename.rsplit("_", 1)[0] label_dir = os.path.join(root_path, folder_name, segment_name, city) data = Data(image_path) # get semantic mask and instance mask label = data.label label.semantic_mask = SemanticMask( os.path.join(label_dir, f"{image_prefix}_{folder_name}_labelIds.png")) label.instance_mask = InstanceMask( os.path.join(label_dir, f"{image_prefix}_{folder_name}_instanceIds.png")) # get polygons polygons: List[LabeledPolygon] = [] with open( os.path.join(label_dir, f"{image_prefix}_{folder_name}_polygons.json"), encoding="utf-8", ) as fp: objects = json.load(fp)["objects"] for obj in objects: polygons.append(LabeledPolygon(obj["polygon"], category=obj["label"])) label.polygon = polygons return data
def VOC2012Segmentation(path: str) -> Dataset: """`VOC2012Segmentation <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset. The file structure should be like:: <path>/ JPEGImages/ <image_name>.jpg ... SegmentationClass/ <mask_name>.png ... SegmentationObject/ <mask_name>.png ... ImageSets/ Segmentation/ train.txt val.txt ... ... ... Arguments: path: The root directory of the dataset. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) image_path = os.path.join(root_path, "JPEGImages") semantic_mask_path = os.path.join(root_path, "SegmentationClass") instance_mask_path = os.path.join(root_path, "SegmentationObject") image_set_path = os.path.join(root_path, "ImageSets", "Segmentation") dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name in _SEGMENT_NAMES: segment = dataset.create_segment(segment_name) with open(os.path.join(image_set_path, f"{segment_name}.txt"), encoding="utf-8") as fp: for stem in fp: stem = stem.strip() data = Data(os.path.join(image_path, f"{stem}.jpg")) label = data.label mask_filename = f"{stem}.png" label.semantic_mask = SemanticMask( os.path.join(semantic_mask_path, mask_filename)) label.instance_mask = InstanceMask( os.path.join(instance_mask_path, mask_filename)) segment.append(data) return dataset
def OxfordIIITPet(path: str) -> Dataset: """`OxfordIIITPet <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ dataset. The file structure should be like:: <path> annotations/ trimaps/ Bombay_113.png Bombay_114.png ... xmls/ Birman_174.xml Birman_175.xml ... list.txt test.txt trainval.txt README images/ Bombay_117.jpg Bombay_118.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) trainval_segment = dataset.create_segment("trainval") test_segment = dataset.create_segment("test") annotation_path = os.path.join(root_path, "annotations") for image_path in glob(os.path.join(root_path, "images", "*.jpg")): image_name = os.path.splitext(os.path.basename(image_path))[0] name = "Cat" if image_name.istitle() else "Dog" category, num = image_name.rsplit("_", 1) data = Data(image_path, target_remote_path=f"{category}_{num.zfill(3)}.jpg") label = data.label label.classification = Classification(category=f"{name}.{category}") label.semantic_mask = SemanticMask( os.path.join(annotation_path, "trimaps", f"{image_name}.png")) xml_path = os.path.join(annotation_path, "xmls", f"{image_name}.xml") if os.path.exists(xml_path): label.box2d = _get_box_label(xml_path) trainval_segment.append(data) else: test_segment.append(data) return dataset
def test_cache_dataset(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(_CATALOG) path = tmp_path / "sub" semantic_path = tmp_path / "semantic_mask" instance_path = tmp_path / "instance_mask" path.mkdir() semantic_path.mkdir() instance_path.mkdir() for i in range(_SEGMENT_LENGTH): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(_LABEL) semantic_mask = semantic_path / f"semantic_mask{i}.png" semantic_mask.write_text("SEMANTIC_MASK") data.label.semantic_mask = SemanticMask(str(semantic_mask)) instance_mask = instance_path / f"instance_mask{i}.png" instance_mask.write_text("INSTANCE_MASK") data.label.instance_mask = InstanceMask(str(instance_mask)) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit-1") cache_path = tmp_path / "cache_test" dataset_client.enable_cache(str(cache_path)) segment1 = Segment("Segment1", client=dataset_client) for data in segment1: data.open() data.label.semantic_mask.open() data.label.instance_mask.open() segment_cache_path = (cache_path / dataset_client.dataset_id / dataset_client.status.commit_id / "Segment1") semantic_mask_cache_path = segment_cache_path / "semantic_mask" instance_mask_cache_path = segment_cache_path / "instance_mask" for cache_dir, extension in ( (segment_cache_path, "txt"), (semantic_mask_cache_path, "png"), (instance_mask_cache_path, "png"), ): assert set(cache_dir.glob(f"*.{extension}")) == set( cache_dir / f"hello{i}.{extension}" for i in range(_SEGMENT_LENGTH)) gas_client.delete_dataset(dataset_name)
def _get_semi_supervised_labeled_data( image_path: str, root_path: str, resolution: str, segment_name: str ) -> Data: data = Data(image_path) label = data.label mask_stem = os.path.splitext(os.path.basename(data.path))[0] mask_path = os.path.join(resolution, segment_name, f"{mask_stem}.png") label.instance_mask = InstanceMask(os.path.join(root_path, "Annotations", mask_path)) label.semantic_mask = SemanticMask(os.path.join(root_path, "Annotations_semantics", mask_path)) return data
def _get_data(keypoints_info: List[str], image_path: str, parsing_path: str) -> Data: stem = os.path.splitext(keypoints_info[0])[0] data = Data(os.path.join(image_path, f"{stem}.jpg")) label = data.label label.semantic_mask = SemanticMask( os.path.join(parsing_path, f"{stem}.png")) keypoints = LabeledKeypoints2D() for x, y, v in chunked(islice(keypoints_info, 1, None), 3): keypoints.append( Keypoint2D(float(x), float(y), 1 - int(v)) if x.isnumeric() else Keypoint2D(0, 0, 0)) label.keypoints2d = [keypoints] return data
def _get_data_10k( image_path: str, original_mask_paths: Dict[str, str], label_content: Dict[str, Any], single_channel_mask_paths: Dict[str, str], ) -> Data: data = Data(image_path) polygon: List[LabeledPolygon] = [] for label_info in label_content["labels"]: if "poly2d" in label_info: _add_poly2d_label_10k(label_info, polygon) label = data.label label.polygon = polygon stem = os.path.splitext(os.path.basename(image_path))[0] label.semantic_mask = SemanticMask( os.path.join(original_mask_paths["sem"], f"{stem}.png")) label.instance_mask = _get_instance_mask(stem, original_mask_paths["ins"], single_channel_mask_paths["ins"]) label.panoptic_mask = _get_panoptic_mask(stem, original_mask_paths["pan"], single_channel_mask_paths["pan"]) return data
def _get_mots_data( image_path: str, original_mask_subdir: str, semantic_subdir: str, instance_subdir: str, stem: str, *, label_content: Dict[str, Any], ) -> Data: data = Data(image_path) labeled_multipolygons = [] for label_info in label_content.get("labels", ()): if "poly2d" not in label_info: continue labeled_multipolygon = LabeledMultiPolygon( polygons=(poly2d_info["vertices"] for poly2d_info in label_info["poly2d"]), category=label_info["category"], attributes=label_info["attributes"], instance=str(label_info["id"]), ) labeled_multipolygons.append(labeled_multipolygon) semantic_path = os.path.join(semantic_subdir, f"{stem}.png") instance_path = os.path.join(instance_subdir, f"{stem}.png") mask_info = _save_and_get_mask_info( os.path.join(original_mask_subdir, f"{stem}.png"), semantic_path, instance_path, os.path.join(instance_subdir, f"{stem}.json"), ) ins_mask = InstanceMask(instance_path) ins_mask.all_attributes = mask_info["all_attributes"] label = data.label label.multi_polygon = labeled_multipolygons label.semantic_mask = SemanticMask(semantic_path) label.instance_mask = ins_mask return data
def CIHP(path: str) -> Dataset: """`CIHP <https://github.com/Engineering-Course/CIHP_PGN>`_ dataset. The file structure should be like:: <path> Testing/ Images/ 0000002.jpg ... test_id.txt Training/ Images/ 0000006.jpg ... Category_ids/ 0000006.png ... Instance_ids/ 0000006.png ... train_id.txt Validation/ Images/ 0000001.jpg ... Category_ids/ 0000001.png ... Instance_ids/ 0000001.png ... val_id.txt Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "instance-level_human_parsing") dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name, segment_path in _SEGMENTS_INFO.items(): segment = dataset.create_segment(segment_name) segment_abspath = os.path.join(root_path, segment_path) image_path = os.path.join(segment_abspath, "Images") with open(os.path.join(segment_abspath, f"{segment_name}_id.txt"), encoding="utf-8") as fp: if segment_name == "test": for stem in fp: segment.append( Data(os.path.join(image_path, f"{stem.rstrip()}.jpg"))) else: category_path = os.path.join(segment_abspath, "Category_ids") instance_path = os.path.join(segment_abspath, "Instance_ids") for stem in fp: stem = stem.rstrip() data = Data(os.path.join(image_path, f"{stem}.jpg")) label = data.label png_filename = f"{stem}.png" label.semantic_mask = SemanticMask( os.path.join(category_path, png_filename)) label.instance_mask = InstanceMask( os.path.join(instance_path, png_filename)) segment.append(data) return dataset
def test_init(self): semantic_mask = SemanticMask("semantic_mask.png") assert semantic_mask.path == "semantic_mask.png"