def test_copy_fusion_segment(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name, is_fusion=True) dataset = FusionDataset(name=dataset_name) segment = dataset.create_segment("Segment1") segment.sensors.add(Sensor.loads(LIDAR_DATA)) dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): frame = Frame() local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) frame[LIDAR_DATA["name"]] = data segment.append(frame) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.copy_segment("Segment1", "Segment2") assert segment_client.name == "Segment2" with pytest.raises(InvalidParamsError): dataset_client.copy_segment("Segment1", "Segment3", strategy="push") segment2 = FusionSegment("Segment2", client=dataset_client) assert segment2[0][LIDAR_DATA["name"]].path == "hello0.txt" assert ( segment2[0][LIDAR_DATA["name"]].path == segment[0][LIDAR_DATA["name"]].target_remote_path ) assert segment2[0][LIDAR_DATA["name"]].label gas_client.delete_dataset(dataset_name)
def test_import_cloud_files_to_fusiondataset(self, accesskey, url, config_name): gas_client = GAS(access_key=accesskey, url=url) try: cloud_client = gas_client.get_cloud_client(config_name) except ResourceNotExistError: pytest.skip(f"skip this case because there's no {config_name} config") auth_data = cloud_client.list_auth_data("tests")[:5] dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, True, config_name=config_name) dataset = FusionDataset(name=dataset_name) segment = dataset.create_segment("Segment1") lidar = Lidar("LIDAR") segment.sensors.add(lidar) for data in auth_data: data.label.classification = Classification("cat", attributes={"color": "red"}) frame = Frame() frame["LIDAR"] = data segment.append(frame) dataset_client = gas_client.upload_dataset(dataset, jobs=5) dataset_client.commit("import data") segment1 = FusionSegment("Segment1", client=dataset_client) assert len(segment1) == len(segment) assert segment1[0]["LIDAR"].path == segment[0]["LIDAR"].path.split("/")[-1] assert segment1[0]["LIDAR"].label.classification.category == "cat" assert segment1[0]["LIDAR"].label.classification.attributes["color"] == "red" assert len(auth_data) == len(segment) gas_client.delete_dataset(dataset_name)
def test_create_and_upload_dataset_with_config(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() try: gas_client.get_auth_storage_config(name=_LOCAL_CONFIG_NAME) except ResourceNotExistError: pytest.skip(f"skip this case because there's no {_LOCAL_CONFIG_NAME} config") gas_client.create_dataset(dataset_name, config_name=_LOCAL_CONFIG_NAME) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(5): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) assert dataset_client.get_catalog() segment1 = Segment("Segment1", client=dataset_client) assert len(segment1) == 5 for i in range(5): assert segment1[i].path == f"hello{i}.txt" assert segment1[i].label gas_client.delete_dataset(dataset_name)
def test_upload_dataset_only_with_file(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) dataset.notes.is_continuous = True segment = dataset.create_segment("Segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") segment.append(Data(local_path=str(local_path))) dataset_client = gas_client.upload_dataset(dataset) assert dataset_client.status.branch_name == DEFAULT_BRANCH assert dataset_client.status.draft_number assert not dataset_client.status.commit_id assert dataset_client.get_notes().is_continuous is True assert not dataset_client.get_catalog() segment1 = Segment("Segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert not segment1[0].label gas_client.delete_dataset(dataset_name)
def test_copy_data(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.get_segment("Segment1") segment_client.copy_data("hello0.txt", "goodbye0.txt") segment_client.copy_data("hello1.txt", "hello10.txt") with pytest.raises(InvalidParamsError): segment_client.copy_data("hello2.txt", "see_you.txt", strategy="push") segment2 = Segment("Segment1", client=dataset_client) assert segment2[0].path == "goodbye0.txt" assert segment2[3].path == "hello10.txt" assert segment2[1].label gas_client.delete_dataset(dataset_name)
def test_move_segment(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.move_segment("Segment1", "Segment2") assert segment_client.name == "Segment2" with pytest.raises(InvalidParamsError): dataset_client.move_segment("Segment1", "Segment3", strategy="push") segment2 = Segment("Segment2", client=dataset_client) assert segment2[0].path == "hello0.txt" assert segment2[0].path == segment[0].target_remote_path assert segment2[0].label gas_client.delete_dataset(dataset_name)
def test_copy_data_between_datasets(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name_1 = get_dataset_name() gas_client.create_dataset(dataset_name_1) dataset_1 = Dataset(name=dataset_name_1) segment_1 = dataset_1.create_segment("Segment1") dataset_1._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment_1.append(data) dataset_client_1 = gas_client.upload_dataset(dataset_1) dataset_client_1.commit("upload data") segment_client_1 = dataset_client_1.get_segment("Segment1") dataset_name_2 = dataset_name_1 + "_2" dataset_client_2 = gas_client.create_dataset(dataset_name_2) dataset_client_2.create_draft("draft_2") dataset_client_2.create_segment("Segment1") segment_client_2 = dataset_client_2.get_segment("Segment1") segment_client_2.copy_data("hello0.txt", "hello0.txt", source_client=segment_client_1) segment2 = Segment("Segment1", client=dataset_client_2) assert segment2[0].path == "hello0.txt" assert segment2[0].label gas_client.delete_dataset(dataset_name_1) gas_client.delete_dataset(dataset_name_2)
def test_copy_segment_abort(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment1 = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment1.append(data) segment2 = dataset.create_segment("Segment2") for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment2.append(data) dataset_client = gas_client.upload_dataset(dataset) with pytest.raises(InternalServerError): dataset_client.copy_segment("Segment1", "Segment2") gas_client.delete_dataset(dataset_name)
def test_copy_segment_skip(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment1 = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment1.append(data) segment2 = dataset.create_segment("Segment2") for i in range(10, 20): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment2.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.copy_segment("Segment1", "Segment2", strategy="skip") segment_copied = Segment("Segment2", client=dataset_client) assert segment_copied[0].path == "hello10.txt" assert segment_copied[0].path == segment2[0].target_remote_path assert segment_copied[0].label gas_client.delete_dataset(dataset_name)
def test_move_data_skip(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text(f"CONTENT_{i}") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.get_segment("Segment1") segment_client.move_data("hello0.txt", "hello1.txt", strategy="skip") segment_moved = Segment("Segment1", client=dataset_client) assert segment_moved[0].path == "hello1.txt" assert segment_moved[0].open().read() == b"CONTENT_1" gas_client.delete_dataset(dataset_name)
def test_import_cloud_files(self, accesskey, url, config_name): gas_client = GAS(access_key=accesskey, url=url) try: cloud_client = gas_client.get_cloud_client(config_name) except ResourceNotExistError: pytest.skip( f"skip this case because there's no {config_name} config") auth_data = cloud_client.list_auth_data("tests") dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, config_name=config_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") for data in auth_data: segment.append(data) dataset_client = gas_client.upload_dataset(dataset, jobs=5) dataset_client.commit("import data") segment1 = Segment("Segment1", client=dataset_client) assert len(segment1) == len(segment) assert segment1[0].path == segment[0].path.split("/")[-1] assert not segment1[0].label assert len(auth_data) == len(segment) gas_client.delete_dataset(dataset_name)
def test_upload_dataset_with_label(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG_CONTENTS) path = tmp_path / "sub" path.mkdir() local_path = path / "hello.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("upload dataset with label") dataset = Dataset(dataset_name, gas_client) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert dataset[0][0].label == Label.loads(LABEL) statistics1 = dataset_client.get_label_statistics() assert statistics1 == Statistics(STATISTICS) total_size = dataset_client.get_total_size() assert total_size == TOTALSIZE gas_client.delete_dataset(dataset_name)
def test_upload_dataset_with_label(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) assert dataset_client.get_catalog() segment1 = Segment("Segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert segment1[0].label gas_client.delete_dataset(dataset_name)
def test_upload_dataset_after_commit(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) dataset._catalog = Catalog.loads(CATALOG) dataset.notes.is_continuous = True segment = dataset.create_segment("Segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("test") dataset_remote = Dataset(name=dataset_name, gas=gas_client) assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous assert dataset_remote.catalog == dataset.catalog segment_remote = dataset_remote[0] assert len(segment_remote) == len(segment) for remote_data, data in zip(segment_remote, segment): assert remote_data.path == data.target_remote_path assert remote_data.label == data.label gas_client.delete_dataset(dataset_name)
def test_upload_dataset_to_given_branch(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client_1 = gas_client.create_dataset(dataset_name) dataset_client_1.create_draft("test") dataset_client_1.commit("test1") dataset_client_1.create_branch("dev") dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") segment.append(Data(local_path=str(local_path))) dataset_client_2 = gas_client.upload_dataset(dataset, branch_name="dev") assert dataset_client_2.status.branch_name == "dev" assert dataset_client_2.status.draft_number assert not dataset_client_2.status.commit_id segment1 = Segment("Segment1", client=dataset_client_2) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert not segment1[0].label dataset_client_2.commit("test2") draft_number = dataset_client_2.create_draft("test2") for i in range(10): local_path = path / f"goodbye{i}.txt" local_path.write_text("CONTENT") segment.append(Data(local_path=str(local_path))) dataset_client_2 = gas_client.upload_dataset(dataset, branch_name="dev") assert dataset_client_2.status.branch_name == "dev" assert dataset_client_2.status.draft_number == draft_number assert not dataset_client_2.status.commit_id with pytest.raises(ResourceNotExistError): gas_client.upload_dataset(dataset, branch_name="wrong") gas_client.delete_dataset(dataset_name)
def test_copy_data_from_commits(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit_1") for i in range(10, 20): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit_2") dataset_client_1 = gas_client.get_dataset(dataset_name) commit_id = dataset_client_1.list_commits()[-1].commit_id dataset_client_1.checkout(revision=commit_id) dataset_client.create_draft("draft_3") segment_client_1 = dataset_client_1.get_segment("Segment1") segment_client_2 = dataset_client.get_segment("Segment1") segment_client_2.copy_data("hello0.txt", "goodbye0.txt", source_client=segment_client_1) segment2 = Segment("Segment1", client=dataset_client) assert segment2[0].path == "goodbye0.txt" assert segment2[0].path != segment[0].target_remote_path assert segment2[0].label assert len(segment2) == 21 gas_client.delete_dataset(dataset_name)
def test_cache_dataset(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(_CATALOG) path = tmp_path / "sub" semantic_path = tmp_path / "semantic_mask" instance_path = tmp_path / "instance_mask" path.mkdir() semantic_path.mkdir() instance_path.mkdir() for i in range(_SEGMENT_LENGTH): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(_LABEL) semantic_mask = semantic_path / f"semantic_mask{i}.png" semantic_mask.write_text("SEMANTIC_MASK") data.label.semantic_mask = SemanticMask(str(semantic_mask)) instance_mask = instance_path / f"instance_mask{i}.png" instance_mask.write_text("INSTANCE_MASK") data.label.instance_mask = InstanceMask(str(instance_mask)) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit-1") cache_path = tmp_path / "cache_test" dataset_client.enable_cache(str(cache_path)) segment1 = Segment("Segment1", client=dataset_client) for data in segment1: data.open() data.label.semantic_mask.open() data.label.instance_mask.open() segment_cache_path = (cache_path / dataset_client.dataset_id / dataset_client.status.commit_id / "Segment1") semantic_mask_cache_path = segment_cache_path / "semantic_mask" instance_mask_cache_path = segment_cache_path / "instance_mask" for cache_dir, extension in ( (segment_cache_path, "txt"), (semantic_mask_cache_path, "png"), (instance_mask_cache_path, "png"), ): assert set(cache_dir.glob(f"*.{extension}")) == set( cache_dir / f"hello{i}.{extension}" for i in range(_SEGMENT_LENGTH)) gas_client.delete_dataset(dataset_name)
def test_copy_between_datasets_override(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name_1 = get_dataset_name() gas_client.create_dataset(dataset_name_1) dataset_1 = Dataset(name=dataset_name_1) segment_1 = dataset_1.create_segment("Segment") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) segment_1.append(data) dataset_client_1 = gas_client.upload_dataset(dataset_1) dataset_client_1.commit("upload data") dataset_name_2 = dataset_name_1 + "_2" dataset_client_2 = gas_client.create_dataset(dataset_name_2) dataset_2 = Dataset(name=dataset_name_2) segment_2 = dataset_2.create_segment("Segment") for i in range(10, 15): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) segment_2.append(data) dataset_client_2 = gas_client.upload_dataset(dataset_2) dataset_client_2.commit("upload data") dataset_client_2.create_draft("draft 2") dataset_client_2.copy_segment( "Segment", source_client=dataset_client_1, strategy="override" ) dataset_client_2.commit("copy segmnet") segment = Segment("Segment", client=dataset_client_2) assert len(segment) == 10 assert segment[0].path == "hello0.txt" gas_client.delete_dataset(dataset_name_1) gas_client.delete_dataset(dataset_name_2)
def test_upload_dataset_with_mask(self, accesskey, url, tmp_path, mask_file): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG_CONTENTS) path = tmp_path / "sub" path.mkdir() local_path = path / "hello.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) remote_semantic_mask = SemanticMask(str(mask_file)) remote_semantic_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}} data.label.semantic_mask = remote_semantic_mask instance_mask = InstanceMask(str(mask_file)) instance_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}} data.label.instance_mask = instance_mask panoptic_mask = PanopticMask(str(mask_file)) panoptic_mask.all_category_ids = {100: 0, 101: 1} data.label.panoptic_mask = panoptic_mask segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("upload dataset with label") dataset = Dataset(dataset_name, gas_client) remote_semantic_mask = dataset[0][0].label.semantic_mask semantic_mask = RemoteSemanticMask.from_response_body(SEMANTIC_MASK_LABEL) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert remote_semantic_mask.path == semantic_mask.path assert remote_semantic_mask.all_attributes == semantic_mask.all_attributes remote_instance_mask = dataset[0][0].label.instance_mask instance_mask = RemoteInstanceMask.from_response_body(INSTANCE_MASK_LABEL) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert remote_instance_mask.path == instance_mask.path assert remote_instance_mask.all_attributes == instance_mask.all_attributes remote_panoptic_mask = dataset[0][0].label.panoptic_mask panoptic_mask = RemotePanopticMask.from_response_body(PANOPTIC_MASK_LABEL) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert remote_panoptic_mask.path == panoptic_mask.path assert remote_panoptic_mask.all_category_ids == panoptic_mask.all_category_ids gas_client.delete_dataset(dataset_name)
def test_upload_label(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG_CONTENTS) path = tmp_path / "sub" path.mkdir() local_path = path / "hello.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("upload dataset with label") dataset = Dataset(dataset_name, gas_client) assert dataset[0][0].label == Label.loads(LABEL) dataset_client.create_draft("update label") segment_client = dataset_client.get_segment(segment.name) upload_data = [] new_label = Label.loads(LABEL) new_label.multi_polygon[0].category = "dog" for data in segment: data.label = new_label upload_data.append(data) segment_client.upload_label(upload_data) dataset_client.commit("update label") dataset = Dataset(dataset_name, gas_client) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert dataset[0][0].label == new_label gas_client.delete_dataset(dataset_name)
def test_move_segment_override(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment1 = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT_1") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment1.append(data) segment2 = dataset.create_segment("Segment2") for i in range(10, 20): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT_2") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment2.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.move_segment("Segment1", "Segment2", strategy="override") with pytest.raises(ResourceNotExistError): dataset_client.get_segment("Segment1") segment_moved = Segment("Segment2", client=dataset_client) assert segment_moved[0].path == "hello0.txt" assert segment_moved[0].path == segment1[0].target_remote_path assert segment_moved[0].open().read() == b"CONTENT_1" assert segment_moved[0].label gas_client.delete_dataset(dataset_name)
def test_upload_fusion_dataset_after_commit(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name, is_fusion=True) dataset = FusionDataset(name=dataset_name) dataset._catalog = Catalog.loads(CATALOG) dataset.notes.is_continuous = True segment = dataset.create_segment("Segment1") segment.sensors = Sensors.loads([LIDAR_DATA]) path = tmp_path / "sub" path.mkdir() for i in range(10): remote_frame = Frame() local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) remote_frame[LIDAR_NAME] = data segment.append(remote_frame) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("test") dataset_remote = FusionDataset(name=dataset_name, gas=gas_client) assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous assert dataset_remote.catalog == dataset.catalog segment_remote = dataset_remote[0] assert len(segment_remote) == len(segment) assert segment_remote.sensors == segment.sensors for remote_frame, frame in zip(segment_remote, segment): assert remote_frame[LIDAR_NAME].path == frame[LIDAR_NAME].target_remote_path assert remote_frame[LIDAR_DATA["name"]].label == frame[LIDAR_NAME].label gas_client.delete_dataset(dataset_name)
"""List Dataset Names""" dataset_list = list(gas.list_dataset_names()) """""" """Upload Images to the Dataset""" from tensorbay.dataset import Data, Dataset # Organize the local dataset by the "Dataset" class before uploading. dataset = Dataset("DatasetName") # TensorBay uses "segment" to separate different parts in a dataset. segment = dataset.create_segment() segment.append(Data("0000001.jpg")) segment.append(Data("0000002.jpg")) dataset_client = gas.upload_dataset(dataset) # TensorBay provides dataset version control feature, commit the uploaded data before using it. dataset_client.commit("Initial commit") """""" """Read Images from the Dataset""" from PIL import Image from tensorbay.dataset import Segment dataset_client = gas.get_dataset("DatasetName") segment = Segment("", dataset_client) for data in segment: with data.open() as fp: image = Image.open(fp)
"""Create Dataset""" gas.create_dataset("LeedsSportsPose") """""" """List Dataset Names""" list(gas.list_dataset_names()) """""" from tensorbay.opendataset import LeedsSportsPose dataset = LeedsSportsPose("path/to/dataset/directory") """Upload Dataset""" # dataset is the one you initialized in "Organize Dataset" section dataset_client = gas.upload_dataset(dataset, jobs=8, skip_uploaded_files=False) dataset_client.commit("LeedsSportsPose") """""" """Read Dataset / get dataset""" dataset_client = gas.get_dataset("LeedsSportsPose") """""" """Read Dataset / get segment""" from tensorbay.dataset import Segment default_segment = Segment("", dataset_client) """""" """Read Dataset / get data""" data = default_segment[0]
from tensorbay.dataset import Dataset from tensorbay.label import Classification # Use AuthData to organize a dataset by the "Dataset" class before importing. dataset = Dataset("<DATASET_NAME>") # TensorBay uses "segment" to separate different parts in a dataset. segment = dataset.create_segment() images = cloud_client.list_auth_data("<data/images/>") labels = cloud_client.list_auth_data("<data/labels/>") for auth_data, label in zip(images, labels): with label.open() as fp: auth_data.label.classification = Classification.loads(json.load(fp)) segment.append(auth_data) dataset_client = gas.upload_dataset(dataset, jobs=8) """""" """Create local storage config""" gas.create_local_storage_config( name="<LOCAL_STORAGE_CONFIG>", file_path="<path/to/dataset>", endpoint="<external IP address of the local storage service>", ) """""" """Create authorized local storage dataset""" dataset_client = gas.create_dataset("<DATASET_NAME>", config_name="<LOCAL_STORAGE_CONFIG>") """"""
"""Create Fusion Dataset""" gas.create_dataset("CADC", is_fusion=True) """""" """List Dataset Names""" gas.list_dataset_names() """""" from tensorbay.opendataset import CADC fusion_dataset = CADC("<path/to/dataset>") """Upload Fusion Dataset""" # fusion_dataset is the one you initialized in "Organize Fusion Dataset" section fusion_dataset_client = gas.upload_dataset(fusion_dataset, jobs=8) fusion_dataset_client.commit("initial commit") """""" """Read Fusion Dataset / get fusion dataset""" fusion_dataset = FusionDataset("CADC", gas) """""" """Read Fusion Dataset / list fusion segment names""" fusion_dataset.keys() """""" """Read Fusion Dataset / get fusion segment""" fusion_segment = fusion_dataset["2018_03_06/0001"] fusion_segment = fusion_dataset[0] """"""
dataset = Dataset("<DATASET_NAME>", gas) for segment in dataset: update_data = [] for data in segment: data.label.classification = Classification( "NEW_CATEGORY") # set new label update_data.append(data) segment_client = dataset_client.get_segment(segment.name) segment_client.upload_label(update_data) """""" """Update label / commit dataset""" dataset_client.commit("update labels") """""" """Update data/ upload dataset""" gas.upload_dataset(dataset, jobs=8, skip_uploaded_files=True) """""" """Update data/ overwrite dataset""" gas.upload_dataset(dataset, jobs=8) """""" """Update data/ delete segment""" dataset_client.create_draft("draft-3") dataset_client.delete_segment("<SEGMENT_NAME>") """""" """Update data/ delete data""" segment_client = dataset_client.get_segment("<SEGMENT_NAME>") segment_client.delete_data("a.png") """""" """Delete frame""" segment_client.delete_frame("00000000003W09TEMC1HXYMC74") """"""