Example #1
0
    def test_copy_fusion_segment(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name, is_fusion=True)
        dataset = FusionDataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        segment.sensors.add(Sensor.loads(LIDAR_DATA))
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            frame[LIDAR_DATA["name"]] = data
            segment.append(frame)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.copy_segment("Segment1", "Segment2")
        assert segment_client.name == "Segment2"

        with pytest.raises(InvalidParamsError):
            dataset_client.copy_segment("Segment1", "Segment3", strategy="push")

        segment2 = FusionSegment("Segment2", client=dataset_client)
        assert segment2[0][LIDAR_DATA["name"]].path == "hello0.txt"
        assert (
            segment2[0][LIDAR_DATA["name"]].path
            == segment[0][LIDAR_DATA["name"]].target_remote_path
        )
        assert segment2[0][LIDAR_DATA["name"]].label

        gas_client.delete_dataset(dataset_name)
    def test_import_cloud_files_to_fusiondataset(self, accesskey, url, config_name):
        gas_client = GAS(access_key=accesskey, url=url)
        try:
            cloud_client = gas_client.get_cloud_client(config_name)
        except ResourceNotExistError:
            pytest.skip(f"skip this case because there's no {config_name} config")

        auth_data = cloud_client.list_auth_data("tests")[:5]
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name, True, config_name=config_name)

        dataset = FusionDataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        lidar = Lidar("LIDAR")
        segment.sensors.add(lidar)

        for data in auth_data:
            data.label.classification = Classification("cat", attributes={"color": "red"})
            frame = Frame()
            frame["LIDAR"] = data
            segment.append(frame)

        dataset_client = gas_client.upload_dataset(dataset, jobs=5)
        dataset_client.commit("import data")

        segment1 = FusionSegment("Segment1", client=dataset_client)
        assert len(segment1) == len(segment)
        assert segment1[0]["LIDAR"].path == segment[0]["LIDAR"].path.split("/")[-1]
        assert segment1[0]["LIDAR"].label.classification.category == "cat"
        assert segment1[0]["LIDAR"].label.classification.attributes["color"] == "red"
        assert len(auth_data) == len(segment)

        gas_client.delete_dataset(dataset_name)
    def test_create_and_upload_dataset_with_config(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        try:
            gas_client.get_auth_storage_config(name=_LOCAL_CONFIG_NAME)
        except ResourceNotExistError:
            pytest.skip(f"skip this case because there's no {_LOCAL_CONFIG_NAME} config")

        gas_client.create_dataset(dataset_name, config_name=_LOCAL_CONFIG_NAME)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG)

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(5):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 5
        for i in range(5):
            assert segment1[i].path == f"hello{i}.txt"
            assert segment1[i].label

        gas_client.delete_dataset(dataset_name)
Example #4
0
    def test_upload_dataset_only_with_file(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        dataset.notes.is_continuous = True
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            segment.append(Data(local_path=str(local_path)))

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.status.branch_name == DEFAULT_BRANCH
        assert dataset_client.status.draft_number
        assert not dataset_client.status.commit_id

        assert dataset_client.get_notes().is_continuous is True
        assert not dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert not segment1[0].label

        gas_client.delete_dataset(dataset_name)
Example #5
0
    def test_copy_data(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.get_segment("Segment1")
        segment_client.copy_data("hello0.txt", "goodbye0.txt")
        segment_client.copy_data("hello1.txt", "hello10.txt")

        with pytest.raises(InvalidParamsError):
            segment_client.copy_data("hello2.txt", "see_you.txt", strategy="push")

        segment2 = Segment("Segment1", client=dataset_client)
        assert segment2[0].path == "goodbye0.txt"
        assert segment2[3].path == "hello10.txt"
        assert segment2[1].label

        gas_client.delete_dataset(dataset_name)
Example #6
0
    def test_move_segment(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.move_segment("Segment1", "Segment2")
        assert segment_client.name == "Segment2"

        with pytest.raises(InvalidParamsError):
            dataset_client.move_segment("Segment1", "Segment3", strategy="push")

        segment2 = Segment("Segment2", client=dataset_client)
        assert segment2[0].path == "hello0.txt"
        assert segment2[0].path == segment[0].target_remote_path
        assert segment2[0].label

        gas_client.delete_dataset(dataset_name)
Example #7
0
    def test_copy_data_between_datasets(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name_1 = get_dataset_name()
        gas_client.create_dataset(dataset_name_1)
        dataset_1 = Dataset(name=dataset_name_1)
        segment_1 = dataset_1.create_segment("Segment1")
        dataset_1._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment_1.append(data)
        dataset_client_1 = gas_client.upload_dataset(dataset_1)
        dataset_client_1.commit("upload data")
        segment_client_1 = dataset_client_1.get_segment("Segment1")

        dataset_name_2 = dataset_name_1 + "_2"
        dataset_client_2 = gas_client.create_dataset(dataset_name_2)
        dataset_client_2.create_draft("draft_2")
        dataset_client_2.create_segment("Segment1")
        segment_client_2 = dataset_client_2.get_segment("Segment1")

        segment_client_2.copy_data("hello0.txt", "hello0.txt", source_client=segment_client_1)

        segment2 = Segment("Segment1", client=dataset_client_2)
        assert segment2[0].path == "hello0.txt"
        assert segment2[0].label

        gas_client.delete_dataset(dataset_name_1)
        gas_client.delete_dataset(dataset_name_2)
Example #8
0
    def test_copy_segment_abort(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        with pytest.raises(InternalServerError):
            dataset_client.copy_segment("Segment1", "Segment2")

        gas_client.delete_dataset(dataset_name)
Example #9
0
    def test_copy_segment_skip(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.copy_segment("Segment1", "Segment2", strategy="skip")

        segment_copied = Segment("Segment2", client=dataset_client)
        assert segment_copied[0].path == "hello10.txt"
        assert segment_copied[0].path == segment2[0].target_remote_path
        assert segment_copied[0].label

        gas_client.delete_dataset(dataset_name)
Example #10
0
    def test_move_data_skip(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text(f"CONTENT_{i}")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.get_segment("Segment1")

        segment_client.move_data("hello0.txt", "hello1.txt", strategy="skip")

        segment_moved = Segment("Segment1", client=dataset_client)
        assert segment_moved[0].path == "hello1.txt"
        assert segment_moved[0].open().read() == b"CONTENT_1"

        gas_client.delete_dataset(dataset_name)
Example #11
0
    def test_import_cloud_files(self, accesskey, url, config_name):

        gas_client = GAS(access_key=accesskey, url=url)
        try:
            cloud_client = gas_client.get_cloud_client(config_name)
        except ResourceNotExistError:
            pytest.skip(
                f"skip this case because there's no {config_name} config")

        auth_data = cloud_client.list_auth_data("tests")
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name,
                                                   config_name=config_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        for data in auth_data:
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset, jobs=5)
        dataset_client.commit("import data")

        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == len(segment)
        assert segment1[0].path == segment[0].path.split("/")[-1]
        assert not segment1[0].label

        assert len(auth_data) == len(segment)

        gas_client.delete_dataset(dataset_name)
Example #12
0
    def test_upload_dataset_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG_CONTENTS)

        path = tmp_path / "sub"
        path.mkdir()
        local_path = path / "hello.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL)
        segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("upload dataset with label")
        dataset = Dataset(dataset_name, gas_client)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert dataset[0][0].label == Label.loads(LABEL)

        statistics1 = dataset_client.get_label_statistics()
        assert statistics1 == Statistics(STATISTICS)

        total_size = dataset_client.get_total_size()
        assert total_size == TOTALSIZE
        gas_client.delete_dataset(dataset_name)
Example #13
0
    def test_upload_dataset_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG)

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert segment1[0].label

        gas_client.delete_dataset(dataset_name)
    def test_upload_dataset_after_commit(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        dataset._catalog = Catalog.loads(CATALOG)
        dataset.notes.is_continuous = True
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("test")
        dataset_remote = Dataset(name=dataset_name, gas=gas_client)
        assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous
        assert dataset_remote.catalog == dataset.catalog

        segment_remote = dataset_remote[0]
        assert len(segment_remote) == len(segment)
        for remote_data, data in zip(segment_remote, segment):
            assert remote_data.path == data.target_remote_path
            assert remote_data.label == data.label

        gas_client.delete_dataset(dataset_name)
Example #15
0
    def test_upload_dataset_to_given_branch(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client_1 = gas_client.create_dataset(dataset_name)
        dataset_client_1.create_draft("test")
        dataset_client_1.commit("test1")
        dataset_client_1.create_branch("dev")

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            segment.append(Data(local_path=str(local_path)))

        dataset_client_2 = gas_client.upload_dataset(dataset,
                                                     branch_name="dev")
        assert dataset_client_2.status.branch_name == "dev"
        assert dataset_client_2.status.draft_number
        assert not dataset_client_2.status.commit_id

        segment1 = Segment("Segment1", client=dataset_client_2)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert not segment1[0].label

        dataset_client_2.commit("test2")
        draft_number = dataset_client_2.create_draft("test2")

        for i in range(10):
            local_path = path / f"goodbye{i}.txt"
            local_path.write_text("CONTENT")
            segment.append(Data(local_path=str(local_path)))

        dataset_client_2 = gas_client.upload_dataset(dataset,
                                                     branch_name="dev")
        assert dataset_client_2.status.branch_name == "dev"
        assert dataset_client_2.status.draft_number == draft_number
        assert not dataset_client_2.status.commit_id

        with pytest.raises(ResourceNotExistError):
            gas_client.upload_dataset(dataset, branch_name="wrong")

        gas_client.delete_dataset(dataset_name)
Example #16
0
    def test_copy_data_from_commits(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit_1")

        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)
        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit_2")

        dataset_client_1 = gas_client.get_dataset(dataset_name)
        commit_id = dataset_client_1.list_commits()[-1].commit_id
        dataset_client_1.checkout(revision=commit_id)
        dataset_client.create_draft("draft_3")
        segment_client_1 = dataset_client_1.get_segment("Segment1")
        segment_client_2 = dataset_client.get_segment("Segment1")
        segment_client_2.copy_data("hello0.txt",
                                   "goodbye0.txt",
                                   source_client=segment_client_1)

        segment2 = Segment("Segment1", client=dataset_client)
        assert segment2[0].path == "goodbye0.txt"
        assert segment2[0].path != segment[0].target_remote_path
        assert segment2[0].label
        assert len(segment2) == 21

        gas_client.delete_dataset(dataset_name)
    def test_cache_dataset(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(_CATALOG)

        path = tmp_path / "sub"
        semantic_path = tmp_path / "semantic_mask"
        instance_path = tmp_path / "instance_mask"
        path.mkdir()
        semantic_path.mkdir()
        instance_path.mkdir()
        for i in range(_SEGMENT_LENGTH):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(_LABEL)

            semantic_mask = semantic_path / f"semantic_mask{i}.png"
            semantic_mask.write_text("SEMANTIC_MASK")
            data.label.semantic_mask = SemanticMask(str(semantic_mask))

            instance_mask = instance_path / f"instance_mask{i}.png"
            instance_mask.write_text("INSTANCE_MASK")
            data.label.instance_mask = InstanceMask(str(instance_mask))
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit-1")
        cache_path = tmp_path / "cache_test"
        dataset_client.enable_cache(str(cache_path))
        segment1 = Segment("Segment1", client=dataset_client)
        for data in segment1:
            data.open()
            data.label.semantic_mask.open()
            data.label.instance_mask.open()

        segment_cache_path = (cache_path / dataset_client.dataset_id /
                              dataset_client.status.commit_id / "Segment1")
        semantic_mask_cache_path = segment_cache_path / "semantic_mask"
        instance_mask_cache_path = segment_cache_path / "instance_mask"

        for cache_dir, extension in (
            (segment_cache_path, "txt"),
            (semantic_mask_cache_path, "png"),
            (instance_mask_cache_path, "png"),
        ):
            assert set(cache_dir.glob(f"*.{extension}")) == set(
                cache_dir / f"hello{i}.{extension}"
                for i in range(_SEGMENT_LENGTH))

        gas_client.delete_dataset(dataset_name)
Example #18
0
    def test_copy_between_datasets_override(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name_1 = get_dataset_name()
        gas_client.create_dataset(dataset_name_1)
        dataset_1 = Dataset(name=dataset_name_1)
        segment_1 = dataset_1.create_segment("Segment")
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            segment_1.append(data)
        dataset_client_1 = gas_client.upload_dataset(dataset_1)
        dataset_client_1.commit("upload data")

        dataset_name_2 = dataset_name_1 + "_2"
        dataset_client_2 = gas_client.create_dataset(dataset_name_2)
        dataset_2 = Dataset(name=dataset_name_2)
        segment_2 = dataset_2.create_segment("Segment")
        for i in range(10, 15):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            segment_2.append(data)
        dataset_client_2 = gas_client.upload_dataset(dataset_2)
        dataset_client_2.commit("upload data")
        dataset_client_2.create_draft("draft 2")
        dataset_client_2.copy_segment(
            "Segment", source_client=dataset_client_1, strategy="override"
        )
        dataset_client_2.commit("copy segmnet")

        segment = Segment("Segment", client=dataset_client_2)
        assert len(segment) == 10
        assert segment[0].path == "hello0.txt"

        gas_client.delete_dataset(dataset_name_1)
        gas_client.delete_dataset(dataset_name_2)
Example #19
0
    def test_upload_dataset_with_mask(self, accesskey, url, tmp_path, mask_file):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG_CONTENTS)

        path = tmp_path / "sub"
        path.mkdir()
        local_path = path / "hello.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        remote_semantic_mask = SemanticMask(str(mask_file))
        remote_semantic_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}}
        data.label.semantic_mask = remote_semantic_mask

        instance_mask = InstanceMask(str(mask_file))
        instance_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}}
        data.label.instance_mask = instance_mask

        panoptic_mask = PanopticMask(str(mask_file))
        panoptic_mask.all_category_ids = {100: 0, 101: 1}
        data.label.panoptic_mask = panoptic_mask
        segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("upload dataset with label")
        dataset = Dataset(dataset_name, gas_client)
        remote_semantic_mask = dataset[0][0].label.semantic_mask
        semantic_mask = RemoteSemanticMask.from_response_body(SEMANTIC_MASK_LABEL)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert remote_semantic_mask.path == semantic_mask.path
        assert remote_semantic_mask.all_attributes == semantic_mask.all_attributes

        remote_instance_mask = dataset[0][0].label.instance_mask
        instance_mask = RemoteInstanceMask.from_response_body(INSTANCE_MASK_LABEL)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert remote_instance_mask.path == instance_mask.path
        assert remote_instance_mask.all_attributes == instance_mask.all_attributes

        remote_panoptic_mask = dataset[0][0].label.panoptic_mask
        panoptic_mask = RemotePanopticMask.from_response_body(PANOPTIC_MASK_LABEL)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert remote_panoptic_mask.path == panoptic_mask.path
        assert remote_panoptic_mask.all_category_ids == panoptic_mask.all_category_ids

        gas_client.delete_dataset(dataset_name)
Example #20
0
    def test_upload_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG_CONTENTS)

        path = tmp_path / "sub"
        path.mkdir()
        local_path = path / "hello.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL)
        segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("upload dataset with label")
        dataset = Dataset(dataset_name, gas_client)
        assert dataset[0][0].label == Label.loads(LABEL)

        dataset_client.create_draft("update label")
        segment_client = dataset_client.get_segment(segment.name)

        upload_data = []
        new_label = Label.loads(LABEL)
        new_label.multi_polygon[0].category = "dog"
        for data in segment:
            data.label = new_label
            upload_data.append(data)
        segment_client.upload_label(upload_data)
        dataset_client.commit("update label")
        dataset = Dataset(dataset_name, gas_client)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert dataset[0][0].label == new_label

        gas_client.delete_dataset(dataset_name)
Example #21
0
    def test_move_segment_override(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT_1")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT_2")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.move_segment("Segment1",
                                    "Segment2",
                                    strategy="override")

        with pytest.raises(ResourceNotExistError):
            dataset_client.get_segment("Segment1")

        segment_moved = Segment("Segment2", client=dataset_client)
        assert segment_moved[0].path == "hello0.txt"
        assert segment_moved[0].path == segment1[0].target_remote_path
        assert segment_moved[0].open().read() == b"CONTENT_1"
        assert segment_moved[0].label

        gas_client.delete_dataset(dataset_name)
    def test_upload_fusion_dataset_after_commit(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name, is_fusion=True)

        dataset = FusionDataset(name=dataset_name)
        dataset._catalog = Catalog.loads(CATALOG)
        dataset.notes.is_continuous = True
        segment = dataset.create_segment("Segment1")
        segment.sensors = Sensors.loads([LIDAR_DATA])

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            remote_frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            remote_frame[LIDAR_NAME] = data
            segment.append(remote_frame)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("test")
        dataset_remote = FusionDataset(name=dataset_name, gas=gas_client)
        assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous
        assert dataset_remote.catalog == dataset.catalog

        segment_remote = dataset_remote[0]
        assert len(segment_remote) == len(segment)
        assert segment_remote.sensors == segment.sensors
        for remote_frame, frame in zip(segment_remote, segment):
            assert remote_frame[LIDAR_NAME].path == frame[LIDAR_NAME].target_remote_path
            assert remote_frame[LIDAR_DATA["name"]].label == frame[LIDAR_NAME].label

        gas_client.delete_dataset(dataset_name)
Example #23
0
"""List Dataset Names"""
dataset_list = list(gas.list_dataset_names())
""""""
"""Upload Images to the Dataset"""
from tensorbay.dataset import Data, Dataset

# Organize the local dataset by the "Dataset" class before uploading.
dataset = Dataset("DatasetName")

# TensorBay uses "segment" to separate different parts in a dataset.
segment = dataset.create_segment()

segment.append(Data("0000001.jpg"))
segment.append(Data("0000002.jpg"))

dataset_client = gas.upload_dataset(dataset)

# TensorBay provides dataset version control feature, commit the uploaded data before using it.
dataset_client.commit("Initial commit")
""""""
"""Read Images from the Dataset"""
from PIL import Image
from tensorbay.dataset import Segment

dataset_client = gas.get_dataset("DatasetName")

segment = Segment("", dataset_client)

for data in segment:
    with data.open() as fp:
        image = Image.open(fp)
"""Create Dataset"""
gas.create_dataset("LeedsSportsPose")
""""""

"""List Dataset Names"""
list(gas.list_dataset_names())
""""""

from tensorbay.opendataset import LeedsSportsPose

dataset = LeedsSportsPose("path/to/dataset/directory")

"""Upload Dataset"""
# dataset is the one you initialized in "Organize Dataset" section
dataset_client = gas.upload_dataset(dataset, jobs=8, skip_uploaded_files=False)
dataset_client.commit("LeedsSportsPose")
""""""

"""Read Dataset / get dataset"""
dataset_client = gas.get_dataset("LeedsSportsPose")
""""""

"""Read Dataset / get segment"""
from tensorbay.dataset import Segment

default_segment = Segment("", dataset_client)
""""""

"""Read Dataset / get data"""
data = default_segment[0]
Example #25
0
from tensorbay.dataset import Dataset
from tensorbay.label import Classification

# Use AuthData to organize a dataset by the "Dataset" class before importing.
dataset = Dataset("<DATASET_NAME>")

# TensorBay uses "segment" to separate different parts in a dataset.
segment = dataset.create_segment()

images = cloud_client.list_auth_data("<data/images/>")
labels = cloud_client.list_auth_data("<data/labels/>")

for auth_data, label in zip(images, labels):
    with label.open() as fp:
        auth_data.label.classification = Classification.loads(json.load(fp))
    segment.append(auth_data)

dataset_client = gas.upload_dataset(dataset, jobs=8)
""""""
"""Create local storage config"""
gas.create_local_storage_config(
    name="<LOCAL_STORAGE_CONFIG>",
    file_path="<path/to/dataset>",
    endpoint="<external IP address of the local storage service>",
)
""""""
"""Create authorized local storage dataset"""
dataset_client = gas.create_dataset("<DATASET_NAME>",
                                    config_name="<LOCAL_STORAGE_CONFIG>")
""""""
Example #26
0
"""Create Fusion Dataset"""
gas.create_dataset("CADC", is_fusion=True)
""""""

"""List Dataset Names"""
gas.list_dataset_names()
""""""

from tensorbay.opendataset import CADC

fusion_dataset = CADC("<path/to/dataset>")


"""Upload Fusion Dataset"""
# fusion_dataset is the one you initialized in "Organize Fusion Dataset" section
fusion_dataset_client = gas.upload_dataset(fusion_dataset, jobs=8)
fusion_dataset_client.commit("initial commit")
""""""

"""Read Fusion Dataset / get fusion dataset"""
fusion_dataset = FusionDataset("CADC", gas)
""""""

"""Read Fusion Dataset / list fusion segment names"""
fusion_dataset.keys()
""""""

"""Read Fusion Dataset / get fusion segment"""
fusion_segment = fusion_dataset["2018_03_06/0001"]
fusion_segment = fusion_dataset[0]
""""""
Example #27
0
dataset = Dataset("<DATASET_NAME>", gas)
for segment in dataset:
    update_data = []
    for data in segment:
        data.label.classification = Classification(
            "NEW_CATEGORY")  # set new label
        update_data.append(data)
    segment_client = dataset_client.get_segment(segment.name)
    segment_client.upload_label(update_data)
""""""
"""Update label / commit dataset"""
dataset_client.commit("update labels")
""""""
"""Update data/ upload dataset"""
gas.upload_dataset(dataset, jobs=8, skip_uploaded_files=True)
""""""
"""Update data/ overwrite dataset"""
gas.upload_dataset(dataset, jobs=8)
""""""
"""Update data/ delete segment"""
dataset_client.create_draft("draft-3")
dataset_client.delete_segment("<SEGMENT_NAME>")
""""""
"""Update data/ delete data"""
segment_client = dataset_client.get_segment("<SEGMENT_NAME>")
segment_client.delete_data("a.png")
""""""
"""Delete frame"""
segment_client.delete_frame("00000000003W09TEMC1HXYMC74")
""""""