예제 #1
0
def CACD(path: str) -> Dataset:
    """`Cross-Age Celebrity Dataset (CACD) <https://bcsiriuschen.github.io/CARC/>`_ dataset.

    The file structure should be like::

        <path>
            CACD2000/
                14_Aaron_Johnson_0001.jpg
                ...
            celebrity2000.mat

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.catalog.classification = _get_subcatalog()
    segment = dataset.create_segment()
    image_files = glob(os.path.join(root_path, "CACD2000", "*.jpg"))
    labels_map = _get_labels_map(os.path.join(root_path, "celebrity2000.mat"))
    for image in image_files:
        category, attribute = labels_map[os.path.basename(image)]
        image_data = Data(image)
        image_data.label.classification = Classification(category, attribute)
        segment.append(image_data)
    return dataset
    def test_create_and_upload_dataset_with_config(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        try:
            gas_client.get_auth_storage_config(name=_LOCAL_CONFIG_NAME)
        except ResourceNotExistError:
            pytest.skip(f"skip this case because there's no {_LOCAL_CONFIG_NAME} config")

        gas_client.create_dataset(dataset_name, config_name=_LOCAL_CONFIG_NAME)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG)

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(5):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 5
        for i in range(5):
            assert segment1[i].path == f"hello{i}.txt"
            assert segment1[i].label

        gas_client.delete_dataset(dataset_name)
예제 #3
0
    def test_copy_data_between_datasets(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name_1 = get_dataset_name()
        gas_client.create_dataset(dataset_name_1)
        dataset_1 = Dataset(name=dataset_name_1)
        segment_1 = dataset_1.create_segment("Segment1")
        dataset_1._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment_1.append(data)
        dataset_client_1 = gas_client.upload_dataset(dataset_1)
        dataset_client_1.commit("upload data")
        segment_client_1 = dataset_client_1.get_segment("Segment1")

        dataset_name_2 = dataset_name_1 + "_2"
        dataset_client_2 = gas_client.create_dataset(dataset_name_2)
        dataset_client_2.create_draft("draft_2")
        dataset_client_2.create_segment("Segment1")
        segment_client_2 = dataset_client_2.get_segment("Segment1")

        segment_client_2.copy_data("hello0.txt", "hello0.txt", source_client=segment_client_1)

        segment2 = Segment("Segment1", client=dataset_client_2)
        assert segment2[0].path == "hello0.txt"
        assert segment2[0].label

        gas_client.delete_dataset(dataset_name_1)
        gas_client.delete_dataset(dataset_name_2)
예제 #4
0
    def test_move_segment(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.move_segment("Segment1", "Segment2")
        assert segment_client.name == "Segment2"

        with pytest.raises(InvalidParamsError):
            dataset_client.move_segment("Segment1", "Segment3", strategy="push")

        segment2 = Segment("Segment2", client=dataset_client)
        assert segment2[0].path == "hello0.txt"
        assert segment2[0].path == segment[0].target_remote_path
        assert segment2[0].label

        gas_client.delete_dataset(dataset_name)
예제 #5
0
    def test_copy_segment_skip(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.copy_segment("Segment1", "Segment2", strategy="skip")

        segment_copied = Segment("Segment2", client=dataset_client)
        assert segment_copied[0].path == "hello10.txt"
        assert segment_copied[0].path == segment2[0].target_remote_path
        assert segment_copied[0].label

        gas_client.delete_dataset(dataset_name)
예제 #6
0
    def test_copy_data(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.get_segment("Segment1")
        segment_client.copy_data("hello0.txt", "goodbye0.txt")
        segment_client.copy_data("hello1.txt", "hello10.txt")

        with pytest.raises(InvalidParamsError):
            segment_client.copy_data("hello2.txt", "see_you.txt", strategy="push")

        segment2 = Segment("Segment1", client=dataset_client)
        assert segment2[0].path == "goodbye0.txt"
        assert segment2[3].path == "hello10.txt"
        assert segment2[1].label

        gas_client.delete_dataset(dataset_name)
예제 #7
0
    def test_move_data_skip(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text(f"CONTENT_{i}")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.get_segment("Segment1")

        segment_client.move_data("hello0.txt", "hello1.txt", strategy="skip")

        segment_moved = Segment("Segment1", client=dataset_client)
        assert segment_moved[0].path == "hello1.txt"
        assert segment_moved[0].open().read() == b"CONTENT_1"

        gas_client.delete_dataset(dataset_name)
예제 #8
0
    def test_copy_segment_abort(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        with pytest.raises(InternalServerError):
            dataset_client.copy_segment("Segment1", "Segment2")

        gas_client.delete_dataset(dataset_name)
예제 #9
0
    def test_upload_dataset_to_given_draft(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_random_dataset_name()
        dataset_client_1 = gas_client.create_dataset(dataset_name)
        draft_number = dataset_client_1.create_draft("test")

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            segment.append(Data(local_path=str(local_path)))

        dataset_client_2 = gas_client.upload_dataset(dataset,
                                                     draft_number=draft_number)
        segment1 = Segment("Segment1", client=dataset_client_2)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert not segment1[0].label

        with pytest.raises(GASResponseError):
            gas_client.upload_dataset(dataset, draft_number=draft_number + 1)

        gas_client.delete_dataset(dataset_name)
예제 #10
0
def FSDD(path: str) -> Dataset:
    """`Free Spoken Digit <https://github.com/Jakobovski/free-spoken-digit-dataset>`_ dataset.

    The file structure should be like::

        <path>
            recordings/
                0_george_0.wav
                0_george_1.wav
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    label_map = {}
    for key, value in _METADATA.items():
        attributes = {"name": key}
        attributes.update(value)
        label_map[key] = attributes

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()
    audio_paths = glob(os.path.join(path, "recordings", "*.wav"))
    for audio_path in audio_paths:
        category, name = os.path.basename(audio_path).split("_")[:2]
        data = Data(audio_path)
        data.label.classification = Classification(category, label_map[name])
        segment.append(data)
    return dataset
예제 #11
0
    def test_upload_dataset_after_commit(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        dataset._catalog = Catalog.loads(CATALOG)
        dataset.notes.is_continuous = True
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("test")
        dataset_remote = Dataset(name=dataset_name, gas=gas_client)
        assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous
        assert dataset_remote.catalog == dataset.catalog

        segment_remote = dataset_remote[0]
        assert len(segment_remote) == len(segment)
        for remote_data, data in zip(segment_remote, segment):
            assert remote_data.path == data.target_remote_path
            assert remote_data.label == data.label

        gas_client.delete_dataset(dataset_name)
예제 #12
0
def CCPDGreen(path: str) -> Dataset:
    """`CCPDGreen <https://github.com/detectRecog/CCPD>`_ dataset.

    The file structure should be like::

        <path>
            ccpd_green/
                train/
                test/
                val/

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "ccpd_green")

    dataset = Dataset(DATASET_NAME_CCPDGREEN)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name in _CCPDGREEN_SEGMENTS:
        segment = dataset.create_segment(segment_name)
        for image_path in glob(os.path.join(root_path, segment_name, "*.jpg")):
            data = Data(image_path)
            data.label.polygon = _get_polygons(image_path)
            segment.append(data)
    return dataset
예제 #13
0
def PASCALContext(mask_path: str, image_path: str) -> Dataset:
    """`PASCALContext <https://cs.stanford.edu/~roozbeh/pascal-context/>`_ dataset.

    The file structure should be like::

        <mask_path>
            <image_name>.png
            ...

        <image_path>
            <image_name>.jpg
            ...

    Arguments:
        mask_path: The root directory of the dataset mask.
        image_path: The root directory of the dataset image.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_mask_path = os.path.abspath(os.path.expanduser(mask_path))
    root_image_path = os.path.abspath(os.path.expanduser(image_path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))

    segment = dataset.create_segment("trainval")
    for mask_filename in glob(os.path.join(root_mask_path, "*.png")):
        stem = os.path.splitext(os.path.basename(mask_filename))[0]
        data = Data(os.path.join(root_image_path, f"{stem}.jpg"))
        data.label.semantic_mask = SemanticMask(mask_filename)
        segment.append(data)

    return dataset
예제 #14
0
    def test_upload_dataset_only_with_file(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        dataset.notes.is_continuous = True
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            segment.append(Data(local_path=str(local_path)))

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.status.branch_name == DEFAULT_BRANCH
        assert dataset_client.status.draft_number
        assert not dataset_client.status.commit_id

        assert dataset_client.get_notes().is_continuous is True
        assert not dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert not segment1[0].label

        gas_client.delete_dataset(dataset_name)
예제 #15
0
def CarConnection(path: str) -> Dataset:
    """`Car Connection Picture <https://github.com/nicolas-gervais\
    /predicting-car-price-from-scraped-data/tree/master/picture-scraper>`_ dataset.

    The file structure should be like::

        <path>
            <imagename>.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    image_paths = glob(os.path.join(root_path, "*.jpg"))
    keys = dataset.catalog.classification.attributes.keys()

    for image_path in image_paths:
        data = Data(image_path)
        basename = os.path.basename(image_path)
        label = _extract_label_from_basename(keys, basename)
        data.label.classification = label
        segment.append(data)

    return dataset
예제 #16
0
    def test_upload_dataset_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG)

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert segment1[0].label

        gas_client.delete_dataset(dataset_name)
예제 #17
0
def ImageEmotionArtphoto(path: str) -> Dataset:
    """`Image Emotion-art Photo <https://www.imageemotion.org/>`_ dataset.

    The file structure should be like::

        <path>
            <filename>.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_ARTPHOTO)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_artphoto.json"))
    segment = dataset.create_segment()

    image_paths = glob(os.path.join(root_path, "*.jpg"))

    for image_path in image_paths:
        image_category = os.path.basename(image_path).split("_", 1)[0]

        data = Data(image_path)
        data.label.classification = Classification(category=image_category)
        segment.append(data)

    return dataset
예제 #18
0
def DownsampledImagenet(path: str) -> Dataset:
    """`Downsampled Imagenet <https://www.tensorflow.org/datasets\
    /catalog/downsampled_imagenet>`_ dataset.

    The file structure should be like::

        <path>
            valid_32x32/
                <imagename>.png
                ...
            valid_64x64/
                <imagename>.png
                ...
            train_32x32/
                <imagename>.png
                ...
            train_64x64/
                <imagename>.png
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)

    for segment_name in SEGMENT_NAMES:
        dataset.add_segment(_get_segment(segment_name, root_path))
    return dataset
예제 #19
0
    def test_import_cloud_files(self, accesskey, url, config_name):

        gas_client = GAS(access_key=accesskey, url=url)
        try:
            cloud_client = gas_client.get_cloud_client(config_name)
        except ResourceNotExistError:
            pytest.skip(
                f"skip this case because there's no {config_name} config")

        auth_data = cloud_client.list_auth_data("tests")
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name,
                                                   config_name=config_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        for data in auth_data:
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset, jobs=5)
        dataset_client.commit("import data")

        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == len(segment)
        assert segment1[0].path == segment[0].path.split("/")[-1]
        assert not segment1[0].label

        assert len(auth_data) == len(segment)

        gas_client.delete_dataset(dataset_name)
예제 #20
0
def AnimalsWithAttributes2(path: str) -> Dataset:
    """`Animals with attributes 2 <https://cvml.ist.ac.at/AwA2/>`_ dataset.

    The file structure should be like::

        <path>
            classes.txt
            predicates.txt
            predicate-matrix-binary.txt
            JPEGImages/
                <classname>/
                    <imagename>.jpg
                ...
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    with open(os.path.join(root_path, "classes.txt"), encoding="utf-8") as fp:
        class_names = [line[:-1].split("\t", 1)[-1] for line in fp]

    with open(os.path.join(root_path, "predicates.txt"),
              encoding="utf-8") as fp:
        attribute_keys = [line[:-1].split("\t", 1)[-1] for line in fp]

    with open(os.path.join(root_path, "predicate-matrix-binary.txt"),
              encoding="utf-8") as fp:
        attribute_values = [line[:-1].split(" ") for line in fp]

    attribute_mapping = {}
    for class_name, values in zip(class_names, attribute_values):
        attribute_mapping[class_name] = Classification(
            category=class_name,
            attributes=dict(
                zip(attribute_keys, (bool(int(value)) for value in values))),
        )

    for class_name in sorted(os.listdir(os.path.join(root_path,
                                                     "JPEGImages"))):
        image_paths = glob(
            os.path.join(root_path, "JPEGImages", class_name, "*.jpg"))
        label = attribute_mapping[class_name]
        for image_path in image_paths:
            data = Data(image_path)
            data.label.classification = label
            segment.append(data)

    return dataset
예제 #21
0
def COVID_CT(path: str) -> Dataset:
    """`COVID-CT <https://github.com/UCSD-AI4H/COVID-CT>`_ dataset.

    The file structure should be like::

        <path>
            Data-split/
                COVID/
                    testCT_COVID.txt
                    trainCT_COVID.txt
                    valCT_COVID.txt
                NonCOVID/
                    testCT_NonCOVID.txt
                    trainCT_NonCOVID.txt
                    valCT_NonCOVID.txt
            Images-processed/
                CT_COVID/
                    ...
                    2020.01.24.919183-p27-132.png
                    2020.01.24.919183-p27-133.png
                    ...
                    PIIS0140673620303603%8.png
                    ...
                CT_NonCOVID/
                    0.jpg
                    1%0.jog
                    ...
                    91%1.jpg
                    102.png
                    ...
                    2341.png

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.
    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    data_split_path = os.path.join(root_path, "Data-split")
    images_processed_path = os.path.join(root_path, "Images-processed")

    for segment_name, (split_filename, image_dir,
                       category) in _SEGMENT_TO_PATH.items():
        segment = dataset.create_segment(segment_name)
        image_dir = os.path.join(images_processed_path, image_dir)
        with open(os.path.join(data_split_path, category, split_filename),
                  "r",
                  encoding="utf-8") as fp:
            for line in fp:
                image_path = os.path.join(image_dir, line.strip("\n"))
                data = Data(image_path)
                data.label.classification = Classification(category)
                segment.append(data)

    return dataset
예제 #22
0
def DeepRoute(path: str) -> Dataset:
    """`DeepRoute <https://gas.graviti.cn/dataset/graviti-open-dataset\
    /DeepRoute>`_ dataset.

    The file structure should be like::

        <path>
            pointcloud/
                00001.bin
                00002.bin
                ...
                10000.bin
            groundtruth/
                00001.txt
                00002.txt
                ...
                10000.txt

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    point_cloud_paths = glob(os.path.join(root_path, "pointcloud", "*.bin"))

    for point_cloud_path in point_cloud_paths:
        point_cloud_id = os.path.splitext(os.path.basename(point_cloud_path))[0]
        label_path = os.path.join(root_path, "groundtruth", f"{point_cloud_id}.txt")

        data = Data(point_cloud_path)
        data.label.box3d = []

        with open(label_path, encoding="utf-8") as fp:
            annotations = json.load(fp)["objects"]

        for annotation in annotations:
            bounding_box = annotation["bounding_box"]
            position = annotation["position"]

            label = LabeledBox3D(
                size=(bounding_box["length"], bounding_box["width"], bounding_box["height"]),
                translation=(position["x"], position["y"], position["z"]),
                rotation=from_rotation_vector((0, 0, annotation["heading"])),
                category=annotation["type"],
            )
            data.label.box3d.append(label)

        segment.append(data)

    return dataset
예제 #23
0
def CCPD(path: str) -> Dataset:
    """`CCPD <https://github.com/detectRecog/CCPD>`_ dataset.

    The file structure should be like::

        <path>
            ccpd_np/
                1005.jpg
                1019.jpg
                ...
            ccpd_base/
                00205459770115-90_85-352&516_448&547- \
                444&547_368&549_364&517_440&515-0_0_22_10_26_29_24-128-7.jpg
                00221264367816-91_91-283&519_381&553- \
                375&551_280&552_285&514_380&513-0_0_7_26_17_33_29-95-9.jpg
                ...
            ccpd_blur/
            ccpd_challenge/
            ccpd_db/
            ccpd_fn/
            ccpd_rotate/
            ccpd_tilt/
            ccpd_weather/
            LICENSE
            README.md
            splits/
                ccpd_blur.txt
                ccpd_challenge.txt
                ccpd_db.txt
                ccpd_fn.txt
                ccpd_rotate.txt
                ccpd_tilt.txt
                test.txt
                train.txt
                val.txt

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_CCPD)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_head, segment_tails in _CCPD_SEGMENTS.items():
        for segment_tail in segment_tails:
            segment_name = f"{segment_head}-{segment_tail}"
            segment = dataset.create_segment(segment_name)
            get_polygons = _get_polygons if segment_name != "other-np" else lambda _: []
            for image_path in _get_ccpd_image_path(root_path, segment_head, segment_tail):
                data = Data(image_path)
                data.label.polygon = get_polygons(image_path)
                segment.append(data)
    return dataset
예제 #24
0
def VOC2012Segmentation(path: str) -> Dataset:
    """`VOC2012Segmentation <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset.

    The file structure should be like::

        <path>/
            JPEGImages/
                <image_name>.jpg
                ...
            SegmentationClass/
                <mask_name>.png
                ...
            SegmentationObject/
                <mask_name>.png
                ...
            ImageSets/
                Segmentation/
                    train.txt
                    val.txt
                    ...
                ...
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    image_path = os.path.join(root_path, "JPEGImages")
    semantic_mask_path = os.path.join(root_path, "SegmentationClass")
    instance_mask_path = os.path.join(root_path, "SegmentationObject")
    image_set_path = os.path.join(root_path, "ImageSets", "Segmentation")

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        with open(os.path.join(image_set_path, f"{segment_name}.txt"),
                  encoding="utf-8") as fp:
            for stem in fp:
                stem = stem.strip()
                data = Data(os.path.join(image_path, f"{stem}.jpg"))
                label = data.label
                mask_filename = f"{stem}.png"
                label.semantic_mask = SemanticMask(
                    os.path.join(semantic_mask_path, mask_filename))
                label.instance_mask = InstanceMask(
                    os.path.join(instance_mask_path, mask_filename))

                segment.append(data)

    return dataset
    def test_cache_dataset(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(_CATALOG)

        path = tmp_path / "sub"
        semantic_path = tmp_path / "semantic_mask"
        instance_path = tmp_path / "instance_mask"
        path.mkdir()
        semantic_path.mkdir()
        instance_path.mkdir()
        for i in range(_SEGMENT_LENGTH):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(_LABEL)

            semantic_mask = semantic_path / f"semantic_mask{i}.png"
            semantic_mask.write_text("SEMANTIC_MASK")
            data.label.semantic_mask = SemanticMask(str(semantic_mask))

            instance_mask = instance_path / f"instance_mask{i}.png"
            instance_mask.write_text("INSTANCE_MASK")
            data.label.instance_mask = InstanceMask(str(instance_mask))
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit-1")
        cache_path = tmp_path / "cache_test"
        dataset_client.enable_cache(str(cache_path))
        segment1 = Segment("Segment1", client=dataset_client)
        for data in segment1:
            data.open()
            data.label.semantic_mask.open()
            data.label.instance_mask.open()

        segment_cache_path = (cache_path / dataset_client.dataset_id /
                              dataset_client.status.commit_id / "Segment1")
        semantic_mask_cache_path = segment_cache_path / "semantic_mask"
        instance_mask_cache_path = segment_cache_path / "instance_mask"

        for cache_dir, extension in (
            (segment_cache_path, "txt"),
            (semantic_mask_cache_path, "png"),
            (instance_mask_cache_path, "png"),
        ):
            assert set(cache_dir.glob(f"*.{extension}")) == set(
                cache_dir / f"hello{i}.{extension}"
                for i in range(_SEGMENT_LENGTH))

        gas_client.delete_dataset(dataset_name)
예제 #26
0
def AnimalPose7(path: str) -> Dataset:
    """`7 Categories Animal-Pose <https://sites.google.com/view/animal-pose/>`_ dataset.

    The file structure should be like::

        <path>
            bndbox_image/
                antelope/
                    Img-77.jpg
                    ...
                ...
            bndbox_anno/
                antelope.json
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        loaded :class:`~tensorbay.dataset.dataset.Dataset` object.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_7)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_7.json"))

    segment = dataset.create_segment()

    for animal in dataset.catalog.box2d.categories.keys():
        with open(os.path.join(root_path, "bndbox_anno", f"{animal}.json"),
                  encoding="utf-8") as fp:
            annotations = json.load(fp)
        for image_name, box2ds in annotations.items():
            image_path = os.path.join(root_path, "bndbox_image", animal,
                                      image_name)
            data = Data(image_path,
                        target_remote_path=f"{animal}/{image_name}")
            data.label.box2d = []

            for box2d in box2ds:
                coordinates = box2d["bndbox"]
                data.label.box2d.append(
                    LabeledBox2D(
                        float(coordinates["xmin"]),
                        float(coordinates["ymin"]),
                        float(coordinates["xmax"]),
                        float(coordinates["ymax"]),
                        category=animal,
                    ))

            segment.append(data)

    return dataset
예제 #27
0
def _BDD100K_loader(path: str, dataset_type: str) -> Dataset:
    root_path = os.path.join(
        os.path.abspath(os.path.expanduser(path)), f"bdd100k_images_{dataset_type}"
    )
    dataset = Dataset(DATASET_NAMES[dataset_type])
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), f"catalog_{dataset_type}.json"))
    load_segment = _load_segment_10k if dataset_type == "10k" else _load_segment_100k
    labels_dir = os.path.join(root_path, "labels")
    load_segment(dataset, root_path, labels_dir)

    return dataset
예제 #28
0
def RarePlanesReal(path: str) -> Dataset:
    """`RarePlanesReal <https://www.cosmiqworks.org/RarePlanes/>`_ dataset.

    The folder structure should be like::

        <path>
            metadata_annotations/
                RarePlanes_Public_Metadata.csv
                RarePlanes_Test_Coco_Annotations_tiled.json
                RarePlanes_Train_Coco_Annotations_tiled.json
            test/
                PS-RGB_tiled/
                    105_104001003108D900_tile_47.png
                    ...
            train/
                PS-RGB_tiled/
                    100_1040010029990A00_tile_319.png
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    catalog = dataset.catalog

    annotations_dir = os.path.join(root_path, "metadata_annotations")
    classification_attributes = _get_classification_attributes(
        os.path.join(annotations_dir, "RarePlanes_Public_Metadata.csv"),
        catalog.classification.attributes.keys(),
    )
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_name_to_polygons = _get_polygon_labels(
            annotations_dir, segment_name, catalog.polygon.attributes.keys())
        for image_path in glob(
                os.path.join(root_path, segment_name, "PS-RGB_tiled",
                             "*.png")):
            data = Data(image_path)
            label = data.label
            filename = os.path.basename(image_path)
            image_id = filename.rsplit("_", 2)[0]
            label.polygon = image_name_to_polygons[filename]
            label.classification = Classification(
                attributes=classification_attributes[image_id])
            segment.append(data)
    return dataset
예제 #29
0
def KenyanFoodType(path: str) -> Dataset:
    """`Kenyan Food Type <https://github.com/monajalal/Kenyan-Food>`_ dataset.

    The file structure should be like::

        <path>
            test.csv
            test/
                bhaji/
                    1611654056376059197.jpg
                    ...
                chapati/
                    1451497832469337023.jpg
                    ...
                ...
            train/
                bhaji/
                    190393222473009410.jpg
                    ...
                chapati/
                    1310641031297661755.jpg
                    ...
            val/
                bhaji/
                    1615408264598518873.jpg
                    ...
                chapati/
                    1553618479852020228.jpg
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME_FOOD_TYPE)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_food_type.json"))

    for segment_name in SEGMENTS_FOOD_TYPE:
        segment = dataset.create_segment(segment_name)
        segment_path = os.path.join(root_path, segment_name)
        for category in sorted(os.listdir(segment_path)):
            image_paths = glob(os.path.join(segment_path, category, "*.jpg"))
            label = Classification(category)
            for image_path in image_paths:
                data = Data(image_path)
                data.label.classification = label
                segment.append(data)
    return dataset
예제 #30
0
def LeedsSportsPose(path: str) -> Dataset:
    """`Leeds Sports Pose <http://sam.johnson.io/research/lsp.html>`_ dataset.

    The folder structure should be like::

        <path>
            joints.mat
            images/
                im0001.jpg
                im0002.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    mat = loadmat(os.path.join(root_path, "joints.mat"))

    joints = mat["joints"].T
    image_paths = glob(os.path.join(root_path, "images", "*.jpg"))
    for image_path in image_paths:
        data = Data(image_path)
        data.label.keypoints2d = []
        index = int(os.path.basename(image_path)
                    [2:6]) - 1  # get image index from "im0001.jpg"

        keypoints = LabeledKeypoints2D()
        for keypoint in joints[index]:
            keypoints.append(
                Keypoint2D(keypoint[0], keypoint[1], int(not keypoint[2])))

        data.label.keypoints2d.append(keypoints)
        segment.append(data)
    return dataset