Python Dataset.flushの例

プログラミング言語: Python

名前空間/パッケージ名: hub

クラス/型: Dataset

メソッド/関数: flush

hotexamples.comのコード掲載数: 10

Python Dataset.flush - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのhub.Dataset.flushの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Dataset(30)

flush(10)

delete(9)

close(7)

to_pytorch(7)

copy(7)

to_tensorflow(6)

filter(5)

commit(5)

from_pytorch(2)

from_tfds(2)

resize_shape(2)

compute(2)

disable_lazy(1)

enable_lazy(1)

append_shape(1)

rename(1)

save(1)

store(1)

from_directory(1)

コード例 #1

ファイルを表示

def test_dataset_name():
    schema = {"temp": "uint8"}
    ds = Dataset(
        "./data/test_ds_name", shape=(10,), schema=schema, name="my_dataset", mode="w"
    )
    ds.flush()
    assert ds.name == "my_dataset"
    ds2 = Dataset("./data/test_ds_name")
    ds2.rename("my_dataset_2")
    assert ds2.name == "my_dataset_2"
    ds3 = Dataset("./data/test_ds_name")
    assert ds3.name == "my_dataset_2"

コード例 #2

ファイルを表示

ファイル: test_dataset.py プロジェクト: dut3062796s/Hub-1

def test_dataset_copy_exception():
    ds = Dataset("./data/test_data_cp", shape=(100,), schema=simple_schema)
    ds2 = Dataset("./data/test_data_cp_2", shape=(100,), schema=simple_schema)
    for i in range(100):
        ds["num", i] = i
        ds2["num", i] = 2 * i
    ds.flush()
    ds2.flush()
    with pytest.raises(DirectoryNotEmptyException):
        ds3 = ds.copy("./data/test_data_cp_2")
    ds.delete()
    ds2.delete()

コード例 #3

ファイルを表示

def test_dataset(url="./data/test/dataset", token=None, public=True):
    ds = Dataset(
        url, token=token, shape=(10000,), mode="w", schema=my_schema, public=public
    )

    sds = ds[5]
    sds["label/a", 50, 50] = 2
    assert sds["label", 50, 50, "a"].numpy() == 2

    ds["image", 5, 4, 100:200, 150:300, :] = np.ones((100, 150, 3), "uint8")
    assert (
        ds["image", 5, 4, 100:200, 150:300, :].numpy()
        == np.ones((100, 150, 3), "uint8")
    ).all()

    ds["image", 8, 6, 500:550, 700:730] = np.ones((50, 30, 3))
    subds = ds[3:15]
    subsubds = subds[4:9]
    assert (
        subsubds["image", 1, 6, 500:550, 700:730].numpy() == np.ones((50, 30, 3))
    ).all()

    subds = ds[5:7]
    ds["image", 6, 3:5, 100:135, 700:720] = 5 * np.ones((2, 35, 20, 3))

    assert (
        subds["image", 1, 3:5, 100:135, 700:720].numpy() == 5 * np.ones((2, 35, 20, 3))
    ).all()

    ds["label", "c"] = 4 * np.ones((10000, 5, 3), "uint8")
    assert (ds["label/c"].numpy() == 4 * np.ones((10000, 5, 3), "uint8")).all()

    ds["label", "c", 2, 4] = 6 * np.ones((3))
    sds = ds["label", "c"]
    ssds = sds[1:3, 4]
    sssds = ssds[1]
    assert (sssds.numpy() == 6 * np.ones((3))).all()
    ds.flush()

    sds = ds["/label", 5:15, "c"]
    sds[2:4, 4, :] = 98 * np.ones((2, 3))
    assert (ds[7:9, 4, "label", "/c"].numpy() == 98 * np.ones((2, 3))).all()

    labels = ds["label", 1:5]
    d = labels["d"]
    e = d["e"]
    e[:] = 77 * np.ones((4, 5, 3))
    assert (e.numpy() == 77 * np.ones((4, 5, 3))).all()
    ds.close()

コード例 #4

ファイルを表示

ファイル: test_dataset.py プロジェクト: xBugs-dot/Hub

def test_dataset_schema_bug():
    schema = {"abc": Primitive("int32"), "def": "int64"}
    ds = Dataset("./data/schema_bug", schema=schema, shape=(100, ))
    ds.flush()
    ds2 = Dataset("./data/schema_bug", schema=schema, shape=(100, ))

    schema = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    ds = Dataset("./data/schema_bug_2", schema=schema, shape=(100, ))
    ds.flush()
    ds2 = Dataset("./data/schema_bug_2", schema=schema, shape=(100, ))

コード例 #5

ファイルを表示

def test_append_dataset():
    dt = {"first": Tensor(shape=(250, 300)), "second": "float"}
    url = "./data/test/model"
    ds = Dataset(schema=dt, shape=(100,), url=url, mode="w")
    ds.append_shape(20)
    ds["first"][0] = np.ones((250, 300))

    assert len(ds) == 120
    assert ds["first"].shape[0] == 120
    assert ds["first", 5:10].shape[0] == 5
    assert ds["second"].shape[0] == 120
    ds.flush()

    ds = Dataset(url)
    assert ds["first"].shape[0] == 120
    assert ds["first", 5:10].shape[0] == 5
    assert ds["second"].shape[0] == 120

コード例 #6

ファイルを表示

def test_minio_endpoint():
    token = {
        "aws_access_key_id": os.getenv("ACTIVELOOP_MINIO_KEY"),
        "aws_secret_access_key": os.getenv("ACTIVELOOP_MINIO_SECRET_ACCESS_KEY"),
        "endpoint_url": "https://play.min.io:9000",
        "region": "us-east-1",
    }

    schema = {"abc": Tensor((100, 100, 3))}
    ds = Dataset(
        "s3://bucket/random_dataset", token=token, shape=(10,), schema=schema, mode="w"
    )

    for i in range(10):
        ds["abc", i] = i * np.ones((100, 100, 3))
    ds.flush()
    for i in range(10):
        assert (ds["abc", i].compute() == i * np.ones((100, 100, 3))).all()

コード例 #7

ファイルを表示

def main():
    schema = {
        "image": Image(shape=(None, None), max_shape=(28, 28)),
        "label": ClassLabel(num_classes=10),
    }
    path = "./data/examples/new_api_intro2"

    ds = Dataset(path, shape=(10, ), mode="w", schema=schema)
    print(len(ds))
    for i in range(len(ds)):
        with Timer("writing single element"):
            ds["image", i] = np.ones((28, 28), dtype="uint8")
            ds["label", i] = 3

    ds.resize_shape(200)
    print(ds.shape)
    print(ds["label", 100:110].numpy())
    with Timer("Committing"):
        ds.flush()

    ds = Dataset(path)
    print(ds.schema)
    print(ds["image", 0].compute())

コード例 #8

ファイルを表示

def main():
    # Tag is set {Username}/{Dataset}
    tag = "davitb/basic11"

    # Create dataset
    ds = Dataset(
        tag,
        shape=(4, ),
        schema={
            "image": schema.Tensor((512, 512), dtype="float"),
            "label": schema.Tensor((512, 512), dtype="float"),
        },
        mode="w+",
    )

    # Upload Data
    ds["image"][:] = np.ones((4, 512, 512))
    ds["label"][:] = np.ones((4, 512, 512))
    ds.flush()

    # Load the data
    ds = Dataset(tag)
    print(ds["image"][0].compute())

コード例 #9

ファイルを表示

ファイル: test.py プロジェクト: Rayos1/Software

def hub():
    from hub import Dataset, schema, transform
    from skimage.io import imread
    from pathlib import Path

    USER = Path('rayos')
    dataset = './rayos/test'
    custom = {
        'image':
        schema.Image(shape=(None, None), dtype='uint8', max_shape=(512, 512)),
        'label':
        schema.ClassLabel(num_classes=2),
    }

    @transform(schema=custom)
    def load_transform(sample):

        image = imread(sample)
        label = int(sample.split('.')[-2])

        return {"image": image, "label": label}

    fnames = [
        r"C:\Users\Emc11\Dropbox\ん\エラティカ ニ\f0b2dbfa779195e0769a1ebaf7d22488.jpg",
        r"C:\Users\Emc11\Dropbox\ん\エラティカ 三\bfbf442331b996dcd3909080199df88d.jpg",
        r"C:\Users\Emc11\Dropbox\ん\エラティカ 三\90596a829d162455bd44759748b0e779.jpg",
        r"C:\Users\Emc11\Dropbox\ん\エラティカ ニ\5956d21f8b3ffa492669001f6be4d20c.jpg",
        r"C:\Users\Emc11\Dropbox\ん\エラティカ 三\8a360e1daa60742752da3a4ded7241fb.png",
        r"C:\Users\Emc11\Dropbox\ん\エラティカ ニ\c5504009cd88251533ea265b4fcf2ede.jpg",
    ]

    ds = Dataset(dataset, shape=(len(fnames), ), mode='w+', schema=custom)
    ds.flush()
    dase = load_transform(fnames)
    ds2 = dase.store(dataset)
    data = Dataset(dataset)

コード例 #10

ファイルを表示

ファイル: ray.py プロジェクト: stjordanis/Hub-1

    def upload(
        self,
        results,
        url: str,
        token: dict,
        progressbar: bool = True,
        public: bool = True,
    ):
        """Batchified upload of results.
        For each tensor batchify based on its chunk and upload.
        If tensor is dynamic then still upload element by element.

        Parameters
        ----------
        dataset: hub.Dataset
            Dataset object that should be written to
        results:
            Output of transform function
        progressbar: bool
        public: bool, optional
            only applicable if using hub storage, ignored otherwise
            setting this to False allows only the user who created it to access the dataset and
            the dataset won't be visible in the visualizer to the public
        Returns
        ----------
        ds: hub.Dataset
            Uploaded dataset
        """
        if len(list(results.values())) == 0:
            shape = (0,)
        else:
            shape = (len(list(results.values())[0]),)

        ds = Dataset(
            url,
            mode="w",
            shape=shape,
            schema=self.schema,
            token=token,
            cache=False,
            public=public,
        )

        tasks = []
        for key, value in results.items():

            length = ds[key].chunksize[0]
            value = get_value(value)
            value = str_to_int(value, ds.tokenizer)
            batched_values = batchify(value, length)
            chunk_id = list(range(len(batched_values)))
            index_batched_values = list(zip(chunk_id, batched_values))

            ds._tensors[f"/{key}"].disable_dynamicness()

            results = [
                self.upload_chunk.remote(el, key=key, ds=ds)
                for el in index_batched_values
            ]
            tasks.extend(results)

        results = ray.get(tasks)
        self.set_dynamic_shapes(results, ds)
        ds.flush()
        return ds