Esempio n. 1
0
def test_cache_nomap_basic2():
    """
    A random dataset (a non mappable dataset) with a cache over it just after the leaf
    """

    logger.info("Test cache nomap basic 2")

    schema = ds.Schema()
    schema.add_column('image', de_type=mstype.uint8,
                      shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
    schema.add_column('label', de_type=mstype.uint8, shape=[1])

    # create a cache.  arbitrary session_id for now
    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)

    # sampler arg not given directly, however any of these args will auto-generate an appropriate sampler:
    # num_samples, shuffle, num_shards, shard_id
    # In this case, the presence of num_samples chooses a sampler.
    ds1 = ds.RandomDataset(schema=schema, total_rows=20, num_samples=20, num_parallel_workers=4, cache=some_cache)
    ds1 = ds1.repeat(2)

    num_iter = 0
    for data in ds1.create_dict_iterator():
        logger.info("printing the label: {}".format(data["label"]))
        num_iter += 1

    logger.info("Number of data in ds1: {} ".format(num_iter))
    assert num_iter == 40
    logger.info("test_cache_nomap_basic2 Ended.\n")
Esempio n. 2
0
def test_cache_nomap_basic1():
    """
    A random dataset (a non mappable dataset) with a cache over it just after the leaf
    """

    logger.info("Test cache nomap basic 1")

    schema = ds.Schema()
    schema.add_column('image', de_type=mstype.uint8,
                      shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
    schema.add_column('label', de_type=mstype.uint8, shape=[1])

    # create a cache.  arbitrary session_id for now
    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)

    # User-created sampler here
    ds1 = ds.RandomDataset(schema=schema, total_rows=10, num_parallel_workers=4, cache=some_cache)
    ds1 = ds1.repeat(4)

    num_iter = 0
    for data in ds1.create_dict_iterator():
        logger.info("printing the label: {}".format(data["label"]))
        num_iter += 1

    logger.info("Number of data in ds1: {} ".format(num_iter))
    assert num_iter == 40
    logger.info("test_cache_nomap_basic1 Ended.\n")
Esempio n. 3
0
def test_randomdataset_basic2():
    logger.info("Test randomdataset basic 2")

    schema = ds.Schema()
    schema.add_column(
        'image', de_type=mstype.uint8,
        shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
    schema.add_column('label', de_type=mstype.uint8, shape=[1])

    # Make up 10 rows
    ds1 = ds.RandomDataset(schema=schema,
                           total_rows=10,
                           num_parallel_workers=1)
    ds1 = ds1.repeat(4)

    num_iter = 0
    for data in ds1.create_dict_iterator(
            num_epochs=1):  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        # logger.info(data["image"])
        logger.info("printing the label: {}".format(data["label"]))
        num_iter += 1

    logger.info("Number of data in ds1: {}".format(num_iter))
    assert num_iter == 40
    logger.info("Test randomdataset basic 2 complete")
def test_randomdataset_basic2():
    logger.info("Test randomdataset basic 2")

    schema = ds.Schema()
    schema.add_column(
        'image', de_type=mstype.uint8,
        shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
    schema.add_column('label', de_type=mstype.uint8, shape=[1])

    # Make up about 10 samples
    ds1 = ds.RandomDataset(schema=schema,
                           num_samples=10,
                           num_parallel_workers=1)

    # cache size allows for about 4 images since each image just a bit less than 1MB, after that we will have to spill
    ds1 = ds1.repeat(4)

    num_iter = 0
    for data in ds1.create_dict_iterator():  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        #logger.info(data["image"])
        logger.info("printing the label: {}".format(data["label"]))
        num_iter += 1

    logger.info("Number of data in ds1: ", num_iter)
    assert (num_iter == 40)
Esempio n. 5
0
def test_randomdataset_basic3():
    logger.info("Test randomdataset basic 3")

    # Make up 10 samples, but here even the schema is randomly created
    # The columns are named like this "c0", "c1", "c2" etc
    # But, we will use a tuple iterator instead of dict iterator so the column names
    # are not needed to iterate
    ds1 = ds.RandomDataset(total_rows=10, num_parallel_workers=1)
    ds1 = ds1.repeat(2)

    num_iter = 0
    for _ in ds1.create_tuple_iterator(num_epochs=1):
        num_iter += 1

    logger.info("Number of data in ds1: {}".format(num_iter))
    assert num_iter == 20
    logger.info("Test randomdataset basic 3 Complete")
def test_randomdataset_basic1():
    logger.info("Test randomdataset basic 1")

    schema = ds.Schema()
    schema.add_column('image', de_type=mstype.uint8, shape=[2])
    schema.add_column('label', de_type=mstype.uint8, shape=[1])

    # apply dataset operations
    ds1 = ds.RandomDataset(schema=schema, total_rows=50, num_parallel_workers=4)
    ds1 = ds1.repeat(4)

    num_iter = 0
    for data in ds1.create_dict_iterator():  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        logger.info("{} image: {}".format(num_iter, data["image"]))
        logger.info("{} label: {}".format(num_iter, data["label"]))
        num_iter += 1

    logger.info("Number of data in ds1: {}".format(num_iter))
    assert num_iter == 200
    logger.info("Test randomdataset basic 1 complete")