Beispiel #1
0
def test_2ops_repeat_shuffle():
    """
    Test Repeat then Shuffle
    """
    logger.info("Test Repeat then Shuffle")
    # define parameters
    repeat_count = 2
    buffer_size = 5
    seed = 0
    parameters = {
        "params": {
            'repeat_count': repeat_count,
            'buffer_size': buffer_size,
            'seed': seed
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    data1 = data1.repeat(repeat_count)
    ds.config.set_seed(seed)
    data1 = data1.shuffle(buffer_size=buffer_size)

    filename = "test_2ops_repeat_shuffle.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #2
0
def test_case_1():
    """
    Test Repeat then Batch
    """
    logger.info("Test Repeat then Batch")
    # define parameters
    repeat_count = 2
    batch_size = 5
    parameters = {
        "params": {
            'repeat_count': repeat_count,
            'batch_size': batch_size
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    data1 = data1.repeat(repeat_count)
    data1 = data1.batch(batch_size, drop_remainder=True)

    filename = "test_case_1_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #3
0
def test_batch_08():
    """
    Test batch: num_parallel_workers=1, drop_remainder default
    """
    logger.info("test_batch_08")
    # define parameters
    batch_size = 6
    num_parallel_workers = 1
    parameters = {
        "params": {
            'batch_size': batch_size,
            'num_parallel_workers': num_parallel_workers
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(batch_size, num_parallel_workers=num_parallel_workers)

    assert sum([1 for _ in data1]) == 2
    filename = "batch_08_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #4
0
def test_batch_07():
    """
    Test batch: num_parallel_workers>1, drop_remainder=False, reorder params
    """
    logger.info("test_batch_07")
    # define parameters
    batch_size = 4
    drop_remainder = False
    num_parallel_workers = 2
    parameters = {
        "params": {
            'batch_size': batch_size,
            'drop_remainder': drop_remainder,
            'num_parallel_workers': num_parallel_workers
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(num_parallel_workers=num_parallel_workers,
                        drop_remainder=drop_remainder,
                        batch_size=batch_size)

    assert sum([1 for _ in data1]) == 3
    filename = "batch_07_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #5
0
def test_batch_10():
    """
    Test batch: batch_size > number-of-rows-in-dataset, drop_remainder=True
    """
    logger.info("test_batch_10")
    # define parameters
    batch_size = 99
    drop_remainder = True
    parameters = {
        "params": {
            'batch_size': batch_size,
            'drop_remainder': drop_remainder
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(batch_size, drop_remainder=drop_remainder)

    assert sum([1 for _ in data1]) == 0
    filename = "batch_10_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #6
0
def test_batch_03():
    """
    Test batch: batch_size>1, drop_remainder=False, no remainder exists
    """
    logger.info("test_batch_03")
    # define parameters
    batch_size = 3
    drop_remainder = False
    parameters = {
        "params": {
            'batch_size': batch_size,
            'drop_remainder': drop_remainder
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(batch_size=batch_size, drop_remainder=drop_remainder)

    assert sum([1 for _ in data1]) == 4
    filename = "batch_03_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #7
0
def test_case_tf_file_no_schema():
    logger.info("reading data from: {}".format(FILES[0]))
    parameters = {"params": {}}

    data = ds.TFRecordDataset(FILES, shuffle=ds.Shuffle.FILES)
    filename = "tf_file_no_schema.npz"
    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #8
0
def skip_test_case_0_reverse():
    """
    Test Shuffle then Repeat
    """
    logger.info("Test Shuffle then Repeat")
    # define parameters
    repeat_count = 2
    buffer_size = 5
    seed = 0
    parameters = {
        "params": {
            'repeat_count': repeat_count,
            'buffer_size': buffer_size,
            'reshuffle_each_iteration': False,
            'seed': seed
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    ds.config.set_seed(seed)
    data1 = data1.shuffle(buffer_size=buffer_size)
    data1 = data1.repeat(repeat_count)

    filename = "test_case_0_reverse_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #9
0
def test_case_2_reverse():
    """
    Test Shuffle then Batch
    """
    logger.info("Test Shuffle then Batch")
    # define parameters
    buffer_size = 5
    seed = 0
    batch_size = 2
    parameters = {
        "params": {
            'buffer_size': buffer_size,
            'seed': seed,
            'batch_size': batch_size
        }
    }

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    ds.config.set_seed(seed)
    data1 = data1.shuffle(buffer_size=buffer_size)
    data1 = data1.batch(batch_size, drop_remainder=True)

    filename = "test_case_2_reverse_result.npz"
    save_and_check(data1,
                   parameters,
                   filename,
                   generate_golden=GENERATE_GOLDEN)
Beispiel #10
0
def test_case_tf_file_pad():
    logger.info("reading data from: {}".format(FILES[0]))
    parameters = {"params": {}}

    schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaPadBytes10.json"
    data = ds.TFRecordDataset(FILES, schema_file, shuffle=ds.Shuffle.FILES)
    filename = "tf_file_padBytes10.npz"
    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #11
0
def test_case_storage():
    """
    test StorageDataset
    """
    logger.info("Test Simple StorageDataset")
    # define parameters
    parameters = {"params": {}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)

    filename = "storage_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #12
0
def test_batch_05():
    """
    Test batch: batch_size=1 (minimum valid size), drop_remainder default
    """
    logger.info("test_batch_05")
    # define parameters
    batch_size = 1
    parameters = {"params": {'batch_size': batch_size}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(batch_size)

    filename = "batch_05_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #13
0
def test_tf_repeat_01():
    """
    a simple repeat operation.
    """
    logger.info("Test Simple Repeat")
    # define parameters
    repeat_count = 2
    parameters = {"params": {'repeat_count': repeat_count}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False)
    data1 = data1.repeat(repeat_count)

    filename = "repeat_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #14
0
def test_case_columns_list():
    """
    a simple repeat operation.
    """
    logger.info("Test Simple Repeat")
    # define parameters
    repeat_count = 2
    parameters = {"params": {'repeat_count': repeat_count}}
    columns_list = ["col_sint64", "col_sint32"]
    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=columns_list, shuffle=False)
    data1 = data1.repeat(repeat_count)

    filename = "columns_list_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #15
0
def test_batch_12():
    """
    Test batch: batch_size boolean value True, treated as valid value 1
    """
    logger.info("test_batch_12")
    # define parameters
    batch_size = True
    parameters = {"params": {'batch_size': batch_size}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(batch_size=batch_size)

    assert sum([1 for _ in data1]) == 12
    filename = "batch_12_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #16
0
def test_batch_11():
    """
    Test batch: batch_size=1 and dataset-size=1
    """
    logger.info("test_batch_11")
    # define parameters
    batch_size = 1
    parameters = {"params": {'batch_size': batch_size}}

    # apply dataset operations
    # Use schema file with 1 row
    schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema1Row.json"
    data1 = ds.TFRecordDataset(DATA_DIR, schema_file)
    data1 = data1.batch(batch_size)

    filename = "batch_11_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #17
0
def test_batch_02():
    """
    Test batch: batch_size>1, drop_remainder=True, remainder exists
    """
    logger.info("test_batch_02")
    # define parameters
    batch_size = 5
    drop_remainder = True
    parameters = {"params": {'batch_size': batch_size,
                             'drop_remainder': drop_remainder}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(batch_size, drop_remainder=drop_remainder)

    filename = "batch_02_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #18
0
def test_shuffle_01():
    """
    Test shuffle: buffer_size < number-of-rows-in-dataset
    """
    logger.info("test_shuffle_01")
    # define parameters
    buffer_size = 5
    seed = 1
    parameters = {"params": {'buffer_size': buffer_size, "seed": seed}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    ds.config.set_seed(seed)
    data1 = data1.shuffle(buffer_size=buffer_size)

    filename = "shuffle_01_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #19
0
def test_shuffle_04():
    """
    Test shuffle: buffer_size=2 (minimum size), number-of-rows-in-dataset = 2
    """
    logger.info("test_shuffle_04")
    # define parameters
    buffer_size = 2
    seed = 1
    parameters = {"params": {'buffer_size': buffer_size, "seed": seed}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, num_samples=2)
    ds.config.set_seed(seed)
    data1 = data1.shuffle(buffer_size=buffer_size)

    filename = "shuffle_04_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #20
0
def test_batch_06():
    """
    Test batch: batch_size = number-of-rows-in-dataset, drop_remainder=True, reorder params
    """
    logger.info("test_batch_06")
    # define parameters
    batch_size = 12
    drop_remainder = False
    parameters = {"params": {'batch_size': batch_size,
                             'drop_remainder': drop_remainder}}

    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
    data1 = data1.batch(drop_remainder=drop_remainder, batch_size=batch_size)

    filename = "batch_06_result.npz"
    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)