Esempi in Python per create_test_scalar_dataset

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: petastorm.tests.test_common

Metodo/funzione: create_test_scalar_dataset

Esempi su hotexamples.com: 4

create_test_scalar_dataset in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per petastorm.tests.test_common.create_test_scalar_dataset, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: test_parquet_reader.py Progetto: xiaohanhuang/petastorm

def test_string_partition(reader_factory, tmpdir, partition_by):
    """Try datasets partitioned by a string, integer and string+integer fields"""
    url = 'file://' + tmpdir.strpath

    data = create_test_scalar_dataset(url, 10, partition_by=partition_by)
    with reader_factory(url) as reader:
        row_ids_batched = [row.id for row in reader]
    actual_row_ids = list(itertools.chain(*row_ids_batched))
    assert len(data) == len(actual_row_ids)

Esempio n. 2

Mostra file

File: test_parquet_reader.py Progetto: xiaohanhuang/petastorm

def test_partitioned_field_is_not_queried(reader_factory, tmpdir):
    """Try datasets partitioned by a string, integer and string+integer fields"""
    url = 'file://' + tmpdir.strpath

    data = create_test_scalar_dataset(url, 10, partition_by=['id'])
    with reader_factory(url, schema_fields=['string']) as reader:
        all_rows = list(reader)
    assert len(data) == len(all_rows)
    assert all_rows[0]._fields == ('string',)

Esempio n. 3

Mostra file

File: test_parquet_reader.py Progetto: yariv/petastorm

def test_asymetric_parquet_pieces(reader_factory, tmpdir):
    """Check that datasets with parquet files that all rows in datasets that have different number of rowgroups can
    be fully read """
    url = 'file://' + tmpdir.strpath

    ROWS_COUNT = 1000
    # id_div_700 forces asymetric split between partitions and hopefully get us files with different number of row
    # groups
    create_test_scalar_dataset(url, ROWS_COUNT, partition_by=['id_div_700'])

    # We verify we have pieces with different number of row-groups
    dataset = pq.ParquetDataset(tmpdir.strpath)
    row_group_counts = set(compat_get_metadata(piece, dataset.fs.open).num_row_groups for piece in dataset.pieces)
    assert len(row_group_counts) > 1

    # Make sure we are not missing any rows.
    with reader_factory(url, schema_fields=['id']) as reader:
        row_ids_batched = [row.id for row in reader]
        actual_row_ids = list(itertools.chain(*row_ids_batched))

    assert ROWS_COUNT == len(actual_row_ids)

Esempio n. 4

Mostra file

File: conftest.py Progetto: zhangruiskyline/petastorm

 def _pure_parquet_dataset_no_cache():
     path = tmpdir_factory.mktemp("data").strpath
     url = 'file://' + path
     data = create_test_scalar_dataset(url, 100)
     dataset = SyntheticDataset(url=url, path=path, data=data)
     return dataset