Python ExplicitSecondaryIndex.from_v2 Exemples

Langage de programmation: Python

Espace de nommage/Pack: kartothek.core.index

Méthode/Fonction: from_v2

Exemples au hotexamples.com: 2

Python ExplicitSecondaryIndex.from_v2 - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de kartothek.core.index.ExplicitSecondaryIndex.from_v2 extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ExplicitSecondaryIndex(30)

store(12)

as_flat_series(6)

copy(3)

from_v2(2)

load(2)

normalize_value(2)

observed_values(2)

query(2)

remove_partitions(2)

remove_values(2)

update(2)

_normalize_value(1)

eval_operator(1)

index_storage_key(1)

unload(1)

Méthodes fréquemment utilisées

ExplicitSecondaryIndex (30)

store (12)

as_flat_series (6)

copy (3)

from_v2 (2)

load (2)

normalize_value (2)

observed_values (2)

query (2)

remove_partitions (2)

Méthodes fréquemment utilisées

remove_values (2)

update (2)

_normalize_value (1)

eval_operator (1)

index_storage_key (1)

unload (1)

Exemple #1

0

Afficher le fichier

def from_dict(dct: Dict, explicit_partitions: bool = True): """ Load dataset metadata from a dictionary. This must have no external references. Otherwise use ``load_from_dict`` to have them resolved automatically. """ # Use the builder class for reconstruction to have a single point for metadata version changes builder = DatasetMetadataBuilder( uuid=dct[naming.UUID_KEY], metadata_version=dct[naming.METADATA_VERSION_KEY], explicit_partitions=explicit_partitions, partition_keys=dct.get("partition_keys", None), table_meta=dct.get("table_meta", None), ) for key, value in dct.get("metadata", {}).items(): builder.add_metadata(key, value) for partition_label, part_dct in dct.get("partitions", {}).items(): builder.add_partition( partition_label, Partition.from_dict(partition_label, part_dct)) for column, index_dct in dct.get("indices", {}).items(): if isinstance(index_dct, IndexBase): builder.add_embedded_index(column, index_dct) else: builder.add_embedded_index( column, ExplicitSecondaryIndex.from_v2(column, index_dct)) return builder.to_dataset()

Exemple #2

0

Afficher le fichier

def test_query_indices_external(store, metadata_version): expected = { "dataset_metadata_version": metadata_version, "dataset_uuid": "uuid+namespace-attribute12_underscored", "partitions": { "part_1": { "files": { "core_data": "file.parquest" } }, "part_2": { "files": { "core_data": "file2.parquest" } }, }, "indices": { "product_id": "uuid+namespace-attribute12_underscored.product_id.by-dataset-index.parquet", "location_id": { "1": ["part_1"], "2": ["part_2"], "3": ["part_1"], "4": ["part_2"], }, }, } store.put( "uuid+namespace-attribute12_underscored.by-dataset-metadata.json", simplejson.dumps(expected).encode("utf-8"), ) df = pd.DataFrame({ "product_id": [1, 2, 100, 34], "partition": [ np.array(["part_1"], dtype=object), np.array(["part_2"], dtype=object), np.array(["part_1", "part_2"], dtype=object), np.array(["part_1"], dtype=object), ], }) schema = pa.schema([ pa.field("partition", pa.list_(pa.string())), pa.field("product_id", pa.int64()), ]) table = pa.Table.from_pandas(df, schema=schema) buf = pa.BufferOutputStream() pq.write_table(table, buf) store.put( "uuid+namespace-attribute12_underscored.product_id.by-dataset-index.parquet", buf.getvalue().to_pybytes(), ) store_schema_metadata( make_meta(df, origin="core"), "uuid+namespace-attribute12_underscored", store, "core_data", ) dmd = DatasetMetadata.load_from_store( "uuid+namespace-attribute12_underscored", store) dmd = dmd.load_index("product_id", store) assert dmd.query(product_id=2) == ["part_2"] dmd = dmd.load_all_indices(store) assert dmd.query(product_id=2, location_id=2) == ["part_2"] assert dmd.query(product_id=100, location_id=3) == ["part_1"] assert dmd.query(product_id=2, location_id=2, something_else="bla") == ["part_2"] additional_index = ExplicitSecondaryIndex.from_v2( "another_column", {"1": ["part_2", "part_3"]}) assert dmd.query(indices=[additional_index], another_column="1", product_id=2, location_id=2) == ["part_2"]