Python MetaPartition.merge_indices Examples

Programming Language: Python

Namespace/Package Name: kartothek.io_components.metapartition

Class/Type: MetaPartition

Method/Function: merge_indices

Examples at hotexamples.com: 4

Python MetaPartition.merge_indices - 4 examples found. These are the top rated real world Python examples of kartothek.io_components.metapartition.MetaPartition.merge_indices extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

partition_on(19)

store_dataframes(11)

add_metapartition(9)

concat_metapartitions(9)

load_dataframes(7)

MetaPartition(6)

from_partition(6)

from_dict(5)

build_indices(4)

merge_indices(4)

copy(3)

merge_metapartitions(2)

to_dict(2)

_reconstruct_index_columns(1)

merge_dataframes(1)

Example #1

Show file

def update_indices_from_partitions(partition_list, dataset_metadata_factory):
    """
    This takes indices from a partition list and overwrites all indices in the dataset metadata
    provided by the dataset metadata factory. The same is done in the store dataset part. This is used
    in an additional build index step (by the build_dataset_indices__pipeline) which should be used after
    updating partitions of a dataset.
    """

    dataset_indices = MetaPartition.merge_indices(partition_list)

    indices = persist_indices(
        store=dataset_metadata_factory.store,
        dataset_uuid=dataset_metadata_factory.uuid,
        indices=dataset_indices,
    )

    for column, storage_key in six.iteritems(indices):
        dataset_metadata_factory.indices[column] = ExplicitSecondaryIndex(
            column=column, index_storage_key=storage_key)

    dataset_metadata_factory.store.put(
        naming.metadata_key_from_uuid(dataset_metadata_factory.uuid),
        dataset_metadata_factory.to_json(),
    )
    return dataset_metadata_factory

Example #2

Show file

File: write.py Project: x-malet/kartothek

def update_indices(dataset_builder, store, add_partitions, remove_partitions):
    dataset_indices = dataset_builder.indices
    partition_indices = MetaPartition.merge_indices(add_partitions)

    if dataset_indices:  # dataset already exists and will be updated
        if remove_partitions:
            for column, dataset_index in dataset_indices.items():
                dataset_indices[column] = dataset_index.remove_partitions(
                    remove_partitions, inplace=True)

        for column, index in partition_indices.items():
            dataset_indices[column] = dataset_indices[column].update(
                index, inplace=True)

    else:  # dataset index will be created first time from partitions
        dataset_indices = partition_indices

    # Store indices
    index_filenames = persist_indices(store=store,
                                      dataset_uuid=dataset_builder.uuid,
                                      indices=dataset_indices)
    for column, filename in index_filenames.items():
        dataset_builder.add_external_index(column, filename)

    return dataset_builder

Example #3

Show file

def test_merge_indices():
    indices = [
        MetaPartition(
            label="label1",
            indices={"location": {"Loc1": ["label1"], "Loc2": ["label1"]}},
        ),
        MetaPartition(
            label="label2",
            indices={
                "location": {"Loc3": ["label2"], "Loc2": ["label2"]},
                "product": {"Product1": ["label2"], "Product2": ["label2"]},
            },
        ),
    ]
    result = MetaPartition.merge_indices(indices)
    expected = {
        "location": ExplicitSecondaryIndex(
            "location",
            {"Loc1": ["label1"], "Loc2": ["label1", "label2"], "Loc3": ["label2"]},
        ),
        "product": ExplicitSecondaryIndex(
            "product", {"Product1": ["label2"], "Product2": ["label2"]}
        ),
    }
    assert result == expected

Example #4

Show file

 def time_merge_indices(self, cardinality, num_values, partitions_to_merge):
     MetaPartition.merge_indices(self.merge_indices)