Esempio n. 1
0
def _copy_naive(
    key_mappings: Dict[str, str],
    src_store: KeyValueStore,
    tgt_store: KeyValueStore,
    md_transformed: Optional[Dict[str, DatasetMetadata]] = None,
):
    """
    Copies a list of items from one KV store to another.
    Parameters
    ----------
    key_mappings: Dict[str, str]
        Mapping of source key names to target key names. May be equal if a key will
        not be renamed.
    src_store: simplekv.KeyValueStore
        Source KV store–
    tgt_store: simplekv.KeyValueStore
        Target KV store
    md_transformed: Dict[str, DatasetMetadata]
        Mapping containing {target dataset uuid: modified target metadata} values which will be written
        directly instead of being copied
    """
    for src_key, tgt_key in key_mappings.items():
        if (md_transformed is not None) and (tgt_key in md_transformed):
            item = md_transformed.get(tgt_key).to_json()  # type: ignore
        else:
            item = src_store.get(src_key)
        tgt_store.put(tgt_key, item)
Esempio n. 2
0
    def store(self, store: KeyValueStore, dataset_uuid: str) -> str:
        """
        Store the index as a parquet file

        If compatible, the new keyname will be the name stored under the attribute `index_storage_key`.
        If this attribute is None, a new key will be generated of the format

            `{dataset_uuid}/indices/{column}/{timestamp}.by-dataset-index.parquet`

        where the timestamp is in nanosecond accuracy and is created upon Index object initialization

        Parameters
        ----------
        store:
        dataset_uuid:
        """
        storage_key = None

        if (self.index_storage_key is not None and dataset_uuid
                and dataset_uuid in self.index_storage_key):
            storage_key = self.index_storage_key
        if storage_key is None:
            storage_key = "{dataset_uuid}/indices/{column}/{timestamp}{suffix}".format(
                dataset_uuid=dataset_uuid,
                suffix=naming.EXTERNAL_INDEX_SUFFIX,
                column=quote(self.column),
                timestamp=quote(self.creation_time.isoformat()),
            )

        table = _index_dct_to_table(self.index_dct, self.column, self.dtype)
        buf = pa.BufferOutputStream()
        pq.write_table(table, buf)

        store.put(storage_key, buf.getvalue().to_pybytes())
        return storage_key
Esempio n. 3
0
def store_schema_metadata(
    schema: SchemaWrapper,
    dataset_uuid: str,
    store: KeyValueStore,
    table: str = SINGLE_TABLE,
) -> str:
    """
    Store schema and metadata to store.

    Parameters
    ----------
    schema
        Schema information for DataFrame/table.
    dataset_uuid
        Unique ID of the dataset in question.
    store
        Object that implements `.put(key, data)` to write data.
    table
        Table to write metadata for.

    Returns
    -------
    key: str
        Key to which the metadata was written to.
    """
    key = _get_common_metadata_key(dataset_uuid=dataset_uuid, table=table)
    return store.put(key, _schema2bytes(schema.internal()))