Beispiel #1
0
    def __init__(
        self,
        uuid: str,
        metadata_version=naming.DEFAULT_METADATA_VERSION,
        explicit_partitions=True,
        partition_keys=None,
        table_meta=None,
    ):
        verify_metadata_version(metadata_version)

        self.uuid = uuid
        self.metadata: Dict = OrderedDict()
        self.indices: Dict[str, IndexBase] = OrderedDict()
        self.metadata_version = metadata_version
        self.partitions: Dict[str, Partition] = OrderedDict()
        self.partition_keys = partition_keys
        self.table_meta = table_meta
        self.explicit_partitions = explicit_partitions

        _add_creation_time(self)
        super(DatasetMetadataBuilder, self).__init__()
Beispiel #2
0
    def __init__(
        self,
        label: Optional[str],
        file: Optional[str] = None,
        table_name: str = SINGLE_TABLE,
        data: Optional[pd.DataFrame] = None,
        indices: Optional[Dict[Any, Any]] = None,
        metadata_version: Optional[int] = None,
        schema: Optional[SchemaWrapper] = None,
        partition_keys: Optional[Sequence[str]] = None,
        logical_conjunction: Optional[List[Tuple[Any, str, Any]]] = None,
    ):
        """
        Initialize the :mod:`kartothek.io` base class MetaPartition.

        The `MetaPartition` is used as a wrapper around the kartothek
        `Partition` and primarily deals with dataframe manipulations,
        in- and output to store.

        The :class:`kartothek.io_components.metapartition` is immutable, i.e. all member
        functions will return a new MetaPartition object where the new
        attribute is changed

        Parameters
        ----------
        label
            partition label
        files
            A dictionary with references to the files in store where the
            keys represent file labels and the keys file prefixes.
        metadata
            The metadata of the partition
        data
            A dictionary including the materialized in-memory DataFrames
            corresponding to the file references in `files`.
        indices
            Kartothek index dictionary,
        metadata_version
        table_meta
            The dataset table schemas
        partition_keys
            The dataset partition keys
        logical_conjunction
            A logical conjunction to assign to the MetaPartition. By assigning
            this, the MetaPartition will only be able to load data respecting
            this conjunction.
        """

        if metadata_version is None:
            self.metadata_version = naming.DEFAULT_METADATA_VERSION
        else:
            self.metadata_version = metadata_version
        verify_metadata_version(self.metadata_version)
        self.schema = schema
        self.table_name = table_name
        if data is not None and schema is None:
            self.schema = make_meta(data,
                                    origin=f"{table_name}/{label}",
                                    partition_keys=partition_keys)

        indices = indices or {}
        for column, index_dct in indices.items():
            if isinstance(index_dct, dict):
                indices[column] = ExplicitSecondaryIndex(column=column,
                                                         index_dct=index_dct)
        self.logical_conjunction = logical_conjunction
        self.metapartitions = [{
            "label": label,
            "data": data,
            "file": file or None,
            "indices": indices,
            "logical_conjunction": logical_conjunction,
        }]
        self.partition_keys = partition_keys or []