def __init__( self, uuid: str, metadata_version=naming.DEFAULT_METADATA_VERSION, explicit_partitions=True, partition_keys=None, table_meta=None, ): verify_metadata_version(metadata_version) self.uuid = uuid self.metadata: Dict = OrderedDict() self.indices: Dict[str, IndexBase] = OrderedDict() self.metadata_version = metadata_version self.partitions: Dict[str, Partition] = OrderedDict() self.partition_keys = partition_keys self.table_meta = table_meta self.explicit_partitions = explicit_partitions _add_creation_time(self) super(DatasetMetadataBuilder, self).__init__()
def __init__( self, label: Optional[str], file: Optional[str] = None, table_name: str = SINGLE_TABLE, data: Optional[pd.DataFrame] = None, indices: Optional[Dict[Any, Any]] = None, metadata_version: Optional[int] = None, schema: Optional[SchemaWrapper] = None, partition_keys: Optional[Sequence[str]] = None, logical_conjunction: Optional[List[Tuple[Any, str, Any]]] = None, ): """ Initialize the :mod:`kartothek.io` base class MetaPartition. The `MetaPartition` is used as a wrapper around the kartothek `Partition` and primarily deals with dataframe manipulations, in- and output to store. The :class:`kartothek.io_components.metapartition` is immutable, i.e. all member functions will return a new MetaPartition object where the new attribute is changed Parameters ---------- label partition label files A dictionary with references to the files in store where the keys represent file labels and the keys file prefixes. metadata The metadata of the partition data A dictionary including the materialized in-memory DataFrames corresponding to the file references in `files`. indices Kartothek index dictionary, metadata_version table_meta The dataset table schemas partition_keys The dataset partition keys logical_conjunction A logical conjunction to assign to the MetaPartition. By assigning this, the MetaPartition will only be able to load data respecting this conjunction. """ if metadata_version is None: self.metadata_version = naming.DEFAULT_METADATA_VERSION else: self.metadata_version = metadata_version verify_metadata_version(self.metadata_version) self.schema = schema self.table_name = table_name if data is not None and schema is None: self.schema = make_meta(data, origin=f"{table_name}/{label}", partition_keys=partition_keys) indices = indices or {} for column, index_dct in indices.items(): if isinstance(index_dct, dict): indices[column] = ExplicitSecondaryIndex(column=column, index_dct=index_dct) self.logical_conjunction = logical_conjunction self.metapartitions = [{ "label": label, "data": data, "file": file or None, "indices": indices, "logical_conjunction": logical_conjunction, }] self.partition_keys = partition_keys or []