def evaluation_dataset(meta_partitions_evaluation_files_only, store_session): with cm_frozen_time(TIME_TO_FREEZE): return store_dataset_from_partitions( partition_list=meta_partitions_evaluation_files_only, dataset_uuid="evaluation_uuid", store=store_session, )
def meta_partitions_dataframe(metadata_version): """ Create a list of MetaPartitions for testing. The partitions include in-memory pd.DataFrames without external references, i.e. files are empty """ with cm_frozen_time(TIME_TO_FREEZE): return _get_meta_partitions_with_dataframe(metadata_version)
def _freeze_time_on_worker(freeze_time): # this runs on the distributed worker, initiated by cm_distributed_frozen_time # it runs the "enter" part of the cm_frozen_time context manager. # It saves the context manager to be able to call __exit__ on it later # in _unfreeze_time_on_worker. kartothek.core._time._datetime_utcnow_orig = kartothek.core._time.datetime_utcnow cm = cm_frozen_time(freeze_time) kartothek.core._time._time_patcher = cm cm.__enter__()
def dataset_function(meta_partitions_files_only_function, store): """ Create a proper kartothek dataset in store with two partitions """ with cm_frozen_time(TIME_TO_FREEZE): return store_dataset_from_partitions( partition_list=meta_partitions_files_only_function, dataset_uuid="dataset_uuid", store=store, dataset_metadata={"dataset": "metadata"}, )
def dataset_alternative_table_name( meta_partitions_files_only_alternative_table_name, store_factory): """ Create a proper kartothek dataset in store with two partitions """ with cm_frozen_time(TIME_TO_FREEZE): return store_dataset_from_partitions( partition_list=meta_partitions_files_only_alternative_table_name, dataset_uuid="dataset_uuid_alternative_name", store=store_factory(), dataset_metadata={"dataset": "metadata"}, )
def meta_partitions_dataframe_alternative_table_name(metadata_version, alternative_table_name): """ Create a list of MetaPartitions for testing. The tables inside the partitions have a non-standard table name. """ with cm_frozen_time(TIME_TO_FREEZE): return _get_meta_partitions_with_dataframe( metadata_version, table_name=alternative_table_name, table_name_2=None, )
def frozen_time(): """ Depend on this fixture to set the time to TIME_TO_FREEZE by patching kartothek.core._time.* with mock objects. Note: you only need one of the fixtures `frozen_time`, `distributed_frozen_time`, or `frozen_time_em`: * if your test function takes a `execution_mode` parameter, use `frozen_time_em`. It will behave like `distributed_frozen_time` if `execution_mode` starts with "dask", and like `frozen_time` otherwise. * otherwise, if you are testing for dask/distributed, use `distributed_frozen_time`. Note that this includes the effects of `frozen_time`. * otherwise, use `frozen_time` """ with cm_frozen_time(TIME_TO_FREEZE): yield
def dataset_partition_keys(meta_partitions_dataframe, store_session_factory): """ Create a proper kartothek dataset in store with two partitions """ with cm_frozen_time(TIME_TO_FREEZE): new_mps = [] for mp in meta_partitions_dataframe: new_mps.append(mp.partition_on(["P"])) new_mps = _store_metapartitions(new_mps, store_session_factory()) return store_dataset_from_partitions( partition_list=new_mps, dataset_uuid="dataset_uuid_partition_keys", store=store_session_factory(), dataset_metadata={"dataset": "metadata"}, )
def frozen_time(): with cm_frozen_time(TIME_TO_FREEZE): yield