Ejemplo n.º 1
0
def test_copy_same_source_and_target(dataset_to_copy, store):
    with pytest.raises(ValueError):
        copy_dataset(
            source_dataset_uuid=SRC_DS_UUID,
            target_dataset_uuid=SRC_DS_UUID,
            store=store,
            target_store=store,
        )
Ejemplo n.º 2
0
def test_copy_eager_no_target_store(dataset_to_copy, store, store2):
    copy_dataset(
        source_dataset_uuid=SRC_DS_UUID,
        target_dataset_uuid=TGT_DS_UUID,
        store=store,
        target_store=None,
    )
    assert_target_keys(store, SRC_DS_UUID, store, TGT_DS_UUID)
    assert_target_ktk_readable(store, TGT_DS_UUID)
Ejemplo n.º 3
0
def test_copy_eager_with_rename_different_store(dataset_to_copy, store, store2):
    """
    Copies and renames DS between stores
    """
    copy_dataset(
        source_dataset_uuid=SRC_DS_UUID,
        target_dataset_uuid=TGT_DS_UUID,
        store=store,
        target_store=store2,
    )
    assert_target_keys(store, SRC_DS_UUID, store2, TGT_DS_UUID)
    assert_target_ktk_readable(store2, TGT_DS_UUID)
Ejemplo n.º 4
0
def test_copy_rename_eager_same_store(dataset_to_copy, store):
    """
    Copies and renames DS within one store
    """
    copy_dataset(
        source_dataset_uuid=SRC_DS_UUID,
        target_dataset_uuid=TGT_DS_UUID,
        store=store,
        target_store=store,
    )
    assert_target_keys(store, SRC_DS_UUID, store, TGT_DS_UUID)
    assert_target_ktk_readable(store, TGT_DS_UUID)
Ejemplo n.º 5
0
def test_copy_eager_without_rename_different_store(dataset_to_copy, store, store2):
    """
    Copies DS between stores while keeping the name
    """
    copy_dataset(
        source_dataset_uuid=SRC_DS_UUID,
        target_dataset_uuid=SRC_DS_UUID,
        store=store,
        target_store=store2,
    )
    assert_target_keys(store, SRC_DS_UUID, store2, SRC_DS_UUID)
    assert_target_ktk_readable(store2, SRC_DS_UUID)
Ejemplo n.º 6
0
 def side_effect(*args, **kwargs):
     if side_effect.counter == 0:
         side_effect.counter += 1
         return copy_dataset(*args, **kwargs)
     else:
         raise ValueError("Something unexpected happened during cube copy.")
Ejemplo n.º 7
0
def copy_cube(
    cube: Cube,
    src_store: Union[KeyValueStore, Callable[[], KeyValueStore]],
    tgt_store: Union[KeyValueStore, Callable[[], KeyValueStore]],
    overwrite: bool = False,
    datasets: Union[None, Iterable[str], Dict[str, DatasetMetadata]] = None,
    renamed_cube_prefix: Optional[str] = None,
    renamed_datasets: Optional[Dict[str, str]] = None,
):
    """
    Copy cube from one store to another.

    .. warning::
        A failing copy operation can not be rolled back if the `overwrite` flag is enabled
        and might leave the overwritten dataset in an inconsistent state.

    Parameters
    ----------
    cube: Cube
        Cube specification.
    src_store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]]
        Source KV store.
    tgt_store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]]
        Target KV store.
    overwrite: bool
        If possibly existing datasets in the target store should be overwritten.
    datasets: Union[None, Iterable[str], Dict[str, DatasetMetadata]]
        Datasets to copy, must all be part of the cube. May be either the result of :func:`~kartothek.api.discover.discover_datasets`, a list
        of Ktk_cube dataset ID or ``None`` (in which case entire cube will be copied).
    renamed_cube_prefix: Optional[str]
        Optional new cube prefix. If specified, the cube will be renamed while copying.
    renamed_datasets: Optional[Dict[str, str]]
        Optional dict with {old dataset name: new dataset name} entries. If provided,
        the datasets will be renamed accordingly during copying. When the parameter
        datasets is specified, the datasets to rename must be a subset of the datasets
        to copy.
    """
    if callable(src_store):
        src_store = src_store()
    if callable(tgt_store):
        tgt_store = tgt_store()
    assert_stores_different(src_store, tgt_store,
                            cube.ktk_dataset_uuid(cube.seed_dataset))
    existing_datasets = discover_datasets_unchecked(cube.uuid_prefix,
                                                    tgt_store)

    if renamed_datasets is None:
        new_seed_dataset = cube.seed_dataset
    else:
        new_seed_dataset = renamed_datasets.get(cube.seed_dataset,
                                                cube.seed_dataset)

    new_cube = Cube(
        dimension_columns=cube.dimension_columns,
        partition_columns=cube.partition_columns,
        uuid_prefix=renamed_cube_prefix or cube.uuid_prefix,
        seed_dataset=new_seed_dataset,
        index_columns=cube.index_columns,
        suppress_index_on=cube.suppress_index_on,
    )

    datasets_to_copy = get_datasets_to_copy(
        cube=cube,
        src_store=src_store,
        tgt_store=tgt_store,
        overwrite=overwrite,
        datasets=datasets,
    )

    copied = {}  # type: Dict[str, DatasetMetadata]
    for src_ds_name, src_ds_meta in datasets_to_copy.items():
        tgt_ds_uuid = _transform_uuid(
            src_uuid=src_ds_meta.uuid,
            cube_prefix=cube.uuid_prefix,
            renamed_cube_prefix=renamed_cube_prefix,
            renamed_datasets=renamed_datasets,
        )
        try:
            md_transformed = copy_dataset(
                source_dataset_uuid=src_ds_meta.uuid,
                store=src_store,
                target_dataset_uuid=tgt_ds_uuid,
                target_store=tgt_store,
            )
        except Exception as e:
            if overwrite:
                # We can't roll back safely if the target dataset has been partially overwritten.
                raise RuntimeError(e)
            else:
                apply_postwrite_checks(
                    datasets=copied,
                    cube=new_cube,
                    store=tgt_store,
                    existing_datasets=existing_datasets,
                )
        else:
            copied.update(md_transformed)