Example #1
0
def _info_dataset(ktk_cube_dataset_id, ds, cube):
    click.echo("")
    click.echo(h("Dataset: {}".format(ktk_cube_dataset_id)))

    ds = ds.load_partition_indices()
    schema = ds.schema
    all_cols = get_dataset_columns(ds)
    payload_cols = sorted(
        all_cols - (set(cube.dimension_columns) | set(cube.partition_columns))
    )
    dim_cols = sorted(set(cube.dimension_columns) & all_cols)

    click.echo(b("Partition Keys:") + _collist_string(ds.partition_keys, schema))

    click.echo(b("Partitions:") + " {}".format(len(ds.partitions)))

    click.echo(
        b("Metadata:")
        + "\n{}".format(
            "\n".join(
                "  {}".format(line)
                for line in json.dumps(
                    ds.metadata, indent=2, sort_keys=True, separators=(",", ": ")
                ).split("\n")
            )
        )
    )

    click.echo(b("Dimension Columns:") + _collist_string(dim_cols, schema))

    click.echo(b("Payload Columns:") + _collist_string(payload_cols, schema))
Example #2
0
def stats(ctx, include, exclude):
    """
    Collect technical statistic from cube.
    """
    cube = ctx.obj["cube"]
    store = ctx.obj["store"]
    all_datasets = set(ctx.obj["datasets"].keys())

    selected_datasets = filter_items("dataset", all_datasets, include, exclude)

    try:
        result = collect_stats_bag(cube=cube,
                                   store=store,
                                   datasets=selected_datasets).compute()
    except RuntimeError as e:
        raise click.UsageError("Failed to collect stats: {e}".format(e=e))

    data = result[0]

    blobsize = 0
    files = 0

    for i, ktk_cube_dataset_id in enumerate(sorted(data.keys())):
        stats = data[ktk_cube_dataset_id]

        if i > 0:
            click.echo("")
        click.echo(h(ktk_cube_dataset_id))
        for what in sorted(stats.keys()):
            click.echo(b("{}:".format(what)) + "  {:,}".format(stats[what]))

        blobsize += stats["blobsize"]
        files += stats["files"]

    click.echo("")
    click.echo(h("__total__"))
    click.echo(b("blobsize:") + "  {:,}".format(blobsize))
    click.echo(b("files:") + "  {:,}".format(files))
Example #3
0
def info(ctx):
    """
    Show certain infos about the cube.
    """
    cube = ctx.obj["cube"]
    datasets = ctx.obj["datasets"]

    seed_ds = datasets[cube.seed_dataset]
    seed_schema = seed_ds.schema

    click.echo(h("Infos"))
    click.echo(b("UUID Prefix:") + "        {}".format(cube.uuid_prefix))
    click.echo(
        b("Dimension Columns:") + _collist_string(cube.dimension_columns, seed_schema)
    )
    click.echo(
        b("Partition Columns:") + _collist_string(cube.partition_columns, seed_schema)
    )
    click.echo(b("Index Columns:") + _collist_string_index(cube, datasets))
    click.echo(b("Seed Dataset:") + "      {}".format(cube.seed_dataset))

    for ktk_cube_dataset_id in sorted(datasets.keys()):
        _info_dataset(ktk_cube_dataset_id, datasets[ktk_cube_dataset_id], cube)