Ejemplo n.º 1
0
def profile(datasource_name, data_assets, profile_all_data_assets, directory):
    """
    Profile datasources from the specified context.

    If the optional data_assets and profile_all_data_assets arguments are not specified, the profiler will check
    if the number of data assets in the datasource exceeds the internally defined limit. If it does, it will
    prompt the user to either specify the list of data assets to profile or to profile all.
    If the limit is not exceeded, the profiler will profile all data assets in the datasource.

    :param datasource_name: name of the datasource to profile
    :param data_assets: if this comma-separated list of data asset names is provided, only the specified data assets will be profiled
    :param profile_all_data_assets: if provided, all data assets will be profiled
    :param directory:
    :return:
    """

    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message("Error: no great_expectations context configuration found in the specified directory.")
        return

    if datasource_name is None:
        datasources = [datasource["name"] for datasource in context.list_datasources()]
        if len(datasources) > 1:
            cli_message("Error: please specify the datasource to profile. Available datasources: " + ", ".join(datasources))
            return
        else:
            profile_datasource(context, datasources[0], data_assets=data_assets, profile_all_data_assets=profile_all_data_assets)
    else:
        profile_datasource(context, datasource_name, data_assets=data_assets, profile_all_data_assets=profile_all_data_assets)
Ejemplo n.º 2
0
def profile(datasource_name, data_assets, profile_all_data_assets, directory):
    """Profile datasources from the specified context.


    DATASOURCE_NAME: the datasource to profile, or leave blank to profile all datasources."""

    if profile_all_data_assets:
        max_data_assets = None

    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message(
            "Error: no great_expectations context configuration found in the specified directory."
        )
        return

    if datasource_name is None:
        datasources = [
            datasource["name"] for datasource in context.list_datasources()
        ]
        for datasource_name in datasources:
            profile_datasource(context,
                               datasource_name,
                               max_data_assets=max_data_assets)
    else:
        profile_datasource(context, datasource_name, data_assets=data_assets)
Ejemplo n.º 3
0
def profile(datasource_name, data_assets, profile_all_data_assets, directory, view, batch_kwargs):
    """
    Profile datasources from the specified context.

    If the optional data_assets and profile_all_data_assets arguments are not specified, the profiler will check
    if the number of data assets in the datasource exceeds the internally defined limit. If it does, it will
    prompt the user to either specify the list of data assets to profile or to profile all.
    If the limit is not exceeded, the profiler will profile all data assets in the datasource.

    :param datasource_name: name of the datasource to profile
    :param data_assets: if this comma-separated list of data asset names is provided, only the specified data assets will be profiled
    :param profile_all_data_assets: if provided, all data assets will be profiled
    :param directory:
    :param view: Open the docs in a browser
    :param batch_kwargs: Additional keyword arguments to be provided to get_batch when loading the data asset.
    :return:
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)
        return

    if batch_kwargs is not None:
        batch_kwargs = json.loads(batch_kwargs)

    if datasource_name is None:
        datasources = [datasource["name"] for datasource in context.list_datasources()]
        if not datasources:
            cli_message(NO_DATASOURCES_FOUND)
            sys.exit(-1)
        elif len(datasources) > 1:
            cli_message(
                "<red>Error: please specify the datasource to profile. "\
                "Available datasources: " + ", ".join(datasources) + "</red>"
            )
            sys.exit(-1)
        else:
            profile_datasource(
                context,
                datasources[0],
                data_assets=data_assets,
                profile_all_data_assets=profile_all_data_assets,
                open_docs=view,
                additional_batch_kwargs=batch_kwargs
            )
    else:
        profile_datasource(
            context,
            datasource_name,
            data_assets=data_assets,
            profile_all_data_assets=profile_all_data_assets,
            open_docs=view,
            additional_batch_kwargs=batch_kwargs
        )
Ejemplo n.º 4
0
def list_datasources(directory):
    """List known datasources."""
    try:
        context = DataContext(directory)
        datasources = context.list_datasources()
        # TODO Pretty up this console output
        cli_message(str([d for d in datasources]))
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)