def test_build_data_docs_skipping_index_does_not_build_index( tmp_path_factory, ): # TODO What's the latest and greatest way to use configs rather than my hackery? empty_directory = str(tmp_path_factory.mktemp("empty")) DataContext.create(empty_directory) ge_dir = os.path.join(empty_directory, DataContext.GE_DIR) context = DataContext(ge_dir) config = context.get_config() config.data_docs_sites = { "local_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleFilesystemStoreBackend", "base_directory": os.path.join("uncommitted", "data_docs"), }, }, } context._project_config = config # TODO Workaround project config programmatic config manipulation # statefulness issues by writing to disk and re-upping a new context context._save_project_config() del context context = DataContext(ge_dir) data_docs_dir = os.path.join(ge_dir, "uncommitted", "data_docs") index_path = os.path.join(data_docs_dir, "index.html") assert not os.path.isfile(index_path) context.build_data_docs(build_index=False) assert os.path.isdir(os.path.join(data_docs_dir, "static")) assert not os.path.isfile(index_path)
def test_existing_local_data_docs_urls_returns_single_url_from_customized_local_site( tmp_path_factory, ): empty_directory = str(tmp_path_factory.mktemp("yo_yo")) DataContext.create(empty_directory) ge_dir = os.path.join(empty_directory, DataContext.GE_DIR) context = DataContext(ge_dir) context._project_config["data_docs_sites"] = { "my_rad_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleFilesystemStoreBackend", "base_directory": "uncommitted/data_docs/some/local/path/", }, } } # TODO Workaround project config programmatic config manipulation # statefulness issues by writing to disk and re-upping a new context context._save_project_config() context = DataContext(ge_dir) context.build_data_docs() expected_path = os.path.join( ge_dir, "uncommitted/data_docs/some/local/path/index.html") assert os.path.isfile(expected_path) obs = context.get_docs_sites_urls() assert obs == [{ "site_name": "my_rad_site", "site_url": "file://{}".format(expected_path) }]
def test_existing_local_data_docs_urls_returns_url_on_project_with_no_datasources_and_a_site_configured( tmp_path_factory, ): """ This test ensures that a url will be returned for a default site even if a datasource is not configured, and docs are not built. """ empty_directory = str(tmp_path_factory.mktemp("another_empty_project")) DataContext.create(empty_directory) context = DataContext(os.path.join(empty_directory, DataContext.GE_DIR)) obs = context.get_docs_sites_urls(only_if_exists=False) assert len(obs) == 1 assert obs[0]["site_url"].endswith( "great_expectations/uncommitted/data_docs/local_site/index.html")
def init(target_directory, usage_stats): """ Create a new Great Expectations project configuration and fill in the Datasources and Suites based on the kedro catalog """ from kedro.framework.context import load_context target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) if not DataContext.does_config_exist_on_disk(ge_dir): if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(SETUP_SUCCESS) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) exit(5) if click.confirm("Generate Datasources based on Kedro Context?", default=True): package_name = Path(os.getcwd()).resolve().name with KedroSession.create(package_name) as session: kedro_context = session.load_context() ge_context = toolkit.load_data_context_with_error_handling(ge_dir) new_datasources = generate_datasources(kedro_context, ge_context) if new_datasources: cli_message("Added {} New datasources to your project.".format( len(new_datasources))) if click.confirm( "Generate Basic Validation Suites based on Kedro Context?", default=True): package_name = Path(os.getcwd()).resolve().name with KedroSession.create(package_name) as session: kedro_context = session.load_context() ge_context = toolkit.load_data_context_with_error_handling(ge_dir) new_datasources = generate_basic_suites(kedro_context, ge_context) if new_datasources: cli_message("Added {} New datasources to your project.".format( len(new_datasources)))
def init(target_directory, view, usage_stats): """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): try: if DataContext.is_project_initialized(ge_dir): # Ensure the context can be instantiated cli_message(PROJECT_IS_COMPLETE) except (DataContextError, DatasourceInitializationError) as e: cli_message("<red>{}</red>".format(e.message)) sys.exit(1) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) # TODO if this is correct, ensure this is covered by a test # cli_message(SETUP_SUCCESS) # exit(0) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) # TODO ensure this is covered by a test exit(5) else: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) send_usage_message(data_context=context, event="cli.init.create", success=True) except DataContextError as e: # TODO ensure this is covered by a test cli_message("<red>{}</red>".format(e)) try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suites(): if click.confirm(BUILD_DOCS_PROMPT, default=True): build_docs(context, view=view) else: datasources = context.list_datasources() if len(datasources) == 0: cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to configure a Datasource?", default=True): cli_message("Okay, bye!") sys.exit(1) datasource_name, data_source_type = add_datasource_impl( context, choose_one_data_asset=False) if not datasource_name: # no datasource was created sys.exit(1) datasources = context.list_datasources() if len(datasources) == 1: datasource_name = datasources[0]["name"] cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to profile new Expectations for a single data asset within your new Datasource?", default=True, ): cli_message( "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!" ) sys.exit(1) ( success, suite_name, profiling_results, ) = toolkit.create_expectation_suite( context, datasource_name=datasource_name, additional_batch_kwargs={"limit": 1000}, flag_build_docs=False, open_docs=False, ) cli_message(SECTION_SEPARATOR) if not click.confirm("Would you like to build Data Docs?", default=True): cli_message( "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!" ) sys.exit(1) build_docs(context, view=False) if not click.confirm( "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.", default=True, ): cli_message( "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!" ) sys.exit(1) toolkit.attempt_to_open_validation_results_in_data_docs( context, profiling_results) cli_message(SECTION_SEPARATOR) cli_message(SETUP_SUCCESS) sys.exit(0) except ( DataContextError, ge_exceptions.ProfilerError, OSError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) sys.exit(1)
def init(target_directory, view, usage_stats): """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): try: if DataContext.is_project_initialized(ge_dir): # Ensure the context can be instantiated cli_message(PROJECT_IS_COMPLETE) except (DataContextError, DatasourceInitializationError) as e: cli_message("<red>{}</red>".format(e.message)) sys.exit(1) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) # TODO if this is correct, ensure this is covered by a test # cli_message(SETUP_SUCCESS) # exit(0) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) # TODO ensure this is covered by a test exit(5) else: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) send_usage_message(data_context=context, event="cli.init.create", success=True) except DataContextError as e: # TODO ensure this is covered by a test cli_message("<red>{}</red>".format(e)) try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suites(): if click.confirm(BUILD_DOCS_PROMPT, default=True): build_docs(context, view=view) else: datasources = context.list_datasources() if len(datasources) == 0: datasource_name, data_source_type = add_datasource_impl( context, choose_one_data_asset=True) if not datasource_name: # no datasource was created sys.exit(1) datasources = context.list_datasources() if len(datasources) == 1: datasource_name = datasources[0]["name"] success, suite_name = toolkit.create_expectation_suite( context, datasource_name=datasource_name, additional_batch_kwargs={"limit": 1000}, open_docs=view) if success: cli_message( "A new Expectation suite '{}' was added to your project" .format(suite_name)) cli_message(SETUP_SUCCESS) sys.exit(0) except (DataContextError, ge_exceptions.ProfilerError, IOError, SQLAlchemyError) as e: cli_message("<red>{}</red>".format(e)) sys.exit(1)
def init(ctx: click.Context, usage_stats: bool) -> None: """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, creates a project file, and appends to a `.gitignore` file. """ directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if directory is None: directory = os.getcwd() target_directory = os.path.abspath(directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): message = ( f"""Warning. An existing `{DataContext.GE_YML}` was found here: {ge_dir}.""" ) warnings.warn(message) try: project_file_structure_exists = ( DataContext.does_config_exist_on_disk(ge_dir) and DataContext.all_uncommitted_directories_exist(ge_dir) and DataContext.config_variables_yml_exist(ge_dir)) if project_file_structure_exists: cli_message(PROJECT_IS_COMPLETE) sys.exit(0) else: # Prompt to modify the project to add missing files if not ctx.obj.assume_yes: if not click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) except (DataContextError, DatasourceInitializationError) as e: cli_message(f"<red>{e.message}</red>") sys.exit(1) try: DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) except DataContextError as e: cli_message(f"<red>{e.message}</red>") # TODO ensure this is covered by a test exit(5) else: if not ctx.obj.assume_yes: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) send_usage_message( data_context=context, event=UsageStatsEvents.CLI_INIT_CREATE.value, success=True, ) except DataContextError as e: # TODO ensure this is covered by a test cli_message(f"<red>{e}</red>") cli_message(SECTION_SEPARATOR) cli_message(READY_FOR_CUSTOMIZATION) cli_message(HOW_TO_CUSTOMIZE) sys.exit(0)
def _complete_onboarding(target_dir): DataContext.create(target_dir) cli_message(ONBOARDING_COMPLETE) return True