def test_cli_init_on_new_project( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file ): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = create_engine("sqlite:///{}".format(database_path)) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="Y\n2\n5\ntitanic\n{}\n1\nwarning\n\n".format( engine.url, catch_exceptions=False ), ) stdout = result.output assert len(stdout) < 3000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new data source a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert "Which table would you like to use?" in stdout assert "Name the new expectation suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "A new Expectation suite 'warning' was added to your project" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [ { "class_name": "SqlAlchemyDatasource", "name": "titanic", "module_name": "great_expectations.datasource", 'credentials': { 'url': str(engine.url)}, 'data_asset_type': {'class_name': 'SqlAlchemyDataset', 'module_name': 'great_expectations.dataset'}, } ] first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 14 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path, "r")) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name" ] assert data_source_class == "SqlAlchemyDataset" obs_tree = gen_directory_tree_str(ge_dir) # Instead of monkey patching datetime, just regex out the time directories date_safe_obs_tree = re.sub(r"\d*T\d*\.\d*Z", "9999.9999", obs_tree) # Instead of monkey patching guids, just regex out the guids guid_safe_obs_tree = re.sub( r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", date_safe_obs_tree ) assert ( guid_safe_obs_tree == """\ great_expectations/ .gitignore great_expectations.yml checkpoints/ expectations/ warning.json notebooks/ pandas/ validation_playground.ipynb spark/ validation_playground.ipynb sql/ validation_playground.ipynb plugins/ custom_data_docs/ renderers/ styles/ data_docs_custom_styles.css views/ uncommitted/ config_variables.yml data_docs/ local_site/ index.html expectations/ warning.html static/ fonts/ HKGrotesk/ HKGrotesk-Bold.otf HKGrotesk-BoldItalic.otf HKGrotesk-Italic.otf HKGrotesk-Light.otf HKGrotesk-LightItalic.otf HKGrotesk-Medium.otf HKGrotesk-MediumItalic.otf HKGrotesk-Regular.otf HKGrotesk-SemiBold.otf HKGrotesk-SemiBoldItalic.otf images/ favicon.ico glossary_scroller.gif iterative-dev-loop.png logo-long-vector.svg logo-long.png short-logo-vector.svg short-logo.png validation_failed_unexpected_values.gif styles/ data_docs_custom_styles_template.css data_docs_default_styles.css validations/ warning/ 9999.9999/ foobarbazguid.html validations/ warning/ 9999.9999/ foobarbazguid.json """ ) assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir ) in mock_webbrowser.call_args[0][0] )
def test_init_on_existing_project_with_no_datasources_should_continue_init_flow_and_add_one( mock_webbrowser, capsys, caplog, initialized_project, ): project_dir = initialized_project ge_dir = os.path.join(project_dir, DataContext.GE_DIR) # mangle the project to remove all traces of a suite and validations _remove_all_datasources(ge_dir) os.remove(os.path.join(ge_dir, "expectations", "Titanic", "warning.json")) uncommitted_dir = os.path.join(ge_dir, "uncommitted") validations_dir = os.path.join(ge_dir, uncommitted_dir, "validations") shutil.rmtree(validations_dir) os.mkdir(validations_dir) shutil.rmtree(os.path.join(uncommitted_dir, "data_docs", "local_site")) context = DataContext(ge_dir) assert not context.list_expectation_suites() data_folder_path = os.path.join(project_dir, "data") csv_path = os.path.join(project_dir, "data", "Titanic.csv") runner = CliRunner(mix_stderr=False) with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found"): result = runner.invoke( cli, ["init", "-d", project_dir], input="\n1\n1\n{}\n\n\n\n2\n{}\nmy_suite\n\n\n\n\n".format( data_folder_path, csv_path), catch_exceptions=False, ) assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/my_suite/" .format(project_dir) in mock_webbrowser.call_args[0][0]) stdout = result.stdout assert result.exit_code == 0 assert "Error: invalid input" not in stdout assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Enter the path (relative or absolute) of a data file" in stdout assert "Name the new Expectation Suite [Titanic.warning]:" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations" in stdout) assert "Great Expectations is now set up." in stdout config = _load_config_file(os.path.join(ge_dir, DataContext.GE_YML)) assert "data__dir" in config["datasources"].keys() context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources()[0]["name"] == "data__dir" assert context.list_datasources()[0]["class_name"] == "PandasDatasource" assert context.list_expectation_suites( )[0].expectation_suite_name == "my_suite" assert len(context.list_expectation_suites()) == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project_extra_whitespace_in_url( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file ): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = create_engine("sqlite:///{}".format(database_path)) engine_url_with_added_whitespace = " " + str(engine.url) + " " runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="Y\n2\n5\ntitanic\n{}\n1\nwarning\n\n".format( engine_url_with_added_whitespace, catch_exceptions=False ), ) stdout = result.output assert len(stdout) < 3000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new data source a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert "Which table would you like to use?" in stdout assert "Name the new expectation suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "A new Expectation suite 'warning' was added to your project" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [ { "class_name": "SqlAlchemyDatasource", "name": "titanic", "module_name": "great_expectations.datasource", 'credentials': { 'url': str(engine.url)}, 'data_asset_type': {'class_name': 'SqlAlchemyDataset', 'module_name': 'great_expectations.dataset'}, } ] first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 14 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path, "r")) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name" ] assert data_source_class == "SqlAlchemyDataset" assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir ) in mock_webbrowser.call_args[0][0] )
def test_init_on_existing_project_with_no_datasources_should_continue_init_flow_and_add_one( mock_webbrowser, caplog, initialized_sqlite_project, titanic_sqlite_db_file, ): project_dir = initialized_sqlite_project ge_dir = os.path.join(project_dir, DataContext.GE_DIR) _remove_all_datasources(ge_dir) os.remove(os.path.join(ge_dir, "expectations", "warning.json")) context = DataContext(ge_dir) assert not context.list_expectation_suites() runner = CliRunner(mix_stderr=False) url = "sqlite:///{}".format(titanic_sqlite_db_file) with pytest.warns(UserWarning, match="Warning. An existing `great_expectations.yml` was found"): result = runner.invoke( cli, ["init", "-d", project_dir], input="2\n5\nsqlite\nsqlite:///{}\n1\nmy_suite\n\n".format( titanic_sqlite_db_file ), catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/my_suite/".format( project_dir ) in mock_webbrowser.call_args[0][0] ) assert "Error: invalid input" not in stdout assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert ( "Next, we will configure database credentials and store them in the `sqlite` section" in stdout ) assert "What is the url/connection string for the sqlalchemy connection?" in stdout assert "Which table would you like to use?" in stdout assert "Great Expectations connected to your database" in stdout assert "A new Expectation suite 'my_suite' was added to your project" in stdout assert "This looks like an existing project that" not in stdout config = _load_config_file(os.path.join(ge_dir, DataContext.GE_YML)) assert "sqlite" in config["datasources"].keys() context = DataContext(ge_dir) assert context.list_datasources() == [ { "class_name": "SqlAlchemyDatasource", "name": "sqlite", "module_name": "great_expectations.datasource", 'credentials': { 'url': url}, 'data_asset_type': {'class_name': 'SqlAlchemyDataset', 'module_name': 'great_expectations.dataset'}, } ] assert context.list_expectation_suites()[0].expectation_suite_name == "my_suite" assert len(context.list_expectation_suites()) == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def init(target_directory, view, usage_stats): """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): try: if DataContext.is_project_initialized(ge_dir): # Ensure the context can be instantiated cli_message(PROJECT_IS_COMPLETE) except (DataContextError, DatasourceInitializationError) as e: cli_message(f"<red>{e.message}</red>") sys.exit(1) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) # TODO if this is correct, ensure this is covered by a test # cli_message(SETUP_SUCCESS) # exit(0) except DataContextError as e: cli_message(f"<red>{e.message}</red>") # TODO ensure this is covered by a test exit(5) else: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) toolkit.send_usage_message(data_context=context, event="cli.init.create", success=True) except DataContextError as e: # TODO ensure this is covered by a test cli_message(f"<red>{e}</red>") try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suites(): if click.confirm(BUILD_DOCS_PROMPT, default=True): build_docs(context, view=view) else: datasources = context.list_datasources() if len(datasources) == 0: cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to configure a Datasource?", default=True): cli_message("Okay, bye!") sys.exit(1) datasource_name, data_source_type = add_datasource_impl( context, choose_one_data_asset=False) if not datasource_name: # no datasource was created sys.exit(1) datasources = context.list_datasources() if len(datasources) == 1: datasource_name = datasources[0]["name"] cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to profile new Expectations for a single data asset within your new Datasource?", default=True, ): cli_message( "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!" ) sys.exit(1) ( success, suite_name, profiling_results, ) = toolkit.create_expectation_suite( context, datasource_name=datasource_name, additional_batch_kwargs={"limit": 1000}, flag_build_docs=False, open_docs=False, ) cli_message(SECTION_SEPARATOR) if not click.confirm("Would you like to build Data Docs?", default=True): cli_message( "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!" ) sys.exit(1) build_docs(context, view=False) if not click.confirm( "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.", default=True, ): cli_message( "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!" ) sys.exit(1) toolkit.attempt_to_open_validation_results_in_data_docs( context, profiling_results) cli_message(SECTION_SEPARATOR) cli_message(SETUP_SUCCESS) sys.exit(0) except ( DataContextError, ge_exceptions.ProfilerError, OSError, SQLAlchemyError, ) as e: cli_message(f"<red>{e}</red>") sys.exit(1)
def test_init_on_existing_project_with_datasource_with_no_suite_create_one( mock_webbrowser, caplog, initialized_sqlite_project, sa ): project_dir = initialized_sqlite_project ge_dir = os.path.join(project_dir, DataContext.GE_DIR) uncommitted_dir = os.path.join(ge_dir, "uncommitted") # mangle the setup to remove all traces of any suite expectations_dir = os.path.join(ge_dir, "expectations") data_docs_dir = os.path.join(uncommitted_dir, "data_docs") validations_dir = os.path.join(uncommitted_dir, "validations") _delete_and_recreate_dir(expectations_dir) _delete_and_recreate_dir(data_docs_dir) _delete_and_recreate_dir(validations_dir) context = DataContext(ge_dir) # get the datasource from data context all_datasources = context.list_datasources() datasource = all_datasources[0] if all_datasources else None # create a sqlalchemy engine using the URL of existing datasource engine = sa.create_engine(datasource.get("credentials", dict()).get("url")) inspector = sa.inspect(engine) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] assert context.list_expectation_suites() == [] runner = CliRunner(mix_stderr=False) with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found" ): result = runner.invoke( cli, ["init", "-d", project_dir], input="\n1\n{schema}\n{table}\nsink_me\n\n\n\n".format( os.path.join(project_dir, "data/Titanic.csv"), schema=default_schema, table=default_table, ), catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/sink_me/".format( project_dir ) in mock_webbrowser.call_args[0][0] ) assert "Always know what to expect from your data" in stdout assert ( "You have selected a datasource that is a SQL database. How would you like to specify the data?" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "The following Data Docs sites will be built" in stdout assert "Great Expectations is now set up" in stdout assert "Error: invalid input" not in stdout assert "This looks like an existing project that" not in stdout assert_no_logging_messages_or_tracebacks(caplog, result) context = DataContext(ge_dir) assert len(context.list_expectation_suites()) == 1
def test_cli_init_on_new_project( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa ): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = sa.create_engine("sqlite:///{}".format(database_path), pool_recycle=3600) inspector = sa.inspect(engine) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n".format( url=engine.url, schema=default_schema, table=default_table ), catch_exceptions=False, ) stdout = result.output assert len(stdout) < 6000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new Datasource a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert ( "You have selected a datasource that is a SQL database. How would you like to specify the data?" in stdout ) assert "Name the new Expectation Suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources()[0]["class_name"] == "SqlAlchemyDatasource" assert context.list_datasources()[0]["name"] == "titanic" first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 14 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path)) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name" ] assert data_source_class == "SqlAlchemyDataset" obs_tree = gen_directory_tree_str(ge_dir) # Instead of monkey patching guids, just regex out the guids guid_safe_obs_tree = re.sub( r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", obs_tree ) # print(guid_safe_obs_tree) assert ( guid_safe_obs_tree == """\ great_expectations/ .gitignore great_expectations.yml checkpoints/ expectations/ .ge_store_backend_id warning.json notebooks/ pandas/ validation_playground.ipynb spark/ validation_playground.ipynb sql/ validation_playground.ipynb plugins/ custom_data_docs/ renderers/ styles/ data_docs_custom_styles.css views/ uncommitted/ config_variables.yml data_docs/ local_site/ index.html expectations/ warning.html static/ fonts/ HKGrotesk/ HKGrotesk-Bold.otf HKGrotesk-BoldItalic.otf HKGrotesk-Italic.otf HKGrotesk-Light.otf HKGrotesk-LightItalic.otf HKGrotesk-Medium.otf HKGrotesk-MediumItalic.otf HKGrotesk-Regular.otf HKGrotesk-SemiBold.otf HKGrotesk-SemiBoldItalic.otf images/ favicon.ico glossary_scroller.gif iterative-dev-loop.png logo-long-vector.svg logo-long.png short-logo-vector.svg short-logo.png validation_failed_unexpected_values.gif styles/ data_docs_custom_styles_template.css data_docs_default_styles.css validations/ warning/ 20190926T134241.000000Z/ 20190926T134241.000000Z/ foobarbazguid.html validations/ .ge_store_backend_id warning/ 20190926T134241.000000Z/ 20190926T134241.000000Z/ foobarbazguid.json """ ) assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir ) in mock_webbrowser.call_args[0][0] )
def test_init_on_existing_project_with_no_datasources_should_continue_init_flow_and_add_one( mock_webbrowser, caplog, initialized_sqlite_project, titanic_sqlite_db_file, sa ): project_dir = initialized_sqlite_project ge_dir = os.path.join(project_dir, DataContext.GE_DIR) _remove_all_datasources(ge_dir) os.remove(os.path.join(ge_dir, "expectations", "warning.json")) context = DataContext(ge_dir) assert not context.list_expectation_suites() runner = CliRunner(mix_stderr=False) url = "sqlite:///{}".format(titanic_sqlite_db_file) inspector = sa.inspect(sa.create_engine(url)) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found" ): result = runner.invoke( cli, ["init", "-d", project_dir], input="\n\n2\n6\nsqlite\n{url}\n\n\n1\n{schema}\n{table}\nmy_suite\n\n\n\n".format( url=url, schema=default_schema, table=default_table ), catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/my_suite/".format( project_dir ) in mock_webbrowser.call_args[0][0] ) assert "Error: invalid input" not in stdout assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert ( "Next, we will configure database credentials and store them in the `sqlite` section" in stdout ) assert "What is the url/connection string for the sqlalchemy connection?" in stdout assert ( "You have selected a datasource that is a SQL database. How would you like to specify the data?" in stdout ) assert "Great Expectations connected to your database" in stdout assert "This looks like an existing project that" not in stdout config = _load_config_file(os.path.join(ge_dir, DataContext.GE_YML)) assert "sqlite" in config["datasources"].keys() context = DataContext(ge_dir) assert context.list_datasources() == [ { "class_name": "SqlAlchemyDatasource", "name": "sqlite", "module_name": "great_expectations.datasource", "credentials": {"url": url}, "data_asset_type": { "class_name": "SqlAlchemyDataset", "module_name": "great_expectations.dataset", }, } ] assert context.list_expectation_suites()[0].expectation_suite_name == "my_suite" assert len(context.list_expectation_suites()) == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project_extra_whitespace_in_url( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa ): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = sa.create_engine("sqlite:///{}".format(database_path), pool_recycle=3600) engine_url_with_added_whitespace = " " + str(engine.url) + " " inspector = sa.inspect(engine) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n".format( url=engine_url_with_added_whitespace, schema=default_schema, table=default_table, ), catch_exceptions=False, ) stdout = result.output assert len(stdout) < 6000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new Datasource a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert ( "You have selected a datasource that is a SQL database. How would you like to specify the data?" in stdout ) assert "Name the new Expectation Suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [ { "class_name": "SqlAlchemyDatasource", "name": "titanic", "module_name": "great_expectations.datasource", "credentials": {"url": str(engine.url)}, "data_asset_type": { "class_name": "SqlAlchemyDataset", "module_name": "great_expectations.dataset", }, } ] first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 14 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path)) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name" ] assert data_source_class == "SqlAlchemyDataset" assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir ) in mock_webbrowser.call_args[0][0] )