def test_data_context_create_does_nothing_if_all_uncommitted_dirs_exist( tmp_path_factory): expected = """\ great_expectations/ .gitignore great_expectations.yml datasources/ expectations/ notebooks/ create_expectations.ipynb integrate_validation_into_pipeline.ipynb plugins/ uncommitted/ config_variables.yml data_docs/ samples/ validations/ """ project_path = str(tmp_path_factory.mktemp('stuff')) ge_dir = os.path.join(project_path, "great_expectations") DataContext.create(project_path) fixture = gen_directory_tree_str(ge_dir) assert fixture == expected # re-run create to simulate onboarding DataContext.create(project_path) obs = gen_directory_tree_str(ge_dir) assert obs == expected
def test_existing_local_data_docs_urls_returns_single_url_from_customized_local_site( tmp_path_factory): empty_directory = str(tmp_path_factory.mktemp("yo_yo")) DataContext.create(empty_directory) ge_dir = os.path.join(empty_directory, DataContext.GE_DIR) context = DataContext(ge_dir) context._project_config["data_docs_sites"] = { "my_rad_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleFilesystemStoreBackend", "base_directory": "uncommitted/data_docs/some/local/path/" } } } # TODO Workaround project config programmatic config manipulation # statefulness issues by writing to disk and re-upping a new context context._save_project_config() context = DataContext(ge_dir) context.build_data_docs() expected_path = os.path.join( ge_dir, "uncommitted/data_docs/some/local/path/index.html") assert os.path.isfile(expected_path) obs = context.get_docs_sites_urls() assert obs == ["file://{}".format(expected_path)]
def test_data_context_do_all_uncommitted_dirs_exist(tmp_path_factory): expected = """\ uncommitted/ config_variables.yml data_docs/ samples/ validations/ """ project_path = str(tmp_path_factory.mktemp('stuff')) ge_dir = os.path.join(project_path, "great_expectations") uncommitted_dir = os.path.join(ge_dir, "uncommitted") DataContext.create(project_path) fixture = gen_directory_tree_str(uncommitted_dir) print(fixture) assert fixture == expected # Test that all exist assert DataContext.all_uncommitted_directories_exist(ge_dir) # remove a few shutil.rmtree(os.path.join(uncommitted_dir, "data_docs")) shutil.rmtree(os.path.join(uncommitted_dir, "validations")) # Test that not all exist assert not DataContext.all_uncommitted_directories_exist(project_path)
def empty_context(tmp_path_factory): project_path = str(tmp_path_factory.mktemp('data_context')) DataContext.create(project_path) ge_dir = os.path.join(project_path, "great_expectations") assert os.path.isdir(ge_dir) assert os.path.isfile(os.path.join(ge_dir, DataContext.GE_YML)) context = DataContext(ge_dir) assert isinstance(context, DataContext) return context
def test_existing_local_data_docs_urls_returns_url_on_project_with_no_datasources_and_a_site_configured(tmp_path_factory): """ This test ensures that a url will be returned for a default site even if a datasource is not configured, and docs are not built. """ empty_directory = str(tmp_path_factory.mktemp("another_empty_project")) DataContext.create(empty_directory) context = DataContext(os.path.join(empty_directory, DataContext.GE_DIR)) obs = context.get_docs_sites_urls() assert len(obs) == 1 assert obs[0].endswith("great_expectations/uncommitted/data_docs/local_site/index.html")
def test_data_context_create_raises_warning_and_leaves_existing_yml_untouched( tmp_path_factory): project_path = str(tmp_path_factory.mktemp('data_context')) DataContext.create(project_path) ge_yml = os.path.join(project_path, "great_expectations/great_expectations.yml") with open(ge_yml, "a") as ff: ff.write("# LOOK I WAS MODIFIED") with pytest.warns(UserWarning): DataContext.create(project_path) with open(ge_yml, "r") as ff: obs = ff.read() assert "# LOOK I WAS MODIFIED" in obs
def init(target_directory): """Initialize a new Great Expectations project. This guided input walks the user through setting up a project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ try: context = DataContext.create(target_directory) except DataContextError as err: logger.critical(err.message) sys.exit(-1) base_dir = context.root_directory six.print_( colored(figlet_format("Great Expectations", font="big"), color="cyan")) cli_message(greeting_1) if not click.confirm(msg_prompt_lets_begin, default=True): cli_message( "OK - run great_expectations init again when ready. Exiting...") exit(0) scaffold_directories_and_notebooks(base_dir) cli_message("\nDone.", ) add_datasource(context)
def test_data_context_create_makes_uncommitted_dirs_when_all_are_missing( tmp_path_factory): project_path = str(tmp_path_factory.mktemp('data_context')) DataContext.create(project_path) # mangle the existing setup ge_dir = os.path.join(project_path, "great_expectations") uncommitted_dir = os.path.join(ge_dir, "uncommitted") shutil.rmtree(uncommitted_dir) # re-run create to simulate onboarding DataContext.create(project_path) obs = gen_directory_tree_str(ge_dir) assert os.path.isdir(uncommitted_dir), "No uncommitted directory created" assert obs == """\
def _create_new_project(target_directory): try: context = DataContext.create(target_directory) data_source_name, data_source_type = add_datasource_impl(context) return context, data_source_name, data_source_type except ge_exceptions.DataContextError as err: logger.critical(err.message) sys.exit(-1)
def test_existing_local_data_docs_urls_returns_multiple_urls_from_customized_local_site( tmp_path_factory): empty_directory = str(tmp_path_factory.mktemp("yo_yo_ma")) DataContext.create(empty_directory) ge_dir = os.path.join(empty_directory, DataContext.GE_DIR) context = DataContext(ge_dir) context._project_config["data_docs_sites"] = { "my_rad_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleFilesystemStoreBackend", "base_directory": "uncommitted/data_docs/some/path/" } }, "another_just_amazing_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleFilesystemStoreBackend", "base_directory": "uncommitted/data_docs/another/path/" } } } # TODO Workaround project config programmatic config manipulation # statefulness issues by writing to disk and re-upping a new context context._save_project_config() context = DataContext(ge_dir) context.build_data_docs() data_docs_dir = os.path.join(ge_dir, "uncommitted/data_docs/") path_1 = os.path.join(data_docs_dir, "some/path/index.html") path_2 = os.path.join(data_docs_dir, "another/path/index.html") for expected_path in [path_1, path_2]: assert os.path.isfile(expected_path) obs = context.get_docs_sites_urls() assert obs == [{ 'site_name': 'my_rad_site', 'site_url': "file://{}".format(path_1) }, { 'site_name': 'another_just_amazing_site', 'site_url': "file://{}".format(path_2) }]
def test_data_context_create_does_not_overwrite_existing_config_variables_yml(tmp_path_factory): project_path = str(tmp_path_factory.mktemp('data_context')) DataContext.create(project_path) ge_dir = os.path.join(project_path, "great_expectations") uncommitted_dir = os.path.join(ge_dir, "uncommitted") config_vars_yml = os.path.join(uncommitted_dir, "config_variables.yml") # modify config variables with open(config_vars_yml, "a") as ff: ff.write("# LOOK I WAS MODIFIED") # re-run create to simulate onboarding with pytest.warns(UserWarning): DataContext.create(project_path) with open(config_vars_yml, "r") as ff: obs = ff.read() print(obs) assert "# LOOK I WAS MODIFIED" in obs
def test_data_context_create_does_nothing_if_all_uncommitted_dirs_exist( tmp_path_factory): expected = """\ great_expectations/ .gitignore great_expectations.yml checkpoints/ expectations/ notebooks/ pandas/ validation_playground.ipynb spark/ validation_playground.ipynb sql/ validation_playground.ipynb plugins/ custom_data_docs/ renderers/ styles/ data_docs_custom_styles.css views/ uncommitted/ config_variables.yml data_docs/ validations/ """ project_path = str(tmp_path_factory.mktemp('stuff')) ge_dir = os.path.join(project_path, "great_expectations") DataContext.create(project_path) fixture = gen_directory_tree_str(ge_dir) assert fixture == expected with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found"): # re-run create to simulate onboarding DataContext.create(project_path) obs = gen_directory_tree_str(ge_dir) assert obs == expected
def test_data_context_create_builds_base_directories(tmp_path_factory): project_path = str(tmp_path_factory.mktemp("data_context")) context = DataContext.create(project_path) assert isinstance(context, DataContext) for directory in [ "expectations", "notebooks", "plugins", "checkpoints", "uncommitted", ]: base_dir = os.path.join(project_path, context.GE_DIR, directory) assert os.path.isdir(base_dir)
def test_render_full_static_site_from_empty_project(tmp_path_factory, filesystem_csv_3): # TODO : Use a standard test fixture # TODO : Have that test fixture copy a directory, rather than building a new one from scratch base_dir = str(tmp_path_factory.mktemp("project_dir")) project_dir = os.path.join(base_dir, "project_path") os.mkdir(project_dir) os.makedirs(os.path.join(project_dir, "data")) os.makedirs(os.path.join(project_dir, "data/titanic")) shutil.copy("./tests/test_sets/Titanic.csv", str(os.path.join(project_dir, "data/titanic/Titanic.csv"))) os.makedirs(os.path.join(project_dir, "data/random")) shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"), str(os.path.join(project_dir, "data/random/f1.csv"))) shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"), str(os.path.join(project_dir, "data/random/f2.csv"))) assert gen_directory_tree_str(project_dir) == """\ project_path/ data/ random/ f1.csv f2.csv titanic/ Titanic.csv """ context = DataContext.create(project_dir) ge_directory = os.path.join(project_dir, "great_expectations") context.add_datasource("titanic", module_name="great_expectations.datasource", class_name="PandasDatasource", base_directory=os.path.join(project_dir, "data/titanic/")) context.add_datasource("random", module_name="great_expectations.datasource", class_name="PandasDatasource", base_directory=os.path.join(project_dir, "data/random/")) context.profile_datasource("titanic") assert gen_directory_tree_str(project_dir) == """\ project_path/ data/ random/ f1.csv f2.csv titanic/ Titanic.csv great_expectations/ .gitignore great_expectations.yml datasources/ expectations/ titanic/ default/ Titanic/ BasicDatasetProfiler.json notebooks/ create_expectations.ipynb integrate_validation_into_pipeline.ipynb plugins/ uncommitted/ config_variables.yml data_docs/ samples/ validations/ profiling/ titanic/ default/ Titanic/ BasicDatasetProfiler.json """ context.profile_datasource("random") context.build_data_docs() data_docs_dir = os.path.join(project_dir, "great_expectations/uncommitted/data_docs") observed = gen_directory_tree_str(data_docs_dir) print(observed) assert observed == """\ data_docs/ local_site/ index.html expectations/ random/ default/ f1/ BasicDatasetProfiler.html f2/ BasicDatasetProfiler.html titanic/ default/ Titanic/ BasicDatasetProfiler.html validations/ profiling/ random/ default/ f1/ BasicDatasetProfiler.html f2/ BasicDatasetProfiler.html titanic/ default/ Titanic/ BasicDatasetProfiler.html """ # save data_docs locally safe_mmkdir("./tests/data_context/output") safe_mmkdir("./tests/data_context/output/data_docs") if os.path.isdir("./tests/data_context/output/data_docs"): shutil.rmtree("./tests/data_context/output/data_docs") shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"), "./tests/data_context/output/data_docs")
def init(target_directory): """Initialize a new Great Expectations project. This guided input walks the user through setting up a project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ six.print_( colored(figlet_format("Great Expectations", font="big"), color="cyan")) cli_message(greeting_1) if not click.confirm(msg_prompt_lets_begin, default=True): cli_message( "OK - run great_expectations init again when ready. Exiting...") exit(0) try: context = DataContext.create(target_directory) except DataContextError as err: logger.critical(err.message) sys.exit(-1) base_dir = context.root_directory scaffold_directories_and_notebooks(base_dir) cli_message("\nDone.", ) data_source_name = add_datasource(context) cli_message(""" ========== Profiling ========== Would you like to profile '{0:s}' to create candidate expectations and documentation? Please note: Profiling is still a beta feature in Great Expectations. The current profiler will evaluate the entire data source (without sampling), which may be very time consuming. As a rule of thumb, we recommend starting with data smaller than 100MB. To learn more about profiling, visit <blue>https://docs.greatexpectations.io/en/latest/guides/profiling.html\ ?utm_source=cli&utm_medium=init&utm_campaign={1:s}</blue>. """.format(data_source_name, __version__.replace(".", "_"))) if click.confirm("Proceed?", default=True): profiling_results = profile_datasource(context, data_source_name) cli_message(""" ========== Data Documentation ========== To generate documentation from the data you just profiled, the profiling results should be moved from great_expectations/uncommitted (ignored by git) to great_expectations/fixtures. Before committing, please make sure that this data does not contain sensitive information! To learn more: <blue>https://docs.greatexpectations.io/en/latest/guides/data_documentation.html\ ?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue> """.format(__version__.replace(".", "_"))) if click.confirm( "Move the profiled data and build HTML documentation?", default=True): cli_message("\nMoving files...") for profiling_result in profiling_results: data_asset_name = profiling_result[1]['meta'][ 'data_asset_name'] expectation_suite_name = profiling_result[1]['meta'][ 'expectation_suite_name'] run_id = profiling_result[1]['meta']['run_id'] context.move_validation_to_fixtures(data_asset_name, expectation_suite_name, run_id) cli_message("\nDone.") cli_message("\nBuilding documentation...") build_documentation(context) else: cli_message("Okay, skipping HTML documentation for now.`.") else: cli_message("Okay, skipping profiling for now. You can always do this " "later by running `great_expectations profile`.") cli_message(msg_go_to_notebook)
def test_data_context_create_does_not_raise_error_or_warning_if_ge_dir_exists( tmp_path_factory): project_path = str(tmp_path_factory.mktemp('data_context')) DataContext.create(project_path)
def test_render_full_static_site_from_empty_project(tmp_path_factory, filesystem_csv_3): # TODO : Use a standard test fixture # TODO : Have that test fixture copy a directory, rather than building a new one from scratch base_dir = str(tmp_path_factory.mktemp("project_dir")) project_dir = os.path.join(base_dir, "project_path") os.mkdir(project_dir) os.makedirs(os.path.join(project_dir, "data")) os.makedirs(os.path.join(project_dir, "data/titanic")) shutil.copy(file_relative_path(__file__, "../test_sets/Titanic.csv"), str(os.path.join(project_dir, "data/titanic/Titanic.csv"))) os.makedirs(os.path.join(project_dir, "data/random")) shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"), str(os.path.join(project_dir, "data/random/f1.csv"))) shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"), str(os.path.join(project_dir, "data/random/f2.csv"))) assert gen_directory_tree_str(project_dir) == """\ project_path/ data/ random/ f1.csv f2.csv titanic/ Titanic.csv """ context = DataContext.create(project_dir) ge_directory = os.path.join(project_dir, "great_expectations") context.add_datasource("titanic", module_name="great_expectations.datasource", class_name="PandasDatasource", generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": os.path.join(project_dir, "data/titanic/") } }) context.add_datasource("random", module_name="great_expectations.datasource", class_name="PandasDatasource", generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": os.path.join(project_dir, "data/random/") } }) context.profile_datasource("titanic") # Replicate the batch id of the batch that will be profiled in order to generate the file path of the # validation result titanic_profiled_batch_id = PathBatchKwargs({ 'path': os.path.join(project_dir, 'data/titanic/Titanic.csv'), 'datasource': 'titanic' }).to_id() tree_str = gen_directory_tree_str(project_dir) assert tree_str == """project_path/ data/ random/ f1.csv f2.csv titanic/ Titanic.csv great_expectations/ .gitignore great_expectations.yml expectations/ titanic/ subdir_reader/ Titanic/ BasicDatasetProfiler.json notebooks/ pandas/ validation_playground.ipynb spark/ validation_playground.ipynb sql/ validation_playground.ipynb plugins/ custom_data_docs/ renderers/ styles/ data_docs_custom_styles.css views/ uncommitted/ config_variables.yml data_docs/ samples/ validations/ titanic/ subdir_reader/ Titanic/ BasicDatasetProfiler/ profiling/ {}.json """.format(titanic_profiled_batch_id) context.profile_datasource("random") context.build_data_docs() f1_profiled_batch_id = PathBatchKwargs({ 'path': os.path.join(project_dir, 'data/random/f1.csv'), 'datasource': 'random' }).to_id() f2_profiled_batch_id = PathBatchKwargs({ 'path': os.path.join(project_dir, 'data/random/f2.csv'), 'datasource': 'random' }).to_id() data_docs_dir = os.path.join(project_dir, "great_expectations/uncommitted/data_docs") observed = gen_directory_tree_str(data_docs_dir) assert observed == """\ data_docs/ local_site/ index.html expectations/ random/ subdir_reader/ f1/ BasicDatasetProfiler.html f2/ BasicDatasetProfiler.html titanic/ subdir_reader/ Titanic/ BasicDatasetProfiler.html static/ fonts/ HKGrotesk/ HKGrotesk-Bold.otf HKGrotesk-BoldItalic.otf HKGrotesk-Italic.otf HKGrotesk-Light.otf HKGrotesk-LightItalic.otf HKGrotesk-Medium.otf HKGrotesk-MediumItalic.otf HKGrotesk-Regular.otf HKGrotesk-SemiBold.otf HKGrotesk-SemiBoldItalic.otf images/ favicon.ico glossary_scroller.gif iterative-dev-loop.png logo-long-vector.svg logo-long.png short-logo-vector.svg short-logo.png validation_failed_unexpected_values.gif styles/ data_docs_custom_styles_template.css data_docs_default_styles.css validations/ random/ subdir_reader/ f1/ BasicDatasetProfiler/ profiling/ {0:s}.html f2/ BasicDatasetProfiler/ profiling/ {1:s}.html titanic/ subdir_reader/ Titanic/ BasicDatasetProfiler/ profiling/ {2:s}.html """.format(f1_profiled_batch_id, f2_profiled_batch_id, titanic_profiled_batch_id) # save data_docs locally safe_mmkdir("./tests/data_context/output") safe_mmkdir("./tests/data_context/output/data_docs") if os.path.isdir("./tests/data_context/output/data_docs"): shutil.rmtree("./tests/data_context/output/data_docs") shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"), "./tests/data_context/output/data_docs")
def _complete_onboarding(target_dir): if click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True): DataContext.create(target_dir) cli_message(ONBOARDING_COMPLETE) else: cli_message(RUN_INIT_AGAIN)
def test_render_full_static_site(tmp_path_factory, filesystem_csv_3): project_dir = str(tmp_path_factory.mktemp("project_dir")) print(project_dir) os.makedirs(os.path.join(project_dir, "data")) os.makedirs(os.path.join(project_dir, "data/titanic")) curdir = os.path.abspath(os.getcwd()) shutil.copy("./tests/test_sets/Titanic.csv", str(os.path.join(project_dir, "data/titanic/Titanic.csv"))) os.makedirs(os.path.join(project_dir, "data/random")) curdir = os.path.abspath(os.getcwd()) shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"), str(os.path.join(project_dir, "data/random/f1.csv"))) shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"), str(os.path.join(project_dir, "data/random/f2.csv"))) context = DataContext.create(project_dir) ge_directory = os.path.join(project_dir, "great_expectations") scaffold_directories_and_notebooks(ge_directory) context.add_datasource("titanic", "pandas", base_directory=os.path.join(project_dir, "data/titanic/")) context.add_datasource("random", "pandas", base_directory=os.path.join(project_dir, "data/random/")) context.profile_datasource("titanic") glob_str = os.path.join( ge_directory, "uncommitted/validations/*/titanic/default/Titanic/BasicDatasetProfiler.json" ) print(glob_str) glob_result = glob(glob_str) os.mkdir(os.path.join(ge_directory, "fixtures/validations")) os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic")) os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic/default")) full_fixture_path = os.path.join( ge_directory, "fixtures/validations/titanic/default/Titanic/") os.mkdir(full_fixture_path) shutil.copy(glob_result[0], full_fixture_path + "BasicDatasetProfiler.json") context.profile_datasource("random") os.mkdir(os.path.join(ge_directory, "fixtures/validations/random")) os.mkdir(os.path.join(ge_directory, "fixtures/validations/random/default")) glob_str = os.path.join( ge_directory, "uncommitted/validations/*/random/default/f*/BasicDatasetProfiler.json" ) print(glob_str) glob_result = glob(glob_str) full_fixture_path = os.path.join( ge_directory, "fixtures/validations/random/default/f1/") os.mkdir(full_fixture_path) shutil.copy( glob_result[0], # !!! This might switch the f1 and f2 files... full_fixture_path + "BasicDatasetProfiler.json") full_fixture_path = os.path.join( ge_directory, "fixtures/validations/random/default/f2/") os.mkdir(full_fixture_path) shutil.copy( glob_result[1], # !!! This might switch the f1 and f2 files... full_fixture_path + "BasicDatasetProfiler.json") # for g in glob_result: # shutil.copy( # g, # full_fixture_path+"BasicDatasetProfiler.json" # ) # os.mkdir(os.path.join(ge_directory,"fixtures") context.render_full_static_site() # Titanic assert os.path.exists( os.path.join( ge_directory, "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json" )) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html" )) with open( os.path.join( ge_directory, "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json" ), "r") as infile: titanic_validation = json.load(infile) titanic_run_id = titanic_validation['meta']['run_id'] titanic_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format( run_id=titanic_run_id.replace(':', ''), ) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/titanic/default/Titanic/{filename}". format(filename=titanic_validation_html_filename))) # f1 assert os.path.exists( os.path.join( ge_directory, "fixtures/validations/random/default/f1/BasicDatasetProfiler.json") ) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/random/default/f1/BasicDatasetProfiler.html" )) with open( os.path.join( ge_directory, "fixtures/validations/random/default/f1/BasicDatasetProfiler.json" ), "r") as infile: f1_validation = json.load(infile) f1_run_id = f1_validation['meta']['run_id'] f1_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format( run_id=f1_run_id.replace(':', ''), ) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/random/default/f1/{filename}".format( filename=f1_validation_html_filename))) # f2 assert os.path.exists( os.path.join( ge_directory, "fixtures/validations/random/default/f2/BasicDatasetProfiler.json") ) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html" )) with open( os.path.join( ge_directory, "fixtures/validations/random/default/f2/BasicDatasetProfiler.json" ), "r") as infile: f2_validation = json.load(infile) f2_run_id = f2_validation['meta']['run_id'] f2_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format( run_id=f2_run_id.replace(':', ''), ) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/random/default/f2/{filename}".format( filename=f2_validation_html_filename))) # full site assert os.path.exists( os.path.join(ge_directory, "uncommitted/documentation/index.html")) # save documentation locally safe_mmkdir("./tests/data_context/output") safe_mmkdir("./tests/data_context/output/documentation") safe_mmkdir("./tests/data_context/output/documentation/titanic") try: shutil.copytree( os.path.join(ge_directory, "uncommitted/documentation/titanic/default"), "./tests/data_context/output/documentation/titanic/default") except FileExistsError: shutil.rmtree( "./tests/data_context/output/documentation/titanic/default") shutil.copytree( os.path.join(ge_directory, "uncommitted/documentation/titanic/default"), "./tests/data_context/output/documentation/titanic/default") safe_mmkdir("./tests/data_context/output/documentation/random") try: shutil.copytree( os.path.join(ge_directory, "uncommitted/documentation/random/default"), "./tests/data_context/output/documentation/random/default") except FileExistsError: shutil.rmtree( "./tests/data_context/output/documentation/random/default") shutil.copytree( os.path.join(ge_directory, "uncommitted/documentation/random/default"), "./tests/data_context/output/documentation/random/default") shutil.copy( os.path.join(ge_directory, "uncommitted/documentation/index.html"), "./tests/data_context/output/documentation")
def ge_data_context(tmp_path: str) -> DataContext: return DataContext.create(tmp_path)
def test_render_full_static_site(tmp_path_factory, filesystem_csv_3): project_dir = str(tmp_path_factory.mktemp("project_dir")) print(project_dir) os.makedirs(os.path.join(project_dir, "data")) os.makedirs(os.path.join(project_dir, "data/titanic")) curdir = os.path.abspath(os.getcwd()) shutil.copy("./tests/test_sets/Titanic.csv", str(os.path.join(project_dir, "data/titanic/Titanic.csv"))) os.makedirs(os.path.join(project_dir, "data/random")) curdir = os.path.abspath(os.getcwd()) shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"), str(os.path.join(project_dir, "data/random/f1.csv"))) shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"), str(os.path.join(project_dir, "data/random/f2.csv"))) context = DataContext.create(project_dir) ge_directory = os.path.join(project_dir, "great_expectations") scaffold_directories_and_notebooks(ge_directory) context.add_datasource("titanic", "pandas", base_directory=os.path.join(project_dir, "data/titanic/")) context.add_datasource("random", "pandas", base_directory=os.path.join(project_dir, "data/random/")) context.profile_datasource("titanic") glob_str = os.path.join( ge_directory, "uncommitted/validations/*/titanic/default/Titanic/BasicDatasetProfiler.json" ) print(glob_str) glob_result = glob(glob_str) os.mkdir(os.path.join(ge_directory, "fixtures/validations")) os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic")) os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic/default")) full_fixture_path = os.path.join( ge_directory, "fixtures/validations/titanic/default/Titanic/") os.mkdir(full_fixture_path) shutil.copy(glob_result[0], full_fixture_path + "BasicDatasetProfiler.json") context.profile_datasource("random") os.mkdir(os.path.join(ge_directory, "fixtures/validations/random")) os.mkdir(os.path.join(ge_directory, "fixtures/validations/random/default")) glob_str = os.path.join( ge_directory, "uncommitted/validations/*/random/default/f*/BasicDatasetProfiler.json" ) print(glob_str) glob_result = glob(glob_str) full_fixture_path = os.path.join( ge_directory, "fixtures/validations/random/default/f1/") os.mkdir(full_fixture_path) shutil.copy( glob_result[0], # !!! This might switch the f1 and f2 files... full_fixture_path + "BasicDatasetProfiler.json") full_fixture_path = os.path.join( ge_directory, "fixtures/validations/random/default/f2/") os.mkdir(full_fixture_path) shutil.copy( glob_result[1], # !!! This might switch the f1 and f2 files... full_fixture_path + "BasicDatasetProfiler.json") # for g in glob_result: # shutil.copy( # g, # full_fixture_path+"BasicDatasetProfiler.json" # ) # os.mkdir(os.path.join(ge_directory,"fixtures") context.render_full_static_site() assert os.path.exists( os.path.join( ge_directory, "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json" )) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html" )) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/random/default/f1/BasicDatasetProfiler.html" )) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html" )) # Store output files locally # shutil.copy( # os.path.join( # ge_directory, # "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html" # ), # "test_output/f2_BasicDatasetProfiler.html" # ) with open( os.path.join( ge_directory, "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html" ), 'r') as f: # print(f.read()) pass assert os.path.exists( os.path.join(ge_directory, "uncommitted/documentation/index.html"))
def test_render_full_static_site(tmp_path_factory, filesystem_csv_3): project_dir = str(tmp_path_factory.mktemp("project_dir")) print(project_dir) os.makedirs(os.path.join(project_dir, "data")) os.makedirs(os.path.join(project_dir, "data/titanic")) curdir = os.path.abspath(os.getcwd()) shutil.copy("./tests/test_sets/Titanic.csv", str(os.path.join(project_dir, "data/titanic/Titanic.csv"))) os.makedirs(os.path.join(project_dir, "data/random")) curdir = os.path.abspath(os.getcwd()) shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"), str(os.path.join(project_dir, "data/random/f1.csv"))) shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"), str(os.path.join(project_dir, "data/random/f2.csv"))) context = DataContext.create(project_dir) ge_directory = os.path.join(project_dir, "great_expectations") scaffold_directories_and_notebooks(ge_directory) context.add_datasource("titanic", "pandas", base_directory=os.path.join(project_dir, "data/titanic/")) context.add_datasource("random", "pandas", base_directory=os.path.join(project_dir, "data/random/")) context.profile_datasource("titanic") context.profile_datasource("random") context.build_data_documentation() # Titanic assert os.path.exists( os.path.join( ge_directory, "uncommitted/validations/profiling/titanic/default/Titanic/BasicDatasetProfiler.json" )) assert os.path.exists( os.path.join( # profiling results HTML ge_directory, "uncommitted/documentation/local_site/profiling/titanic/default/Titanic/BasicDatasetProfiler.html" )) assert os.path.exists( os.path.join( # profiling expectations HTML ge_directory, "uncommitted/documentation/local_site/expectations/titanic/default/Titanic/BasicDatasetProfiler.html" )) # f1 assert os.path.exists( os.path.join( ge_directory, "uncommitted/validations/profiling/random/default/f1/BasicDatasetProfiler.json" )) assert os.path.exists( os.path.join( # profiling results HTML ge_directory, "uncommitted/documentation/local_site/profiling/random/default/f1/BasicDatasetProfiler.html" )) assert os.path.exists( os.path.join( # profiling expectations HTML ge_directory, "uncommitted/documentation/local_site/profiling/random/default/f1/BasicDatasetProfiler.html" )) # f2 assert os.path.exists( os.path.join( ge_directory, "uncommitted/validations/profiling/random/default/f2/BasicDatasetProfiler.json" )) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/local_site/profiling/random/default/f2/BasicDatasetProfiler.html" )) assert os.path.exists( os.path.join( ge_directory, "uncommitted/documentation/local_site/expectations/random/default/f2/BasicDatasetProfiler.html" )) # local_site index.html assert os.path.exists( os.path.join(ge_directory, "uncommitted/documentation/local_site/index.html")) # team_site index.html assert os.path.exists( os.path.join(ge_directory, "uncommitted/documentation/team_site/index.html")) # save documentation locally safe_mmkdir("./tests/data_context/output") safe_mmkdir("./tests/data_context/output/documentation") if os.path.isdir("./tests/data_context/output/documentation"): shutil.rmtree("./tests/data_context/output/documentation") shutil.copytree(os.path.join(ge_directory, "uncommitted/documentation/"), "./tests/data_context/output/documentation")