def test_get_referencing_views( self, mock_normalized_config_fn: MagicMock, mock_unnormalized_config_fn: MagicMock, ) -> None: mock_normalized_config_fn.return_value = FakeDirectIngestRegionRawFileConfig( "US_XX" ) mock_unnormalized_config_fn.return_value = FakeDirectIngestRegionRawFileConfig( "US_XX" ) documentation_generator = DirectIngestDocumentationGenerator() tags = ["tagA", "tagB", "tagC"] my_collector = FakeDirectIngestPreProcessedIngestViewCollector( region=fake_region(), controller_tag_rank_list=tags ) expected_referencing_views = { "tagA": ["tagA", "gatedTagNotInTagsList"], "tagB": ["tagB", "gatedTagNotInTagsList"], "tagC": ["tagC"], } self.assertEqual( documentation_generator.get_referencing_views( my_collector ), # pylint: disable=W0212 expected_referencing_views, )
def generate_raw_data_documentation_for_region(region_code: str) -> bool: """ Parses the files available under `recidiviz/ingest/direct/regions/{region_code}/raw_data/` to produce documentation which is suitable to be added to the region ingest specification. Overwrites or creates one Markdown file per raw data file, plus one additional header file, for a given region. Returns True if files were modified, False otherwise. """ documentation_generator = DirectIngestDocumentationGenerator() docs_per_file = documentation_generator.generate_raw_file_docs_for_region( region_code.lower() ) markdown_dir_path = os.path.join(_INGEST_CATALOG_ROOT, region_code.lower()) os.makedirs(os.path.join(markdown_dir_path, "raw_data"), exist_ok=True) anything_modified = False for file_path, file_contents in docs_per_file.items(): if file_path == STATE_RAW_DATA_FILE_HEADER_PATH: markdown_file_path = os.path.join(markdown_dir_path, file_path) else: markdown_file_path = os.path.join(markdown_dir_path, "raw_data", file_path) anything_modified |= persist_file_contents(file_contents, markdown_file_path) return anything_modified
def test_generate_raw_file_docs_for_region_region_not_found(self) -> None: documentation_generator = DirectIngestDocumentationGenerator() with pytest.raises(ValueError) as error: documentation_generator.generate_raw_file_docs_for_region("US_NOT_REAL") self.assertEqual( error.value, "Missing raw data configs for region: US_NOT_REAL" )
def generate_raw_data_documentation_for_region(region_code: str) -> None: """ Parses the files available under `recidiviz/ingest/direct/regions/{region_code}/raw_data/` to produce documentation which is suitable to be added to the region ingest specification. Overwrites or creates the markdown file for a given region. """ documentation_generator = DirectIngestDocumentationGenerator() documentation = documentation_generator.generate_raw_file_docs_for_region( region_code.lower() ) ingest_docs_path = "docs/ingest" markdown_file_path = os.path.join( ingest_docs_path, f"{region_code.lower()}/raw_data.md" ) with open(markdown_file_path, "w") as raw_data_md_file: raw_data_md_file.write(documentation) res = subprocess.Popen( f"git add {markdown_file_path}", shell=True, stdout=subprocess.PIPE ) _stdout, _stderr = res.communicate()
def test_generate_raw_file_docs_for_region( self, mock_referencing_views: MagicMock, _mock_region: MagicMock, mock_last_updated: MagicMock, mock_updated_by: MagicMock, mock_raw_config: MagicMock, ) -> None: importlib.reload(states) region_code = states.StateCode.US_XX.value.lower() region_config = DirectIngestRegionRawFileConfig( region_code=region_code, yaml_config_file_dir=fixtures.as_filepath(region_code), ) mock_raw_config.return_value = region_config mock_updated_by.return_value = "Julia Dressel" mock_last_updated.return_value = "2021-02-10" mock_referencing_views.return_value = { "multiLineDescription": ["view_one", "view_two"], "tagColumnsMissing": ["view_one"], "tagPrimaryKeyColsMissing": [], } documentation_generator = DirectIngestDocumentationGenerator() documentation = documentation_generator.generate_raw_file_docs_for_region( region_code ) expected_documentation = """# Test State Raw Data Description All raw data can be found in append-only tables in the dataset `us_xx_raw_data`. Views on the raw data table that show the latest state of this table (i.e. select the most recently received row for each primary key) can be found in `us_xx_raw_data_up_to_date_views`. ## Table of Contents | **Table** | **Referencing Views** | **Last Updated** | **Updated By** | |-------------------------------------------------------|-------------------------|------------------|----------------| | [multiLineDescription](#multiLineDescription) | view_one,<br />view_two | 2021-02-10 | Julia Dressel | | [tagColumnsMissing](#tagColumnsMissing) | view_one | 2021-02-10 | Julia Dressel | | [tagPrimaryKeyColsMissing](#tagPrimaryKeyColsMissing) | | 2021-02-10 | Julia Dressel | ## multiLineDescription First raw file. | Column | Column Description | Part of Primary Key? | |---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------| | col_name_1a | First column. | YES | | col_name_1b | A column description that is long enough to take up multiple lines. This text block will be interpreted literally and trailing/leading whitespace is removed. | YES | | undocumented_column | <No documentation> | | ## tagColumnsMissing tagColumnsMissing file description | Column | Column Description | Part of Primary Key? | |--------|--------------------|----------------------| ## tagPrimaryKeyColsMissing tagPrimaryKeyColsMissing file description | Column | Column Description | Part of Primary Key? | |----------|----------------------|----------------------| | column_1 | column_1 description | | """ self.assertIsNotNone(documentation) self.assertEqual(expected_documentation, documentation)
def test_generate_raw_file_docs_for_region( self, mock_referencing_views: MagicMock, _mock_region: MagicMock, mock_last_updated: MagicMock, mock_updated_by: MagicMock, mock_raw_config: MagicMock, ) -> None: region_code = states.StateCode.US_WW.value.lower() region_config = DirectIngestRegionRawFileConfig( region_code=region_code, region_module=fake_regions, ) mock_raw_config.return_value = region_config mock_updated_by.return_value = "Julia Dressel" mock_last_updated.return_value = "2021-02-10" mock_referencing_views.return_value = { "multiLineDescription": ["view_one", "view_two"], "tagColumnsMissing": ["view_one"], "tagPrimaryKeyColsMissing": [], } documentation_generator = DirectIngestDocumentationGenerator() documentation = documentation_generator.generate_raw_file_docs_for_region( region_code) expected_raw_data = """# Test State Raw Data Description All raw data can be found in append-only tables in the dataset `us_ww_raw_data`. Views on the raw data table that show the latest state of this table (i.e. select the most recently received row for each primary key) can be found in `us_ww_raw_data_up_to_date_views`. ## Table of Contents | **Table** | **Referencing Views** |**Last Updated**|**Updated By**| |----------------------------------------------------------------|-----------------------|----------------|--------------| |[multiLineDescription](raw_data/multiLineDescription.md) |view_one,<br />view_two|2021-02-10 |Julia Dressel | |[tagColumnsMissing](raw_data/tagColumnsMissing.md) |view_one |2021-02-10 |Julia Dressel | |[tagPrimaryKeyColsMissing](raw_data/tagPrimaryKeyColsMissing.md)| |2021-02-10 |Julia Dressel | """ expected_multi_line = """## multiLineDescription First raw file. | Column | Column Description |Part of Primary Key?| Distinct Values | |-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|------------------------------------------------------------------------------| |col_name_1a |First column. |YES |<b>VAL1: </b> value 1, <br/><b>VAL2: </b> value 2, <br/><b>UNKWN: </b> Unknown| |col_name_1b |A column description that is long enough to take up multiple lines. This text block will be interpreted literally and trailing/leading whitespace is removed.|YES |N/A | |undocumented_column|<No documentation> | |N/A | """ expected_tag_columns_missing = """## tagColumnsMissing tagColumnsMissing file description |Column|Column Description|Part of Primary Key?|Distinct Values| |------|------------------|--------------------|---------------| """ expected_tag_primary_key_cols_missing = """## tagPrimaryKeyColsMissing tagPrimaryKeyColsMissing file description | Column | Column Description |Part of Primary Key?|Distinct Values| |--------|--------------------|--------------------|---------------| |column_1|column_1 description| |N/A | """ expected_documentation = { "multiLineDescription.md": expected_multi_line, "raw_data.md": expected_raw_data, "tagColumnsMissing.md": expected_tag_columns_missing, "tagPrimaryKeyColsMissing.md": expected_tag_primary_key_cols_missing, } self.assertIsNotNone(documentation) self.assertEqual(expected_documentation, documentation)