def test_get_referencing_views(
     self,
     mock_normalized_config_fn: MagicMock,
     mock_unnormalized_config_fn: MagicMock,
 ) -> None:
     mock_normalized_config_fn.return_value = FakeDirectIngestRegionRawFileConfig(
         "US_XX"
     )
     mock_unnormalized_config_fn.return_value = FakeDirectIngestRegionRawFileConfig(
         "US_XX"
     )
     documentation_generator = DirectIngestDocumentationGenerator()
     tags = ["tagA", "tagB", "tagC"]
     my_collector = FakeDirectIngestPreProcessedIngestViewCollector(
         region=fake_region(), controller_tag_rank_list=tags
     )
     expected_referencing_views = {
         "tagA": ["tagA", "gatedTagNotInTagsList"],
         "tagB": ["tagB", "gatedTagNotInTagsList"],
         "tagC": ["tagC"],
     }
     self.assertEqual(
         documentation_generator.get_referencing_views(
             my_collector
         ),  # pylint: disable=W0212
         expected_referencing_views,
     )
Esempio n. 2
0
def generate_raw_data_documentation_for_region(region_code: str) -> bool:
    """
    Parses the files available under `recidiviz/ingest/direct/regions/{region_code}/raw_data/` to produce documentation
    which is suitable to be added to the region ingest specification. Overwrites or creates one Markdown file
    per raw data file, plus one additional header file, for a given region.

    Returns True if files were modified, False otherwise.
    """
    documentation_generator = DirectIngestDocumentationGenerator()
    docs_per_file = documentation_generator.generate_raw_file_docs_for_region(
        region_code.lower()
    )
    markdown_dir_path = os.path.join(_INGEST_CATALOG_ROOT, region_code.lower())
    os.makedirs(os.path.join(markdown_dir_path, "raw_data"), exist_ok=True)

    anything_modified = False
    for file_path, file_contents in docs_per_file.items():
        if file_path == STATE_RAW_DATA_FILE_HEADER_PATH:
            markdown_file_path = os.path.join(markdown_dir_path, file_path)
        else:
            markdown_file_path = os.path.join(markdown_dir_path, "raw_data", file_path)

        anything_modified |= persist_file_contents(file_contents, markdown_file_path)

    return anything_modified
    def test_generate_raw_file_docs_for_region_region_not_found(self) -> None:
        documentation_generator = DirectIngestDocumentationGenerator()

        with pytest.raises(ValueError) as error:
            documentation_generator.generate_raw_file_docs_for_region("US_NOT_REAL")
            self.assertEqual(
                error.value, "Missing raw data configs for region: US_NOT_REAL"
            )
Esempio n. 4
0
def generate_raw_data_documentation_for_region(region_code: str) -> None:
    """
    Parses the files available under `recidiviz/ingest/direct/regions/{region_code}/raw_data/` to produce documentation
    which is suitable to be added to the region ingest specification. Overwrites or creates the markdown file
    for a given region.
    """
    documentation_generator = DirectIngestDocumentationGenerator()
    documentation = documentation_generator.generate_raw_file_docs_for_region(
        region_code.lower()
    )
    ingest_docs_path = "docs/ingest"
    markdown_file_path = os.path.join(
        ingest_docs_path, f"{region_code.lower()}/raw_data.md"
    )
    with open(markdown_file_path, "w") as raw_data_md_file:
        raw_data_md_file.write(documentation)
    res = subprocess.Popen(
        f"git add {markdown_file_path}", shell=True, stdout=subprocess.PIPE
    )
    _stdout, _stderr = res.communicate()
    def test_generate_raw_file_docs_for_region(
        self,
        mock_referencing_views: MagicMock,
        _mock_region: MagicMock,
        mock_last_updated: MagicMock,
        mock_updated_by: MagicMock,
        mock_raw_config: MagicMock,
    ) -> None:
        importlib.reload(states)
        region_code = states.StateCode.US_XX.value.lower()
        region_config = DirectIngestRegionRawFileConfig(
            region_code=region_code,
            yaml_config_file_dir=fixtures.as_filepath(region_code),
        )
        mock_raw_config.return_value = region_config
        mock_updated_by.return_value = "Julia Dressel"
        mock_last_updated.return_value = "2021-02-10"
        mock_referencing_views.return_value = {
            "multiLineDescription": ["view_one", "view_two"],
            "tagColumnsMissing": ["view_one"],
            "tagPrimaryKeyColsMissing": [],
        }

        documentation_generator = DirectIngestDocumentationGenerator()
        documentation = documentation_generator.generate_raw_file_docs_for_region(
            region_code
        )

        expected_documentation = """# Test State Raw Data Description

All raw data can be found in append-only tables in the dataset `us_xx_raw_data`. Views on the raw data
table that show the latest state of this table (i.e. select the most recently received row for each primary key) can be
found in `us_xx_raw_data_up_to_date_views`.

## Table of Contents

|                       **Table**                       |  **Referencing Views**  | **Last Updated** | **Updated By** |
|-------------------------------------------------------|-------------------------|------------------|----------------|
| [multiLineDescription](#multiLineDescription)         | view_one,<br />view_two | 2021-02-10       | Julia Dressel  |
| [tagColumnsMissing](#tagColumnsMissing)               | view_one                | 2021-02-10       | Julia Dressel  |
| [tagPrimaryKeyColsMissing](#tagPrimaryKeyColsMissing) |                         | 2021-02-10       | Julia Dressel  |

## multiLineDescription

First raw file.

|       Column        |                                                                      Column Description                                                                       | Part of Primary Key? |
|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------|
| col_name_1a         | First column.                                                                                                                                                 | YES                  |
| col_name_1b         | A column description that is long enough to take up multiple lines. This text block will be interpreted literally and trailing/leading whitespace is removed. | YES                  |
| undocumented_column | <No documentation>                                                                                                                                            |                      |


## tagColumnsMissing

tagColumnsMissing file description

| Column | Column Description | Part of Primary Key? |
|--------|--------------------|----------------------|


## tagPrimaryKeyColsMissing

tagPrimaryKeyColsMissing file description

|  Column  |  Column Description  | Part of Primary Key? |
|----------|----------------------|----------------------|
| column_1 | column_1 description |                      |
"""

        self.assertIsNotNone(documentation)
        self.assertEqual(expected_documentation, documentation)
Esempio n. 6
0
    def test_generate_raw_file_docs_for_region(
        self,
        mock_referencing_views: MagicMock,
        _mock_region: MagicMock,
        mock_last_updated: MagicMock,
        mock_updated_by: MagicMock,
        mock_raw_config: MagicMock,
    ) -> None:
        region_code = states.StateCode.US_WW.value.lower()
        region_config = DirectIngestRegionRawFileConfig(
            region_code=region_code,
            region_module=fake_regions,
        )
        mock_raw_config.return_value = region_config
        mock_updated_by.return_value = "Julia Dressel"
        mock_last_updated.return_value = "2021-02-10"
        mock_referencing_views.return_value = {
            "multiLineDescription": ["view_one", "view_two"],
            "tagColumnsMissing": ["view_one"],
            "tagPrimaryKeyColsMissing": [],
        }

        documentation_generator = DirectIngestDocumentationGenerator()
        documentation = documentation_generator.generate_raw_file_docs_for_region(
            region_code)

        expected_raw_data = """# Test State Raw Data Description

All raw data can be found in append-only tables in the dataset `us_ww_raw_data`. Views on the raw data
table that show the latest state of this table (i.e. select the most recently received row for each primary key) can be
found in `us_ww_raw_data_up_to_date_views`.

## Table of Contents

|                           **Table**                            | **Referencing Views** |**Last Updated**|**Updated By**|
|----------------------------------------------------------------|-----------------------|----------------|--------------|
|[multiLineDescription](raw_data/multiLineDescription.md)        |view_one,<br />view_two|2021-02-10      |Julia Dressel |
|[tagColumnsMissing](raw_data/tagColumnsMissing.md)              |view_one               |2021-02-10      |Julia Dressel |
|[tagPrimaryKeyColsMissing](raw_data/tagPrimaryKeyColsMissing.md)|                       |2021-02-10      |Julia Dressel |
"""

        expected_multi_line = """## multiLineDescription

First raw file.

|      Column       |                                                                     Column Description                                                                      |Part of Primary Key?|                               Distinct Values                                |
|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|------------------------------------------------------------------------------|
|col_name_1a        |First column.                                                                                                                                                |YES                 |<b>VAL1: </b> value 1, <br/><b>VAL2: </b> value 2, <br/><b>UNKWN: </b> Unknown|
|col_name_1b        |A column description that is long enough to take up multiple lines. This text block will be interpreted literally and trailing/leading whitespace is removed.|YES                 |N/A                                                                           |
|undocumented_column|<No documentation>                                                                                                                                           |                    |N/A                                                                           |
"""

        expected_tag_columns_missing = """## tagColumnsMissing

tagColumnsMissing file description

|Column|Column Description|Part of Primary Key?|Distinct Values|
|------|------------------|--------------------|---------------|
"""

        expected_tag_primary_key_cols_missing = """## tagPrimaryKeyColsMissing

tagPrimaryKeyColsMissing file description

| Column | Column Description |Part of Primary Key?|Distinct Values|
|--------|--------------------|--------------------|---------------|
|column_1|column_1 description|                    |N/A            |
"""

        expected_documentation = {
            "multiLineDescription.md": expected_multi_line,
            "raw_data.md": expected_raw_data,
            "tagColumnsMissing.md": expected_tag_columns_missing,
            "tagPrimaryKeyColsMissing.md":
            expected_tag_primary_key_cols_missing,
        }

        self.assertIsNotNone(documentation)
        self.assertEqual(expected_documentation, documentation)