Esempio n. 1
0
    def generate_raw_file_docs_for_region(self, region_code: str) -> Dict[str, str]:
        """Generates documentation for all raw file configs for the given region and
        returns all of it as a combined string.

        Returns one Markdown-formatted string per raw file, mapped to its filename, as
        well as a header file with a table of contents.
        """
        region_config = DirectIngestRegionRawFileConfig(region_code=region_code)

        sorted_file_tags = sorted(region_config.raw_file_tags)

        if StateCode.is_state_code(region_code):
            state_code = StateCode(region_code.upper())
            state_name = state_code.get_state().name

            file_header = STATE_RAW_DATA_FILE_HEADER_TEMPLATE.format(
                state_name=state_name, state_code_lower=state_code.value.lower()
            )
        else:
            file_header = ""

        raw_file_configs = [
            region_config.raw_file_configs[file_tag] for file_tag in sorted_file_tags
        ]

        config_paths_by_file_tag = {
            file_tag: file_config.file_path
            for file_tag, file_config in region_config.raw_file_configs.items()
        }

        file_tags_with_raw_file_configs = [
            raw_file_config.file_tag for raw_file_config in raw_file_configs
        ]

        region = regions.get_region(region_code=region_code, is_direct_ingest=True)

        view_collector = DirectIngestPreProcessedIngestViewCollector(region, [])
        views_by_raw_file = self.get_referencing_views(view_collector)
        touched_configs = self._get_touched_raw_data_configs(
            region_config.yaml_config_file_dir
        )

        raw_file_table = self._generate_raw_file_table(
            config_paths_by_file_tag,
            file_tags_with_raw_file_configs,
            views_by_raw_file,
            touched_configs,
        )

        docs_per_file: Dict[str, str] = {
            f"{config.file_tag}.md": self._generate_docs_for_raw_config(config)
            for config in raw_file_configs
        }

        docs_per_file[STATE_RAW_DATA_FILE_HEADER_PATH] = (
            file_header + "\n" + raw_file_table
        )

        return docs_per_file
Esempio n. 2
0
def get_data_folder(
    drive: Drive,
    state_code: states.StateCode,
    system: schema.System,
    base_drive_folder_id: str,
) -> DriveItem:
    state_folder = drive.get_folder(state_code.get_state().name, base_drive_folder_id)
    corrections_folder = drive.get_folder(system.value.title(), state_folder.id)
    return drive.get_folder("Data", corrections_folder.id)
Esempio n. 3
0
    def generate_raw_file_docs_for_region(self, region_code: str) -> str:
        """Generates documentation for all raw file configs for the given region and returns all of it
        as a combined string."""
        region_config = DirectIngestRegionRawFileConfig(
            region_code=region_code)

        sorted_file_tags = sorted(region_config.raw_file_tags)

        if StateCode.is_state_code(region_code):
            state_code = StateCode(region_code.upper())
            state_name = state_code.get_state()

            file_header = STATE_RAW_DATA_FILE_HEADER_TEMPLATE.format(
                state_name=state_name,
                state_code_lower=state_code.value.lower())
        else:
            file_header = ""

        raw_file_configs = [
            region_config.raw_file_configs[file_tag]
            for file_tag in sorted_file_tags
        ]

        config_paths_by_file_tag = {
            file_tag: file_config.file_path
            for file_tag, file_config in
            region_config.raw_file_configs.items()
        }

        file_tags_with_raw_file_configs = [
            raw_file_config.file_tag for raw_file_config in raw_file_configs
        ]

        region = regions.get_region(region_code=region_code,
                                    is_direct_ingest=True)

        view_collector = DirectIngestPreProcessedIngestViewCollector(
            region, [])
        views_by_raw_file = self.get_referencing_views(view_collector)

        raw_file_table = self._generate_raw_file_table(
            config_paths_by_file_tag, file_tags_with_raw_file_configs,
            views_by_raw_file)

        docs_per_file = [
            self._generate_docs_for_raw_config(config)
            for config in raw_file_configs
        ]

        return file_header + "\n" + raw_file_table + "\n" + "\n\n".join(
            docs_per_file)
Esempio n. 4
0
def download_data(
    state_code: states.StateCode,
    system: schema.System,
    base_drive_folder_id: str,
    base_local_directory: str,
    credentials_directory: str,
) -> None:
    local_directory = os.path.join(base_local_directory, state_code.value,
                                   system.value)
    os.makedirs(local_directory, exist_ok=True)

    drive = Drive(credentials_directory)

    state_folder = drive.get_folder(state_code.get_state().name,
                                    base_drive_folder_id)
    corrections_folder = drive.get_folder(system.value.title(),
                                          state_folder.id)
    data_folder = drive.get_folder("Data", corrections_folder.id)
    drive.download_data(data_folder.id, local_directory=local_directory)
Esempio n. 5
0
def _create_ingest_catalog_summary() -> List[str]:
    """Creates the State Ingest Catalog portion of SUMMARY.md, as a list of lines."""
    ingest_catalog_states = sorted(
        [
            f.lower()
            for f in listdir(_INGEST_CATALOG_ROOT)
            if isdir(join(_INGEST_CATALOG_ROOT, f))
        ]
    )

    ingest_catalog_summary = ["## State Ingest Catalog\n\n"]

    for state in ingest_catalog_states:
        if StateCode.is_state_code(state):
            state_code = StateCode(state.upper())
            state_name = state_code.get_state()
        else:
            raise ValueError(
                f"Folder under {_INGEST_CATALOG_ROOT} named {state} is not a valid state code"
            )

        ingest_catalog_summary.extend(
            [
                f"- [{state_name}](ingest/{state}/{state}.md)\n",
                f"  - [Schema Mappings](ingest/{state}/schema_mappings.md)\n",
                f"  - [Raw Data Description](ingest/{state}/raw_data.md)\n",
            ]
        )

        raw_data_dir = join(_INGEST_CATALOG_ROOT, state, "raw_data")
        if not isdir(raw_data_dir):
            continue
        raw_data_files = sorted(
            [f for f in listdir(raw_data_dir) if isfile(join(raw_data_dir, f))]
        )

        for file_name in raw_data_files:
            ingest_catalog_summary.append(
                f"    - [{file_name[:-3]}](ingest/{state}/raw_data/{file_name})\n"
            )
    return ingest_catalog_summary
Esempio n. 6
0
def add_fips_to_state_df(df: pd.DataFrame) -> pd.DataFrame:
    state_code = StateCode(df.name)
    df = df.copy()
    return fips.add_column_to_df(df, df[TEMP_COUNTY_NAME_COL],
                                 state_code.get_state())