예제 #1
0
def generate_raw_data_documentation_for_region(region_code: str) -> bool:
    """
    Parses the files available under `recidiviz/ingest/direct/regions/{region_code}/raw_data/` to produce documentation
    which is suitable to be added to the region ingest specification. Overwrites or creates one Markdown file
    per raw data file, plus one additional header file, for a given region.

    Returns True if files were modified, False otherwise.
    """
    documentation_generator = DirectIngestDocumentationGenerator()
    docs_per_file = documentation_generator.generate_raw_file_docs_for_region(
        region_code.lower()
    )
    markdown_dir_path = os.path.join(_INGEST_CATALOG_ROOT, region_code.lower())
    os.makedirs(os.path.join(markdown_dir_path, "raw_data"), exist_ok=True)

    anything_modified = False
    for file_path, file_contents in docs_per_file.items():
        if file_path == STATE_RAW_DATA_FILE_HEADER_PATH:
            markdown_file_path = os.path.join(markdown_dir_path, file_path)
        else:
            markdown_file_path = os.path.join(markdown_dir_path, "raw_data", file_path)

        anything_modified |= persist_file_contents(file_contents, markdown_file_path)

    return anything_modified
    def generate_metric_markdowns(self) -> bool:
        """Generate markdown files for each metric."""
        anything_modified = False
        metrics_dir_path = os.path.join(self.root_calc_docs_dir, "metrics")
        os.makedirs(metrics_dir_path, exist_ok=True)

        for generic_type, class_list in sorted(
                self.metrics_by_generic_types.items()):
            generic_type_dir = os.path.join(
                metrics_dir_path,
                generic_type.lower(),
            )
            os.makedirs(generic_type_dir, exist_ok=True)

            for metric in class_list:
                # Generate documentation
                documentation = self._get_metric_information(metric)

                # Write to markdown files
                metric_markdown_path = os.path.join(
                    generic_type_dir,
                    f"{DATAFLOW_METRICS_TO_TABLES[metric]}.md",
                )
                anything_modified |= persist_file_contents(
                    documentation, metric_markdown_path)

        return anything_modified
def generate_entity_documentation() -> bool:
    """
    Parses `persistence/entity/state/entities_docs.yaml` to produce documentation. Overwrites or creates the
    corresponding Markdown file.

    Returns True if files were modified, False otherwise.
    """
    def _get_fields(fields: List[sqlalchemy.Column]) -> str:
        """Returns a table of the entity's fields and their corresponding descriptions."""
        if fields is None:
            return "No Fields"
        if not fields:
            return "<No columns>"

        table_matrix = []
        for field in fields:
            if field.comment is None:
                raise ValueError(
                    f"Every entity field must have an associated comment. "
                    f"Field {field.name} has no comment.")
            field_values = [
                field.name,
                field.comment,
                f"ENUM: <br />{'<br />'.join([f'{e}' for e in field.type.enums])}"
                if hasattr(field.type, "enums") else
                field.type.python_type.__name__.upper(),
            ]
            table_matrix.append(field_values)

        writer = MarkdownTableWriter(
            headers=[
                "Entity Field",
                "Entity Description",
                "Entity Type",
            ],
            value_matrix=table_matrix,
            margin=0,
        )
        return writer.dumps()

    anything_modified = False
    for t in StateBase.metadata.sorted_tables:
        if t.comment is None:
            raise ValueError(f"Every entity must have an associated comment. "
                             f"Entity {t.name} has no comment.")
        documentation = f"## {t.name}\n\n"
        documentation += f"{t.comment}\n\n"
        documentation += f"{_get_fields(t.columns)}\n\n"

        markdown_file_path = os.path.join(ENTITY_DOCS_ROOT, f"{t.name}.md")
        anything_modified |= persist_file_contents(documentation,
                                                   markdown_file_path)

    return anything_modified
    def generate_states_markdowns(self) -> bool:
        """Generate markdown files for each state."""
        anything_modified = False

        states_dir_path = os.path.join(self.root_calc_docs_dir, "states")
        os.makedirs(states_dir_path, exist_ok=True)

        for state_code in self._get_dataflow_pipeline_enabled_states():
            state_name = str(state_code.get_state())

            # Generate documentation
            documentation = self._get_state_information(state_code, state_name)

            # Write to markdown files
            states_markdown_path = os.path.join(
                states_dir_path,
                f"{self._normalize_string_for_path(state_name)}.md",
            )
            anything_modified |= persist_file_contents(documentation,
                                                       states_markdown_path)
        return anything_modified
    def generate_products_markdowns(self) -> bool:
        """Generates markdown files if necessary for the docs/calculation/products
        directories"""
        anything_modified = False
        for product in self.products:
            # Generate documentation for each product
            documentation = self._get_product_information(product)

            # Write documentation to markdown files
            product_name_for_path = self._normalize_string_for_path(
                product.name)
            product_dir_path = os.path.join(self.root_calc_docs_dir,
                                            "products", product_name_for_path)
            os.makedirs(product_dir_path, exist_ok=True)

            product_markdown_path = os.path.join(
                product_dir_path,
                f"{product_name_for_path}_summary.md",
            )

            anything_modified |= persist_file_contents(documentation,
                                                       product_markdown_path)
        return anything_modified
    def generate_view_markdowns(self) -> bool:
        """Generate markdown files for each view."""
        anything_modified = False
        views_dir_path = os.path.join(self.root_calc_docs_dir, "views")
        os.makedirs(views_dir_path, exist_ok=True)

        for view_key in self.all_views_to_document:
            # Generate documentation
            documentation = self._get_view_information(view_key)

            # Write to markdown files
            dataset_dir = os.path.join(
                views_dir_path,
                view_key.dataset_id,
            )
            os.makedirs(dataset_dir, exist_ok=True)

            view_markdown_path = os.path.join(
                dataset_dir,
                f"{view_key.table_id}.md",
            )
            anything_modified |= persist_file_contents(documentation,
                                                       view_markdown_path)
        return anything_modified