Esempio n. 1
0
    def from_yaml_dict(
        cls,
        file_tag: str,
        file_path: str,
        default_encoding: str,
        default_separator: str,
        default_ignore_quotes: bool,
        file_config_dict: YAMLDict,
        yaml_filename: str,
    ) -> "DirectIngestRawFileConfig":
        """Returns a DirectIngestRawFileConfig built from a YAMLDict"""
        primary_key_cols = file_config_dict.pop("primary_key_cols", list)
        file_description = file_config_dict.pop("file_description", str)
        columns = file_config_dict.pop("columns", list)

        column_names = [column["name"] for column in columns]
        if len(column_names) != len(set(column_names)):
            raise ValueError(
                f"Found duplicate columns in raw_file [{file_tag}]")

        missing_columns = set(primary_key_cols) - {
            column["name"]
            for column in columns
        }
        if missing_columns:
            raise ValueError(
                f"Column(s) marked as primary keys not listed in"
                f" columns list for file [{yaml_filename}]: {missing_columns}")

        supplemental_order_by_clause = file_config_dict.pop_optional(
            "supplemental_order_by_clause", str)
        encoding = file_config_dict.pop_optional("encoding", str)
        separator = file_config_dict.pop_optional("separator", str)
        ignore_quotes = file_config_dict.pop_optional("ignore_quotes", bool)
        custom_line_terminator = file_config_dict.pop_optional(
            "custom_line_terminator", str)
        always_historical_export = file_config_dict.pop_optional(
            "always_historical_export", bool)

        if len(file_config_dict) > 0:
            raise ValueError(f"Found unexpected config values for raw file"
                             f"[{file_tag}]: {repr(file_config_dict.get())}")
        return DirectIngestRawFileConfig(
            file_tag=file_tag,
            file_path=file_path,
            file_description=file_description,
            primary_key_cols=primary_key_cols,
            columns=[
                RawTableColumnInfo(
                    name=column["name"],
                    is_datetime=column.get("is_datetime", False),
                    description=column.get("description", None),
                    known_values=[
                        ColumnEnumValueInfo(
                            value=str(x["value"]),
                            description=x.get("description", None),
                        ) for x in column["known_values"]
                    ] if "known_values" in column else None,
                ) for column in columns
            ],
            supplemental_order_by_clause=supplemental_order_by_clause
            if supplemental_order_by_clause else "",
            encoding=encoding if encoding else default_encoding,
            separator=separator if separator else default_separator,
            custom_line_terminator=custom_line_terminator,
            ignore_quotes=ignore_quotes
            if ignore_quotes else default_ignore_quotes,
            always_historical_export=always_historical_export
            if always_historical_export else False,
        )
    def from_yaml_dict(
        cls,
        region_code: str,
        file_tag: str,
        file_path: str,
        default_encoding: str,
        default_separator: str,
        file_config_dict: YAMLDict,
        yaml_filename: str,
    ) -> "DirectIngestRawFileConfig":
        """Returns a DirectIngestRawFileConfig built from a YAMLDict"""
        primary_key_cols = file_config_dict.pop("primary_key_cols", list)
        # TODO(#5399): Migrate raw file configs for all legacy regions to have file descriptions
        if region_code.upper() in {"US_PA"}:
            file_description = (file_config_dict.pop_optional(
                "file_description", str) or "LEGACY_FILE_MISSING_DESCRIPTION")
        else:
            file_description = file_config_dict.pop("file_description", str)
        # TODO(#5399): Migrate raw file configs for all legacy regions to have column descriptions
        if region_code.upper() in {"US_PA"}:
            columns = file_config_dict.pop_optional("columns", list) or []
        else:
            columns = file_config_dict.pop("columns", list)

        column_names = [column["name"] for column in columns]
        if len(column_names) != len(set(column_names)):
            raise ValueError(
                f"Found duplicate columns in raw_file [{file_tag}]")

        missing_columns = set(primary_key_cols) - {
            column["name"]
            for column in columns
        }
        # TODO(#5399): Remove exempted region codes once legacy primary keys are documented
        if missing_columns and region_code.upper() not in {"US_PA"}:
            raise ValueError(
                f"Column(s) marked as primary keys not listed in"
                f" columns list for file [{yaml_filename}]: {missing_columns}")

        supplemental_order_by_clause = file_config_dict.pop_optional(
            "supplemental_order_by_clause", str)
        encoding = file_config_dict.pop_optional("encoding", str)
        separator = file_config_dict.pop_optional("separator", str)
        ignore_quotes = file_config_dict.pop_optional("ignore_quotes", bool)
        always_historical_export = file_config_dict.pop_optional(
            "always_historical_export", bool)

        if len(file_config_dict) > 0:
            raise ValueError(f"Found unexpected config values for raw file"
                             f"[{file_tag}]: {repr(file_config_dict.get())}")

        return DirectIngestRawFileConfig(
            file_tag=file_tag,
            file_path=file_path,
            file_description=file_description,
            primary_key_cols=primary_key_cols,
            columns=[
                RawTableColumnInfo(
                    name=column["name"],
                    is_datetime=column.get("is_datetime", False),
                    description=column.get("description", None),
                ) for column in columns
            ],
            supplemental_order_by_clause=supplemental_order_by_clause
            if supplemental_order_by_clause else "",
            encoding=encoding if encoding else default_encoding,
            separator=separator if separator else default_separator,
            ignore_quotes=ignore_quotes if ignore_quotes else False,
            always_historical_export=always_historical_export
            if always_historical_export else False,
        )