def from_yaml_dict( cls, file_tag: str, file_path: str, default_encoding: str, default_separator: str, default_ignore_quotes: bool, file_config_dict: YAMLDict, yaml_filename: str, ) -> "DirectIngestRawFileConfig": """Returns a DirectIngestRawFileConfig built from a YAMLDict""" primary_key_cols = file_config_dict.pop("primary_key_cols", list) file_description = file_config_dict.pop("file_description", str) columns = file_config_dict.pop("columns", list) column_names = [column["name"] for column in columns] if len(column_names) != len(set(column_names)): raise ValueError( f"Found duplicate columns in raw_file [{file_tag}]") missing_columns = set(primary_key_cols) - { column["name"] for column in columns } if missing_columns: raise ValueError( f"Column(s) marked as primary keys not listed in" f" columns list for file [{yaml_filename}]: {missing_columns}") supplemental_order_by_clause = file_config_dict.pop_optional( "supplemental_order_by_clause", str) encoding = file_config_dict.pop_optional("encoding", str) separator = file_config_dict.pop_optional("separator", str) ignore_quotes = file_config_dict.pop_optional("ignore_quotes", bool) custom_line_terminator = file_config_dict.pop_optional( "custom_line_terminator", str) always_historical_export = file_config_dict.pop_optional( "always_historical_export", bool) if len(file_config_dict) > 0: raise ValueError(f"Found unexpected config values for raw file" f"[{file_tag}]: {repr(file_config_dict.get())}") return DirectIngestRawFileConfig( file_tag=file_tag, file_path=file_path, file_description=file_description, primary_key_cols=primary_key_cols, columns=[ RawTableColumnInfo( name=column["name"], is_datetime=column.get("is_datetime", False), description=column.get("description", None), known_values=[ ColumnEnumValueInfo( value=str(x["value"]), description=x.get("description", None), ) for x in column["known_values"] ] if "known_values" in column else None, ) for column in columns ], supplemental_order_by_clause=supplemental_order_by_clause if supplemental_order_by_clause else "", encoding=encoding if encoding else default_encoding, separator=separator if separator else default_separator, custom_line_terminator=custom_line_terminator, ignore_quotes=ignore_quotes if ignore_quotes else default_ignore_quotes, always_historical_export=always_historical_export if always_historical_export else False, )
def from_yaml_dict( cls, region_code: str, file_tag: str, file_path: str, default_encoding: str, default_separator: str, file_config_dict: YAMLDict, yaml_filename: str, ) -> "DirectIngestRawFileConfig": """Returns a DirectIngestRawFileConfig built from a YAMLDict""" primary_key_cols = file_config_dict.pop("primary_key_cols", list) # TODO(#5399): Migrate raw file configs for all legacy regions to have file descriptions if region_code.upper() in {"US_PA"}: file_description = (file_config_dict.pop_optional( "file_description", str) or "LEGACY_FILE_MISSING_DESCRIPTION") else: file_description = file_config_dict.pop("file_description", str) # TODO(#5399): Migrate raw file configs for all legacy regions to have column descriptions if region_code.upper() in {"US_PA"}: columns = file_config_dict.pop_optional("columns", list) or [] else: columns = file_config_dict.pop("columns", list) column_names = [column["name"] for column in columns] if len(column_names) != len(set(column_names)): raise ValueError( f"Found duplicate columns in raw_file [{file_tag}]") missing_columns = set(primary_key_cols) - { column["name"] for column in columns } # TODO(#5399): Remove exempted region codes once legacy primary keys are documented if missing_columns and region_code.upper() not in {"US_PA"}: raise ValueError( f"Column(s) marked as primary keys not listed in" f" columns list for file [{yaml_filename}]: {missing_columns}") supplemental_order_by_clause = file_config_dict.pop_optional( "supplemental_order_by_clause", str) encoding = file_config_dict.pop_optional("encoding", str) separator = file_config_dict.pop_optional("separator", str) ignore_quotes = file_config_dict.pop_optional("ignore_quotes", bool) always_historical_export = file_config_dict.pop_optional( "always_historical_export", bool) if len(file_config_dict) > 0: raise ValueError(f"Found unexpected config values for raw file" f"[{file_tag}]: {repr(file_config_dict.get())}") return DirectIngestRawFileConfig( file_tag=file_tag, file_path=file_path, file_description=file_description, primary_key_cols=primary_key_cols, columns=[ RawTableColumnInfo( name=column["name"], is_datetime=column.get("is_datetime", False), description=column.get("description", None), ) for column in columns ], supplemental_order_by_clause=supplemental_order_by_clause if supplemental_order_by_clause else "", encoding=encoding if encoding else default_encoding, separator=separator if separator else default_separator, ignore_quotes=ignore_quotes if ignore_quotes else False, always_historical_export=always_historical_export if always_historical_export else False, )