Esempio n. 1
0
    def are_raw_data_bq_imports_enabled_in_env(self) -> bool:
        """Returns true if this regions supports raw data import to BQ.

        Side effects when enabled:
        - For this region, we will create a us_xx_raw_data BQ dataset on launch to store raw data tables for that region
            (if it does not already exist).
        - For this region, we will create a us_xx_raw_data_up_to_date_views BQ dataset on launch to store raw data
            tables for that region (if it does not already exist).
        - For every file tag in the region raw data config, auto generate <raw_data_table_name>_by_update_date and
            <raw_data_table_name>_latest on launch.
        - Every 'raw' file we encounter that also matches a tag in the raw data yaml config for this region will get
            uploaded to a BQ raw data table (table will be auto-created if it does not exist)
        - When a raw file is uploaded to BQ, we will update the raw_file_metadata table with information about this
            file.
        - Raw files that have been uploaded to BQ are moved to <storage-bucket>/<region-code>/raw/<year>/<month>/<day>/
            subdirectory
        - If are_ingest_view_exports_enabled_in_env() is not True, we will create a copy of the 'raw' file with the
            'ingest_view' type in the name and save it to the ingest bucket once we're done processing the raw file (if
            the tag exists in the region's controller ingest tags).

        Conditions to enable for region:
        - is_raw_vs_ingest_file_name_detection_enabled() is already True for this environment w/ all preconditions met
        - Region has raw file yaml config with all expected raw files listed and all primary key / expected column
            configs completed

        If the |raw_data_bq_imports_enabled_env| config is unset, returns False. If it is set to 'prod',
        BQ import will also be enabled in staging.
        """
        return self.is_raw_vs_ingest_file_name_detection_enabled() and \
            self.raw_data_bq_imports_enabled_env is not None and \
            (not environment.in_gae_production() or
             self.raw_data_bq_imports_enabled_env == environment.get_gae_environment())
Esempio n. 2
0
    def is_raw_vs_ingest_file_name_detection_enabled(self) -> bool:
        """Returns True if this is ready for ingest to differentiate between files with the 'raw' and 'ingest_view'
        file types in the file name.

        Side effects when enabled:
        - When new, un-normalized files are dropped in the region's ingest bucket, the file name will be normalized, now
            with the file type 'raw' added to the name.
        - Split files will always get created with normalized names with type 'ingest_view'
        - Ingest file prioritizer will only look at 'ingest_view' type files. We will not move a file with 'raw' in
            the name through the pre-existing ingest flow.
        - Files with 'ingest_view' type that have been ingested to Postgres will be moved to
            <storage-bucket>/<region-code>/ingest_view/<year>/<month>/<day>/ subdirectory
        - If a 'raw' file is not in the raw data yaml for this region, we will ignore it after normalizing. Otherwise:
        - If are_raw_data_bq_imports_enabled_in_env() is not True, we will leave this file as 'unprocessed' in the
            region ingest bucket. If it is False, we will upload the raw file to BQ raw tables.

        Conditions to enable for region:
        - Existing normalized files in storage or ingest buckets must be moved to include either 'raw' or 'ingest_view'
            file type in the names.
        - Any "derived", ingest-ready files (i.e. based on a SQL query on several tables) that get manually uploaded to
            the bucket after this is enabled must have a pre-normalized name with 'ingest_view' file type.
        - We are prepared to manually upload ingest-ready files (MO, ID, PA, any other new states) or we are ready to
            fully enable raw data imports (ND, other launched direct ingest counties).

        If the |raw_vs_ingest_file_name_differentiation_enabled_env| config is unset, returns False. If it is set to
        'prod', this will also be enabled in staging.
        """
        return self.raw_vs_ingest_file_name_differentiation_enabled_env is not None and \
            (not environment.in_gae_production() or
             self.raw_vs_ingest_file_name_differentiation_enabled_env == environment.get_gae_environment())
def check_is_region_launched_in_env(region: Region) -> None:
    """Checks if direct ingest has been launched for the provided |region| in the current GAE env and throws if it has
    not."""
    if not region.is_ingest_launched_in_env():
        gae_env = environment.get_gae_environment()
        error_msg = f'Bad environment [{gae_env}] for region [{region.region_code}].'
        logging.error(error_msg)
        raise DirectIngestError(
            msg=error_msg, error_type=DirectIngestErrorType.ENVIRONMENT_ERROR)
 def __init__(self,
              ingest_directory_path: Optional[str] = 'us-tx-brazos',
              storage_directory_path: Optional[str] = None,
              max_delay_sec_between_files: Optional[int] = None):
     gae_environment = get_gae_environment()
     if ingest_directory_path and gae_environment:
         ingest_directory_path += f'-{gae_environment}'
     super().__init__('us_tx_brazos', SystemLevel.COUNTY,
                      ingest_directory_path, storage_directory_path,
                      max_delay_sec_between_files)
Esempio n. 5
0
    def is_ingest_launched_in_env(self) -> bool:
        """Returns true if ingest can be launched for this region in the current
        environment.

        If we are in prod, the region config must be explicitly set to specify
        this region can be run in prod. All regions can be triggered to run in
        staging.
        """
        return not environment.in_gae_production() \
            or self.environment == environment.get_gae_environment()
 def __init__(self,
              ingest_directory_path: Optional[str] = 'us-nm-bernalillo',
              storage_directory_path: Optional[str] = None,
              max_delay_sec_between_files: Optional[int] = None):
     gae_environment = get_gae_environment()
     if ingest_directory_path and gae_environment:
         ingest_directory_path += f'-{gae_environment}'
     super(UsNmBernalilloController,
           self).__init__('us_nm_bernalillo', SystemLevel.COUNTY,
                          ingest_directory_path, storage_directory_path,
                          max_delay_sec_between_files)
Esempio n. 7
0
def _regions_matching_environment(region_codes: Set[str]) -> Set[str]:
    """Filter to regions with the matching environment.

    If we are running locally, include all supported regions.
    """
    if not environment.in_gae():
        return region_codes
    gae_env = environment.get_gae_environment()
    return {
        region_code
        for region_code in region_codes
        if regions.get_region(region_code).environment == gae_env
    }
Esempio n. 8
0
    def are_ingest_view_exports_enabled_in_env(self) -> bool:
        """Returns true if this regions supports export of ingest views to the ingest bucket.

        Side effects when enabled:
        - For this region, we will create a us_xx_ingest_views BQ dataset on launch to store raw data tables for that
            region (if it does not already exist).
        - Once all raw BQ pre-processing complete, we will export a diff of all updated ingest views based on
            information in the latest_valid_ingest_file_by_view table in BQ
        - When a view diff is exported, we will update the ingest_file_metadata table in BQ with information about the
            exported file.

        Conditions to enable for region:
        - are_raw_data_bq_imports_enabled_in_env() is already True for this environment w/ all preconditions met
        - Ingest views implemented in an ingest_views/ directory for all ingest file tags the controller expects to see

        If the |ingest_view_exports_enabled_env| config is unset, returns False. If it is set to 'prod',
        ingest view export will also be enabled in staging.
        """
        return self.is_raw_vs_ingest_file_name_detection_enabled() and \
            self.are_raw_data_bq_imports_enabled_in_env() and \
            self.ingest_view_exports_enabled_env is not None and \
            (not environment.in_gae_production() or
             self.ingest_view_exports_enabled_env == environment.get_gae_environment())