예제 #1
0
    def is_raw_vs_ingest_file_name_detection_enabled(self) -> bool:
        """Returns True if this is ready for ingest to differentiate between files with the 'raw' and 'ingest_view'
        file types in the file name.

        Side effects when enabled:
        - When new, un-normalized files are dropped in the region's ingest bucket, the file name will be normalized, now
            with the file type 'raw' added to the name.
        - Split files will always get created with normalized names with type 'ingest_view'
        - Ingest file prioritizer will only look at 'ingest_view' type files. We will not move a file with 'raw' in
            the name through the pre-existing ingest flow.
        - Files with 'ingest_view' type that have been ingested to Postgres will be moved to
            <storage-bucket>/<region-code>/ingest_view/<year>/<month>/<day>/ subdirectory
        - If a 'raw' file is not in the raw data yaml for this region, we will ignore it after normalizing. Otherwise:
        - If are_raw_data_bq_imports_enabled_in_env() is not True, we will leave this file as 'unprocessed' in the
            region ingest bucket. If it is False, we will upload the raw file to BQ raw tables.

        Conditions to enable for region:
        - Existing normalized files in storage or ingest buckets must be moved to include either 'raw' or 'ingest_view'
            file type in the names.
        - Any "derived", ingest-ready files (i.e. based on a SQL query on several tables) that get manually uploaded to
            the bucket after this is enabled must have a pre-normalized name with 'ingest_view' file type.
        - We are prepared to manually upload ingest-ready files (MO, ID, PA, any other new states) or we are ready to
            fully enable raw data imports (ND, other launched direct ingest counties).

        If the |raw_vs_ingest_file_name_differentiation_enabled_env| config is unset, returns False. If it is set to
        'prod', this will also be enabled in staging.
        """
        return self.raw_vs_ingest_file_name_differentiation_enabled_env is not None and \
            (not environment.in_gae_production() or
             self.raw_vs_ingest_file_name_differentiation_enabled_env == environment.get_gae_environment())
예제 #2
0
    def are_raw_data_bq_imports_enabled_in_env(self) -> bool:
        """Returns true if this regions supports raw data import to BQ.

        Side effects when enabled:
        - For this region, we will create a us_xx_raw_data BQ dataset on launch to store raw data tables for that region
            (if it does not already exist).
        - For this region, we will create a us_xx_raw_data_up_to_date_views BQ dataset on launch to store raw data
            tables for that region (if it does not already exist).
        - For every file tag in the region raw data config, auto generate <raw_data_table_name>_by_update_date and
            <raw_data_table_name>_latest on launch.
        - Every 'raw' file we encounter that also matches a tag in the raw data yaml config for this region will get
            uploaded to a BQ raw data table (table will be auto-created if it does not exist)
        - When a raw file is uploaded to BQ, we will update the raw_file_metadata table with information about this
            file.
        - Raw files that have been uploaded to BQ are moved to <storage-bucket>/<region-code>/raw/<year>/<month>/<day>/
            subdirectory
        - If are_ingest_view_exports_enabled_in_env() is not True, we will create a copy of the 'raw' file with the
            'ingest_view' type in the name and save it to the ingest bucket once we're done processing the raw file (if
            the tag exists in the region's controller ingest tags).

        Conditions to enable for region:
        - is_raw_vs_ingest_file_name_detection_enabled() is already True for this environment w/ all preconditions met
        - Region has raw file yaml config with all expected raw files listed and all primary key / expected column
            configs completed

        If the |raw_data_bq_imports_enabled_env| config is unset, returns False. If it is set to 'prod',
        BQ import will also be enabled in staging.
        """
        return self.is_raw_vs_ingest_file_name_detection_enabled() and \
            self.raw_data_bq_imports_enabled_env is not None and \
            (not environment.in_gae_production() or
             self.raw_data_bq_imports_enabled_env == environment.get_gae_environment())
예제 #3
0
    def _get_file_tag_rank_list(self) -> List[str]:
        # NOTE: The order of ingest here is important! Do not change unless you know what you're doing!
        tags = [
            # Elite - incarceration-focused
            'elite_offenderidentifier',
            'elite_offenders',
            'elite_alias',
            'elite_offenderbookingstable',
            'elite_offendersentenceaggs',
            'elite_offendersentences',
            'elite_offendersentenceterms',
            'elite_offenderchargestable',
            'elite_orderstable',
            'elite_externalmovements',
        ]

        # TODO(2399): Once we are capable of handling historical and nightly ingest of
        #  'elite_offense_in_custody_and_pos_report_data', remove this check.
        if not environment.in_gae_production() and not environment.in_gae_staging():
            tags.append('elite_offense_in_custody_and_pos_report_data')

        tags += [
            # Docstars - supervision-focused
            'docstars_offenders',
            'docstars_offendercasestable',
            'docstars_offensestable',
            'docstars_ftr_episode',
            'docstars_lsi_chronology',
            # TODO(1918): Integrate bed assignment / location history
        ]

        return tags
예제 #4
0
    def perform_match_postprocessing(
            self, matched_persons: List[schema.StatePerson]):
        """Performs the following ND specific postprocessing on the provided
        |matched_persons| directly after they have been entity matched:
            - Move IncarcerationIncidents onto IncarcerationPeriods based on
              date.
            - Transform IncarcerationPeriods periods of temporary custody
              (holds), when appropriate.
            - Associates SupervisionViolationResponses with IncarcerationPeriods
              based on date.
            - Moves supervising_officer from StatePerson onto open
              SupervisionPeriods.
        """
        logging.info("[Entity matching] Move incidents into periods")
        move_incidents_onto_periods(matched_persons)

        logging.info("[Entity matching] Transform incarceration periods into "
                     "holds")
        update_temporary_holds(matched_persons, self.region)

        logging.info("[Entity matching] Associate revocation SVRs with IPs")
        associate_revocation_svrs_with_ips(matched_persons)

        # TODO(#3444): Clean up before launching SQL preprocessing in production
        if environment.in_gae_production():
            logging.info(
                '[Entity matching] Moving supervising officer onto open supervision periods'
            )
            add_supervising_officer_to_open_supervision_periods(
                matched_persons)
예제 #5
0
    def is_ingest_launched_in_env(self) -> bool:
        """Returns true if ingest can be launched for this region in the current
        environment.

        If we are in prod, the region config must be explicitly set to specify
        this region can be run in prod. All regions can be triggered to run in
        staging.
        """
        return not environment.in_gae_production() \
            or self.environment == environment.get_gae_environment()
def state_allows_multiple_ids_same_type(state_code: str) -> bool:
    if state_code.upper() in ('US_ND', 'US_PA'):
        return True

    if state_code.upper() == 'US_MO':
        # TODO(#3427): Remove this block once #4639 has landed and we are ready to do a new rerun in staging
        if environment.in_gae_staging():
            return True
        # TODO(#4043): Remove this block when we ship MO SQL preprocessing to production and drop all data for a rerun.
        if environment.in_gae_production():
            return True

    # By default, states don't allow multiple different ids of the same type
    return False
예제 #7
0
    def _do_cleanup(self, args: IngestArgs):
        """Removes all rows in all tables for a single export time."""

        export_time = args.ingest_time
        if environment.in_gae_production():
            engine = self._create_engine()
            meta = sqlalchemy.MetaData()
            meta.reflect(bind=engine)
            for table in reversed(meta.sorted_tables):
                result = engine.execute(
                    table.delete().where(table.c.export_time == export_time))
                logging.info(
                    "Deleted [%d] rows from table [%s] with export time [%s]",
                    result.rowcount, table, export_time)
        else:
            logging.info(
                "Skipping row deletion for us_ma_middlesex tables and "
                "export time [%s] outside of prod environment.", export_time)
예제 #8
0
    def are_ingest_view_exports_enabled_in_env(self) -> bool:
        """Returns true if this regions supports export of ingest views to the ingest bucket.

        Side effects when enabled:
        - For this region, we will create a us_xx_ingest_views BQ dataset on launch to store raw data tables for that
            region (if it does not already exist).
        - Once all raw BQ pre-processing complete, we will export a diff of all updated ingest views based on
            information in the latest_valid_ingest_file_by_view table in BQ
        - When a view diff is exported, we will update the ingest_file_metadata table in BQ with information about the
            exported file.

        Conditions to enable for region:
        - are_raw_data_bq_imports_enabled_in_env() is already True for this environment w/ all preconditions met
        - Ingest views implemented in an ingest_views/ directory for all ingest file tags the controller expects to see

        If the |ingest_view_exports_enabled_env| config is unset, returns False. If it is set to 'prod',
        ingest view export will also be enabled in staging.
        """
        return self.is_raw_vs_ingest_file_name_detection_enabled() and \
            self.are_raw_data_bq_imports_enabled_in_env() and \
            self.ingest_view_exports_enabled_env is not None and \
            (not environment.in_gae_production() or
             self.ingest_view_exports_enabled_env == environment.get_gae_environment())