def test_create_raw_data_latest_view_update_tasks(
        self,
        mock_cloud_task_manager_fn: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        with local_project_id_override("recidiviz-staging"):
            mock_environment.return_value = "staging"

            mock_cloud_task_manager = create_autospec(
                DirectIngestRawUpdateCloudTaskManager)
            mock_cloud_task_manager_fn.return_value = mock_cloud_task_manager

            headers = {"X-Appengine-Cron": "test-cron"}
            response = self.client.post(
                "/create_raw_data_latest_view_update_tasks",
                query_string={},
                headers=headers,
            )

            self.assertEqual(200, response.status_code)

            expected_calls = [
                mock.call(region_code)
                for region_code in get_existing_region_dir_names()
            ]
            mock_cloud_task_manager.create_raw_data_latest_view_update_task.assert_has_calls(
                expected_calls)
Beispiel #2
0
    def test_create_raw_data_latest_view_update_tasks(
            self, mock_cloud_task_manager_fn, mock_environment):
        with local_project_id_override('recidiviz-staging'):
            mock_environment.return_value = 'staging'

            mock_cloud_task_manager = create_autospec(
                DirectIngestRawUpdateCloudTaskManager)
            mock_cloud_task_manager_fn.return_value = mock_cloud_task_manager

            headers = {'X-Appengine-Cron': 'test-cron'}
            response = self.client.post(
                '/create_raw_data_latest_view_update_tasks',
                query_string={},
                headers=headers)

            self.assertEqual(200, response.status_code)

            no_raw_data_import_regions = {
                'us_tx_brazos', 'us_ma_middlesex', 'us_nm_bernalillo'
            }
            expected_calls = [
                mock.call(region_code)
                for region_code in get_existing_region_dir_names()
                if region_code not in no_raw_data_import_regions
            ]
            mock_cloud_task_manager.create_raw_data_latest_view_update_task.assert_has_calls(
                expected_calls)
Beispiel #3
0
    def test_collect_ingest_views():
        with local_project_id_override('project'):
            for region_code in get_existing_region_dir_names():
                region = get_region(region_code, is_direct_ingest=True)

                controller_class = region.get_ingestor_class()
                if not issubclass(controller_class,
                                  GcsfsDirectIngestController):
                    continue

                _ = DirectIngestPreProcessedIngestViewCollector(
                    region,
                    controller_class.get_file_tag_rank_list()).collect_views()
Beispiel #4
0
    def test_direct_ingest_instance_status_contains_data_for_all_states(
            self) -> None:
        '''Enforces that after all migrations the set of direct ingest instance statuses
        matches the list of known states.

        If this test fails, you will likely have to add a new migration because a new state
        was recently created. To do so, first run:
        ```
        python -m recidiviz.tools.migrations.autogenerate_migration \
            --database OPERATIONS \
            --message add_us_xx
        ```

        This will generate a blank migration. You should then modify the migration, changing
        the `upgrade` method to look like:
        ```
        def upgrade() -> None:
            op.execute("""
                INSERT INTO direct_ingest_instance_status (region_code, instance, is_paused) VALUES
                ('US_XX', 'PRIMARY', TRUE),
                ('US_XX', 'SECONDARY', TRUE);
            """)
        ```

        Afterwards, this test should ideally pass.
        '''

        with runner(self.default_config(), self.engine) as r:
            r.migrate_up_to("head")

            engine = create_engine(
                local_postgres_helpers.postgres_db_url_from_env_vars())

            conn = engine.connect()
            rows = conn.execute(
                "SELECT region_code, instance FROM direct_ingest_instance_status;"
            )

            instance_to_state_codes = defaultdict(set)
            for row in rows:
                instance_to_state_codes[DirectIngestInstance(row[1])].add(
                    row[0])

            required_states = {
                name.upper()
                for name in get_existing_region_dir_names()
            }

            for instance in DirectIngestInstance:
                self.assertEqual(required_states,
                                 instance_to_state_codes[instance])
Beispiel #5
0
    def test_get_raw_data_configs(self) -> None:
        raw_data_configs = get_raw_data_configs("us_id")
        self.assertIsNotNone(raw_data_configs)

        mittimus = next(raw_data_config for raw_data_config in raw_data_configs
                        if raw_data_config.file_tag == "mittimus")

        self.assertIn("mitt_srl", mittimus.columns)
        self.assertIn("mitt_srl", mittimus.primary_keys)
        self.assertEqual("|", mittimus.separator)
        self.assertEqual("ISO-8859-1", mittimus.encoding)
        self.assertEqual(csv.QUOTE_MINIMAL, mittimus.quoting)

        for region_code in get_existing_region_dir_names():
            self.assertIsNotNone(get_raw_data_configs(region_code))
Beispiel #6
0
    def test_raw_files_yaml_parses_all_regions(self):
        for region_code in get_existing_region_dir_names():
            region = get_region(region_code, is_direct_ingest=True)

            raw_file_manager = DirectIngestRegionRawFileConfig(
                region_code=region.region_code)

            if region.raw_data_bq_imports_enabled_env is not None:
                self.assertTrue(raw_file_manager.raw_file_configs)
            config_file_tags = set()
            for config in raw_file_manager.raw_file_configs.values():
                self.assertTrue(
                    config.file_tag not in config_file_tags,
                    f"Multiple raw file configs defined with the same file_tag [{config.file_tag}]"
                )
                config_file_tags.add(config.file_tag)
    def test_build_gcsfs_ingest_controller_all_regions(self) -> None:
        for region_code in get_existing_region_dir_names():
            region = get_region(region_code, is_direct_ingest=True)
            for ingest_instance in DirectIngestInstance:
                ingest_bucket_path = gcsfs_direct_ingest_bucket_for_region(
                    region_code=region_code,
                    system_level=SystemLevel.for_region(region),
                    ingest_instance=ingest_instance,
                )
                controller = DirectIngestControllerFactory.build(
                    ingest_bucket_path=ingest_bucket_path,
                    allow_unlaunched=False)

                self.assertIsNotNone(controller)
                self.assertIsInstance(controller, BaseDirectIngestController)
                self.assertEqual(ingest_bucket_path,
                                 controller.ingest_bucket_path)
 def collect_view_builders(
         self) -> List[DirectIngestRawDataTableLatestViewBuilder]:
     builder_list = []
     for region_code in get_existing_region_dir_names():
         region_raw_file_config = DirectIngestRegionRawFileConfig(
             region_code)
         raw_file_configs = region_raw_file_config.raw_file_configs
         builder_list.extend([
             DirectIngestRawDataTableLatestViewBuilder(
                 region_code=region_code,
                 raw_file_config=config,
                 should_build_predicate=BigQueryTableChecker(
                     f"{region_raw_file_config.region_code.lower()}_raw_data",
                     config.file_tag,
                 ).get_table_exists_predicate(),
             ) for config in raw_file_configs.values()
             if not config.is_undocumented and config.primary_key_cols
         ])
     return builder_list
    def test_build_gcsfs_ingest_controller_all_regions_do_not_allow_launched(
        self, ) -> None:
        for region_code in get_existing_region_dir_names():
            region = get_region(region_code, is_direct_ingest=True)
            for ingest_instance in DirectIngestInstance:
                ingest_bucket_path = gcsfs_direct_ingest_bucket_for_region(
                    region_code=region_code,
                    system_level=SystemLevel.for_region(region),
                    ingest_instance=ingest_instance,
                )
                controller = DirectIngestControllerFactory.build(
                    ingest_bucket_path=ingest_bucket_path,
                    allow_unlaunched=True)

                # Should still succeed for all controllers in the test environment
                self.assertIsNotNone(controller)
                self.assertIsInstance(controller, BaseDirectIngestController)
                self.assertEqual(ingest_bucket_path,
                                 controller.ingest_bucket_path)
def create_raw_data_latest_view_update_tasks() -> Tuple[str, HTTPStatus]:
    """Creates tasks for every direct ingest region with SQL preprocessing
    enabled to update the raw data table latest views.
    """
    raw_update_ctm = DirectIngestRawUpdateCloudTaskManager(
        metadata.project_id())

    for region_code in get_existing_region_dir_names():
        with monitoring.push_region_tag(region_code):
            region = get_region(region_code, is_direct_ingest=True)
            if region.are_raw_data_bq_imports_enabled_in_env():
                logging.info(
                    'Creating raw data latest view update task for region [%s]',
                    region_code)
                raw_update_ctm.create_raw_data_latest_view_update_task(
                    region_code)
            else:
                logging.info(
                    'Skipping raw data latest view update for region [%s] - raw data imports not enabled.',
                    region_code)
    return '', HTTPStatus.OK
Beispiel #11
0
def create_raw_data_latest_view_update_tasks() -> Tuple[str, HTTPStatus]:
    """Creates tasks for every direct ingest region with SQL preprocessing
    enabled to update the raw data table latest views.
    """
    raw_update_ctm = DirectIngestRawUpdateCloudTaskManager()

    for region_code in get_existing_region_dir_names():
        with monitoring.push_region_tag(region_code, ingest_instance=None):
            region = _region_for_region_code(region_code)
            if region.is_ingest_launched_in_env():
                logging.info(
                    "Creating raw data latest view update task for region [%s]",
                    region_code,
                )
                raw_update_ctm.create_raw_data_latest_view_update_task(
                    region_code)
            else:
                logging.info(
                    "Skipping raw data latest view update for region [%s] - ingest not enabled.",
                    region_code,
                )
    return "", HTTPStatus.OK
 def region_dir_paths(self) -> List[str]:
     return [
         os.path.join(self.temp_dir, REGIONS_DIR_PATH, d)
         for d in get_existing_region_dir_names()
     ]
 def region_dir_names(self) -> List[str]:
     return get_existing_region_dir_names()
Beispiel #14
0
 def test_region_dirname_matches_pattern(self):
     for d in get_existing_region_dir_names():
         self.assertIsNotNone(
             re.match(_REGION_REGEX, d),
             f'Region [{d}] does not match expected region pattern.')