def test_do_upload_graceful_failures(self, mock_fs_factory: Mock) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "test-project-direct-ingest-state-us-xx", "raw_data/test_file.txt"
         ),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             (
                 "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt",
                 TODAY,
             ),
             (
                 "test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt",
                 TODAY,
             ),
         ],
         project_id="test-project",
         region="us_xx",
     )
     uploaded_files, unable_to_upload_files = controller.do_upload()
     self.assertEqual(
         uploaded_files,
         ["test-project-direct-ingest-state-us-xx/raw_data/test_file.txt"],
     )
     self.assertEqual(
         unable_to_upload_files,
         ["test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt"],
     )
def main(
    sandbox_dataset_prefix: str,
    schema_type: SchemaType,
    direct_ingest_instance: Optional[DirectIngestInstance],
) -> None:
    """Defines the main function responsible for moving data from Postgres to BQ."""
    logging.info("Prefixing all output datasets with [%s_].",
                 known_args.sandbox_dataset_prefix)
    fake_gcs = FakeGCSFileSystem()

    # We mock the export config to a version that does not have any paused regions.
    with mock.patch(
            f"{cloud_sql_to_bq_refresh_config.__name__}.GcsfsFactory.build",
            return_value=fake_gcs,
    ):
        fake_gcs.upload_from_string(
            path=CloudSqlToBQConfig.default_config_path(),
            contents=STANDARD_YAML_CONTENTS,
            content_type="text/yaml",
        )
        federated_bq_schema_refresh(
            schema_type=schema_type,
            direct_ingest_instance=direct_ingest_instance,
            dataset_override_prefix=sandbox_dataset_prefix,
        )
        config = CloudSqlToBQConfig.for_schema_type(schema_type)
        final_destination_dataset = config.unioned_multi_region_dataset(
            dataset_override_prefix=sandbox_dataset_prefix)

    logging.info("Load complete. Data loaded to dataset [%s].",
                 final_destination_dataset)
Exemple #3
0
 def test_do_upload_succeeds(self, mock_fs_factory: Mock) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.txt"),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[(
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
             TODAY,
         )],
         project_id="recidiviz-456",
         region="us_xx",
     )
     expected_result = [
         "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt"
     ]
     result: MultiRequestResultWithSkipped[str, str,
                                           str] = controller.do_upload()
     self.assertEqual(result.successes, expected_result)
     self.assertEqual(len(result.failures), 0)
     self.assertEqual(len(controller.skipped_files), 0)
     self.assertFalse(self.us_xx_manager.is_instance_paused())
    def setUp(self) -> None:
        self.test_app = Flask(__name__)
        blueprint = Blueprint("data_discovery_test", __name__)
        self.test_client = self.test_app.test_client()
        self.fakeredis = fakeredis.FakeRedis()

        self.fs = FakeGCSFileSystem()
        self.gcs_factory_patcher = patch(
            "recidiviz.admin_panel.routes.data_discovery.GcsfsFactory.build")
        self.gcs_factory_patcher.start().return_value = self.fs

        self.project_number_patcher = patch(
            "recidiviz.utils.metadata.project_number", return_value=999)
        self.requires_gae_auth_patcher = patch(
            "recidiviz.admin_panel.routes.data_discovery.requires_gae_auth",
            side_effect=lambda route: route,
        )
        self.redis_patcher = patch("redis.Redis", return_value=self.fakeredis)

        self.project_number_patcher.start()
        self.redis_patcher.start()
        self.requires_gae_auth_patcher.start()

        add_data_discovery_routes(blueprint)
        self.test_app.register_blueprint(blueprint)
Exemple #5
0
    def setUp(self) -> None:
        self.metadata_patcher = mock.patch(
            "recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = "recidiviz-staging"

        test_secrets = {
            # pylint: disable=protected-access
            SQLAlchemyEngineManager._get_cloudsql_instance_id_key(schema_type):
            f"test-project:us-east2:{schema_type.value}-data"
            for schema_type in SchemaType
        }
        self.get_secret_patcher = mock.patch(
            "recidiviz.utils.secrets.get_secret")

        self.get_secret_patcher.start().side_effect = test_secrets.get

        self.gcs_factory_patcher = mock.patch(
            "recidiviz.admin_panel.dataset_metadata_store.GcsfsFactory.build")

        self.fake_fs = FakeGCSFileSystem()
        self.gcs_factory_patcher.start().return_value = self.fake_fs

        self.fake_config_path = GcsfsFilePath.from_absolute_path(
            "gs://recidiviz-staging-configs/cloud_sql_to_bq_config.yaml")
    def setUp(self) -> None:
        self.schema_types: List[SchemaType] = list(SchemaType)
        self.enabled_schema_types = [
            schema_type
            for schema_type in self.schema_types
            if CloudSqlToBQConfig.is_valid_schema_type(schema_type)
        ]
        self.mock_project_id = "fake-recidiviz-project"
        self.metadata_patcher = mock.patch(
            "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.metadata"
        )
        self.mock_metadata = self.metadata_patcher.start()
        self.mock_metadata.project_id.return_value = self.mock_project_id

        self.gcs_factory_patcher = mock.patch(
            "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.GcsfsFactory.build"
        )
        self.fake_gcs = FakeGCSFileSystem()
        self.gcs_factory_patcher.start().return_value = self.fake_gcs
        self.set_config_yaml(
            """
region_codes_to_exclude:
  - US_ND
state_history_tables_to_include:
  - state_person_history
county_columns_to_exclude:
  person:
    - full_name
    - birthdate_inferred_from_age
"""
        )
    def test_normalize_file_path(self, mock_fs_factory: mock.MagicMock,
                                 mock_environment: mock.MagicMock) -> None:

        mock_environment.return_value = "production"
        mock_fs = FakeGCSFileSystem()
        mock_fs_factory.return_value = mock_fs

        path = GcsfsFilePath.from_absolute_path("bucket-us-xx/file-tag.csv")

        mock_fs.test_add_path(path, local_path=None)

        request_args = {
            "bucket": path.bucket_name,
            "relative_file_path": path.blob_name,
        }

        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/normalize_raw_file_path",
                                   query_string=request_args,
                                   headers=headers)

        self.assertEqual(200, response.status_code)

        self.assertEqual(1, len(mock_fs.all_paths))
        registered_path = mock_fs.all_paths[0]
        if not isinstance(registered_path, GcsfsFilePath):
            self.fail(f"Unexpected type for path [{type(registered_path)}]")
        self.assertTrue(
            DirectIngestGCSFileSystem.is_normalized_file_path(registered_path))
Exemple #8
0
    def setUp(self) -> None:
        self.client = app.test_client()

        self.fs = FakeGCSFileSystem()
        self.gcs_factory_patcher = patch(
            "recidiviz.ingest.aggregate.scrape_aggregate_reports.GcsfsFactory.build"
        )
        self.gcs_factory_patcher.start().return_value = self.fs
Exemple #9
0
def add_direct_ingest_path(fs: FakeGCSFileSystem,
                           path: Union[GcsfsFilePath, GcsfsDirectoryPath],
                           has_fixture: bool = True,
                           fail_handle_file_call: bool = False):
    local_path = None
    if has_fixture and isinstance(path, GcsfsFilePath):
        local_path = _get_fixture_for_direct_ingest_path(path)
    fs.test_add_path(path, local_path, fail_handle_file_call)
Exemple #10
0
def prepare_files(fs: FakeGCSFileSystem,
                  manifest_filepath: str) -> GcsfsFilePath:
    """Makes the file system aware of all files for the report and returns the manifest filepath."""
    directory = os.path.dirname(manifest_filepath)
    for file_name in os.listdir(directory):
        path = os.path.join(directory, file_name)
        fs.test_add_path(gcs_path(path), path)
    return gcs_path(manifest_filepath)
    def setUp(self) -> None:
        self.project_id_patcher = patch("recidiviz.utils.metadata.project_id")
        self.email_generation_patcher = patch(
            "recidiviz.reporting.email_generation.generate")
        self.gcs_file_system_patcher = patch(
            "recidiviz.cloud_storage.gcsfs_factory.GcsfsFactory.build")
        self.project_id_patcher.start().return_value = "recidiviz-test"
        self.mock_email_generation = self.email_generation_patcher.start()
        self.gcs_file_system = FakeGCSFileSystem()
        self.mock_gcs_file_system = self.gcs_file_system_patcher.start()
        self.mock_gcs_file_system.return_value = self.gcs_file_system

        self.state_code = "US_ID"
        self.report_type = "po_monthly_report"
    def setUp(self) -> None:
        # Ensures StateCode.US_XX is properly loaded
        importlib.reload(states)

        self.mock_project_id = "recidiviz-staging"
        self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id")
        self.mock_metadata = self.metadata_patcher.start()
        self.mock_metadata.return_value = self.mock_project_id
        self.gcs_factory_patcher = mock.patch(
            "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.GcsfsFactory.build"
        )
        self.fake_gcs = FakeGCSFileSystem()
        self.gcs_factory_patcher.start().return_value = self.fake_gcs
        yaml_contents = """
    region_codes_to_exclude:
    - US_ND
    state_history_tables_to_include:
    - state_person_history
    county_columns_to_exclude:
    person:
    - full_name
    - birthdate_inferred_from_age
    """
        path = GcsfsFilePath.from_absolute_path(
            f"gs://{self.mock_project_id}-configs/cloud_sql_to_bq_config.yaml"
        )
        self.fake_gcs.upload_from_string(
            path=path, contents=yaml_contents, content_type="text/yaml"
        )

        self.mock_bq_client = create_autospec(BigQueryClientImpl)
        self.client_patcher = mock.patch(
            f"{FEDERATED_REFRESH_PACKAGE_NAME}.BigQueryClientImpl"
        )
        self.client_patcher.start().return_value = self.mock_bq_client
        self.view_update_client_patcher = mock.patch(
            "recidiviz.big_query.view_update_manager.BigQueryClientImpl"
        )
        self.view_update_client_patcher.start().return_value = self.mock_bq_client

        test_secrets = {
            # pylint: disable=protected-access
            SQLAlchemyEngineManager._get_cloudsql_instance_id_key(
                schema_type
            ): f"test-project:us-east2:{schema_type.value}-data"
            for schema_type in SchemaType
        }
        self.get_secret_patcher = mock.patch("recidiviz.utils.secrets.get_secret")

        self.get_secret_patcher.start().side_effect = test_secrets.get
    def test_upload_from_sftp(
        self,
        _mock_upload_controller: mock.MagicMock,
        _mock_download_controller: mock.MagicMock,
        mock_fs_factory: mock.MagicMock,
        mock_download_delegate_factory: mock.MagicMock,
        mock_sftp_auth: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:

        region_code = "us_xx"
        mock_environment.return_value = "staging"
        request_args = {"region": region_code, "date": "2021-01-01"}
        headers = {"X-Appengine-Cron": "test-cron"}

        mock_fs_factory.return_value = FakeGCSFileSystem()

        mock_download_delegate_factory.return_value = Mock(
            spec=BaseSftpDownloadDelegate,
            root_directory=lambda _, candidate_paths: ".",
            filter_paths=lambda _, candidate_paths: candidate_paths,
            post_process_downloads=lambda _, download_directory_path: None,
        )
        mock_sftp_auth.return_value = SftpAuth("host", "username", "password",
                                               CnOpts())

        response = self.client.post("/upload_from_sftp",
                                    query_string=request_args,
                                    headers=headers)
        self.assertEqual(200, response.status_code)
Exemple #14
0
 def create_export_manager(
     self,
     region: Region,
     is_detect_row_deletion_view: bool = False,
     materialize_raw_data_table_views: bool = False,
     controller_file_tags: Optional[List[str]] = None,
 ) -> DirectIngestIngestViewExportManager:
     metadata_manager = PostgresDirectIngestFileMetadataManager(
         region.region_code)
     controller_file_tags = (["ingest_view"] if controller_file_tags is None
                             else controller_file_tags)
     return DirectIngestIngestViewExportManager(
         region=region,
         fs=FakeGCSFileSystem(),
         ingest_directory_path=GcsfsDirectoryPath.from_absolute_path(
             "ingest_bucket"),
         big_query_client=self.mock_client,
         file_metadata_manager=metadata_manager,
         view_collector=_ViewCollector(  # type: ignore[arg-type]
             region,
             controller_file_tags=controller_file_tags,
             is_detect_row_deletion_view=is_detect_row_deletion_view,
             materialize_raw_data_table_views=
             materialize_raw_data_table_views,
         ),
         launched_file_tags=controller_file_tags,
     )
    def setUp(self) -> None:
        self.fake_fs = FakeGCSFileSystem()
        self.project_id_patcher = patch("recidiviz.utils.metadata.project_id")
        self.project_id_patcher.start().return_value = "recidiviz-456"

        self.blocking_locks = ["blocking_lock1", "blocking_lock2"]
        with patch(
            "recidiviz.cloud_storage.gcs_pseudo_lock_manager.GcsfsFactory.build",
            Mock(return_value=self.fake_fs),
        ):
            self.lock_manager = DirectIngestRegionLockManager(
                region_code=StateCode.US_XX.value,
                blocking_locks=self.blocking_locks,
                ingest_instance=DirectIngestInstance.PRIMARY,
            )

            self.lock_manager_secondary = DirectIngestRegionLockManager(
                region_code=StateCode.US_XX.value,
                blocking_locks=self.blocking_locks,
                ingest_instance=DirectIngestInstance.SECONDARY,
            )

            self.lock_manager_other_region = DirectIngestRegionLockManager(
                region_code=StateCode.US_WW.value,
                blocking_locks=[],
                ingest_instance=DirectIngestInstance.PRIMARY,
            )
 def setUp(self) -> None:
     self.fs = DirectIngestGCSFileSystem(FakeGCSFileSystem())
     self.prioritizer = GcsfsDirectIngestJobPrioritizer(
         self.fs,
         self._INGEST_BUCKET_PATH,
         ["tagA", "tagB"],
         file_type_filter=GcsfsDirectIngestFileType.INGEST_VIEW,
     )
 def setUp(self) -> None:
     self.project_id_patcher = mock.patch(
         "recidiviz.cloud_storage.gcs_pseudo_lock_manager.metadata")
     self.project_id_patcher.start().return_value = "recidiviz-123"
     self.gcs_factory_patcher = mock.patch(
         "recidiviz.cloud_storage.gcs_pseudo_lock_manager.GcsfsFactory.build"
     )
     fake_gcs = FakeGCSFileSystem()
     self.gcs_factory_patcher.start().return_value = fake_gcs
     self.fs = fake_gcs
 def setUp(self) -> None:
     self.project_id_patcher = patch(
         "recidiviz.admin_panel.admin_stores.metadata.project_id")
     self.project_id_patcher.start().return_value = "recidiviz-staging"
     self.gcs_factory_patcher = patch(
         "recidiviz.admin_panel.admin_stores.GcsfsFactory.build")
     fake_gcs = FakeGCSFileSystem()
     self.gcs_factory_patcher.start().return_value = fake_gcs
     self.fs = fake_gcs
     self.admin_stores = AdminStores()
    def setUp(self) -> None:
        self.gcs_factory_patcher = mock.patch(
            "recidiviz.admin_panel.dataset_metadata_store.GcsfsFactory.build")

        fake_gcs = FakeGCSFileSystem()
        fake_gcs.upload_from_string(
            path=GcsfsFilePath.from_absolute_path(
                "gs://recidiviz-456-configs/cloud_sql_to_bq_config.yaml"),
            contents="""
region_codes_to_exclude:
  - US_ND
state_history_tables_to_include:
  - state_person_history
county_columns_to_exclude:
  person:
    - full_name
    - birthdate_inferred_from_age
""",
            content_type="text/yaml",
        )
        fake_gcs.upload_from_string(
            path=GcsfsFilePath.from_absolute_path(
                "gs://recidiviz-456-ingest-metadata/ingest_metadata_latest_ingested_upper_bounds.json"
            ),
            contents="""
{"state_code":"US_PA","processed_date":"2020-11-25"}
{"state_code":"US_ID","processed_date":"2021-01-04"}
{"state_code":"US_MO","processed_date":"2020-12-21"}
{"state_code":"US_ND","processed_date":"2020-12-16"}
""",
            content_type="text/text",
        )

        fixture_folder = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            "fixtures",
        )
        self.table_column_map: Dict[str, List[str]] = defaultdict(list)
        for f in os.listdir(fixture_folder):
            _, table, col = f.split("__")
            self.table_column_map[table].append(col[:-len(".json")])
            path = GcsfsFilePath.from_absolute_path(
                f"gs://recidiviz-456-ingest-metadata/{f}")
            fake_gcs.test_add_path(path,
                                   local_path=os.path.join(fixture_folder, f))

        self.gcs_factory_patcher.start().return_value = fake_gcs
        self.store = DatasetMetadataCountsStore(
            dataset_nickname="ingest",
            metadata_file_prefix="ingest_state_metadata",
            override_project_id="recidiviz-456",
        )
        self.store.recalculate_store()
    def setUp(self) -> None:
        self.project_id_patcher = patch("recidiviz.utils.metadata.project_id")
        self.get_secret_patcher = patch("recidiviz.utils.secrets.get_secret")
        self.gcs_file_system_patcher = patch(
            "recidiviz.reporting.email_generation.GcsfsFactory.build"
        )
        test_secrets = {"po_report_cdn_static_IP": "123.456.7.8"}
        self.get_secret_patcher.start().side_effect = test_secrets.get
        self.project_id_patcher.start().return_value = "recidiviz-test"
        self.gcs_file_system = FakeGCSFileSystem()
        self.mock_gcs_file_system = self.gcs_file_system_patcher.start()
        self.mock_gcs_file_system.return_value = self.gcs_file_system

        with open(self.fixture_file_path()) as fixture_file:
            self.recipient = Recipient.from_report_json(json.loads(fixture_file.read()))

        self.state_code = StateCode.US_ID
        self.mock_batch_id = "1"
        self.recipient.data["batch_id"] = self.mock_batch_id
        self.report_context = self.report_context_type(self.state_code, self.recipient)
def run_justice_counts_ingest_locally(manifest_file: str,
                                      clean_up_db: bool) -> None:
    tmp_db_dir = local_postgres_helpers.start_on_disk_postgresql_database()
    local_postgres_helpers.use_on_disk_postgresql_database(
        SQLAlchemyDatabaseKey.for_schema(SchemaType.JUSTICE_COUNTS))

    fs = FakeGCSFileSystem()
    try:
        manual_upload.ingest(fs, test_utils.prepare_files(fs, manifest_file))
    finally:
        cleanup_run(tmp_db_dir, clean_up_db)
    def test_load_files_from_storage(self, mock_gcs_factory: MagicMock) -> None:
        """Test that load_files_from_storage returns files for the current batch and bucket name"""
        bucket_name = "bucket-name"
        self.mock_utils.get_email_content_bucket_name.return_value = bucket_name

        email_path = GcsfsFilePath.from_absolute_path(
            f"gs://{bucket_name}/{self.state_code}/{self.batch_id}/{self.to_address}.html"
        )
        other_path = GcsfsFilePath.from_absolute_path(
            f"gs://{bucket_name}/excluded/exclude.json"
        )

        fake_gcs_file_system = FakeGCSFileSystem()
        fake_gcs_file_system.upload_from_string(
            path=email_path, contents="<html>", content_type="text/html"
        )
        fake_gcs_file_system.upload_from_string(
            path=other_path, contents="{}", content_type="text/json"
        )

        mock_gcs_factory.return_value = fake_gcs_file_system

        files = email_delivery.load_files_from_storage(
            bucket_name, f"{self.state_code}/{self.batch_id}"
        )

        self.assertEqual(files, {f"{self.to_address}": "<html>"})
def build_gcsfs_controller_for_tests(
    controller_cls,
    fixture_path_prefix: str,
    run_async: bool,
    fake_fs: Optional[FakeGCSFileSystem] = None,
    can_start_ingest: bool = True,
    **kwargs,
) -> GcsfsDirectIngestController:
    """Builds an instance of |controller_cls| for use in tests with several internal classes mocked properly. """
    fake_fs = FakeGCSFileSystem()

    def mock_build_fs():
        return fake_fs

    if 'TestGcsfsDirectIngestController' in controller_cls.__name__:
        view_collector_cls: Type[BigQueryViewCollector] = \
            FakeDirectIngestPreProcessedIngestViewCollector
    else:
        view_collector_cls = DirectIngestPreProcessedIngestViewCollector

    with patch(
            f'{BaseDirectIngestController.__module__}.DirectIngestCloudTaskManagerImpl'
    ) as mock_task_factory_cls:
        with patch(
                f'{GcsfsDirectIngestController.__module__}.BigQueryClientImpl'
        ) as mock_big_query_client_cls:
            with patch(
                    f'{GcsfsDirectIngestController.__module__}.DirectIngestRawFileImportManager',
                    FakeDirectIngestRawFileImportManager):
                with patch(
                        f'{GcsfsDirectIngestController.__module__}.DirectIngestPreProcessedIngestViewCollector',
                        view_collector_cls):
                    task_manager = FakeAsyncDirectIngestCloudTaskManager() \
                        if run_async else FakeSynchronousDirectIngestCloudTaskManager()
                    mock_task_factory_cls.return_value = task_manager
                    mock_big_query_client_cls.return_value = \
                        FakeDirectIngestBigQueryClient(project_id=metadata.project_id(), fs=fake_fs)
                    with patch.object(GcsfsFactory, 'build',
                                      new=mock_build_fs):
                        controller = controller_cls(
                            ingest_directory_path=
                            f'{fixture_path_prefix}/fixtures',
                            storage_directory_path='storage/path',
                            **kwargs)
                        controller.csv_reader = TestSafeGcsCsvReader(fake_fs)
                        controller.raw_file_import_manager.csv_reader = controller.csv_reader

                        task_manager.set_controller(controller)
                        fake_fs.test_set_delegate(
                            DirectIngestFakeGCSFileSystemDelegate(
                                controller, can_start_ingest=can_start_ingest))
                        return controller
    def setUp(self) -> None:
        self.project_id = "recidiviz-456"
        self.project_id_patcher = patch("recidiviz.utils.metadata.project_id")
        self.project_id_patcher.start().return_value = self.project_id
        self.test_region = fake_region(
            region_code="us_xx", are_raw_data_bq_imports_enabled_in_env=True)

        self.region_module_patcher = patch.object(
            direct_ingest_raw_table_migration_collector,
            "regions",
            new=controller_fixtures,
        )
        self.region_module_patcher.start()

        self.fs = DirectIngestGCSFileSystem(FakeGCSFileSystem())
        self.ingest_directory_path = GcsfsDirectoryPath(
            bucket_name="direct/controllers/fixtures")
        self.temp_output_path = GcsfsDirectoryPath(bucket_name="temp_bucket")

        self.region_raw_file_config = DirectIngestRegionRawFileConfig(
            region_code="us_xx",
            yaml_config_file_dir=fixtures.as_filepath("us_xx"),
        )

        self.mock_big_query_client = create_autospec(BigQueryClient)
        self.num_lines_uploaded = 0

        self.mock_big_query_client.insert_into_table_from_cloud_storage_async.side_effect = (
            self.mock_import_raw_file_to_big_query)

        self.import_manager = DirectIngestRawFileImportManager(
            region=self.test_region,
            fs=self.fs,
            ingest_directory_path=self.ingest_directory_path,
            temp_output_directory_path=self.temp_output_path,
            region_raw_file_config=self.region_raw_file_config,
            big_query_client=self.mock_big_query_client,
        )
        self.import_manager.csv_reader = _TestSafeGcsCsvReader(
            self.fs.gcs_file_system)

        self.time_patcher = patch(
            "recidiviz.ingest.direct.controllers.direct_ingest_raw_file_import_manager.time"
        )
        self.mock_time = self.time_patcher.start()

        def fake_get_dataset_ref(dataset_id: str) -> bigquery.DatasetReference:
            return bigquery.DatasetReference(project=self.project_id,
                                             dataset_id=dataset_id)

        self.mock_big_query_client.dataset_ref_for_id = fake_get_dataset_ref
    def setUp(self) -> None:
        self.gcs_file_system_patcher = patch(
            "recidiviz.cloud_storage.gcsfs_factory.GcsfsFactory.build")
        self.requires_gae_auth_patcher = patch(
            "recidiviz.admin_panel.routes.case_triage.requires_gae_auth",
            side_effect=lambda route: route,
        )

        self.requires_gae_auth_patcher.start()

        self.gcs_file_system = FakeGCSFileSystem()
        self.mock_gcs_file_system = self.gcs_file_system_patcher.start()
        self.mock_gcs_file_system.return_value = self.gcs_file_system

        self.app = Flask(__name__)

        blueprint = Blueprint("email_reporting_test", __name__)
        self.app.config["TESTING"] = True

        self.client = self.app.test_client()

        add_case_triage_routes(blueprint, admin_stores)
        self.app.register_blueprint(blueprint)

        with self.app.test_request_context():
            self.state_code = StateCode.US_ID
            self.generate_emails_url = flask.url_for(
                "email_reporting_test._generate_emails",
                state_code_str=self.state_code.value,
            )

            self.send_emails_url = flask.url_for(
                "email_reporting_test._send_emails",
                state_code_str=self.state_code.value,
            )
            self.review_year = 2021
            self.review_month = 5
Exemple #26
0
    def setUp(self) -> None:
        self.project_id_patcher = patch("recidiviz.utils.metadata.project_id")
        self.get_secret_patcher = patch("recidiviz.utils.secrets.get_secret")
        self.gcs_file_system_patcher = patch(
            "recidiviz.reporting.email_generation.GcsfsFactory.build"
        )
        test_secrets = {"po_report_cdn_static_IP": "123.456.7.8"}
        self.get_secret_patcher.start().side_effect = test_secrets.get
        self.project_id_patcher.start().return_value = "recidiviz-test"
        self.gcs_file_system = FakeGCSFileSystem()
        self.mock_gcs_file_system = self.gcs_file_system_patcher.start()
        self.mock_gcs_file_system.return_value = self.gcs_file_system

        with open(
            os.path.join(
                f"{os.path.dirname(__file__)}/context/po_monthly_report", FIXTURE_FILE
            )
        ) as fixture_file:
            self.recipient = Recipient.from_report_json(json.loads(fixture_file.read()))

        self.state_code = "US_ID"
        self.mock_batch_id = "1"
        self.recipient.data["batch_id"] = self.mock_batch_id
        self.report_context = PoMonthlyReportContext(self.state_code, self.recipient)
Exemple #27
0
    def setUp(self) -> None:
        self.schema_types: List[SchemaType] = list(SchemaType)
        self.disabled_schema_types = {
            SchemaType.JUSTICE_COUNTS, SchemaType.CASE_TRIAGE
        }
        self.enabled_schema_types = [
            schema_type for schema_type in self.schema_types
            if schema_type not in self.disabled_schema_types
        ]
        self.mock_project_id = "fake-recidiviz-project"
        self.environment_patcher = mock.patch(
            "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.environment"
        )
        self.metadata_patcher = mock.patch(
            "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.metadata"
        )
        self.mock_metadata = self.metadata_patcher.start()
        self.mock_metadata.project_id.return_value = self.mock_project_id
        self.mock_environment = self.environment_patcher.start()
        self.mock_environment.GCP_PROJECT_STAGING = self.mock_project_id

        self.gcs_factory_patcher = mock.patch(
            "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.GcsfsFactory.build"
        )
        self.fake_gcs = FakeGCSFileSystem()
        self.gcs_factory_patcher.start().return_value = self.fake_gcs
        self.set_config_yaml("""
region_codes_to_exclude:
  - US_ND
state_history_tables_to_include:
  - state_person_history
county_columns_to_exclude:
  person:
    - full_name
    - birthdate_inferred_from_age
""")
Exemple #28
0
def main(
    repo_directory: str,
    system: schema.System,
    base_drive_folder_id: str,
    credentials_directory: str,
    app_url: Optional[str],
    filter_type: Optional[FilterType],
    regions: Optional[List[str]],
) -> None:
    """
    Downloads, tests, and ingests specified regions
    """
    regions_to_ingest = _get_list_of_regions(filter_type, regions)

    logging.info("Starting ingest of regions...")
    logging.info(regions_to_ingest)

    tmp_db_dir = local_postgres_helpers.start_on_disk_postgresql_database()
    local_postgres_helpers.use_on_disk_postgresql_database(
        SQLAlchemyDatabaseKey.for_schema(SchemaType.JUSTICE_COUNTS)
    )
    fs = FakeGCSFileSystem()

    region_ingest_summary = []

    try:
        for region in regions_to_ingest:
            region_ingest_summary.append(
                _full_ingest_region(
                    fs,
                    region,
                    repo_directory,
                    system,
                    base_drive_folder_id,
                    credentials_directory,
                    app_url,
                )
            )
    finally:
        cleanup_run(tmp_db_dir, True)

    for ingest_result in region_ingest_summary:
        if ingest_result.success:
            logging.info("%s: success", ingest_result.region_code)
        else:
            logging.error(
                "%s: failed - %s", ingest_result.region_code, ingest_result.error
            )
Exemple #29
0
 def test_get_paths_to_upload_is_correct(
     self,
     mock_fs_factory: Mock,
 ) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.txt"),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/subdir1/test_file.txt",
         ),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsDirectoryPath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/subdir2/"),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             ("recidiviz-456-direct-ingest-state-us-xx/raw_data/", TODAY),
         ],
         project_id="recidiviz-456",
         region="us_xx",
     )
     result = [
         ("recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
          TODAY),
         (
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/subdir1/test_file.txt",
             TODAY,
         ),
     ]
     self.assertListEqual(result, controller.get_paths_to_upload())
     self.assertFalse(self.us_xx_manager.is_instance_paused())
 def setUp(self) -> None:
     self.fake_fs = FakeGCSFileSystem()
     self.project_id_patcher = patch("recidiviz.utils.metadata.project_id")
     self.project_id_patcher.start().return_value = "recidiviz-456"
     with patch(
             "recidiviz.cloud_storage.gcs_pseudo_lock_manager.GcsfsFactory.build",
             Mock(return_value=self.fake_fs),
     ):
         self.lock_manager = CloudSqlToBQLockManager()
         self.lock_bucket = self.lock_manager.lock_manager.bucket_name
         self.state_ingest_lock_manager = DirectIngestRegionLockManager(
             region_code=StateCode.US_XX.value,
             blocking_locks=[],
             ingest_instance=DirectIngestInstance.PRIMARY,
         )
         self.county_ingest_lock_manager = DirectIngestRegionLockManager(
             region_code="US_XX_YYYYY",
             blocking_locks=[],
             ingest_instance=DirectIngestInstance.PRIMARY,
         )