def test_raise_when_condition_fails(self) -> None: builder = SimpleBigQueryViewBuilder( dataset_id="fake_dataset", view_id="my_fake_view", view_query_template="SELECT NULL LIMIT 0", should_build_predicate=(lambda: False), ) with self.assertRaises(BigQueryViewBuilderShouldNotBuildError): builder.build()
def test_no_raise_when_condition_succeeds(self) -> None: builder = SimpleBigQueryViewBuilder( dataset_id="fake_dataset", view_id="my_fake_view", view_query_template="SELECT NULL LIMIT 0", should_build_predicate=(lambda: True), ) # Should be no raise builder.build()
class ViewCollectionExportManagerTest(unittest.TestCase): """Tests for view_export_manager.py.""" def setUp(self) -> None: self.app = Flask(__name__) self.app.register_blueprint(export_blueprint) self.app.config["TESTING"] = True self.headers: Dict[str, Dict[Any, Any]] = { "x-goog-iap-jwt-assertion": {} } self.client = self.app.test_client() self.mock_cloud_task_client_patcher = mock.patch( "google.cloud.tasks_v2.CloudTasksClient") self.mock_cloud_task_client_patcher.start() self.mock_uuid_patcher = mock.patch( f"{CLOUD_TASK_MANAGER_PACKAGE_NAME}.uuid") self.mock_uuid = self.mock_uuid_patcher.start() with self.app.test_request_context(): self.metric_view_data_export_url = flask.url_for( "export.metric_view_data_export") self.create_metric_view_data_export_tasks_url = flask.url_for( "export.create_metric_view_data_export_tasks") self.mock_state_code = "US_XX" self.mock_project_id = "test-project" self.mock_dataset_id = "base_dataset" self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.BigQueryClientImpl") self.mock_client = self.client_patcher.start().return_value self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset self.mock_view_builder = SimpleBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", ) self.mock_metric_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", dimensions=tuple(), ) self.view_builders_for_dataset = [ self.mock_view_builder, self.mock_metric_view_builder, ] self.output_uri_template_for_dataset = { "dataset_id": "gs://{project_id}-dataset-location/subdirectory", } self.views_to_update = { self.mock_dataset_id: self.view_builders_for_dataset } self.mock_export_name = "MOCK_EXPORT_NAME" self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE self.metric_dataset_export_configs_index = { "EXPORT": ExportViewCollectionConfig( view_builders_to_export=[self.mock_view_builder], output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name="EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ), "OTHER_EXPORT": ExportViewCollectionConfig( view_builders_to_export=[self.mock_metric_view_builder], output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name="OTHER_EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ), self.mock_export_name: ExportViewCollectionConfig( view_builders_to_export=self.view_builders_for_dataset, output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), } export_config_values = { "OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT": self.output_uri_template_for_dataset, "VIEW_COLLECTION_EXPORT_INDEX": self.metric_dataset_export_configs_index, } self.export_config_patcher = mock.patch( # type: ignore[call-overload] "recidiviz.metrics.export.view_export_manager.export_config", **export_config_values, ) self.mock_export_config = self.export_config_patcher.start() self.gcs_factory_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.GcsfsFactory.build") self.gcs_factory_patcher.start().return_value = FakeGCSFileSystem() def tearDown(self) -> None: self.client_patcher.stop() self.export_config_patcher.stop() self.metadata_patcher.stop() self.gcs_factory_patcher.stop() self.mock_uuid_patcher.stop() self.mock_cloud_task_client_patcher.stop() @mock.patch("recidiviz.utils.environment.get_gcp_environment") def test_get_configs_for_export_name( self, mock_environment: mock.MagicMock) -> None: """Tests get_configs_for_export_name function to ensure that export names correctly match""" mock_environment.return_value = "production" export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name, state_code=self.mock_state_code, project_id=self.mock_project_id, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause= f" WHERE state_code = '{self.mock_state_code}'", intermediate_table_name= f"{view.view_id}_table_{self.mock_state_code}", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code=self.mock_state_code, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause= f" WHERE state_code = '{self.mock_state_code}'", intermediate_table_name= f"{view.view_id}_table_{self.mock_state_code}", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code=self.mock_state_code, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] self.assertEqual(expected_view_config_list, export_configs_for_filter) # Test for case insensitivity export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name.lower(), state_code=self.mock_state_code.lower(), project_id=self.mock_project_id, ) self.assertEqual(expected_view_config_list, export_configs_for_filter) @mock.patch("recidiviz.utils.environment.get_gcp_environment") def test_get_configs_for_export_name_state_agnostic( self, mock_environment: mock.MagicMock) -> None: """Tests get_configs_for_export_name function to ensure that export names correctly match""" mock_environment.return_value = "production" export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name, project_id=self.mock_project_id) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory".format( project_id=self.mock_project_id, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory".format( project_id=self.mock_project_id, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] self.assertEqual(expected_view_config_list, export_configs_for_filter) # Test for case insensitivity export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name.lower(), project_id=self.mock_project_id) self.assertEqual(expected_view_config_list, export_configs_for_filter) @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted.""" view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, self.mock_state_code, mock_view_exporter) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list_1 = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.JSON], ) ] expected_view_config_list_2 = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ) ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([]), mock.call([ expected_view_config_list_1[0]. pointed_to_staging_subdirectory(), expected_view_config_list_2[0]. pointed_to_staging_subdirectory(), ]), # JSON exports mock.call([ expected_view_config_list_2[0]. pointed_to_staging_subdirectory() ]), # METRIC export ("OTHER_EXPORT") ], any_order=True, ) @mock.patch( "recidiviz.big_query.view_update_manager.create_managed_dataset_and_deploy_views_for_view_builders" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_raise_exception_no_export_matched( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: # pylint: disable=unused-argument """Tests the table is created from the view and then extracted.""" self.mock_export_config.NAMESPACE_TO_UPDATE_FOR_EXPORT_FILTER = { "US_YY": "NAMESPACE" } with self.assertRaises(ValueError) as e: view_export_manager.export_view_data_to_cloud_storage( export_job_name="JOBZZZ", override_view_exporter=mock_view_exporter) self.assertEqual( str(e.exception), "Export filter did not match any export configs:", " JOBZZZ", ) @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_state_agnostic( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted, where the export is not state-specific.""" state_agnostic_dataset_export_configs = { self.mock_export_name: ExportViewCollectionConfig( view_builders_to_export=self.view_builders_for_dataset, output_directory_uri_template= "gs://{project_id}-bucket-without-state-codes", export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), } self.mock_export_config.VIEW_COLLECTION_EXPORT_INDEX = ( state_agnostic_dataset_export_configs) view_export_manager.export_view_data_to_cloud_storage( export_job_name=self.mock_export_name, override_view_exporter=mock_view_exporter, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([ view_export_configs[1].pointed_to_staging_subdirectory() ]), # JSON export mock.call([ conf.pointed_to_staging_subdirectory() for conf in view_export_configs ]), # METRIC export ], any_order=True, ) @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_value_error( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted.""" mock_view_exporter.export_and_validate.side_effect = ValueError with self.assertRaises(ValueError): view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) # Just the metric export is attempted and then the raise stops subsequent checks from happening mock_view_update_manager_rematerialize.assert_called_once() @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_validation_error( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted.""" mock_view_exporter.export_and_validate.side_effect = ViewExportValidationError # Should not throw view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) # Just the metric export is attempted and then the raise stops subsequent checks from happening mock_view_update_manager_rematerialize.assert_called_once() @mock.patch("recidiviz.metrics.export.view_export_manager.deployed_views") @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.view_update_manager.create_managed_dataset_and_deploy_views_for_view_builders" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_update_all_views( self, mock_view_exporter: Mock, mock_view_update_manager_deploy: Mock, mock_view_update_manager_rematerialize: Mock, mock_deployed_views: Mock, ) -> None: """Tests that all views in the namespace are updated before the export when the export name is in export_config.NAMESPACES_REQUIRING_FULL_UPDATE.""" self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [ self.mock_big_query_view_namespace ] mock_deployed_views.DEPLOYED_VIEW_BUILDERS_BY_NAMESPACE = { self.mock_big_query_view_namespace: self.view_builders_for_dataset } view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) mock_view_update_manager_deploy.assert_called_with( view_source_table_datasets=VIEW_SOURCE_TABLE_DATASETS, view_builders_to_update=self.view_builders_for_dataset, ) mock_view_update_manager_rematerialize.assert_called_once() @mock.patch("recidiviz.metrics.export.view_export_manager.deployed_views") @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_update_materialized_views_only( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock, mock_deployed_views: Mock, ) -> None: """Tests that only materialized views in the namespace are updated before the export when the export name is not in export_config.NAMESPACES_REQUIRING_FULL_UPDATE.""" self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [ "OTHER_NAMESPACE" ] mock_deployed_views.DEPLOYED_VIEW_BUILDERS_BY_NAMESPACE = { self.mock_big_query_view_namespace: self.view_builders_for_dataset } view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) mock_view_update_manager_rematerialize.assert_called_with( view_source_table_datasets=VIEW_SOURCE_TABLE_DATASETS, all_view_builders=DEPLOYED_VIEW_BUILDERS, views_to_update=[ view.build() for view in self.view_builders_for_dataset ], ) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_valid_request( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=EXPORT&state_code=US_XX", ) self.assertEqual(HTTPStatus.OK, response.status_code) response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=export&state_code=us_xx", ) self.assertEqual(HTTPStatus.OK, response.status_code) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_state_agnostic( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=MOCK_EXPORT_NAME", ) self.assertEqual(HTTPStatus.OK, response.status_code) # case insensitive response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=mock_export_name", ) self.assertEqual(HTTPStatus.OK, response.status_code) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_missing_required_state_code( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=EXPORT", ) self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code) self.assertEqual( b"Missing required state_code parameter for export_job_name EXPORT", response.data, ) # case insensitive response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=export", ) self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code) self.assertEqual( b"Missing required state_code parameter for export_job_name EXPORT", response.data, ) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_missing_export_job_name( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="state_code=US_XX", ) self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code) self.assertEqual(b"Missing required export_job_name URL parameter", response.data) @mock.patch( "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task" ) def test_create_metric_view_data_export_tasks_state_code_filter( self, mock_create_metric_view_data_export_task: Mock) -> None: with self.app.test_request_context(): mock_create_metric_view_data_export_task.return_value = None response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=US_XX", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="OTHER_EXPORT", state_code="US_XX"), ], any_order=True, ) response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=us_xx", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="OTHER_EXPORT", state_code="US_XX"), ], any_order=True, ) @mock.patch( "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task" ) def test_create_metric_view_data_export_tasks_export_name_filter_state_agnostic( self, mock_create_metric_view_data_export_task: Mock) -> None: with self.app.test_request_context(): mock_create_metric_view_data_export_task.return_value = None response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=MOCK_EXPORT_NAME", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="MOCK_EXPORT_NAME", state_code=None), ], any_order=True, ) # case insensitive response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=mock_export_name", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="MOCK_EXPORT_NAME", state_code=None), ], any_order=True, ) @mock.patch( "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task" ) def test_create_metric_view_data_export_tasks_export_name_filter( self, mock_create_metric_view_data_export_task: Mock) -> None: with self.app.test_request_context(): mock_create_metric_view_data_export_task.return_value = None response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=EXPORT", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="EXPORT", state_code="US_WW"), ], any_order=True, ) # case insensitive response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=export", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="EXPORT", state_code="US_WW"), ], any_order=True, )
class BigQueryViewExporterTest(unittest.TestCase): """Implements tests for BigQueryViewExporter.""" def setUp(self) -> None: self.mock_bq_client = mock.create_autospec(BigQueryClient) self.mock_validator = mock.create_autospec(BigQueryViewExportValidator) self.mock_project_id = "fake-project" self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.mock_bq_view_namespace = BigQueryViewNamespace.STATE self.view_builder = SimpleBigQueryViewBuilder( dataset_id="test_dataset", view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", ) self.second_view_builder = SimpleBigQueryViewBuilder( dataset_id="test_dataset", view_id="test_view_2", description="test_view_2 description", view_query_template="SELECT NULL LIMIT 0", ) self.view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.HEADERLESS_CSV, ], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.second_view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.second_view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.CSV, ], ), ] def tearDown(self) -> None: self.metadata_patcher.stop() def test_csv_export_format(self) -> None: exporter = CSVBigQueryViewExporter(self.mock_bq_client, self.mock_validator) export_config_and_paths = exporter.export(self.view_export_configs) self.assertEqual(len(export_config_and_paths), len(self.view_export_configs)) for _export_config, gcs_path in export_config_and_paths: self.assertTrue( gcs_path.file_name.endswith(".csv"), msg= f"GCS output file {gcs_path.abs_path()} is not a CSV as expected.", ) def test_csv_throws(self) -> None: exporter = CSVBigQueryViewExporter(self.mock_bq_client, self.mock_validator) view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.HEADERLESS_CSV, ], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.second_view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.second_view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.JSON], ), ] with self.assertRaises(ValueError): exporter.export(view_export_configs) def test_json_export_format(self) -> None: exporter = JsonLinesBigQueryViewExporter(self.mock_bq_client, self.mock_validator) export_config_and_paths = exporter.export(self.view_export_configs) self.assertEqual(len(export_config_and_paths), len(self.view_export_configs)) for _export_config, gcs_path in export_config_and_paths: self.assertTrue( gcs_path.file_name.endswith(".json"), msg= f"GCS output file {gcs_path.abs_path()} is not a .json file as expected.", ) def test_json_throws(self) -> None: exporter = JsonLinesBigQueryViewExporter(self.mock_bq_client, self.mock_validator) view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.HEADERLESS_CSV, ], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.second_view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.second_view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.METRIC], ), ] with self.assertRaises(ValueError): exporter.export(view_export_configs)
class ViewCollectionExportManagerTest(unittest.TestCase): """Tests for view_export_manager.py.""" def setUp(self) -> None: self.mock_state_code = "US_XX" self.mock_project_id = "fake-recidiviz-project" self.mock_dataset_id = "base_dataset" self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.BigQueryClientImpl") self.mock_client = self.client_patcher.start().return_value self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset self.mock_view_builder = SimpleBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", view_query_template="SELECT NULL LIMIT 0", ) self.mock_metric_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", view_query_template="SELECT NULL LIMIT 0", dimensions=tuple(), ) self.view_buidlers_for_dataset = [ self.mock_view_builder, self.mock_metric_view_builder, ] self.output_uri_template_for_dataset = { "dataset_id": "gs://{project_id}-dataset-location/subdirectory", } self.views_to_update = { self.mock_dataset_id: self.view_buidlers_for_dataset } self.mock_export_name = "MOCK_EXPORT_NAME" self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE self.metric_dataset_export_configs = [ ExportViewCollectionConfig( view_builders_to_export=self.view_buidlers_for_dataset, output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", state_code_filter=self.mock_state_code, export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ) ] export_config_values = { "OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT": self.output_uri_template_for_dataset, "VIEW_COLLECTION_EXPORT_CONFIGS": self.metric_dataset_export_configs, } self.export_config_patcher = mock.patch( # type: ignore[call-overload] "recidiviz.metrics.export.view_export_manager.export_config", **export_config_values, ) self.mock_export_config = self.export_config_patcher.start() self.gcs_factory_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.GcsfsFactory.build") self.gcs_factory_patcher.start().return_value = FakeGCSFileSystem() def tearDown(self) -> None: self.client_patcher.stop() self.export_config_patcher.stop() self.metadata_patcher.stop() self.gcs_factory_patcher.stop() @mock.patch( "recidiviz.big_query.view_update_manager.rematerialize_views_for_namespace" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage( self, mock_view_exporter, mock_view_update_manager_rematerialize) -> None: """Tests the table is created from the view and then extracted.""" view_export_manager.export_view_data_to_cloud_storage( self.mock_state_code, mock_view_exporter) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() view_export_configs = [ ExportBigQueryViewConfig( view=view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( view=metric_view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([ view_export_configs[1].pointed_to_staging_subdirectory() ]), # JSON export mock.call([ conf.pointed_to_staging_subdirectory() for conf in view_export_configs ]), # METRIC export ], any_order=True, ) @mock.patch( "recidiviz.big_query.view_update_manager.create_dataset_and_deploy_views_for_view_builders" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_raise_exception_no_export_matched( self, mock_view_exporter, mock_view_update_manager_rematerialize) -> None: # pylint: disable=unused-argument """Tests the table is created from the view and then extracted.""" self.mock_export_config.NAMESPACE_TO_UPDATE_FOR_EXPORT_FILTER = { "US_YY": "NAMESPACE" } with self.assertRaises(ValueError) as e: view_export_manager.export_view_data_to_cloud_storage( "US_YY", mock_view_exporter) self.assertEqual( str(e.exception), "Export filter did not match any export configs:", " US_YY", ) @mock.patch( "recidiviz.big_query.view_update_manager.rematerialize_views_for_namespace" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_state_agnostic( self, mock_view_exporter, mock_view_update_manager_rematerialize) -> None: """Tests the table is created from the view and then extracted, where the export is not state-specific.""" state_agnostic_dataset_export_configs = [ ExportViewCollectionConfig( view_builders_to_export=self.view_buidlers_for_dataset, output_directory_uri_template= "gs://{project_id}-bucket-without-state-codes", state_code_filter=None, export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), ] self.mock_export_config.VIEW_COLLECTION_EXPORT_CONFIGS = ( state_agnostic_dataset_export_configs) view_export_manager.export_view_data_to_cloud_storage( export_job_filter=self.mock_export_name, override_view_exporter=mock_view_exporter, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() view_export_configs = [ ExportBigQueryViewConfig( view=view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( view=metric_view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([ view_export_configs[1].pointed_to_staging_subdirectory() ]), # JSON export mock.call([ conf.pointed_to_staging_subdirectory() for conf in view_export_configs ]), # METRIC export ], any_order=True, ) @mock.patch( "recidiviz.big_query.view_update_manager.rematerialize_views_for_namespace" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_value_error( self, mock_view_exporter, mock_view_update_manager_rematerialize) -> None: """Tests the table is created from the view and then extracted.""" mock_view_exporter.export_and_validate.side_effect = ValueError with self.assertRaises(ValueError): view_export_manager.export_view_data_to_cloud_storage( self.mock_state_code, mock_view_exporter) # Just the metric export is attempted and then the raise stops subsequent checks from happening mock_view_update_manager_rematerialize.assert_called_once() @mock.patch( "recidiviz.big_query.view_update_manager.rematerialize_views_for_namespace" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_validation_error( self, mock_view_exporter, mock_view_update_manager_rematerialize) -> None: """Tests the table is created from the view and then extracted.""" mock_view_exporter.export_and_validate.side_effect = ViewExportValidationError # Should not throw view_export_manager.export_view_data_to_cloud_storage( self.mock_state_code, mock_view_exporter) # Just the metric export is attempted and then the raise stops subsequent checks from happening mock_view_update_manager_rematerialize.assert_called_once() @mock.patch( "recidiviz.metrics.export.view_export_manager.view_update_manager.VIEW_BUILDERS_BY_NAMESPACE" ) @mock.patch( "recidiviz.big_query.view_update_manager.rematerialize_views_for_namespace" ) @mock.patch( "recidiviz.big_query.view_update_manager.create_dataset_and_deploy_views_for_view_builders" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_update_all_views( self, mock_view_exporter, mock_view_update_manager_deploy, mock_view_update_manager_rematerialize, mock_view_builders_by_namespace, ) -> None: """Tests that all views in the namespace are updated before the export when the export name is in export_config.NAMESPACES_REQUIRING_FULL_UPDATE.""" self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [ self.mock_big_query_view_namespace ] mock_view_builders_by_namespace.return_value = { self.mock_big_query_view_namespace: self.view_buidlers_for_dataset } view_export_manager.export_view_data_to_cloud_storage( self.mock_state_code, mock_view_exporter) mock_view_update_manager_deploy.assert_called_with( self.mock_big_query_view_namespace, mock_view_builders_by_namespace[ self.mock_big_query_view_namespace], ) mock_view_update_manager_rematerialize.assert_called_once() @mock.patch( "recidiviz.metrics.export.view_export_manager.view_update_manager.VIEW_BUILDERS_BY_NAMESPACE" ) @mock.patch( "recidiviz.big_query.view_update_manager.rematerialize_views_for_namespace" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_update_materialized_views_only( self, mock_view_exporter, mock_view_update_manager_rematerialize, mock_view_builders_by_namespace, ) -> None: """Tests that only materialized views in the namespace are updated before the export when the export name is not in export_config.NAMESPACES_REQUIRING_FULL_UPDATE.""" self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [ "OTHER_NAMESPACE" ] mock_view_builders_by_namespace.return_value = { self.mock_big_query_view_namespace: self.view_buidlers_for_dataset } view_export_manager.export_view_data_to_cloud_storage( self.mock_state_code, mock_view_exporter) mock_view_update_manager_rematerialize.assert_called_with( bq_view_namespace=self.mock_big_query_view_namespace, candidate_view_builders=mock_view_builders_by_namespace[ self.mock_big_query_view_namespace], )