def test_get_configs_for_export_name(
            self, mock_environment: mock.MagicMock) -> None:
        """Tests get_configs_for_export_name function to ensure that export names correctly match"""

        mock_environment.return_value = "production"
        export_configs_for_filter = view_export_manager.get_configs_for_export_name(
            export_name=self.mock_export_name,
            state_code=self.mock_state_code,
            project_id=self.mock_project_id,
        )
        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        expected_view_config_list = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=view,
                view_filter_clause=
                f" WHERE state_code = '{self.mock_state_code}'",
                intermediate_table_name=
                f"{view.view_id}_table_{self.mock_state_code}",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code=self.mock_state_code,
                    )),
                export_output_formats=[ExportOutputFormatType.JSON],
            ),
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=metric_view,
                view_filter_clause=
                f" WHERE state_code = '{self.mock_state_code}'",
                intermediate_table_name=
                f"{view.view_id}_table_{self.mock_state_code}",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code=self.mock_state_code,
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            ),
        ]

        self.assertEqual(expected_view_config_list, export_configs_for_filter)

        # Test for case insensitivity

        export_configs_for_filter = view_export_manager.get_configs_for_export_name(
            export_name=self.mock_export_name.lower(),
            state_code=self.mock_state_code.lower(),
            project_id=self.mock_project_id,
        )
        self.assertEqual(expected_view_config_list, export_configs_for_filter)
Beispiel #2
0
    def setUp(self) -> None:
        self.mock_bq_client = mock.create_autospec(BigQueryClient)
        self.mock_validator = mock.create_autospec(BigQueryViewExportValidator)

        self.mock_project_id = "fake-project"

        self.metadata_patcher = mock.patch(
            "recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.view_builder = SimpleBigQueryViewBuilder(
            dataset_id="test_dataset",
            view_id="test_view",
            view_query_template="SELECT NULL LIMIT 0",
        )
        self.second_view_builder = SimpleBigQueryViewBuilder(
            dataset_id="test_dataset",
            view_id="test_view_2",
            view_query_template="SELECT NULL LIMIT 0",
        )
        self.view_export_configs = [
            ExportBigQueryViewConfig(
                view=self.view_builder.build(),
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=
                f"{self.view_builder.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.HEADERLESS_CSV,
                ],
            ),
            ExportBigQueryViewConfig(
                view=self.second_view_builder.build(),
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=
                f"{self.second_view_builder.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.CSV,
                ],
            ),
        ]
    def test_export_dashboard_data_to_cloud_storage(
            self, mock_view_exporter,
            mock_view_update_manager_rematerialize) -> None:
        """Tests the table is created from the view and then extracted."""
        view_export_manager.export_view_data_to_cloud_storage(
            self.mock_state_code, mock_view_exporter)

        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        view_export_configs = [
            ExportBigQueryViewConfig(
                view=view,
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=f"{view.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[ExportOutputFormatType.JSON],
            ),
            ExportBigQueryViewConfig(
                view=metric_view,
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=f"{view.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            ),
        ]

        mock_view_update_manager_rematerialize.assert_called()
        mock_view_exporter.export_and_validate.assert_has_calls(
            [
                mock.call([]),  # CSV export
                mock.call([
                    view_export_configs[1].pointed_to_staging_subdirectory()
                ]),  # JSON export
                mock.call([
                    conf.pointed_to_staging_subdirectory()
                    for conf in view_export_configs
                ]),  # METRIC export
            ],
            any_order=True,
        )
    def _export_optimized_format(
        self,
        export_config: ExportBigQueryViewConfig,
        formatted: OptimizedMetricRepresentation,
        storage_client: storage.Client,
    ) -> GcsfsFilePath:
        """Writes the optimized metric representation to Cloud Storage, based on the export configuration. Returns the
        output path the file was written to.
        """
        output_path = export_config.output_path(extension="txt")

        logging.info(
            "Writing optimized metric file %s to GCS bucket %s...",
            output_path.blob_name,
            output_path.bucket_name,
        )

        blob = storage.Blob.from_string(output_path.uri(), client=storage_client)
        self._set_format_metadata(formatted, blob, should_compress=True)
        blob.upload_from_string(
            self._produce_transmission_format(formatted, should_compress=True),
            content_type="text/plain",
        )

        logging.info(
            "Optimized metric file %s written to GCS bucket %s.",
            output_path.blob_name,
            output_path.bucket_name,
        )

        return output_path
Beispiel #5
0
    def export_configs_for_views_to_export(
            self, project_id: str) -> Sequence[ExportBigQueryViewConfig]:
        """Builds a list of ExportBigQueryViewConfig that define how all views in
        view_builders_to_export should be exported to Google Cloud Storage."""
        view_filter_clause = (f" WHERE state_code = '{self.state_code_filter}'"
                              if self.state_code_filter else None)

        intermediate_table_name = "{export_view_name}_table"
        output_directory = self.output_directory_uri_template.format(
            project_id=project_id)

        if self.state_code_filter:
            intermediate_table_name += f"_{self.state_code_filter}"
            output_directory += f"/{self.state_code_filter}"

        configs = []
        for vb in self.view_builders_to_export:
            view = vb.build()
            optional_args = {}
            if self.export_output_formats is not None:
                optional_args[
                    "export_output_formats"] = self.export_output_formats
            configs.append(
                ExportBigQueryViewConfig(
                    view=view,
                    view_filter_clause=view_filter_clause,
                    intermediate_table_name=intermediate_table_name.format(
                        export_view_name=view.view_id),
                    output_directory=GcsfsDirectoryPath.from_absolute_path(
                        output_directory),
                    **optional_args,
                ))
        return configs
 def test_noop_without_staging(self) -> None:
     not_pointed_at_staging_file = GcsfsFilePath.from_directory_and_file_name(
         self.config_with_path("gnarly").output_directory,
         "staging_results.txt")
     self.assertEqual(
         ExportBigQueryViewConfig.revert_staging_path_to_original(
             not_pointed_at_staging_file),
         GcsfsFilePath.from_absolute_path(
             "gs://gnarly/staging_results.txt"),
     )
Beispiel #7
0
 def config_with_path(self, path: str) -> ExportBigQueryViewConfig:
     return ExportBigQueryViewConfig(
         view=SimpleBigQueryViewBuilder(
             dataset_id="test_dataset",
             view_id="test_view",
             view_query_template="you know",
         ).build(),
         view_filter_clause="WHERE state_code = 'US_XX'",
         intermediate_table_name="tubular",
         output_directory=GcsfsDirectoryPath.from_absolute_path(
             f"gs://{path}"),
     )
    def test_json_throws(self) -> None:
        exporter = JsonLinesBigQueryViewExporter(self.mock_bq_client,
                                                 self.mock_validator)
        view_export_configs = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_bq_view_namespace,
                view=self.view_builder.build(),
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=
                f"{self.view_builder.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.HEADERLESS_CSV,
                ],
            ),
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_bq_view_namespace,
                view=self.second_view_builder.build(),
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=
                f"{self.second_view_builder.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[ExportOutputFormatType.METRIC],
            ),
        ]

        with self.assertRaises(ValueError):
            exporter.export(view_export_configs)
    def test_happy_path(self) -> None:
        pointed_at_staging_file = GcsfsFilePath.from_directory_and_file_name(
            self.config_with_path(
                "gnarly").pointed_to_staging_subdirectory().output_directory,
            "foo.txt",
        )
        self.assertEqual(pointed_at_staging_file.abs_path(),
                         "gnarly/staging/foo.txt")

        self.assertEqual(
            ExportBigQueryViewConfig.revert_staging_path_to_original(
                pointed_at_staging_file),
            GcsfsFilePath.from_absolute_path("gs://gnarly/foo.txt"),
        )
    def export(
        self, export_configs: Sequence[ExportBigQueryViewConfig]
    ) -> List[GcsfsFilePath]:
        logging.info("Starting composite BigQuery view export.")

        staging_configs = [
            config.pointed_to_staging_subdirectory()
            for config in export_configs
        ]

        all_staging_paths: List[GcsfsFilePath] = []
        for view_exporter in self.delegate_view_exporters:
            logging.info(
                "Beginning staged export of results for view exporter delegate [%s]",
                view_exporter.__class__)

            staging_paths = view_exporter.export_and_validate(staging_configs)
            all_staging_paths.extend(staging_paths)

            logging.info(
                "Completed staged export of results for view exporter delegate [%s]",
                view_exporter.__class__)

        logging.info("Copying staged export results to final location")

        final_paths = []
        for staging_path in all_staging_paths:
            final_path = ExportBigQueryViewConfig.revert_staging_path_to_original(
                staging_path)
            self.fs.copy(staging_path, final_path)
            final_paths.append(final_path)

        logging.info("Deleting staged copies of the final output paths")
        for staging_path in all_staging_paths:
            self.fs.delete(staging_path)

        logging.info("Completed composite BigQuery view export.")
        return final_paths
    def setUp(self) -> None:
        self.metadata_patcher = patch("recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = "project-id"

        self.mock_bq_view_namespace = BigQueryViewNamespace.STATE

        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view1",
            description="view1 description",
            view_query_template="select * from table",
            dimensions=("a", "b", "c"),
        ).build()

        export_config_one_staging = ExportBigQueryViewConfig(
            bq_view_namespace=self.mock_bq_view_namespace,
            view=metric_view_one,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket1/staging/US_XX"),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view2",
            description="view2 description",
            view_query_template="select * from view2",
            dimensions=("d", "e", "f"),
        ).build()

        export_config_two_staging = ExportBigQueryViewConfig(
            bq_view_namespace=self.mock_bq_view_namespace,
            view=metric_view_two,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table2",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket2/staging/US_XX"),
        )

        self.staging_paths = [
            export_config_one_staging.output_path("txt"),
            export_config_two_staging.output_path("txt"),
        ]
Beispiel #12
0
    def setUp(self) -> None:
        self.metadata_patcher = patch('recidiviz.utils.metadata.project_id')
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = 'project-id'

        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view1',
            view_query_template='select * from table',
            dimensions=['a', 'b', 'c'],
        ).build()

        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/staging/US_XX'),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view2',
            view_query_template='select * from view2',
            dimensions=['d', 'e', 'f'],
        ).build()

        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/staging/US_XX'),
        )

        self.staging_paths = [
            export_config_one_staging.output_path('txt'),
            export_config_two_staging.output_path('txt')
        ]
Beispiel #13
0
    def test_convert_happy_path(self):
        mock_bq_client = create_autospec(BigQueryClient)

        mock_dataset_ref = create_autospec(bigquery.DatasetReference)

        table_ref = bigquery.TableReference(mock_dataset_ref, "test_view")
        schema_fields = [
            bigquery.SchemaField("district", "STRING"),
            bigquery.SchemaField("year", "STRING"),
            bigquery.SchemaField("month", "STRING"),
            bigquery.SchemaField("supervision_type", "STRING"),
            bigquery.SchemaField("total_revocations", "STRING"),
        ]
        table = bigquery.Table(table_ref, schema_fields)

        mock_bq_client.dataset_ref_for_id.return_value = mock_dataset_ref
        mock_bq_client.get_table.return_value = table

        all_rows = _transform_dicts_to_bq_row(_DATA_POINTS)

        mock_query_job = create_autospec(bigquery.QueryJob)
        mock_query_job.result.side_effect = [
            all_rows,
            all_rows,
        ]

        def fake_paged_process_fn(
            query_job: bigquery.QueryJob,
            _page_size: int,
            process_fn: Callable[[bigquery.table.Row], None],
        ) -> None:
            for row in query_job.result(
                    max_results=optimized_metric_big_query_view_exporter.
                    QUERY_PAGE_SIZE,
                    start_index=0,
            ):
                process_fn(row)

        mock_bq_client.paged_read_and_process.side_effect = fake_paged_process_fn
        mock_validator = create_autospec(
            OptimizedMetricBigQueryViewExportValidator)

        view_exporter = OptimizedMetricBigQueryViewExporter(
            mock_bq_client, mock_validator)

        export_config = ExportBigQueryViewConfig(
            view=MetricBigQueryViewBuilder(
                dataset_id="test_dataset",
                view_id="test_view",
                view_query_template="you know",
                dimensions=("district", "year", "month", "supervision_type"),
            ).build(),
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="tubular",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://gnarly/blob"),
        )

        optimized_representation = (
            view_exporter.convert_query_results_to_optimized_value_matrix(
                mock_query_job, export_config))
        expected = OptimizedMetricRepresentation(
            value_matrix=_DATA_VALUES,
            dimension_manifest=_DIMENSION_MANIFEST,
            value_keys=_VALUE_KEYS,
        )

        self.assertEqual(expected, optimized_representation)

        mock_query_job.result.assert_has_calls([
            call(
                max_results=optimized_metric_big_query_view_exporter.
                QUERY_PAGE_SIZE,
                start_index=0,
            ),
            call(
                max_results=optimized_metric_big_query_view_exporter.
                QUERY_PAGE_SIZE,
                start_index=0,
            ),
        ])

        mock_bq_client.paged_read_and_process.assert_called()
        mock_bq_client.dataset_ref_for_id.assert_called()
        mock_bq_client.get_table.assert_called()
Beispiel #14
0
    def test_export_staging_delegate_validation_failed(self) -> None:
        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view1",
            view_query_template="select * from table",
            dimensions=("a", "b", "c"),
        ).build()

        export_config_one = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket1/US_XX"),
        )
        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket1/staging/US_XX"),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view2",
            view_query_template="select * from view2",
            dimensions=("d", "e", "f"),
        ).build()

        export_config_two = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table2",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket2/US_XX"),
        )
        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table2",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket2/staging/US_XX"),
        )

        mock_fs = create_autospec(GCSFileSystem)

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_two = create_autospec(BigQueryViewExporter)

        delegate_one.export_and_validate.return_value = [
            export_config_one_staging.output_path("json"),
            export_config_two_staging.output_path("json"),
        ]
        delegate_two.export_and_validate.return_value = [
            export_config_one_staging.output_path("txt"),
            export_config_two_staging.output_path("txt"),
        ]

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_one_staging_paths = [
            export_config_one_staging.output_path("json"),
            export_config_two_staging.output_path("json"),
        ]
        delegate_one.export_and_validate.return_value = delegate_one_staging_paths

        delegate_two = create_autospec(BigQueryViewExporter)
        delegate_two.export_and_validate.side_effect = ViewExportValidationError(
            "Validation failed")

        # Make the actual call
        with pytest.raises(ViewExportValidationError) as e:
            export_views_with_exporters(
                mock_fs,
                [export_config_one, export_config_two],
                {
                    ExportOutputFormatType.JSON: delegate_one,
                    ExportOutputFormatType.METRIC: delegate_two,
                },
            )

        self.assertIn("Validation failed", str(e.value))
    def test_export_dashboard_data_to_cloud_storage_state_agnostic(
            self, mock_view_exporter: Mock,
            mock_view_update_manager_rematerialize: Mock) -> None:
        """Tests the table is created from the view and then extracted, where the export is not state-specific."""
        state_agnostic_dataset_export_configs = {
            self.mock_export_name:
            ExportViewCollectionConfig(
                view_builders_to_export=self.view_builders_for_dataset,
                output_directory_uri_template=
                "gs://{project_id}-bucket-without-state-codes",
                export_name=self.mock_export_name,
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
        }

        self.mock_export_config.VIEW_COLLECTION_EXPORT_INDEX = (
            state_agnostic_dataset_export_configs)

        view_export_manager.export_view_data_to_cloud_storage(
            export_job_name=self.mock_export_name,
            override_view_exporter=mock_view_exporter,
        )

        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        view_export_configs = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=view,
                view_filter_clause=None,
                intermediate_table_name=f"{view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-bucket-without-state-codes".format(
                        project_id=self.mock_project_id, )),
                export_output_formats=[ExportOutputFormatType.JSON],
            ),
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=metric_view,
                view_filter_clause=None,
                intermediate_table_name=f"{view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-bucket-without-state-codes".format(
                        project_id=self.mock_project_id, )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            ),
        ]

        mock_view_update_manager_rematerialize.assert_called()

        mock_view_exporter.export_and_validate.assert_has_calls(
            [
                mock.call([]),  # CSV export
                mock.call([
                    view_export_configs[1].pointed_to_staging_subdirectory()
                ]),  # JSON export
                mock.call([
                    conf.pointed_to_staging_subdirectory()
                    for conf in view_export_configs
                ]),  # METRIC export
            ],
            any_order=True,
        )
Beispiel #16
0
    def test_export_happy_path(self) -> None:
        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view1",
            view_query_template="select * from table",
            dimensions=("a", "b", "c"),
        ).build()

        export_config_one = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket1/US_XX"),
        )
        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket1/staging/US_XX"),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view2",
            view_query_template="select * from view2",
            dimensions=("d", "e", "f"),
        ).build()

        export_config_two = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table2",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket2/US_XX"),
        )
        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table2",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket2/staging/US_XX"),
        )

        mock_fs = create_autospec(GCSFileSystem)

        mock_fs.exists.return_value = True

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_one_staging_paths = [
            export_config_one_staging.output_path("json"),
            export_config_two_staging.output_path("json"),
        ]
        delegate_one.export_and_validate.return_value = delegate_one_staging_paths

        delegate_two = create_autospec(BigQueryViewExporter)
        delegate_two_staging_paths = [
            export_config_one_staging.output_path("txt"),
            export_config_two_staging.output_path("txt"),
        ]
        delegate_two.export_and_validate.return_value = delegate_two_staging_paths

        # Make the actual call
        export_views_with_exporters(
            mock_fs,
            [export_config_one, export_config_two],
            {
                ExportOutputFormatType.JSON: delegate_one,
                ExportOutputFormatType.METRIC: delegate_two,
            },
        )

        # Assert all mocks called as expected
        delegate_one.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        delegate_two.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        mock_fs.copy.assert_has_calls(
            [
                call(
                    GcsfsFilePath(bucket_name="bucket1",
                                  blob_name="staging/US_XX/view1.json"),
                    GcsfsFilePath(bucket_name="bucket1",
                                  blob_name="US_XX/view1.json"),
                ),
                call(
                    GcsfsFilePath(bucket_name="bucket2",
                                  blob_name="staging/US_XX/view2.json"),
                    GcsfsFilePath(bucket_name="bucket2",
                                  blob_name="US_XX/view2.json"),
                ),
                call(
                    GcsfsFilePath(bucket_name="bucket1",
                                  blob_name="staging/US_XX/view1.txt"),
                    GcsfsFilePath(bucket_name="bucket1",
                                  blob_name="US_XX/view1.txt"),
                ),
                call(
                    GcsfsFilePath(bucket_name="bucket2",
                                  blob_name="staging/US_XX/view2.txt"),
                    GcsfsFilePath(bucket_name="bucket2",
                                  blob_name="US_XX/view2.txt"),
                ),
            ],
            any_order=True,
        )

        mock_fs.delete.assert_has_calls(
            [
                call(
                    GcsfsFilePath(bucket_name="bucket1",
                                  blob_name="staging/US_XX/view1.json")),
                call(
                    GcsfsFilePath(bucket_name="bucket2",
                                  blob_name="staging/US_XX/view2.json")),
                call(
                    GcsfsFilePath(bucket_name="bucket1",
                                  blob_name="staging/US_XX/view1.txt")),
                call(
                    GcsfsFilePath(bucket_name="bucket2",
                                  blob_name="staging/US_XX/view2.txt")),
            ],
            any_order=True,
        )
Beispiel #17
0
    def test_export_happy_path(self) -> None:
        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view1',
            view_query_template='select * from table',
            dimensions=['a', 'b', 'c'],
        ).build()

        export_config_one = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/US_XX'),
        )
        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/staging/US_XX'),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view2',
            view_query_template='select * from view2',
            dimensions=['d', 'e', 'f'],
        ).build()

        export_config_two = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/US_XX'),
        )
        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/staging/US_XX'),
        )

        mock_bq_client = create_autospec(BigQueryClient)
        mock_fs = create_autospec(GCSFileSystem)

        mock_fs.exists.return_value = True

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_one_staging_paths = [
            export_config_one_staging.output_path('json'),
            export_config_two_staging.output_path('json')
        ]
        delegate_one.export_and_validate.return_value = delegate_one_staging_paths

        delegate_two = create_autospec(BigQueryViewExporter)
        delegate_two_staging_paths = [
            export_config_one_staging.output_path('txt'),
            export_config_two_staging.output_path('txt')
        ]
        delegate_two.export_and_validate.return_value = delegate_two_staging_paths

        # Make the actual call
        exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs,
                                                 [delegate_one, delegate_two])
        exporter.export_and_validate([export_config_one, export_config_two])

        # Assert all mocks called as expected
        delegate_one.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        delegate_two.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        mock_fs.copy.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.json'),
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.json'),
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.txt'),
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.txt'),
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.txt'))
        ])

        mock_fs.delete.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.txt'))
        ])

        mock_fs.exists.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.txt')),
        ])
Beispiel #18
0
    def test_export_final_existence_validation_failed(self) -> None:
        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view1',
            view_query_template='select * from table',
            dimensions=['a', 'b', 'c'],
        ).build()

        export_config_one = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/US_XX'),
        )
        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/staging/US_XX'),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view2',
            view_query_template='select * from view2',
            dimensions=['d', 'e', 'f'],
        ).build()

        export_config_two = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/US_XX'),
        )
        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/staging/US_XX'),
        )

        mock_bq_client = create_autospec(BigQueryClient)
        mock_fs = create_autospec(GCSFileSystem)

        # This should cause export_and_validate to raise a ValueError
        mock_fs.exists.return_value = False

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_one_staging_paths = [
            export_config_one_staging.output_path('json'),
            export_config_two_staging.output_path('json')
        ]
        delegate_one.export_and_validate.return_value = delegate_one_staging_paths

        delegate_two = create_autospec(BigQueryViewExporter)
        delegate_two_staging_paths = [
            export_config_one_staging.output_path('txt'),
            export_config_two_staging.output_path('txt')
        ]
        delegate_two.export_and_validate.return_value = delegate_two_staging_paths

        # Make the actual call
        exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs,
                                                 [delegate_one, delegate_two])

        with pytest.raises(ViewExportValidationError) as e:
            exporter.export_and_validate(
                [export_config_one, export_config_two])

        # We get an error at the very end of the export chain because even though delegate validations passed, the
        # final validation failed
        self.assertIn(
            'Validation on path bucket1/US_XX/view1.json failed the metric file export. '
            'Stopping execution here.', str(e.value))

        # The delegate exporters validations all passed so we still copy from staging to final
        delegate_one.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        delegate_two.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        mock_fs.copy.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.json'),
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.json'),
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.txt'),
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.txt'),
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.txt'))
        ])

        mock_fs.delete.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.txt'))
        ])

        # Only one call to the Exists validation made because the first one failed
        mock_fs.exists.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.json')),
        ])
Beispiel #19
0
    def test_export_staging_delegate_validation_failed(self) -> None:
        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view1',
            view_query_template='select * from table',
            dimensions=['a', 'b', 'c'],
        ).build()

        export_config_one = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/US_XX'),
        )
        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/staging/US_XX'),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view2',
            view_query_template='select * from view2',
            dimensions=['d', 'e', 'f'],
        ).build()

        export_config_two = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/US_XX'),
        )
        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/staging/US_XX'),
        )

        mock_bq_client = create_autospec(BigQueryClient)
        mock_fs = create_autospec(GCSFileSystem)

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_two = create_autospec(BigQueryViewExporter)

        delegate_one.export_and_validate.return_value = [
            export_config_one_staging.output_path('json'),
            export_config_two_staging.output_path('json')
        ]
        delegate_two.export_and_validate.return_value = [
            export_config_one_staging.output_path('txt'),
            export_config_two_staging.output_path('txt')
        ]

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_one_staging_paths = [
            export_config_one_staging.output_path('json'),
            export_config_two_staging.output_path('json')
        ]
        delegate_one.export_and_validate.return_value = delegate_one_staging_paths

        delegate_two = create_autospec(BigQueryViewExporter)
        delegate_two.export_and_validate.side_effect = ViewExportValidationError(
            'Validation failed')

        # Make the actual call
        exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs,
                                                 [delegate_one, delegate_two])

        with pytest.raises(ViewExportValidationError) as e:
            exporter.export_and_validate(
                [export_config_one, export_config_two])

        self.assertIn('Validation failed', str(e.value))