def test_empty_dataset(self, mock_build: Any) -> None:
     mock_build.return_value = MockBigQueryClient(ONE_DATASET, NO_TABLES, None)
     extractor = BigQueryMetadataExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
 def test_can_handle_datasets(self, mock_build: Any) -> None:
     mock_build.return_value = MockBigQueryClient(NO_DATASETS, None, None)
     extractor = BigQueryMetadataExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
 def test_view(self, mock_build: Any) -> None:
     mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_VIEW, VIEW_DATA)
     extractor = BigQueryMetadataExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsInstance(result, TableMetadata)
     self.assertEqual(result.is_view, True)
    def test_accepts_dataset_filter_by_label(self, mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PROJECT_ID_KEY}': 'your-project-here',
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.FILTER_KEY}': 'label.key:value'
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsInstance(result, TableMetadata)
    def test_keypath_and_pagesize_can_be_set(self, mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PROJECT_ID_KEY}': 'your-project-here',
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PAGE_SIZE_KEY}': 200,
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.KEY_PATH_KEY}': '/tmp/doesnotexist',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()

        with self.assertRaises(FileNotFoundError):
            extractor.init(Scoped.get_scoped_conf(conf=conf,
                                                  scope=extractor.get_scope()))
    def test_table_without_columns(self, mock_build: Any) -> None:
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NO_COLS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.name, 'nested_recs')
        self.assertEqual(result.description.text, "")
        self.assertEqual(result.columns, [])
        self.assertEqual(result.is_view, False)
    def test_table_part_of_table_date_range(self, mock_build: Any) -> None:
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))

        count = 0
        result = extractor.extract()
        table_name = result.name
        while result:
            count += 1
            result = extractor.extract()

        self.assertEqual(count, 1)
        self.assertEqual(table_name, 'date_range_')
    def test_table_with_nested_records(self, mock_build: Any) -> None:
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NESTED_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        first_col = result.columns[0]
        self.assertEqual(first_col.name, 'nested')
        self.assertEqual(first_col.type, 'RECORD')
        second_col = result.columns[1]
        self.assertEqual(second_col.name, 'nested.nested2')
        self.assertEqual(second_col.type, 'RECORD')
        third_col = result.columns[2]
        self.assertEqual(third_col.name, 'nested.nested2.ahah')
        self.assertEqual(third_col.type, 'STRING')
    def test_normal_table(self, mock_build: Any) -> None:
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.name, 'nested_recs')
        self.assertEqual(result.description.text, "")

        first_col = result.columns[0]
        self.assertEqual(first_col.name, 'test')
        self.assertEqual(first_col.type, 'STRING')
        self.assertEqual(first_col.description.text, 'some_description')
        self.assertEqual(result.is_view, False)
Ejemplo n.º 10
0
Archivo: db.py Proyecto: vrajat/dbcat
    def _create_big_query_extractor(
        source: CatSource,
    ) -> Tuple[BigQueryMetadataExtractor, Any]:
        extractor = BigQueryMetadataExtractor()
        scope = extractor.get_scope()

        conf = ConfigFactory.from_dict(
            {
                f"{scope}.connection_name": source.name,
                f"{scope}.key_path": source.key_path,
                f"{scope}.project_id": source.project_id,
                f"{scope}.project_credentials": source.project_credentials,
                f"{scope}.page_size": source.page_size,
                f"{scope}.filter_key": source.filter_key,
                f"{scope}.included_tables_regex": source.included_tables_regex,
            }
        )

        return extractor, conf