def test_email_filter_counted(self, mock_build: Any) -> None:
        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.EMAIL_PATTERN):
            '.*@test.com.*',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(CORRECT_DATA)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        assert result is not None
        self.assertIsInstance(result, tuple)

        (key, value) = result
        self.assertIsInstance(key, TableColumnUsageTuple)
        self.assertIsInstance(value, int)

        self.assertEqual(key.database, 'bigquery')
        self.assertEqual(key.cluster, 'bigquery-public-data')
        self.assertEqual(key.schema, 'austin_incidents')
        self.assertEqual(key.table, 'incidents_2008')
        self.assertEqual(key.email, '*****@*****.**')
        self.assertEqual(value, 1)
    def test_timestamp_pagesize_settings(self, mock_build: Any) -> None:
        """
        Test timestamp and pagesize can be set
        """
        TIMESTAMP = '2019-01-01T00:00:00.00Z'
        PAGESIZE = 215

        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.TIMESTAMP_KEY):
            TIMESTAMP,
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PAGE_SIZE_KEY):
            PAGESIZE,
        }
        conf = ConfigFactory.from_dict(config_dict)

        client = MockLoggingClient(CORRECT_DATA)
        mock_build.return_value = client
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        args, kwargs = client.b.list.call_args
        body = kwargs['body']

        self.assertEqual(body['pageSize'], PAGESIZE)
        self.assertEqual(TIMESTAMP in body['filter'], True)
Example #3
0
    def test_basic_extraction(self, mock_build):
        """
        Test Extraction using mock class
        """
        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(CORRECT_DATA)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsInstance(result, tuple)

        (key, value) = result
        self.assertIsInstance(key, TableColumnUsageTuple)
        self.assertIsInstance(value, int)

        self.assertEqual(key.database, 'bigquery')
        self.assertEqual(key.cluster, 'bigquery-public-data')
        self.assertEqual(key.schema, 'austin_incidents')
        self.assertEqual(key.table, 'incidents_2008')
        self.assertEqual(key.email, '*****@*****.**')
        self.assertEqual(value, 1)
    def test_key_path(self, mock_build: Any) -> None:
        """
        Test key_path can be used
        """

        with tempfile.NamedTemporaryFile() as keyfile:
            # There are many github scanners looking for API / cloud keys, so in order not to get a
            # false positive triggering everywhere, I base64 encoded the key.
            # This is written to a tempfile as part of this test and then used.
            keyfile.write(base64.b64decode(KEYFILE_DATA))
            keyfile.flush()
            config_dict = {
                'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
                'your-project-here',
                'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.KEY_PATH_KEY):
                keyfile.name,
            }
            conf = ConfigFactory.from_dict(config_dict)

            mock_build.return_value = MockLoggingClient(CORRECT_DATA)
            extractor = BigQueryTableUsageExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

            args, kwargs = mock_build.call_args
            creds = kwargs['http'].credentials
            self.assertEqual(creds.project_id, 'your-project-here')
            self.assertEqual(
                creds.service_account_email,
                '*****@*****.**')
Example #5
0
    def test_counting_referenced_table_belonging_to_different_project(
            self, mock_build: Any) -> None:
        """
        Test result when referenced table belongs to a project different from the PROJECT_ID_KEY of the extractor
        and COUNT_READS_ONLY_FROM_PROJECT is set to False
        """
        config_dict = {
            f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}':
            'your-project-here',
            f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.COUNT_READS_ONLY_FROM_PROJECT_ID_KEY}':
            False,
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(CORRECT_DATA)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        result = extractor.extract()
        assert result is not None
        self.assertIsInstance(result, tuple)

        (key, value) = result
        self.assertIsInstance(key, TableColumnUsageTuple)
        self.assertIsInstance(value, int)

        self.assertEqual(key.database, 'bigquery')
        self.assertEqual(key.cluster, 'bigquery-public-data')
        self.assertEqual(key.schema, 'austin_incidents')
        self.assertEqual(key.table, 'incidents_2008')
        self.assertEqual(key.email, '*****@*****.**')
        self.assertEqual(value, 1)
    def test_no_entries(self, mock_build: Any) -> None:
        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(NO_ENTRIES)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsNone(result)
    def test_email_filter_not_counted(self, mock_build: Any) -> None:
        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.EMAIL_PATTERN):
            'emailFilter',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(CORRECT_DATA)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsNone(result)
Example #8
0
    def test_failed_jobs_should_not_be_counted(self, mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}':
            'bigquery-public-data',
        }
        conf = ConfigFactory.from_dict(config_dict)

        client = MockLoggingClient(FAILURE)
        mock_build.return_value = client
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        result = extractor.extract()
        self.assertIsNone(result)
Example #9
0
    def test_failed_jobs_should_not_be_counted(self, mock_build):

        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
        }
        conf = ConfigFactory.from_dict(config_dict)

        client = MockLoggingClient(FAILURE)
        mock_build.return_value = client
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        result = extractor.extract()
        self.assertIsNone(result)
Example #10
0
    def test_not_counting_referenced_table_belonging_to_different_project(
            self, mock_build: Any) -> None:
        """
        Test result when referenced table belongs to a project different from the PROJECT_ID_KEY of the extractor
        """
        config_dict = {
            f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}':
            'your-project-here',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(CORRECT_DATA)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        result = extractor.extract()
        assert result is None