def test_keypath_can_be_set(self, mock_build: Any) -> None: config_dict = { f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.PROJECT_ID_KEY}': 'your-project-here', f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.KEY_PATH_KEY}': '/tmp/doesnotexist', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, None) extractor = BigQueryWatermarkExtractor() with self.assertRaises(FileNotFoundError): extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
def test_table_part_of_table_date_range(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, TABLE_DATE_RANGE, None) extractor = BigQueryWatermarkExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEquals(result.part_type, 'low_watermark') self.assertEquals(result.database, 'bigquery') self.assertEquals(result.schema, 'fdgdfgh') self.assertEquals(result.table, 'date_range_') self.assertEquals(result.cluster, 'your-project-here') self.assertEquals( result.create_time, datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S')) self.assertEquals(result.parts, [('__table__', '20190101')]) result = extractor.extract() self.assertEquals(result.part_type, 'high_watermark') self.assertEquals(result.database, 'bigquery') self.assertEquals(result.schema, 'fdgdfgh') self.assertEquals(result.table, 'date_range_') self.assertEquals(result.cluster, 'your-project-here') self.assertEquals( result.create_time, datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S')) self.assertEquals(result.parts, [('__table__', '20190102')])
def test_table_with_field_partitions(self, mock_build): mock_build.return_value = MockBigQueryClient( ONE_DATASET, TIME_PARTITIONED_WITH_FIELD, PARTITION_DATA) extractor = BigQueryWatermarkExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEquals(result.part_type, 'low_watermark') self.assertEquals(result.database, 'bigquery') self.assertEquals(result.schema, 'fdgdfgh') self.assertEquals(result.table, 'other') self.assertEquals(result.cluster, 'your-project-here') self.assertEquals( result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S')) self.assertEquals(result.parts, [('processed_date', '20180802')]) result = extractor.extract() self.assertEquals(result.part_type, 'high_watermark') self.assertEquals(result.database, 'bigquery') self.assertEquals(result.schema, 'fdgdfgh') self.assertEquals(result.table, 'other') self.assertEquals(result.cluster, 'your-project-here') self.assertEquals( result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S')) self.assertEquals(result.parts, [('processed_date', '20180804')])
def test_table_without_partitions(self, mock_build: Any) -> None: mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, None) extractor = BigQueryWatermarkExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_empty_dataset(self, mock_build: Any) -> None: mock_build.return_value = MockBigQueryClient(ONE_DATASET, NO_TABLES, None) extractor = BigQueryWatermarkExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_can_handle_no_datasets(self, mock_build: Any) -> None: mock_build.return_value = MockBigQueryClient(NO_DATASETS, None, None) extractor = BigQueryWatermarkExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_table_creation_time_after_cutoff_time(self, mock_build: Any) -> None: config_dict = { f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.PROJECT_ID_KEY}': 'your-project-here', f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.CUTOFF_TIME_KEY}': '2019-05-10T20:10:22Z' } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, TIME_PARTITIONED, PARTITION_DATA) extractor = BigQueryWatermarkExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_table_creation_time_before_cutoff_time(self, mock_build: Any) -> None: config_dict = { f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.PROJECT_ID_KEY}': 'your-project-here', f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.CUTOFF_TIME_KEY}': '2021-04-27T20:10:22Z' } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, TIME_PARTITIONED, PARTITION_DATA) extractor = BigQueryWatermarkExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() assert result is not None self.assertEqual(result.part_type, 'low_watermark') self.assertEqual(result.database, 'bigquery') self.assertEqual(result.schema, 'fdgdfgh') self.assertEqual(result.table, 'other') self.assertEqual(result.cluster, 'your-project-here') self.assertEqual( result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S')) self.assertEqual(result.parts, [('_PARTITIONTIME', '20180802')]) result = extractor.extract() self.assertEqual(result.part_type, 'high_watermark') self.assertEqual(result.database, 'bigquery') self.assertEqual(result.schema, 'fdgdfgh') self.assertEqual(result.table, 'other') self.assertEqual(result.cluster, 'your-project-here') self.assertEqual( result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S')) self.assertEqual(result.parts, [('_PARTITIONTIME', '20180804')])