Beispiel #1
0
    def test_keypath_can_be_set(self, mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.PROJECT_ID_KEY}': 'your-project-here',
            f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.KEY_PATH_KEY}': '/tmp/doesnotexist',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, None)
        extractor = BigQueryWatermarkExtractor()

        with self.assertRaises(FileNotFoundError):
            extractor.init(Scoped.get_scoped_conf(conf=conf,
                                                  scope=extractor.get_scope()))
    def test_table_part_of_table_date_range(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET,
                                                     TABLE_DATE_RANGE, None)
        extractor = BigQueryWatermarkExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))

        result = extractor.extract()
        self.assertEquals(result.part_type, 'low_watermark')
        self.assertEquals(result.database, 'bigquery')
        self.assertEquals(result.schema, 'fdgdfgh')
        self.assertEquals(result.table, 'date_range_')
        self.assertEquals(result.cluster, 'your-project-here')
        self.assertEquals(
            result.create_time,
            datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S'))
        self.assertEquals(result.parts, [('__table__', '20190101')])

        result = extractor.extract()
        self.assertEquals(result.part_type, 'high_watermark')
        self.assertEquals(result.database, 'bigquery')
        self.assertEquals(result.schema, 'fdgdfgh')
        self.assertEquals(result.table, 'date_range_')
        self.assertEquals(result.cluster, 'your-project-here')
        self.assertEquals(
            result.create_time,
            datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S'))
        self.assertEquals(result.parts, [('__table__', '20190102')])
    def test_table_with_field_partitions(self, mock_build):
        mock_build.return_value = MockBigQueryClient(
            ONE_DATASET, TIME_PARTITIONED_WITH_FIELD, PARTITION_DATA)
        extractor = BigQueryWatermarkExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertEquals(result.part_type, 'low_watermark')
        self.assertEquals(result.database, 'bigquery')
        self.assertEquals(result.schema, 'fdgdfgh')
        self.assertEquals(result.table, 'other')
        self.assertEquals(result.cluster, 'your-project-here')
        self.assertEquals(
            result.create_time,
            datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
        self.assertEquals(result.parts, [('processed_date', '20180802')])

        result = extractor.extract()
        self.assertEquals(result.part_type, 'high_watermark')
        self.assertEquals(result.database, 'bigquery')
        self.assertEquals(result.schema, 'fdgdfgh')
        self.assertEquals(result.table, 'other')
        self.assertEquals(result.cluster, 'your-project-here')
        self.assertEquals(
            result.create_time,
            datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
        self.assertEquals(result.parts, [('processed_date', '20180804')])
Beispiel #4
0
 def test_table_without_partitions(self, mock_build: Any) -> None:
     mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, None)
     extractor = BigQueryWatermarkExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
Beispiel #5
0
 def test_empty_dataset(self, mock_build: Any) -> None:
     mock_build.return_value = MockBigQueryClient(ONE_DATASET, NO_TABLES, None)
     extractor = BigQueryWatermarkExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
Beispiel #6
0
 def test_can_handle_no_datasets(self, mock_build: Any) -> None:
     mock_build.return_value = MockBigQueryClient(NO_DATASETS, None, None)
     extractor = BigQueryWatermarkExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
Beispiel #7
0
 def test_table_creation_time_after_cutoff_time(self,
                                                mock_build: Any) -> None:
     config_dict = {
         f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.PROJECT_ID_KEY}':
         'your-project-here',
         f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.CUTOFF_TIME_KEY}':
         '2019-05-10T20:10:22Z'
     }
     conf = ConfigFactory.from_dict(config_dict)
     mock_build.return_value = MockBigQueryClient(ONE_DATASET,
                                                  TIME_PARTITIONED,
                                                  PARTITION_DATA)
     extractor = BigQueryWatermarkExtractor()
     extractor.init(
         Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
Beispiel #8
0
    def test_table_creation_time_before_cutoff_time(self,
                                                    mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.PROJECT_ID_KEY}':
            'your-project-here',
            f'extractor.bigquery_watermarks.{BigQueryWatermarkExtractor.CUTOFF_TIME_KEY}':
            '2021-04-27T20:10:22Z'
        }
        conf = ConfigFactory.from_dict(config_dict)
        mock_build.return_value = MockBigQueryClient(ONE_DATASET,
                                                     TIME_PARTITIONED,
                                                     PARTITION_DATA)
        extractor = BigQueryWatermarkExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        assert result is not None
        self.assertEqual(result.part_type, 'low_watermark')
        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.table, 'other')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(
            result.create_time,
            datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
        self.assertEqual(result.parts, [('_PARTITIONTIME', '20180802')])

        result = extractor.extract()
        self.assertEqual(result.part_type, 'high_watermark')
        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.table, 'other')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(
            result.create_time,
            datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
        self.assertEqual(result.parts, [('_PARTITIONTIME', '20180804')])