Beispiel #1
0
    def test_extraction_with_database_specified(self) -> None:
        """
        Test DATABASE_KEY in extractor result
        """
        with patch.object(SQLAlchemyExtractor,
                          '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute

            sql_execute.return_value = [{
                'schema': 'test_schema',
                'table_name': 'test_table',
                'last_updated_time': 1000,
                'cluster': 'MY_CLUSTER',
            }]

            extractor = SnowflakeTableLastUpdatedExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableLastUpdated(schema='test_schema',
                                        table_name='test_table',
                                        last_updated_time_epoch=1000,
                                        db=self.database_key,
                                        cluster='MY_CLUSTER')
            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())
Beispiel #2
0
    def test_extraction_with_single_result(self) -> None:
        """
        Test Extraction with default cluster and database and with one table as result
        """
        with patch.object(SQLAlchemyExtractor,
                          '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            sql_execute.return_value = [{
                'schema':
                'test_schema',
                'table_name':
                'test_table',
                'last_updated_time':
                1000,
                'cluster':
                self.conf['extractor.snowflake_table_last_updated.{}'.format(
                    SnowflakeTableLastUpdatedExtractor.CLUSTER_KEY)],
            }]

            extractor = SnowflakeTableLastUpdatedExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()

            expected = TableLastUpdated(schema='test_schema',
                                        table_name='test_table',
                                        last_updated_time_epoch=1000,
                                        db='snowflake',
                                        cluster='MY_CLUSTER')
            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())
    def test_extraction_with_partition_table_result(self):
        # type: () -> None
        config_dict = {
            'filesystem.{}'.format(FileSystem.DASK_FILE_SYSTEM): MagicMock()
        }
        conf = ConfigFactory.from_dict(config_dict)

        pt_alchemy_extractor_instance = MagicMock()
        non_pt_alchemy_extractor_instance = MagicMock()
        with patch.object(HiveTableLastUpdatedExtractor, '_get_partitioned_table_sql_alchemy_extractor',
                          return_value=pt_alchemy_extractor_instance),\
            patch.object(HiveTableLastUpdatedExtractor, '_get_non_partitioned_table_sql_alchemy_extractor',
                         return_value=non_pt_alchemy_extractor_instance):
            pt_alchemy_extractor_instance.extract = MagicMock(
                side_effect=[{
                    'schema': 'foo_schema',
                    'table_name': 'table_1',
                    'last_updated_time': 1
                }, {
                    'schema': 'foo_schema',
                    'table_name': 'table_2',
                    'last_updated_time': 2
                }])

            non_pt_alchemy_extractor_instance.extract = MagicMock(
                return_value=None)

            extractor = HiveTableLastUpdatedExtractor()
            extractor.init(conf)

            result = extractor.extract()
            expected = TableLastUpdated(schema='foo_schema',
                                        table_name='table_1',
                                        last_updated_time_epoch=1,
                                        db='hive',
                                        cluster='gold')
            self.assertEqual(result.__repr__(), expected.__repr__())
            result = extractor.extract()
            expected = TableLastUpdated(schema='foo_schema',
                                        table_name='table_2',
                                        last_updated_time_epoch=2,
                                        db='hive',
                                        cluster='gold')
            self.assertEqual(result.__repr__(), expected.__repr__())

            self.assertIsNone(extractor.extract())
Beispiel #4
0
    def test_extraction(self) -> None:
        old_datetime = datetime(2018, 8, 14, 4, 12, 3, tzinfo=UTC)
        new_datetime = datetime(2018, 11, 14, 4, 12, 3, tzinfo=UTC)

        fs = MagicMock()
        fs.ls = MagicMock(return_value=['/foo/bar', '/foo/baz'])
        fs.is_file = MagicMock(return_value=True)
        fs.info = MagicMock(side_effect=[
            FileMetadata(
                path='/foo/bar', last_updated=old_datetime, size=15093),
            FileMetadata(
                path='/foo/baz', last_updated=new_datetime, size=15094)
        ])

        pt_alchemy_extractor_instance = MagicMock()
        non_pt_alchemy_extractor_instance = MagicMock()

        with patch.object(HiveTableLastUpdatedExtractor,
                          '_get_partitioned_table_sql_alchemy_extractor', return_value=pt_alchemy_extractor_instance), \
            patch.object(HiveTableLastUpdatedExtractor,
                         '_get_non_partitioned_table_sql_alchemy_extractor',
                         return_value=non_pt_alchemy_extractor_instance), \
            patch.object(HiveTableLastUpdatedExtractor,
                         '_get_filesystem', return_value=fs):
            pt_alchemy_extractor_instance.extract = MagicMock(
                return_value=None)

            non_pt_alchemy_extractor_instance.extract = MagicMock(
                side_effect=null_iterator([
                    {
                        'schema': 'foo_schema',
                        'table_name': 'table_1',
                        'location': '/foo/bar'
                    },
                ]))

            extractor = HiveTableLastUpdatedExtractor()
            extractor.init(ConfigFactory.from_dict({}))

            result = extractor.extract()
            expected = TableLastUpdated(schema='foo_schema',
                                        table_name='table_1',
                                        last_updated_time_epoch=1542168723,
                                        db='hive',
                                        cluster='gold')
            self.assertEqual(result.__repr__(), expected.__repr__())

            self.assertIsNone(extractor.extract())
Beispiel #5
0
    def test_extraction_with_multiple_result(self) -> None:
        """
        Test Extraction with default cluster and database and with multiple tables as result
        """
        with patch.object(SQLAlchemyExtractor,
                          '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute

            default_cluster = self.conf[
                'extractor.snowflake_table_last_updated.{}'.format(
                    SnowflakeTableLastUpdatedExtractor.CLUSTER_KEY)]

            table = {
                'schema': 'test_schema1',
                'table_name': 'test_table1',
                'last_updated_time': 1000,
                'cluster': default_cluster
            }

            table1 = {
                'schema': 'test_schema1',
                'table_name': 'test_table2',
                'last_updated_time': 2000,
                'cluster': default_cluster
            }

            table2 = {
                'schema': 'test_schema2',
                'table_name': 'test_table3',
                'last_updated_time': 3000,
                'cluster': default_cluster
            }

            sql_execute.return_value = [table, table1, table2]

            extractor = SnowflakeTableLastUpdatedExtractor()
            extractor.init(self.conf)

            expected = TableLastUpdated(schema='test_schema1',
                                        table_name='test_table1',
                                        last_updated_time_epoch=1000,
                                        db='snowflake',
                                        cluster='MY_CLUSTER')
            self.assertEqual(expected.__repr__(),
                             extractor.extract().__repr__())

            expected = TableLastUpdated(schema='test_schema1',
                                        table_name='test_table2',
                                        last_updated_time_epoch=2000,
                                        db='snowflake',
                                        cluster='MY_CLUSTER')
            self.assertEqual(expected.__repr__(),
                             extractor.extract().__repr__())

            expected = TableLastUpdated(schema='test_schema2',
                                        table_name='test_table3',
                                        last_updated_time_epoch=3000,
                                        db='snowflake',
                                        cluster='MY_CLUSTER')
            self.assertEqual(expected.__repr__(),
                             extractor.extract().__repr__())

            self.assertIsNone(extractor.extract())