Example #1
0
    def test_extraction(self) -> None:
        with patch.object(SQLAlchemyExtractor,
                          '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute

            sql_execute.return_value = [{
                'database': 'gold',
                'schema': 'scm',
                'name': 'foo',
                'user_email': '*****@*****.**',
                'read_count': 1
            }]

            expected = TableColumnUsage(col_readers=[
                ColumnReader(database='snowflake',
                             cluster='gold',
                             schema='scm',
                             table='foo',
                             column='*',
                             user_email='*****@*****.**',
                             read_count=1)
            ])

            extractor = GenericUsageExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())
    def test(self):
        # type: () -> None
        config = ConfigFactory.from_dict({
            SqlToTblColUsageTransformer.DATABASE_NAME:
            'database',
            SqlToTblColUsageTransformer.USER_EMAIL_ATTRIBUTE_NAME:
            'email',
            SqlToTblColUsageTransformer.SQL_STATEMENT_ATTRIBUTE_NAME:
            'statement'
        })

        with patch.object(HiveTableMetadataExtractor, 'extract') as mock_extract,\
                patch.object(HiveTableMetadataExtractor, 'init'):
            mock_extract.side_effect = [
                TableMetadata(
                    'hive', 'gold', 'test_schema1', 'test_table1',
                    'test_table1', [
                        ColumnMetadata('test_id1',
                                       'description of test_table1', 'bigint',
                                       0),
                        ColumnMetadata('test_id2', 'description of test_id2',
                                       'bigint', 1),
                        ColumnMetadata('is_active', None, 'boolean', 2),
                        ColumnMetadata('source', 'description of source',
                                       'varchar', 3),
                        ColumnMetadata('etl_created_at',
                                       'description of etl_created_at',
                                       'timestamp', 4),
                        ColumnMetadata('ds', None, 'varchar', 5)
                    ]), None
            ]

            transformer = SqlToTblColUsageTransformer()
            transformer.init(config)
            foo = Foo(email='*****@*****.**',
                      statement='SELECT foo, bar FROM test_table1')

            actual = transformer.transform(foo)
            expected = TableColumnUsage(col_readers=[
                ColumnReader(database=u'database',
                             cluster=u'gold',
                             schema='test_schema1',
                             table='test_table1',
                             column='*',
                             user_email='*****@*****.**')
            ])
            self.assertEqual(expected.__repr__(), actual.__repr__())
Example #3
0
    def test_aggregate(self):
        # type: () -> None
        with patch.object(RegexStrReplaceTransformer, 'init'),\
                patch.object(SqlToTblColUsageTransformer, 'init'),\
                patch.object(RegexStrReplaceTransformer, 'transform'),\
                patch.object(SqlToTblColUsageTransformer, 'transform') as mock_sql_transform:

            raw_extractor = MagicMock()
            mock_raw_extractor = MagicMock()
            raw_extractor.extract = mock_raw_extractor
            raw_extractor.get_scope.return_value = 'foo'

            # Just to iterate 5 times
            mock_raw_extractor.side_effect = ['foo', 'bar', 'foo', 'bar', None]

            conf = ConfigFactory.from_dict({RAW_EXTRACTOR: raw_extractor})

            mock_sql_transform.side_effect = [
                TableColumnUsage(col_readers=[
                    ColumnReader(database='database',
                                 cluster='gold',
                                 schema='test_schema1',
                                 table='test_table1',
                                 column='*',
                                 user_email='*****@*****.**')
                ]),
                TableColumnUsage(col_readers=[
                    ColumnReader(database='database',
                                 cluster='gold',
                                 schema='test_schema1',
                                 table='test_table1',
                                 column='*',
                                 user_email='*****@*****.**',
                                 read_count=2)
                ]),
                TableColumnUsage(col_readers=[
                    ColumnReader(database='database',
                                 cluster='gold',
                                 schema='test_schema1',
                                 table='test_table2',
                                 column='*',
                                 user_email='*****@*****.**',
                                 read_count=5)
                ]), None
            ]

            extractor = TblColUsgAggExtractor()
            extractor.init(conf)
            actual = extractor.extract()
            expected = TableColumnUsage(col_readers=[
                ColumnReader(database='database',
                             cluster='gold',
                             schema='test_schema1',
                             table='test_table1',
                             column='*',
                             user_email='*****@*****.**',
                             read_count=3),
                ColumnReader(database='database',
                             cluster='gold',
                             schema='test_schema1',
                             table='test_table2',
                             column='*',
                             user_email='*****@*****.**',
                             read_count=5)
            ])

            self.assertEqual(expected.__repr__(), actual.__repr__())