def test_extraction(self) -> None: with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute sql_execute.return_value = [{ 'database': 'gold', 'schema': 'scm', 'name': 'foo', 'user_email': '*****@*****.**', 'read_count': 1 }] expected = TableColumnUsage(col_readers=[ ColumnReader(database='snowflake', cluster='gold', schema='scm', table='foo', column='*', user_email='*****@*****.**', read_count=1) ]) extractor = GenericUsageExtractor() extractor.init(self.conf) actual = extractor.extract() self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test(self): # type: () -> None config = ConfigFactory.from_dict({ SqlToTblColUsageTransformer.DATABASE_NAME: 'database', SqlToTblColUsageTransformer.USER_EMAIL_ATTRIBUTE_NAME: 'email', SqlToTblColUsageTransformer.SQL_STATEMENT_ATTRIBUTE_NAME: 'statement' }) with patch.object(HiveTableMetadataExtractor, 'extract') as mock_extract,\ patch.object(HiveTableMetadataExtractor, 'init'): mock_extract.side_effect = [ TableMetadata( 'hive', 'gold', 'test_schema1', 'test_table1', 'test_table1', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0), ColumnMetadata('test_id2', 'description of test_id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5) ]), None ] transformer = SqlToTblColUsageTransformer() transformer.init(config) foo = Foo(email='*****@*****.**', statement='SELECT foo, bar FROM test_table1') actual = transformer.transform(foo) expected = TableColumnUsage(col_readers=[ ColumnReader(database=u'database', cluster=u'gold', schema='test_schema1', table='test_table1', column='*', user_email='*****@*****.**') ]) self.assertEqual(expected.__repr__(), actual.__repr__())
def test_aggregate(self): # type: () -> None with patch.object(RegexStrReplaceTransformer, 'init'),\ patch.object(SqlToTblColUsageTransformer, 'init'),\ patch.object(RegexStrReplaceTransformer, 'transform'),\ patch.object(SqlToTblColUsageTransformer, 'transform') as mock_sql_transform: raw_extractor = MagicMock() mock_raw_extractor = MagicMock() raw_extractor.extract = mock_raw_extractor raw_extractor.get_scope.return_value = 'foo' # Just to iterate 5 times mock_raw_extractor.side_effect = ['foo', 'bar', 'foo', 'bar', None] conf = ConfigFactory.from_dict({RAW_EXTRACTOR: raw_extractor}) mock_sql_transform.side_effect = [ TableColumnUsage(col_readers=[ ColumnReader(database='database', cluster='gold', schema='test_schema1', table='test_table1', column='*', user_email='*****@*****.**') ]), TableColumnUsage(col_readers=[ ColumnReader(database='database', cluster='gold', schema='test_schema1', table='test_table1', column='*', user_email='*****@*****.**', read_count=2) ]), TableColumnUsage(col_readers=[ ColumnReader(database='database', cluster='gold', schema='test_schema1', table='test_table2', column='*', user_email='*****@*****.**', read_count=5) ]), None ] extractor = TblColUsgAggExtractor() extractor.init(conf) actual = extractor.extract() expected = TableColumnUsage(col_readers=[ ColumnReader(database='database', cluster='gold', schema='test_schema1', table='test_table1', column='*', user_email='*****@*****.**', read_count=3), ColumnReader(database='database', cluster='gold', schema='test_schema1', table='test_table2', column='*', user_email='*****@*****.**', read_count=5) ]) self.assertEqual(expected.__repr__(), actual.__repr__())