def test_extraction_with_multiple_views(self) -> None: with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute columns1 = {'columns': [{'name': 'xyz', 'type': 'varchar'}, {'name': 'xyy', 'type': 'double'}, {'name': 'aaa', 'type': 'int'}, {'name': 'ab', 'type': 'varchar'}]} columns2 = {'columns': [{'name': 'xyy', 'type': 'varchar'}, {'name': 'ab', 'type': 'double'}, {'name': 'aaa', 'type': 'int'}, {'name': 'xyz', 'type': 'varchar'}]} sql_execute.return_value = [ {'tbl_id': 2, 'schema': 'test_schema2', 'name': 'test_view2', 'tbl_type': 'virtual_view', 'view_original_text': base64.b64encode(json.dumps(columns2).encode()).decode("utf-8")}, {'tbl_id': 1, 'schema': 'test_schema1', 'name': 'test_view1', 'tbl_type': 'virtual_view', 'view_original_text': base64.b64encode(json.dumps(columns1).encode()).decode("utf-8")}, ] extractor = PrestoViewMetadataExtractor() extractor.init(self.conf) actual_first_view = extractor.extract() expected_first_view = TableMetadata('presto', 'gold', 'test_schema2', 'test_view2', None, [ColumnMetadata(u'xyy', None, u'varchar', 0), ColumnMetadata(u'ab', None, u'double', 1), ColumnMetadata(u'aaa', None, u'int', 2), ColumnMetadata(u'xyz', None, u'varchar', 3)], True) self.assertEqual(expected_first_view.__repr__(), actual_first_view.__repr__()) actual_second_view = extractor.extract() expected_second_view = TableMetadata('presto', 'gold', 'test_schema1', 'test_view1', None, [ColumnMetadata(u'xyz', None, u'varchar', 0), ColumnMetadata(u'xyy', None, u'double', 1), ColumnMetadata(u'aaa', None, u'int', 2), ColumnMetadata(u'ab', None, u'varchar', 3)], True) self.assertEqual(expected_second_view.__repr__(), actual_second_view.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_one_object(self, mock_salesforce: Any) -> None: mock_salesforce.return_value = MockSalesForce() config_dict: Dict = { f"extractor.salesforce_metadata.{SalesForceExtractor.OBJECT_NAMES_KEY}": [ "Account" ], **self.config, } conf = ConfigFactory.from_dict(config_dict) mock_salesforce.return_value = MockSalesForce() extractor = SalesForceExtractor() extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, TableMetadata) expected = TableMetadata( "salesforce", "gold", "default", "Account", None, [ ColumnMetadata("Id", "The Account Id", "id", 0, []), ColumnMetadata("isDeleted", "Deleted?", "bool", 1, []), ], False, [], ) self.assertEqual(expected.__repr__(), result.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_database_specified(self): # type: () -> None with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute sql_execute.return_value = [{ 'schema': 'test_schema', 'name': 'test_table', 'description': 'a table for testing', 'cluster': 'MY_CLUSTER', 'is_view': 'false', 'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 0 }] extractor = SnowflakeMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( self.database_key, 'MY_CLUSTER', 'test_schema', 'test_table', 'a table for testing', [ColumnMetadata('ds', None, 'varchar', 0)]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_partition_badge(self) -> None: with patch.object(GlueExtractor, '_search_tables') as mock_search: mock_search.return_value = [test_table] extractor = GlueExtractor() extractor.init(conf=ConfigFactory.from_dict({ GlueExtractor.PARTITION_BADGE_LABEL_KEY: "partition_key", })) actual = extractor.extract() expected = TableMetadata( 'glue', 'gold', 'test_schema', 'test_table', 'a table for testing', [ ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5), ColumnMetadata( 'partition_key1', 'description of partition_key1', 'string', 6, ["partition_key"], ), ], False) self.assertEqual(expected.__repr__(), actual.__repr__())
def test_extraction_with_single_result(self): # type: () -> None with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = {'schema': 'test_schema', 'name': 'test_table', 'description': 'a table for testing', 'cluster': self.conf['extractor.postgres_metadata.{}'.format(PostgresMetadataExtractor.CLUSTER_KEY)] } sql_execute.return_value = [ self._union( {'col_name': 'col_id1', 'col_type': 'bigint', 'col_description': 'description of id1', 'col_sort_order': 0}, table), self._union( {'col_name': 'col_id2', 'col_type': 'bigint', 'col_description': 'description of id2', 'col_sort_order': 1}, table), self._union( {'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2}, table), self._union( {'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3}, table), self._union( {'col_name': 'etl_created_at', 'col_type': 'timestamp', 'col_description': 'description of etl_created_at', 'col_sort_order': 4}, table), self._union( {'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5}, table) ] extractor = PostgresMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata('postgres', 'MY_CLUSTER', 'test_schema', 'test_table', 'a table for testing', [ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5)]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self, mock_connect: MagicMock) -> None: """ Test Extraction with single table result from query """ mock_connection = MagicMock() mock_connect.return_value = mock_connection mock_cursor = MagicMock() mock_connection.cursor.return_value = mock_cursor mock_execute = MagicMock() mock_cursor.execute = mock_execute mock_cursor.description = [['col_name'], ['col_description'], ['col_type'], ['col_sort_order'], ['database'], ['cluster'], ['schema'], ['name'], ['description'], ['is_view']] # Pass flake8 Unsupported operand types for + error table: List[Any] = [ 'DREMIO', 'Production', 'test_schema', 'test_table', 'a table for testing', 'false' ] # Pass flake8 Unsupported operand types for + error expected_input: List[List[Any]] = [ ['col_id1', 'description of id1', 'number', 0] + table, ['col_id2', 'description of id2', 'number', 1] + table, ['is_active', None, 'boolean', 2] + table, ['source', 'description of source', 'varchar', 3] + table, [ 'etl_created_at', 'description of etl_created_at', 'timestamp_ltz', 4 ] + table, ['ds', None, 'varchar', 5] + table ] mock_cursor.execute.return_value = expected_input extractor = DremioMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( 'DREMIO', 'Production', 'test_schema', 'test_table', 'a table for testing', [ ColumnMetadata('col_id1', 'description of id1', 'number', 0), ColumnMetadata('col_id2', 'description of id2', 'number', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp_ltz', 4), ColumnMetadata('ds', None, 'varchar', 5) ]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self): # type: () -> None with patch.object(GlueExtractor, '_search_tables') as mock_search: mock_search.return_value = [{ 'Name': 'test_table', 'DatabaseName': 'test_schema', 'Description': 'a table for testing', 'StorageDescriptor': { 'Columns': [{ 'Name': 'col_id1', 'Type': 'bigint', 'Comment': 'description of id1' }, { 'Name': 'col_id2', 'Type': 'bigint', 'Comment': 'description of id2' }, { 'Name': 'is_active', 'Type': 'boolean' }, { 'Name': 'source', 'Type': 'varchar', 'Comment': 'description of source' }, { 'Name': 'etl_created_at', 'Type': 'timestamp', 'Comment': 'description of etl_created_at' }, { 'Name': 'ds', 'Type': 'varchar' }] } }] extractor = GlueExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( 'glue', 'gold', 'test_schema', 'test_table', 'a table for testing', [ ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5) ]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_default_conf(self, mock_columns, mock_tables, mock_keyspaces): # type: () -> None mock_keyspaces.return_value = {'test_schema': None} mock_tables.return_value = {'test_table': None} columns_dict = OrderedDict() columns_dict['id'] = CassandraColumnMetadata(None, 'id', 'int') columns_dict['txt'] = CassandraColumnMetadata(None, 'txt', 'text') mock_columns.return_value = columns_dict extractor = CassandraExtractor() extractor.init(self.default_conf) actual = extractor.extract() expected = TableMetadata('cassandra', 'gold', 'test_schema', 'test_table', None, [ ColumnMetadata('id', None, 'int', 0), ColumnMetadata('txt', None, 'text', 1) ]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extractor_sorted_es_v7(self) -> None: extractor = self._get_extractor(self.config_sorted) extractor._get_es_version = lambda: self.es_version_v7 extractor.es.indices.get = MagicMock(return_value=self.indices_v7) expected = TableMetadata('elasticsearch', 'cluster_name', 'schema_name', 'proper_index', None, [ColumnMetadata('keyword_property', '', 'keyword', 0, []), ColumnMetadata('long_property', '', 'long', 1, [])], False, []) result = [] while True: entry = extractor.extract() if entry: result.append(entry) else: break self.assertEqual(1, len(result)) self.assertEqual(expected.__repr__(), result[0].__repr__())
def test_feature_view_extraction(self) -> None: self._init_extractor(programmatic_description_enabled=False) table = self.extractor.extract() expected = TableMetadata( database="feast", cluster="local", schema="fs", name="driver_hourly_stats", description=None, columns=[ ColumnMetadata("driver_id", "Internal identifier of the driver", "INT64", 0), ColumnMetadata("conv_rate", None, "FLOAT", 1), ColumnMetadata("acc_rate", None, "FLOAT", 2), ColumnMetadata("avg_daily_trips", None, "INT64", 3), ], ) self.assertEqual(expected.__repr__(), table.__repr__())
def test_extraction_with_resource_link_result(self) -> None: with patch.object(GlueExtractor, '_search_tables') as mock_search: mock_search.return_value = [ test_table, { "Name": "test_resource_link", "DatabaseName": "test_schema", "TargetTable": { "CatalogId": "111111111111", "DatabaseName": "test_schema_external", "Name": "test_table" }, "CatalogId": "222222222222" } ] extractor = GlueExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( 'glue', 'gold', 'test_schema', 'test_table', 'a table for testing', [ ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5), ColumnMetadata('partition_key1', 'description of partition_key1', 'string', 6), ], False) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_feature_table_extraction(self) -> None: self._init_extractor(programmatic_description_enabled=False) self.extractor._client.list_projects.return_value = ["default"] self._mock_feature_table() table = self.extractor.extract() self.extractor._client.get_entity.assert_called_with("driver_id", project="default") expected = TableMetadata( database="feast", cluster="unittest-feast-instance", schema="default", name="driver_trips", description=None, columns=[ ColumnMetadata("driver_id", "Internal identifier of the driver", "INT64", 0), ColumnMetadata("trips_today", None, "INT32", 1), ], ) self.assertEqual(expected.__repr__(), table.__repr__()) self.assertIsNone(self.extractor.extract())
def test_extraction_with_multiple_result(self): # type: () -> None with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = { 'schema_name': 'test_schema1', 'name': 'test_table1', 'description': 'test table 1', 'cluster': self.conf['extractor.mssql_metadata.{}'.format( MSSQLMetadataExtractor.CLUSTER_KEY)] } table1 = { 'schema_name': 'test_schema1', 'name': 'test_table2', 'description': 'test table 2', 'cluster': self.conf['extractor.mssql_metadata.{}'.format( MSSQLMetadataExtractor.CLUSTER_KEY)] } table2 = { 'schema_name': 'test_schema2', 'name': 'test_table3', 'description': 'test table 3', 'cluster': self.conf['extractor.mssql_metadata.{}'.format( MSSQLMetadataExtractor.CLUSTER_KEY)] } sql_execute.return_value = [ self._union( { 'col_name': 'col_id1', 'col_type': 'bigint', 'col_description': 'description of col_id1', 'col_sort_order': 0 }, table), self._union( { 'col_name': 'col_id2', 'col_type': 'bigint', 'col_description': 'description of col_id2', 'col_sort_order': 1 }, table), self._union( { 'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2 }, table), self._union( { 'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3 }, table), self._union( { 'col_name': 'etl_created_at', 'col_type': 'timestamp', 'col_description': 'description of etl_created_at', 'col_sort_order': 4 }, table), self._union( { 'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5 }, table), self._union( { 'col_name': 'col_name', 'col_type': 'varchar', 'col_description': 'description of col_name', 'col_sort_order': 0 }, table1), self._union( { 'col_name': 'col_name2', 'col_type': 'varchar', 'col_description': 'description of col_name2', 'col_sort_order': 1 }, table1), self._union( { 'col_name': 'col_id3', 'col_type': 'varchar', 'col_description': 'description of col_id3', 'col_sort_order': 0 }, table2), self._union( { 'col_name': 'col_name3', 'col_type': 'varchar', 'col_description': 'description of col_name3', 'col_sort_order': 1 }, table2) ] extractor = MSSQLMetadataExtractor() extractor.init(self.conf) expected = TableMetadata( 'mssql', self.conf['extractor.mssql_metadata.{}'.format( MSSQLMetadataExtractor.CLUSTER_KEY)], 'test_schema1', 'test_table1', 'test table 1', [ ColumnMetadata('col_id1', 'description of col_id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of col_id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5), ], False, ['test_schema1']) actual = extractor.extract().__repr__() self.assertEqual(expected.__repr__(), actual) expected = TableMetadata( 'mssql', self.conf['extractor.mssql_metadata.{}'.format( MSSQLMetadataExtractor.CLUSTER_KEY)], 'test_schema1', 'test_table2', 'test table 2', [ ColumnMetadata('col_name', 'description of col_name', 'varchar', 0), ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1) ], False, ['test_schema1']) actual = extractor.extract().__repr__() self.assertEqual(expected.__repr__(), actual) expected = TableMetadata( 'mssql', self.conf['extractor.mssql_metadata.{}'.format( MSSQLMetadataExtractor.CLUSTER_KEY)], 'test_schema2', 'test_table3', 'test table 3', [ ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0), ColumnMetadata('col_name3', 'description of col_name3', 'varchar', 1) ], False, ['test_schema2']) actual = extractor.extract().__repr__() self.assertEqual(expected.__repr__(), actual) self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self) -> None: with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = { 'schema': 'test_schema', 'name': 'test_table', 'description': 'a table for testing', 'is_view': 0 } sql_execute.return_value = [ self._union( { 'col_name': 'col_id1', 'col_type': 'bigint', 'col_description': 'description of id1', 'col_sort_order': 0, 'is_partition_col': 0 }, table), self._union( { 'col_name': 'col_id2', 'col_type': 'bigint', 'col_description': 'description of id2', 'col_sort_order': 1, 'is_partition_col': 0 }, table), self._union( { 'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2, 'is_partition_col': 1 }, table), self._union( { 'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3, 'is_partition_col': 0 }, table), self._union( { 'col_name': 'etl_created_at', 'col_type': 'timestamp', 'col_description': 'description of etl_created_at', 'col_sort_order': 4, 'is_partition_col': 0 }, table), self._union( { 'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5, 'is_partition_col': 0 }, table) ] extractor = HiveTableMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( 'hive', 'gold', 'test_schema', 'test_table', 'a table for testing', [ ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2, ['partition column']), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5) ], is_view=False) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_multiple_result(self) -> None: with patch.object(GlueExtractor, '_search_tables') as mock_search: mock_search.return_value = [ test_table, { 'Name': 'test_table2', 'DatabaseName': 'test_schema1', 'Description': 'test table 2', 'StorageDescriptor': { 'Columns': [{ 'Name': 'col_name', 'Type': 'varchar', 'Comment': 'description of col_name' }, { 'Name': 'col_name2', 'Type': 'varchar', 'Comment': 'description of col_name2' }] }, 'TableType': 'EXTERNAL_TABLE', }, { 'Name': 'test_table3', 'DatabaseName': 'test_schema2', 'StorageDescriptor': { 'Columns': [{ 'Name': 'col_id3', 'Type': 'varchar', 'Comment': 'description of col_id3' }, { 'Name': 'col_name3', 'Type': 'varchar', 'Comment': 'description of col_name3' }] }, 'Parameters': { 'comment': 'description of test table 3 from comment' }, 'TableType': 'EXTERNAL_TABLE', }, { 'Name': 'test_view1', 'DatabaseName': 'test_schema1', 'Description': 'test view 1', 'StorageDescriptor': { 'Columns': [{ 'Name': 'col_id3', 'Type': 'varchar', 'Comment': 'description of col_id3' }, { 'Name': 'col_name3', 'Type': 'varchar', 'Comment': 'description of col_name3' }] }, 'TableType': 'VIRTUAL_VIEW', }, ] extractor = GlueExtractor() extractor.init(self.conf) expected = TableMetadata( 'glue', 'gold', 'test_schema', 'test_table', 'a table for testing', [ ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5), ColumnMetadata('partition_key1', 'description of partition_key1', 'string', 6), ], False) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'glue', 'gold', 'test_schema1', 'test_table2', 'test table 2', [ ColumnMetadata('col_name', 'description of col_name', 'varchar', 0), ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1) ], False) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'glue', 'gold', 'test_schema2', 'test_table3', 'description of test table 3 from comment', [ ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0), ColumnMetadata('col_name3', 'description of col_name3', 'varchar', 1) ], False) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'glue', 'gold', 'test_schema1', 'test_view1', 'test view 1', [ ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0), ColumnMetadata('col_name3', 'description of col_name3', 'varchar', 1) ], True) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self): # type: () -> None with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = {'schema_name': 'test_schema', 'name': 'test_table', 'description': '', 'cluster': self.conf['extractor.athena_metadata.{}'.format(AthenaMetadataExtractor.CATALOG_KEY)], } sql_execute.return_value = [ self._union( {'col_name': 'col_id1', 'col_type': 'bigint', 'col_description': 'description of id1', 'col_sort_order': 0, 'extras': None}, table), self._union( {'col_name': 'col_id2', 'col_type': 'bigint', 'col_description': 'description of id2', 'col_sort_order': 1, 'extras': None}, table), self._union( {'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2, 'extras': None}, table), self._union( {'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3, 'extras': None}, table), self._union( {'col_name': 'etl_created_at', 'col_type': 'timestamp', 'col_description': None, 'col_sort_order': 4, 'extras': 'partition key'}, table), self._union( {'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5, 'extras': None}, table) ] extractor = AthenaMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata('athena', self.conf['extractor.athena_metadata.{}'. format(AthenaMetadataExtractor.CATALOG_KEY)], 'test_schema', 'test_table', '', [ColumnMetadata('col_id1', 'description of id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'partition key', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5)]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_multiple_result(self): # type: () -> None with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = { 'schema': 'test_schema1', 'name': 'test_table1', 'description': 'test table 1', 'is_view': 0 } table1 = { 'schema': 'test_schema1', 'name': 'test_table2', 'description': 'test table 2', 'is_view': 0 } table2 = { 'schema': 'test_schema2', 'name': 'test_table3', 'description': 'test table 3', 'is_view': 0 } sql_execute.return_value = [ self._union( { 'col_name': 'col_id1', 'col_type': 'bigint', 'col_description': 'description of col_id1', 'col_sort_order': 0 }, table), self._union( { 'col_name': 'col_id2', 'col_type': 'bigint', 'col_description': 'description of col_id2', 'col_sort_order': 1 }, table), self._union( { 'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2 }, table), self._union( { 'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3 }, table), self._union( { 'col_name': 'etl_created_at', 'col_type': 'timestamp', 'col_description': 'description of etl_created_at', 'col_sort_order': 4 }, table), self._union( { 'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5 }, table), self._union( { 'col_name': 'col_name', 'col_type': 'varchar', 'col_description': 'description of col_name', 'col_sort_order': 0 }, table1), self._union( { 'col_name': 'col_name2', 'col_type': 'varchar', 'col_description': 'description of col_name2', 'col_sort_order': 1 }, table1), self._union( { 'col_name': 'col_id3', 'col_type': 'varchar', 'col_description': 'description of col_id3', 'col_sort_order': 0 }, table2), self._union( { 'col_name': 'col_name3', 'col_type': 'varchar', 'col_description': 'description of col_name3', 'col_sort_order': 1 }, table2) ] extractor = HiveTableMetadataExtractor() extractor.init(self.conf) expected = TableMetadata( 'hive', 'gold', 'test_schema1', 'test_table1', 'test table 1', [ ColumnMetadata('col_id1', 'description of col_id1', 'bigint', 0), ColumnMetadata('col_id2', 'description of col_id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5) ], is_view=False) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'hive', 'gold', 'test_schema1', 'test_table2', 'test table 2', [ ColumnMetadata('col_name', 'description of col_name', 'varchar', 0), ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1) ], is_view=False) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'hive', 'gold', 'test_schema2', 'test_table3', 'test table 3', [ ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0), ColumnMetadata('col_name3', 'description of col_name3', 'varchar', 1) ], is_view=False) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())
def test_extraction_with_multiple_result(self) -> None: with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = { 'schema': 'test_schema1', 'name': 'test_table1', 'description': 'test table 1', 'cluster': self.conf['extractor.snowflake_metadata.{}'.format( SnowflakeMetadataExtractor.CLUSTER_KEY)], 'is_view': 'nottrue' } table1 = { 'schema': 'test_schema1', 'name': 'test_table2', 'description': 'test table 2', 'cluster': self.conf['extractor.snowflake_metadata.{}'.format( SnowflakeMetadataExtractor.CLUSTER_KEY)], 'is_view': 'false' } table2 = { 'schema': 'test_schema2', 'name': 'test_table3', 'description': 'test table 3', 'cluster': self.conf['extractor.snowflake_metadata.{}'.format( SnowflakeMetadataExtractor.CLUSTER_KEY)], 'is_view': 'true' } sql_execute.return_value = [ self._union( { 'col_name': 'col_id1', 'col_type': 'number', 'col_description': 'description of col_id1', 'col_sort_order': 0 }, table), self._union( { 'col_name': 'col_id2', 'col_type': 'number', 'col_description': 'description of col_id2', 'col_sort_order': 1 }, table), self._union( { 'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2 }, table), self._union( { 'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3 }, table), self._union( { 'col_name': 'etl_created_at', 'col_type': 'timestamp_ltz', 'col_description': 'description of etl_created_at', 'col_sort_order': 4 }, table), self._union( { 'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5 }, table), self._union( { 'col_name': 'col_name', 'col_type': 'varchar', 'col_description': 'description of col_name', 'col_sort_order': 0 }, table1), self._union( { 'col_name': 'col_name2', 'col_type': 'varchar', 'col_description': 'description of col_name2', 'col_sort_order': 1 }, table1), self._union( { 'col_name': 'col_id3', 'col_type': 'varchar', 'col_description': 'description of col_id3', 'col_sort_order': 0 }, table2), self._union( { 'col_name': 'col_name3', 'col_type': 'varchar', 'col_description': 'description of col_name3', 'col_sort_order': 1 }, table2) ] extractor = SnowflakeMetadataExtractor() extractor.init(self.conf) expected = TableMetadata( 'snowflake', self.conf['extractor.snowflake_metadata.{}'.format( SnowflakeMetadataExtractor.CLUSTER_KEY)], 'test_schema1', 'test_table1', 'test table 1', [ ColumnMetadata('col_id1', 'description of col_id1', 'number', 0), ColumnMetadata('col_id2', 'description of col_id2', 'number', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp_ltz', 4), ColumnMetadata('ds', None, 'varchar', 5) ]) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'snowflake', self.conf['extractor.snowflake_metadata.{}'.format( SnowflakeMetadataExtractor.CLUSTER_KEY)], 'test_schema1', 'test_table2', 'test table 2', [ ColumnMetadata('col_name', 'description of col_name', 'varchar', 0), ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1) ]) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( 'snowflake', self.conf['extractor.snowflake_metadata.{}'.format( SnowflakeMetadataExtractor.CLUSTER_KEY)], 'test_schema2', 'test_table3', 'test table 3', [ ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0), ColumnMetadata('col_name3', 'description of col_name3', 'varchar', 1) ], True) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())