def test_table_metadata_extraction_with_single_result(self, mock1, mock2) -> None: extractor = PrestoLoopExtractor() conf = self.conf.copy() conf.put("is_table_metadata_enabled", True) extractor.init(conf) extractor.execute = MagicMock( side_effect=presto_engine_execute_side_effect) results = extractor.extract() is_partition_column = (True if MOCK_COLUMN_RESULT[2] == "partition key" else False) expected = TableMetadata( database=extractor._database, cluster=None, schema=MOCK_SCHEMA_NAME, name=MOCK_TABLE_NAME, columns=[ ColumnMetadata( name=MOCK_COLUMN_RESULT[0], description=MOCK_COLUMN_RESULT[3], data_type=MOCK_COLUMN_RESULT[1], sort_order=0, is_partition_column=is_partition_column, ) ], ) self.assertEqual(results.__repr__(), expected.__repr__())
def test_get_all_table_metadata_from_information_schema( self, mock_settings) -> None: self.engine.init(self.conf) self.engine.execute = MagicMock( side_effect=presto_engine_execute_side_effect) mock_columns = [ ColumnMetadata( name=MOCK_INFORMATION_SCHEMA_RESULT_1['col_name'], description=MOCK_INFORMATION_SCHEMA_RESULT_1[ 'col_description'], # noqa: 501 col_type=MOCK_INFORMATION_SCHEMA_RESULT_1['col_type'], sort_order=MOCK_INFORMATION_SCHEMA_RESULT_1['col_sort_order'], is_partition_column=None), ColumnMetadata( name=MOCK_INFORMATION_SCHEMA_RESULT_2['col_name'], description=MOCK_INFORMATION_SCHEMA_RESULT_2[ 'col_description'], # noqa: 501 col_type=MOCK_INFORMATION_SCHEMA_RESULT_2['col_type'], sort_order=MOCK_INFORMATION_SCHEMA_RESULT_2['col_sort_order'], is_partition_column=None) ] expected = TableMetadata( database=MOCK_DATABASE_NAME, cluster=MOCK_CLUSTER_NAME, schema=MOCK_SCHEMA_NAME, name=MOCK_TABLE_NAME, columns=mock_columns, is_view=bool(MOCK_INFORMATION_SCHEMA_RESULT_1['is_view']), ) results = self.engine.get_all_table_metadata_from_information_schema( cluster=MOCK_CLUSTER_NAME) result = next(results) self.maxDiff = None self.assertEqual(result.__repr__(), expected.__repr__())
def test_extraction_with_single_result(self): # type: () -> None with patch.object(SQLAlchemyExtractor, "_get_connection") as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = { "schema": "test_schema", "name": "test_table", "description": "a table for testing", "cluster": self.conf[SnowflakeMetadataExtractor.CLUSTER_KEY], "is_view": "false", } sql_execute.return_value = [ self._union( { "col_name": "col_id1", "data_type": "number", "col_description": "description of id1", "col_sort_order": 0, }, table, ), self._union( { "col_name": "col_id2", "data_type": "number", "col_description": "description of id2", "col_sort_order": 1, }, table, ), self._union( { "col_name": "is_active", "data_type": "boolean", "col_description": None, "col_sort_order": 2, }, table, ), self._union( { "col_name": "source", "data_type": "varchar", "col_description": "description of source", "col_sort_order": 3, }, table, ), self._union( { "col_name": "etl_created_at", "data_type": "timestamp_ltz", "col_description": "description of etl_created_at", "col_sort_order": 4, }, table, ), self._union( { "col_name": "ds", "data_type": "varchar", "col_description": None, "col_sort_order": 5, }, table, ), ] extractor = SnowflakeMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( "prod", "MY_CLUSTER", "test_schema", "test_table", "a table for testing", [ ColumnMetadata("col_id1", "description of id1", "number", 0), ColumnMetadata("col_id2", "description of id2", "number", 1), ColumnMetadata("is_active", None, "boolean", 2), ColumnMetadata("source", "description of source", "varchar", 3), ColumnMetadata( "etl_created_at", "description of etl_created_at", "timestamp_ltz", 4, ), ColumnMetadata("ds", None, "varchar", 5), ], ) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self) -> None: with patch.object(GlueExtractor, "_search_tables") as mock_search: mock_search.return_value = [{ "Name": "test_catalog_test_schema_test_table", "DatabaseName": "test_database", "Description": "a table for testing", "StorageDescriptor": { "Columns": [ { "Name": "col_id1", "Type": "bigint", "Comment": "description of id1", }, { "Name": "col_id2", "Type": "bigint", "Comment": "description of id2", }, { "Name": "is_active", "Type": "boolean" }, { "Name": "source", "Type": "varchar", "Comment": "description of source", }, { "Name": "etl_created_at", "Type": "timestamp", "Comment": "description of etl_created_at", }, { "Name": "ds", "Type": "varchar" }, ], "Location": "test_catalog.test_schema.test_table", }, "PartitionKeys": [ { "Name": "partition_key1", "Type": "string", "Comment": "description of partition_key1", }, ], "TableType": "EXTERNAL_TABLE", }] extractor = GlueExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata( "test_database", None, None, "test_catalog_test_schema_test_table", "a table for testing", [ ColumnMetadata("col_id1", "description of id1", "bigint", 0), ColumnMetadata("col_id2", "description of id2", "bigint", 1), ColumnMetadata("is_active", None, "boolean", 2), ColumnMetadata("source", "description of source", "varchar", 3), ColumnMetadata( "etl_created_at", "description of etl_created_at", "timestamp", 4, ), ColumnMetadata("ds", None, "varchar", 5), ColumnMetadata("partition_key1", "description of partition_key1", "string", 6), ], False, ) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_multiple_result(self) -> None: with patch.object(SQLAlchemyExtractor, "_get_connection") as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = { "schema": "test_schema1", "name": "test_table1", "description": "test table 1", "is_view": 0, "cluster": self.conf[PostgresMetadataExtractor.CLUSTER_KEY], } table1 = { "schema": "test_schema1", "name": "test_table2", "description": "test table 2", "is_view": 0, "cluster": self.conf[PostgresMetadataExtractor.CLUSTER_KEY], } table2 = { "schema": "test_schema2", "name": "test_table3", "description": "test table 3", "is_view": 0, "cluster": self.conf[PostgresMetadataExtractor.CLUSTER_KEY], } sql_execute.return_value = [ self._union( { "col_name": "col_id1", "data_type": "bigint", "col_description": "description of col_id1", "col_sort_order": 0, }, table, ), self._union( { "col_name": "col_id2", "data_type": "bigint", "col_description": "description of col_id2", "col_sort_order": 1, }, table, ), self._union( { "col_name": "is_active", "data_type": "boolean", "col_description": None, "col_sort_order": 2, }, table, ), self._union( { "col_name": "source", "data_type": "varchar", "col_description": "description of source", "col_sort_order": 3, }, table, ), self._union( { "col_name": "etl_created_at", "data_type": "timestamp", "col_description": "description of etl_created_at", "col_sort_order": 4, }, table, ), self._union( { "col_name": "ds", "data_type": "varchar", "col_description": None, "col_sort_order": 5, }, table, ), self._union( { "col_name": "col_name", "data_type": "varchar", "col_description": "description of col_name", "col_sort_order": 0, }, table1, ), self._union( { "col_name": "col_name2", "data_type": "varchar", "col_description": "description of col_name2", "col_sort_order": 1, }, table1, ), self._union( { "col_name": "col_id3", "data_type": "varchar", "col_description": "description of col_id3", "col_sort_order": 0, }, table2, ), self._union( { "col_name": "col_name3", "data_type": "varchar", "col_description": "description of col_name3", "col_sort_order": 1, }, table2, ), ] extractor = PostgresMetadataExtractor() extractor.init(self.conf) expected = TableMetadata( "postgres", self.conf[PostgresMetadataExtractor.CLUSTER_KEY], "test_schema1", "test_table1", "test table 1", [ ColumnMetadata("col_id1", "description of col_id1", "bigint", 0), ColumnMetadata("col_id2", "description of col_id2", "bigint", 1), ColumnMetadata("is_active", None, "boolean", 2), ColumnMetadata("source", "description of source", "varchar", 3), ColumnMetadata( "etl_created_at", "description of etl_created_at", "timestamp", 4, ), ColumnMetadata("ds", None, "varchar", 5), ], 0, ) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( "postgres", self.conf[PostgresMetadataExtractor.CLUSTER_KEY], "test_schema1", "test_table2", "test table 2", [ ColumnMetadata("col_name", "description of col_name", "varchar", 0), ColumnMetadata("col_name2", "description of col_name2", "varchar", 1), ], 0, ) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableMetadata( "postgres", self.conf[PostgresMetadataExtractor.CLUSTER_KEY], "test_schema2", "test_table3", "test table 3", [ ColumnMetadata("col_id3", "description of col_id3", "varchar", 0), ColumnMetadata("col_name3", "description of col_name3", "varchar", 1), ], 0, ) self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self): # type: () -> None with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute table = {'schema': 'test_schema', 'name': 'test_table', 'description': 'a table for testing', 'cluster': self.conf[SnowflakeMetadataExtractor.CLUSTER_KEY], 'is_view': 'false' } sql_execute.return_value = [ self._union( {'col_name': 'col_id1', 'col_type': 'number', 'col_description': 'description of id1', 'col_sort_order': 0}, table), self._union( {'col_name': 'col_id2', 'col_type': 'number', 'col_description': 'description of id2', 'col_sort_order': 1}, table), self._union( {'col_name': 'is_active', 'col_type': 'boolean', 'col_description': None, 'col_sort_order': 2}, table), self._union( {'col_name': 'source', 'col_type': 'varchar', 'col_description': 'description of source', 'col_sort_order': 3}, table), self._union( {'col_name': 'etl_created_at', 'col_type': 'timestamp_ltz', 'col_description': 'description of etl_created_at', 'col_sort_order': 4}, table), self._union( {'col_name': 'ds', 'col_type': 'varchar', 'col_description': None, 'col_sort_order': 5}, table) ] extractor = SnowflakeMetadataExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableMetadata('prod', 'MY_CLUSTER', 'test_schema', 'test_table', 'a table for testing', [ColumnMetadata('col_id1', 'description of id1', 'number', 0), ColumnMetadata('col_id2', 'description of id2', 'number', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp_ltz', 4), ColumnMetadata('ds', None, 'varchar', 5)]) self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())