def test_table_metadata_extraction_with_single_result(self, mock1,
                                                          mock2) -> None:
        extractor = PrestoLoopExtractor()
        conf = self.conf.copy()
        conf.put("is_table_metadata_enabled", True)
        extractor.init(conf)
        extractor.execute = MagicMock(
            side_effect=presto_engine_execute_side_effect)

        results = extractor.extract()
        is_partition_column = (True if MOCK_COLUMN_RESULT[2] == "partition key"
                               else False)
        expected = TableMetadata(
            database=extractor._database,
            cluster=None,
            schema=MOCK_SCHEMA_NAME,
            name=MOCK_TABLE_NAME,
            columns=[
                ColumnMetadata(
                    name=MOCK_COLUMN_RESULT[0],
                    description=MOCK_COLUMN_RESULT[3],
                    data_type=MOCK_COLUMN_RESULT[1],
                    sort_order=0,
                    is_partition_column=is_partition_column,
                )
            ],
        )
        self.assertEqual(results.__repr__(), expected.__repr__())
 def test_get_all_table_metadata_from_information_schema(
         self, mock_settings) -> None:
     self.engine.init(self.conf)
     self.engine.execute = MagicMock(
         side_effect=presto_engine_execute_side_effect)
     mock_columns = [
         ColumnMetadata(
             name=MOCK_INFORMATION_SCHEMA_RESULT_1['col_name'],
             description=MOCK_INFORMATION_SCHEMA_RESULT_1[
                 'col_description'],  # noqa: 501
             col_type=MOCK_INFORMATION_SCHEMA_RESULT_1['col_type'],
             sort_order=MOCK_INFORMATION_SCHEMA_RESULT_1['col_sort_order'],
             is_partition_column=None),
         ColumnMetadata(
             name=MOCK_INFORMATION_SCHEMA_RESULT_2['col_name'],
             description=MOCK_INFORMATION_SCHEMA_RESULT_2[
                 'col_description'],  # noqa: 501
             col_type=MOCK_INFORMATION_SCHEMA_RESULT_2['col_type'],
             sort_order=MOCK_INFORMATION_SCHEMA_RESULT_2['col_sort_order'],
             is_partition_column=None)
     ]
     expected = TableMetadata(
         database=MOCK_DATABASE_NAME,
         cluster=MOCK_CLUSTER_NAME,
         schema=MOCK_SCHEMA_NAME,
         name=MOCK_TABLE_NAME,
         columns=mock_columns,
         is_view=bool(MOCK_INFORMATION_SCHEMA_RESULT_1['is_view']),
     )
     results = self.engine.get_all_table_metadata_from_information_schema(
         cluster=MOCK_CLUSTER_NAME)
     result = next(results)
     self.maxDiff = None
     self.assertEqual(result.__repr__(), expected.__repr__())
Exemple #3
0
    def test_extraction_with_single_result(self):
        # type: () -> None
        with patch.object(SQLAlchemyExtractor,
                          "_get_connection") as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {
                "schema": "test_schema",
                "name": "test_table",
                "description": "a table for testing",
                "cluster": self.conf[SnowflakeMetadataExtractor.CLUSTER_KEY],
                "is_view": "false",
            }

            sql_execute.return_value = [
                self._union(
                    {
                        "col_name": "col_id1",
                        "data_type": "number",
                        "col_description": "description of id1",
                        "col_sort_order": 0,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "col_id2",
                        "data_type": "number",
                        "col_description": "description of id2",
                        "col_sort_order": 1,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "is_active",
                        "data_type": "boolean",
                        "col_description": None,
                        "col_sort_order": 2,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "source",
                        "data_type": "varchar",
                        "col_description": "description of source",
                        "col_sort_order": 3,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "etl_created_at",
                        "data_type": "timestamp_ltz",
                        "col_description": "description of etl_created_at",
                        "col_sort_order": 4,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "ds",
                        "data_type": "varchar",
                        "col_description": None,
                        "col_sort_order": 5,
                    },
                    table,
                ),
            ]

            extractor = SnowflakeMetadataExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata(
                "prod",
                "MY_CLUSTER",
                "test_schema",
                "test_table",
                "a table for testing",
                [
                    ColumnMetadata("col_id1", "description of id1", "number",
                                   0),
                    ColumnMetadata("col_id2", "description of id2", "number",
                                   1),
                    ColumnMetadata("is_active", None, "boolean", 2),
                    ColumnMetadata("source", "description of source",
                                   "varchar", 3),
                    ColumnMetadata(
                        "etl_created_at",
                        "description of etl_created_at",
                        "timestamp_ltz",
                        4,
                    ),
                    ColumnMetadata("ds", None, "varchar", 5),
                ],
            )

            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())
    def test_extraction_with_single_result(self) -> None:
        with patch.object(GlueExtractor, "_search_tables") as mock_search:
            mock_search.return_value = [{
                "Name":
                "test_catalog_test_schema_test_table",
                "DatabaseName":
                "test_database",
                "Description":
                "a table for testing",
                "StorageDescriptor": {
                    "Columns": [
                        {
                            "Name": "col_id1",
                            "Type": "bigint",
                            "Comment": "description of id1",
                        },
                        {
                            "Name": "col_id2",
                            "Type": "bigint",
                            "Comment": "description of id2",
                        },
                        {
                            "Name": "is_active",
                            "Type": "boolean"
                        },
                        {
                            "Name": "source",
                            "Type": "varchar",
                            "Comment": "description of source",
                        },
                        {
                            "Name": "etl_created_at",
                            "Type": "timestamp",
                            "Comment": "description of etl_created_at",
                        },
                        {
                            "Name": "ds",
                            "Type": "varchar"
                        },
                    ],
                    "Location":
                    "test_catalog.test_schema.test_table",
                },
                "PartitionKeys": [
                    {
                        "Name": "partition_key1",
                        "Type": "string",
                        "Comment": "description of partition_key1",
                    },
                ],
                "TableType":
                "EXTERNAL_TABLE",
            }]

            extractor = GlueExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata(
                "test_database",
                None,
                None,
                "test_catalog_test_schema_test_table",
                "a table for testing",
                [
                    ColumnMetadata("col_id1", "description of id1", "bigint",
                                   0),
                    ColumnMetadata("col_id2", "description of id2", "bigint",
                                   1),
                    ColumnMetadata("is_active", None, "boolean", 2),
                    ColumnMetadata("source", "description of source",
                                   "varchar", 3),
                    ColumnMetadata(
                        "etl_created_at",
                        "description of etl_created_at",
                        "timestamp",
                        4,
                    ),
                    ColumnMetadata("ds", None, "varchar", 5),
                    ColumnMetadata("partition_key1",
                                   "description of partition_key1", "string",
                                   6),
                ],
                False,
            )
            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())
    def test_extraction_with_multiple_result(self) -> None:
        with patch.object(SQLAlchemyExtractor,
                          "_get_connection") as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {
                "schema": "test_schema1",
                "name": "test_table1",
                "description": "test table 1",
                "is_view": 0,
                "cluster": self.conf[PostgresMetadataExtractor.CLUSTER_KEY],
            }

            table1 = {
                "schema": "test_schema1",
                "name": "test_table2",
                "description": "test table 2",
                "is_view": 0,
                "cluster": self.conf[PostgresMetadataExtractor.CLUSTER_KEY],
            }

            table2 = {
                "schema": "test_schema2",
                "name": "test_table3",
                "description": "test table 3",
                "is_view": 0,
                "cluster": self.conf[PostgresMetadataExtractor.CLUSTER_KEY],
            }

            sql_execute.return_value = [
                self._union(
                    {
                        "col_name": "col_id1",
                        "data_type": "bigint",
                        "col_description": "description of col_id1",
                        "col_sort_order": 0,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "col_id2",
                        "data_type": "bigint",
                        "col_description": "description of col_id2",
                        "col_sort_order": 1,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "is_active",
                        "data_type": "boolean",
                        "col_description": None,
                        "col_sort_order": 2,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "source",
                        "data_type": "varchar",
                        "col_description": "description of source",
                        "col_sort_order": 3,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "etl_created_at",
                        "data_type": "timestamp",
                        "col_description": "description of etl_created_at",
                        "col_sort_order": 4,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "ds",
                        "data_type": "varchar",
                        "col_description": None,
                        "col_sort_order": 5,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "col_name",
                        "data_type": "varchar",
                        "col_description": "description of col_name",
                        "col_sort_order": 0,
                    },
                    table1,
                ),
                self._union(
                    {
                        "col_name": "col_name2",
                        "data_type": "varchar",
                        "col_description": "description of col_name2",
                        "col_sort_order": 1,
                    },
                    table1,
                ),
                self._union(
                    {
                        "col_name": "col_id3",
                        "data_type": "varchar",
                        "col_description": "description of col_id3",
                        "col_sort_order": 0,
                    },
                    table2,
                ),
                self._union(
                    {
                        "col_name": "col_name3",
                        "data_type": "varchar",
                        "col_description": "description of col_name3",
                        "col_sort_order": 1,
                    },
                    table2,
                ),
            ]

            extractor = PostgresMetadataExtractor()
            extractor.init(self.conf)

            expected = TableMetadata(
                "postgres",
                self.conf[PostgresMetadataExtractor.CLUSTER_KEY],
                "test_schema1",
                "test_table1",
                "test table 1",
                [
                    ColumnMetadata("col_id1", "description of col_id1",
                                   "bigint", 0),
                    ColumnMetadata("col_id2", "description of col_id2",
                                   "bigint", 1),
                    ColumnMetadata("is_active", None, "boolean", 2),
                    ColumnMetadata("source", "description of source",
                                   "varchar", 3),
                    ColumnMetadata(
                        "etl_created_at",
                        "description of etl_created_at",
                        "timestamp",
                        4,
                    ),
                    ColumnMetadata("ds", None, "varchar", 5),
                ],
                0,
            )
            self.assertEqual(expected.__repr__(),
                             extractor.extract().__repr__())

            expected = TableMetadata(
                "postgres",
                self.conf[PostgresMetadataExtractor.CLUSTER_KEY],
                "test_schema1",
                "test_table2",
                "test table 2",
                [
                    ColumnMetadata("col_name", "description of col_name",
                                   "varchar", 0),
                    ColumnMetadata("col_name2", "description of col_name2",
                                   "varchar", 1),
                ],
                0,
            )
            self.assertEqual(expected.__repr__(),
                             extractor.extract().__repr__())

            expected = TableMetadata(
                "postgres",
                self.conf[PostgresMetadataExtractor.CLUSTER_KEY],
                "test_schema2",
                "test_table3",
                "test table 3",
                [
                    ColumnMetadata("col_id3", "description of col_id3",
                                   "varchar", 0),
                    ColumnMetadata("col_name3", "description of col_name3",
                                   "varchar", 1),
                ],
                0,
            )
            self.assertEqual(expected.__repr__(),
                             extractor.extract().__repr__())

            self.assertIsNone(extractor.extract())
            self.assertIsNone(extractor.extract())
    def test_extraction_with_single_result(self):
        # type: () -> None
        with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {'schema': 'test_schema',
                     'name': 'test_table',
                     'description': 'a table for testing',
                     'cluster':
                     self.conf[SnowflakeMetadataExtractor.CLUSTER_KEY],
                     'is_view': 'false'
                     }

            sql_execute.return_value = [
                self._union(
                    {'col_name': 'col_id1',
                     'col_type': 'number',
                     'col_description': 'description of id1',
                     'col_sort_order': 0}, table),
                self._union(
                    {'col_name': 'col_id2',
                     'col_type': 'number',
                     'col_description': 'description of id2',
                     'col_sort_order': 1}, table),
                self._union(
                    {'col_name': 'is_active',
                     'col_type': 'boolean',
                     'col_description': None,
                     'col_sort_order': 2}, table),
                self._union(
                    {'col_name': 'source',
                     'col_type': 'varchar',
                     'col_description': 'description of source',
                     'col_sort_order': 3}, table),
                self._union(
                    {'col_name': 'etl_created_at',
                     'col_type': 'timestamp_ltz',
                     'col_description': 'description of etl_created_at',
                     'col_sort_order': 4}, table),
                self._union(
                    {'col_name': 'ds',
                     'col_type': 'varchar',
                     'col_description': None,
                     'col_sort_order': 5}, table)
            ]

            extractor = SnowflakeMetadataExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata('prod', 'MY_CLUSTER', 'test_schema', 'test_table', 'a table for testing',
                                     [ColumnMetadata('col_id1', 'description of id1', 'number', 0),
                                      ColumnMetadata('col_id2', 'description of id2', 'number', 1),
                                      ColumnMetadata('is_active', None, 'boolean', 2),
                                      ColumnMetadata('source', 'description of source', 'varchar', 3),
                                      ColumnMetadata('etl_created_at', 'description of etl_created_at',
                                                     'timestamp_ltz', 4),
                                      ColumnMetadata('ds', None, 'varchar', 5)])

            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())