Python SparkAdapter Examples

Programming Language: Python

Namespace/Package Name: dbt.adapters.spark

Class/Type: SparkAdapter

Examples at hotexamples.com: 2

Python SparkAdapter - 2 examples found. These are the top rated real world Python examples of dbt.adapters.spark.SparkAdapter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SparkAdapter(12)

acquire_connection(6)

_parse_relation(1)

Example #1

Show file

    def test_parse_columns_from_information_with_table_type_and_delta_provider(
            self):
        self.maxDiff = None
        rel_type = SparkRelation.get_relation_type.Table

        # Mimics the output of Spark in the information column
        information = (
            "Database: default_schema\n"
            "Table: mytable\n"
            "Owner: root\n"
            "Created Time: Wed Feb 04 18:15:00 UTC 1815\n"
            "Last Access: Wed May 20 19:25:00 UTC 1925\n"
            "Created By: Spark 3.0.1\n"
            "Type: MANAGED\n"
            "Provider: delta\n"
            "Statistics: 123456789 bytes\n"
            "Location: /mnt/vo\n"
            "Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n"
            "InputFormat: org.apache.hadoop.mapred.SequenceFileInputFormat\n"
            "OutputFormat: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat\n"
            "Partition Provider: Catalog\n"
            "Partition Columns: [`dt`]\n"
            "Schema: root\n"
            " |-- col1: decimal(22,0) (nullable = true)\n"
            " |-- col2: string (nullable = true)\n"
            " |-- dt: date (nullable = true)\n"
            " |-- struct_col: struct (nullable = true)\n"
            " |    |-- struct_inner_col: string (nullable = true)\n")
        relation = SparkRelation.create(schema='default_schema',
                                        identifier='mytable',
                                        type=rel_type,
                                        information=information)

        config = self._get_target_http(self.project_cfg)
        columns = SparkAdapter(config).parse_columns_from_information(relation)
        self.assertEqual(len(columns), 4)
        self.assertEqual(
            columns[0].to_column_dict(omit_none=False), {
                'table_database': None,
                'table_schema': relation.schema,
                'table_name': relation.name,
                'table_type': rel_type,
                'table_owner': 'root',
                'column': 'col1',
                'column_index': 0,
                'dtype': 'decimal(22,0)',
                'numeric_scale': None,
                'numeric_precision': None,
                'char_size': None,
                'stats:bytes:description': '',
                'stats:bytes:include': True,
                'stats:bytes:label': 'bytes',
                'stats:bytes:value': 123456789,
            })

        self.assertEqual(
            columns[3].to_column_dict(omit_none=False), {
                'table_database': None,
                'table_schema': relation.schema,
                'table_name': relation.name,
                'table_type': rel_type,
                'table_owner': 'root',
                'column': 'struct_col',
                'column_index': 3,
                'dtype': 'struct',
                'numeric_scale': None,
                'numeric_precision': None,
                'char_size': None,
                'stats:bytes:description': '',
                'stats:bytes:include': True,
                'stats:bytes:label': 'bytes',
                'stats:bytes:value': 123456789,
            })

Example #2

Show file

    def test_parse_relation(self):
        self.maxDiff = None
        rel_type = SparkRelation.get_relation_type.Table

        relation = SparkRelation.create(schema='default_schema',
                                        identifier='mytable',
                                        type=rel_type)
        assert relation.database is None

        # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
        plain_rows = [
            ('col1', 'decimal(22,0)'), (
                'col2',
                'string',
            ), ('dt', 'date'),
            ('struct_col', 'struct<struct_inner_col:string>'),
            ('# Partition Information', 'data_type'),
            ('# col_name', 'data_type'), ('dt', 'date'), (None, None),
            ('# Detailed Table Information', None), ('Database', None),
            ('Owner', 'root'),
            ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'),
            ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), ('Type',
                                                              'MANAGED'),
            ('Provider', 'delta'), ('Location', '/mnt/vo'),
            ('Serde Library',
             'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'),
            ('InputFormat',
             'org.apache.hadoop.mapred.SequenceFileInputFormat'),
            ('OutputFormat',
             'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'),
            ('Partition Provider', 'Catalog')
        ]

        input_cols = [
            Row(keys=['col_name', 'data_type'], values=r) for r in plain_rows
        ]

        config = self._get_target_http(self.project_cfg)
        rows = SparkAdapter(config).parse_describe_extended(
            relation, input_cols)
        self.assertEqual(len(rows), 4)
        self.assertEqual(
            rows[0].to_column_dict(omit_none=False), {
                'table_database': None,
                'table_schema': relation.schema,
                'table_name': relation.name,
                'table_type': rel_type,
                'table_owner': 'root',
                'column': 'col1',
                'column_index': 0,
                'dtype': 'decimal(22,0)',
                'numeric_scale': None,
                'numeric_precision': None,
                'char_size': None
            })

        self.assertEqual(
            rows[1].to_column_dict(omit_none=False), {
                'table_database': None,
                'table_schema': relation.schema,
                'table_name': relation.name,
                'table_type': rel_type,
                'table_owner': 'root',
                'column': 'col2',
                'column_index': 1,
                'dtype': 'string',
                'numeric_scale': None,
                'numeric_precision': None,
                'char_size': None
            })

        self.assertEqual(
            rows[2].to_column_dict(omit_none=False), {
                'table_database': None,
                'table_schema': relation.schema,
                'table_name': relation.name,
                'table_type': rel_type,
                'table_owner': 'root',
                'column': 'dt',
                'column_index': 2,
                'dtype': 'date',
                'numeric_scale': None,
                'numeric_precision': None,
                'char_size': None
            })

        self.assertEqual(
            rows[3].to_column_dict(omit_none=False), {
                'table_database': None,
                'table_schema': relation.schema,
                'table_name': relation.name,
                'table_type': rel_type,
                'table_owner': 'root',
                'column': 'struct_col',
                'column_index': 3,
                'dtype': 'struct<struct_inner_col:string>',
                'numeric_scale': None,
                'numeric_precision': None,
                'char_size': None
            })