def test_get_avro_schema_invalid_column_interface():
    """
    Test incompatible custom class with no mapping, this shall result in exception.
    """
    class Column:
        def __init__(self, name: str, data_type: str, udt_name: str,
                     is_nullable: bool):
            self.name = name
            self.data_type = data_type
            self.udt_name = udt_name
            self.is_nullable = is_nullable

    columns = [
        Column(name="smallint",
               data_type="smallint",
               udt_name="int2",
               is_nullable=False)
    ]

    table_name = "test_table"
    namespace = "test_namespace"

    # Not passing column mapping, this should raise an exception.
    with pytest.raises(Exception,
                       match="Assuming pg2avro compatible column interface"):
        get_avro_schema(table_name, namespace, columns)
def test_get_avro_schema_assumed_column_interface():
    """
    Test using compatible column object without any integration mapping.
    """
    class Column:
        def __init__(self, name: str, type: str, nullable: bool):
            self.name = name
            self.type = type
            self.nullable = nullable

    columns = [Column(name="smallint", type="smallint", nullable=False)]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name": table_name,
        "namespace": namespace,
        "type": "record",
        "fields": [{
            "name": "smallint",
            "type": "int"
        }],
    }

    actual = get_avro_schema(table_name, namespace, columns)

    assert expected == actual
Exemple #3
0
def test_get_avro_schema_assumed_column_interface():
    """
    Test using dictionary with custom mapping.
    """
    columns = [{
        "name": "smallint",
        "type": "smallint",
        "secondary_type": "int2",
        "nullable": False,
    }]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name": table_name,
        "namespace": namespace,
        "type": "record",
        "fields": [{
            "name": "smallint",
            "type": "int"
        }],
    }

    actual = get_avro_schema(table_name, namespace, columns)

    assert expected == actual
Exemple #4
0
def test_get_avro_schema_custom_mapping():
    """
    Test using dictionary with custom mapping.
    """

    columns = [{"c1": "smallint", "c2": "smallint", "c3": "int2", "c4": False}]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name": table_name,
        "namespace": namespace,
        "type": "record",
        "fields": [{
            "name": "smallint",
            "type": "int"
        }],
    }

    actual = get_avro_schema(
        table_name,
        namespace,
        columns,
        ColumnMapping(
            name="c1",
            type="c2",
            nullable="c4",
            numeric_precision="c5",
            numeric_scale="c6",
        ),
    )

    assert expected == actual
Exemple #5
0
def test_get_avro_schema_invalid_column_interface():
    """
    Test incompatible dict with no mapping, this shall result in exception.
    """
    columns = [{
        "incompatible": "smallint",
        "type": "smallint",
        "nullable": False
    }]

    table_name = "test_table"
    namespace = "test_namespace"

    # Not passing column mapping, this should raise an exception.
    with pytest.raises(Exception,
                       match="Assuming pg2avro compatible column interface"):
        get_avro_schema(table_name, namespace, columns)
Exemple #6
0
def test_get_avro_row_dict_special_data_types():
    """
    Test generating Avro rows from data, using special types.
    """
    columns = [
        {
            "name": "json_col",
            "type": "json"
        },
        {
            "name": "jsonb_col",
            "type": "jsonb"
        },
        {
            "name": "empty_list",
            "type": "_varchar"
        },
    ]

    table_name = "test_table"
    namespace = "test_namespace"
    schema = get_avro_schema(table_name, namespace, columns)

    json_1 = {"key1": "val1"}
    json_2 = {"key2": "val2", "key3": [1, 2], "key4": {"key5": "val5"}}

    expected = [
        {
            "json_col": json.dumps(json_1),
            "jsonb_col": json.dumps(json_2),
            "empty_list": [],
        },
        {
            "json_col": json.dumps(json_2),
            "jsonb_col": json.dumps(json_1),
            "empty_list": None,
        },
    ]

    actual = [
        get_avro_row_dict(r, schema)
        for r in [(json_1, json_2, []), (json_2, json_1, None)]
    ]

    assert expected == actual
def test_get_avro_schema_sqlalchemy():
    """
    Test sqlalchemy integration.
    """
    columns = [
        Column(SMALLINT, name="smallint", nullable=False),
        Column(BOOLEAN, name="bool", nullable=False),
        Column(ARRAY(VARCHAR), name="array", nullable=False),
    ]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name":
        table_name,
        "namespace":
        namespace,
        "type":
        "record",
        "fields": [
            {
                "name": "smallint",
                "type": "int"
            },
            {
                "name": "bool",
                "type": "boolean"
            },
            {
                "name": "array",
                "type": {
                    "items": "string",
                    "type": "array"
                }
            },
        ],
    }

    actual = get_avro_schema(table_name, namespace, columns)

    assert expected == actual
Exemple #8
0
def test_get_avro_row_row_types():
    """
    Test generating Avro rows from different source row data.

    TODO: Cover more than the simplest golden path.
    """
    columns = [
        {
            "name": "name",
            "type": "varchar",
            "nullable": False
        },
        {
            "name": "number",
            "type": "float4",
            "nullable": False
        },
        {
            "name": "list",
            "type": "_varchar",
            "nullable": False
        },
        {
            "name": "is_working",
            "type": "bool",
            "nullable": False
        },
    ]

    table_name = "test_table"
    namespace = "test_namespace"

    schema = get_avro_schema(table_name, namespace, columns)

    expected = [
        {
            "name": "example-01",
            "number": 1.0,
            "list": ["list", "of", "strings"],
            "is_working": True,
        },
        {
            "name": "example-02",
            "number": 2.5,
            "list": ["another", "list", "of", "strings"],
            "is_working": False,
        },
    ]

    class Row:
        def __init__(self, name: str, number: float, list: List[str],
                     is_working: bool):
            self.name = name
            self.number = number
            self.list = list
            self.is_working = is_working

    rows_data = [
        # Compatible Row objects.
        [
            Row("example-01", 1.0, "list of strings".split(), True),
            Row("example-02", 2.5, "another list of strings".split(), False),
        ],
        # Compatible Dicts.
        [
            {
                "name": "example-01",
                "number": 1.0,
                "list": "list of strings".split(),
                "is_working": True,
            },
            {
                "name": "example-02",
                "number": 2.5,
                "list": "another list of strings".split(),
                "is_working": False,
            },
        ],
        # Compatible Dicts, but extended class.
        [
            OrderedDict({
                "name": "example-01",
                "number": 1.0,
                "list": "list of strings".split(),
                "is_working": True,
            }),
            OrderedDict({
                "name": "example-02",
                "number": 2.5,
                "list": "another list of strings".split(),
                "is_working": False,
            }),
        ],
        # Compatible Tuples.
        [
            ("example-01", 1.0, "list of strings".split(), True),
            ("example-02", 2.5, "another list of strings".split(), False),
        ],
    ]

    for row_data in rows_data:
        actual = [get_avro_row_dict(r, schema) for r in row_data]

        assert expected == actual
Exemple #9
0
def test_mapping_overrides():
    """
    Test mapping overrides
    """

    from pg2avro.pg2avro import Column

    table_name = "test_table"
    namespace = "test_namespace"

    columns = [
        Column(name="int_to_string", type="int"),
        Column(name="string_to_numeric", type="string"),
        Column(name="not_overriden", type="int"),
        Column(name="numeric_to_float", type="numeric"),
        Column(name="array_to_string", type="_varchar"),
        Column(name="string_to_array", type="varchar"),
    ]
    overrides = {
        "int_to_string": {
            "pg_type": "string",
            "python_type": str
        },
        "string_to_numeric": {
            "pg_type": "numeric",
            "python_type": float
        },
        "not_matching_override_name": {
            "pg_type": "int",
            "python_type": int
        },
        "numeric_to_float": {
            "pg_type": "float8",
            "python_type": float
        },
        "array_to_string": {
            "pg_type": "string",
            "python_type": str
        },
        "string_to_array": {
            "pg_type": "_string",
            "python_type": list
        },
    }

    expected_schema = {
        "name":
        table_name,
        "namespace":
        namespace,
        "type":
        "record",
        "fields": [
            {
                "name": "int_to_string",
                "type": ["null", "string"]
            },
            {
                "name":
                "string_to_numeric",
                "type": [
                    "null",
                    {
                        "type": "bytes",
                        "logicalType": "decimal",
                        "precision": 38,
                        "scale": 9,
                    },
                ],
            },
            {
                "name": "not_overriden",
                "type": ["null", "int"]
            },
            {
                "name": "numeric_to_float",
                "type": ["null", "double"]
            },
            {
                "name": "array_to_string",
                "type": ["null", "string"]
            },
            {
                "name": "string_to_array",
                "type": ["null", {
                    "type": "array",
                    "items": "string"
                }],
            },
        ],
    }

    schema = get_avro_schema(table_name,
                             namespace,
                             columns,
                             mapping_overrides=overrides)

    assert expected_schema == schema

    # Now data
    rows_data = [
        {
            "int_to_string": 1,
            "string_to_numeric": "2.0",
            "not_overriden": 3,
            "numeric_to_float": 0.12345678910,
            "array_to_string": [1, 2, "a", "b"],
            "string_to_array": "asd",
        },
        {
            "int_to_string": None,
            "string_to_numeric": None,
            "not_overriden": None,
            "numeric_to_float": None,
            "array_to_string": None,
            "string_to_array": None,
        },
    ]
    expected = [
        {
            "int_to_string": "1",
            "string_to_numeric": 2.0,
            "not_overriden": 3,
            "numeric_to_float": 0.12345678910,
            "array_to_string": "[1, 2, 'a', 'b']",
            "string_to_array": ["a", "s", "d"],
        },
        {
            "int_to_string": None,
            "string_to_numeric": None,
            "not_overriden": None,
            "numeric_to_float": None,
            "array_to_string": None,
            "string_to_array": None,
        },
    ]

    actual = [get_avro_row_dict(r, schema, overrides) for r in rows_data]

    assert expected == actual
Exemple #10
0
def test_get_avro_schema_sqlalchemy():
    """
    Test sqlalchemy integration.

    TODO: Cover all sql/postgres types.
    """

    custom_enum_type = ("value_1", "value_2")

    columns = [
        Column(SMALLINT, name="smallint", nullable=False),
        Column(BIGINT, name="bigint", nullable=False),
        Column(INTEGER, name="integer", nullable=False),
        Column(NUMERIC(10, 2), name="numeric", nullable=False),
        Column(NUMERIC(10, 10), name="numeric_to_double", nullable=False),
        Column(NUMERIC, name="numeric_defaults", nullable=False),
        Column(NUMERIC, name="numeric_nullable", nullable=True),
        Column(DOUBLE_PRECISION, name="double_precision", nullable=False),
        Column(BOOLEAN, name="bool", nullable=False),
        Column(DATE, name="date", nullable=False),
        Column(TIME, name="time", nullable=False),
        Column(TIMESTAMP, name="timestamp", nullable=False),
        Column(CHAR, name="char", nullable=False),
        Column(TEXT, name="text", nullable=True),
        Column(VARCHAR(255), primary_key=True, name="varchar", nullable=False),
        Column(ARRAY(VARCHAR), name="array", nullable=False),
        Column(INTERVAL, name="interval", nullable=False),
        Column(ENUM(name="some_enum", *custom_enum_type),
               name="enum",
               nullable=False),
        Column(UUID, name="uuid", nullable=False),
        Column(JSONB, name="jsonb", nullable=False),
        Column(JSON, name="json", nullable=False),
    ]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name":
        table_name,
        "namespace":
        namespace,
        "type":
        "record",
        "fields": [
            {
                "name": "smallint",
                "type": "int"
            },
            {
                "name": "bigint",
                "type": "long"
            },
            {
                "name": "integer",
                "type": "int"
            },
            {
                "name": "numeric",
                "type": {
                    "logicalType": "decimal",
                    "type": "bytes",
                    "precision": 10,
                    "scale": 2,
                },
            },
            {
                "name": "numeric_to_double",
                "type": "double"
            },
            {
                "name": "numeric_defaults",
                "type": {
                    "logicalType": "decimal",
                    "type": "bytes",
                    "precision": 38,
                    "scale": 9,
                },
            },
            {
                "name":
                "numeric_nullable",
                "type": [
                    "null",
                    {
                        "logicalType": "decimal",
                        "type": "bytes",
                        "precision": 38,
                        "scale": 9,
                    },
                ],
            },
            {
                "name": "double_precision",
                "type": "double"
            },
            {
                "name": "bool",
                "type": "boolean"
            },
            {
                "name": "date",
                "type": {
                    "logicalType": "date",
                    "type": "int"
                }
            },
            {
                "name": "time",
                "type": {
                    "logicalType": "timestamp-millis",
                    "type": "int"
                },
            },
            {
                "name": "timestamp",
                "type": {
                    "logicalType": "timestamp-millis",
                    "type": "long"
                },
            },
            {
                "name": "char",
                "type": "string"
            },
            {
                "name": "text",
                "type": ["null", "string"]
            },
            {
                "name": "varchar",
                "type": "string"
            },
            {
                "name": "array",
                "type": {
                    "items": "string",
                    "type": "array"
                }
            },
            {
                "name": "interval",
                "type": "string"
            },
            {
                "name": "enum",
                "type": "string"
            },
            {
                "name": "uuid",
                "type": "string"
            },
            {
                "name": "jsonb",
                "type": "string"
            },
            {
                "name": "json",
                "type": "string"
            },
        ],
    }

    actual = get_avro_schema(table_name, namespace, columns)

    assert expected == actual
Exemple #11
0
def test_get_avro_schema_custom_mapping():
    """
    Test custom integration using mapping class.

    TODO: Cover all sql/postgres types.
    """
    class Col:
        def __init__(
            self,
            n: str,
            un: str,
            nul: bool,
            np: Optional[int] = None,
            ns: Optional[int] = None,
        ):
            self.n = n
            self.un = un
            self.nul = nul
            self.np = np
            self.ns = ns

    columns = [
        Col(n="smallint", un="int2", nul=False),
        Col(n="bigint", un="int8", nul=False),
        Col(n="integer", un="int4", nul=False),
        Col(n="numeric", un="numeric", nul=False, np=3, ns=7),
        Col(n="numeric_to_double", un="numeric", nul=False, np=10, ns=10),
        Col(n="numeric_defaults", un="numeric", nul=False),
        Col(n="numeric_nullable", un="numeric", nul=True),
        Col(n="double_precision", un="float8", nul=False),
        Col(n="real", un="float4", nul=False),
        Col(n="bool", un="bool", nul=False),
        Col(n="char", un="char", nul=False),
        Col(n="bpchar", un="bpchar", nul=False),
        Col(n="varchar", un="varchar", nul=False),
        Col(n="array", un="_varchar", nul=False),
        Col(n="array_n", un="_varchar", nul=True),
        Col(n="date", un="date", nul=False),
        Col(n="time", un="time", nul=False),
        Col(n="timestamp", un="timestamp", nul=False),
        Col(n="enum", un="custom_type", nul=False),
        Col(n="uuid", un="uuid", nul=False),
        Col(n="json", un="json", nul=False),
        Col(n="jsonb", un="jsonb", nul=False),
    ]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name":
        table_name,
        "namespace":
        namespace,
        "type":
        "record",
        "fields": [
            {
                "name": "smallint",
                "type": "int"
            },
            {
                "name": "bigint",
                "type": "long"
            },
            {
                "name": "integer",
                "type": "int"
            },
            {
                "name": "numeric",
                "type": {
                    "logicalType": "decimal",
                    "type": "bytes",
                    "precision": 3,
                    "scale": 7,
                },
            },
            {
                "name": "numeric_to_double",
                "type": "double"
            },
            {
                "name": "numeric_defaults",
                "type": {
                    "logicalType": "decimal",
                    "type": "bytes",
                    "precision": 38,
                    "scale": 9,
                },
            },
            {
                "name":
                "numeric_nullable",
                "type": [
                    "null",
                    {
                        "logicalType": "decimal",
                        "type": "bytes",
                        "precision": 38,
                        "scale": 9,
                    },
                ],
            },
            {
                "name": "double_precision",
                "type": "double"
            },
            {
                "name": "real",
                "type": "float"
            },
            {
                "name": "bool",
                "type": "boolean"
            },
            {
                "name": "char",
                "type": "string"
            },
            {
                "name": "bpchar",
                "type": "string"
            },
            {
                "name": "varchar",
                "type": "string"
            },
            {
                "name": "array",
                "type": {
                    "items": "string",
                    "type": "array"
                }
            },
            {
                "name": "array_n",
                "type": ["null", {
                    "items": "string",
                    "type": "array"
                }]
            },
            {
                "name": "date",
                "type": {
                    "logicalType": "date",
                    "type": "int"
                }
            },
            {
                "name": "time",
                "type": {
                    "logicalType": "timestamp-millis",
                    "type": "int"
                },
            },
            {
                "name": "timestamp",
                "type": {
                    "logicalType": "timestamp-millis",
                    "type": "long"
                },
            },
            {
                "name": "enum",
                "type": "string"
            },
            {
                "name": "uuid",
                "type": "string"
            },
            {
                "name": "json",
                "type": "string"
            },
            {
                "name": "jsonb",
                "type": "string"
            },
        ],
    }

    actual = get_avro_schema(
        table_name,
        namespace,
        columns,
        ColumnMapping(
            name="n",
            type="un",
            nullable="nul",
            numeric_precision="np",
            numeric_scale="ns",
        ),
    )

    assert expected == actual
Exemple #12
0
def test_get_avro_schema_custom_mapping():
    """
    Test custom integration using mapping class.

    TODO: Cover all sql/postgres types.
    """
    class Col:
        def __init__(
            self,
            n: str,
            un: str,
            nul: bool,
            np: Optional[int] = None,
            ns: Optional[int] = None,
        ):
            self.n = n
            self.un = un
            self.nul = nul
            self.np = np
            self.ns = ns

    columns = [
        Col(n="smallint", un="int2", nul=False),
        Col(n="bool", un="bool", nul=False),
        Col(n="array", un="_varchar", nul=False),
    ]

    table_name = "test_table"
    namespace = "test_namespace"

    expected = {
        "name":
        table_name,
        "namespace":
        namespace,
        "type":
        "record",
        "fields": [
            {
                "name": "smallint",
                "type": "int"
            },
            {
                "name": "bool",
                "type": "boolean"
            },
            {
                "name": "array",
                "type": {
                    "items": "string",
                    "type": "array"
                }
            },
        ],
    }

    actual = get_avro_schema(
        table_name,
        namespace,
        columns,
        ColumnMapping(
            name="n",
            type="un",
            nullable="nul",
            numeric_precision="np",
            numeric_scale="ns",
        ),
    )

    assert expected == actual