예제 #1
0
 def test_presto_expand_data_with_simple_structural_columns(self):
     cols = [
         {
             "name": "row_column",
             "type": "ROW(NESTED_OBJ VARCHAR)"
         },
         {
             "name": "array_column",
             "type": "ARRAY(BIGINT)"
         },
     ]
     data = [
         {
             "row_column": ["a"],
             "array_column": [1, 2, 3]
         },
         {
             "row_column": ["b"],
             "array_column": [4, 5, 6]
         },
     ]
     actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
         cols, data)
     expected_cols = [
         {
             "name": "row_column",
             "type": "ROW"
         },
         {
             "name": "row_column.nested_obj",
             "type": "VARCHAR"
         },
         {
             "name": "array_column",
             "type": "ARRAY"
         },
     ]
     expected_data = [
         {
             "row_column": ["a"],
             "row_column.nested_obj": "a",
             "array_column": 1
         },
         {
             "row_column": "",
             "row_column.nested_obj": "",
             "array_column": 2
         },
         {
             "row_column": "",
             "row_column.nested_obj": "",
             "array_column": 3
         },
         {
             "row_column": ["b"],
             "row_column.nested_obj": "b",
             "array_column": 4
         },
         {
             "row_column": "",
             "row_column.nested_obj": "",
             "array_column": 5
         },
         {
             "row_column": "",
             "row_column.nested_obj": "",
             "array_column": 6
         },
     ]
     expected_expanded_cols = [{
         "name": "row_column.nested_obj",
         "type": "VARCHAR"
     }]
     self.assertEqual(actual_cols, expected_cols)
     self.assertEqual(actual_data, expected_data)
     self.assertEqual(actual_expanded_cols, expected_expanded_cols)
예제 #2
0
 def test_presto_expand_data_with_complex_array_columns(self):
     cols = [
         {"name": "int_column", "type": "BIGINT"},
         {
             "name": "array_column",
             "type": "ARRAY(ROW(NESTED_ARRAY ARRAY(ROW(NESTED_OBJ VARCHAR))))",
         },
     ]
     data = [
         {"int_column": 1, "array_column": [[[["a"], ["b"]]], [[["c"], ["d"]]]]},
         {"int_column": 2, "array_column": [[[["e"], ["f"]]], [[["g"], ["h"]]]]},
     ]
     actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
         cols, data
     )
     expected_cols = [
         {"name": "int_column", "type": "BIGINT"},
         {
             "name": "array_column",
             "type": "ARRAY(ROW(NESTED_ARRAY ARRAY(ROW(NESTED_OBJ VARCHAR))))",
         },
         {
             "name": "array_column.nested_array",
             "type": "ARRAY(ROW(NESTED_OBJ VARCHAR))",
         },
         {"name": "array_column.nested_array.nested_obj", "type": "VARCHAR"},
     ]
     expected_data = [
         {
             "array_column": [[["a"], ["b"]]],
             "array_column.nested_array": ["a"],
             "array_column.nested_array.nested_obj": "a",
             "int_column": 1,
         },
         {
             "array_column": "",
             "array_column.nested_array": ["b"],
             "array_column.nested_array.nested_obj": "b",
             "int_column": "",
         },
         {
             "array_column": [[["c"], ["d"]]],
             "array_column.nested_array": ["c"],
             "array_column.nested_array.nested_obj": "c",
             "int_column": "",
         },
         {
             "array_column": "",
             "array_column.nested_array": ["d"],
             "array_column.nested_array.nested_obj": "d",
             "int_column": "",
         },
         {
             "array_column": [[["e"], ["f"]]],
             "array_column.nested_array": ["e"],
             "array_column.nested_array.nested_obj": "e",
             "int_column": 2,
         },
         {
             "array_column": "",
             "array_column.nested_array": ["f"],
             "array_column.nested_array.nested_obj": "f",
             "int_column": "",
         },
         {
             "array_column": [[["g"], ["h"]]],
             "array_column.nested_array": ["g"],
             "array_column.nested_array.nested_obj": "g",
             "int_column": "",
         },
         {
             "array_column": "",
             "array_column.nested_array": ["h"],
             "array_column.nested_array.nested_obj": "h",
             "int_column": "",
         },
     ]
     expected_expanded_cols = [
         {
             "name": "array_column.nested_array",
             "type": "ARRAY(ROW(NESTED_OBJ VARCHAR))",
         },
         {"name": "array_column.nested_array.nested_obj", "type": "VARCHAR"},
     ]
     self.assertEqual(actual_cols, expected_cols)
     self.assertEqual(actual_data, expected_data)
     self.assertEqual(actual_expanded_cols, expected_expanded_cols)
예제 #3
0
    def test_presto_expand_data_array(self):
        cols = [
            {
                "name": "event_id",
                "type": "VARCHAR",
                "is_date": False
            },
            {
                "name": "timestamp",
                "type": "BIGINT",
                "is_date": False
            },
            {
                "name": "user",
                "type":
                "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME VARCHAR)",
                "is_date": False,
            },
        ]
        data = [{
            "event_id": "abcdef01-2345-6789-abcd-ef0123456789",
            "timestamp": "1595895506219",
            "user": '******',
        }]
        actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
            cols, data)
        expected_cols = [
            {
                "name": "event_id",
                "type": "VARCHAR",
                "is_date": False
            },
            {
                "name": "timestamp",
                "type": "BIGINT",
                "is_date": False
            },
            {
                "name": "user",
                "type":
                "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME VARCHAR)",
                "is_date": False,
            },
            {
                "name": "user.id",
                "type": "BIGINT"
            },
            {
                "name": "user.first_name",
                "type": "VARCHAR"
            },
            {
                "name": "user.last_name",
                "type": "VARCHAR"
            },
        ]
        expected_data = [{
            "event_id": "abcdef01-2345-6789-abcd-ef0123456789",
            "timestamp": "1595895506219",
            "user": [1, "JOHN", "DOE"],
            "user.id": 1,
            "user.first_name": "JOHN",
            "user.last_name": "DOE",
        }]
        expected_expanded_cols = [
            {
                "name": "user.id",
                "type": "BIGINT"
            },
            {
                "name": "user.first_name",
                "type": "VARCHAR"
            },
            {
                "name": "user.last_name",
                "type": "VARCHAR"
            },
        ]

        self.assertEqual(actual_cols, expected_cols)
        self.assertEqual(actual_data, expected_data)
        self.assertEqual(actual_expanded_cols, expected_expanded_cols)
예제 #4
0
    def test_presto_expand_data_with_complex_row_columns(self):
        cols = [{
            "name":
            "row_column",
            "type":
            "ROW(NESTED_OBJ1 VARCHAR, NESTED_ROW ROW(NESTED_OBJ2 VARCHAR))",
        }]
        data = [{"row_column": ["a1", ["a2"]]}, {"row_column": ["b1", ["b2"]]}]
        actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
            cols, data)
        expected_cols = [
            {
                "name":
                "row_column",
                "type":
                "ROW(NESTED_OBJ1 VARCHAR, NESTED_ROW ROW(NESTED_OBJ2 VARCHAR))",
            },
            {
                "name": "row_column.nested_obj1",
                "type": "VARCHAR"
            },
            {
                "name": "row_column.nested_row",
                "type": "ROW(NESTED_OBJ2 VARCHAR)"
            },
            {
                "name": "row_column.nested_row.nested_obj2",
                "type": "VARCHAR"
            },
        ]
        expected_data = [
            {
                "row_column": ["a1", ["a2"]],
                "row_column.nested_obj1": "a1",
                "row_column.nested_row": ["a2"],
                "row_column.nested_row.nested_obj2": "a2",
            },
            {
                "row_column": ["b1", ["b2"]],
                "row_column.nested_obj1": "b1",
                "row_column.nested_row": ["b2"],
                "row_column.nested_row.nested_obj2": "b2",
            },
        ]

        expected_expanded_cols = [
            {
                "name": "row_column.nested_obj1",
                "type": "VARCHAR"
            },
            {
                "name": "row_column.nested_row",
                "type": "ROW(NESTED_OBJ2 VARCHAR)"
            },
            {
                "name": "row_column.nested_row.nested_obj2",
                "type": "VARCHAR"
            },
        ]
        self.assertEqual(actual_cols, expected_cols)
        self.assertEqual(actual_data, expected_data)
        self.assertEqual(actual_expanded_cols, expected_expanded_cols)
예제 #5
0
    def test_presto_expand_data_with_complex_row_columns_and_null_values(self):
        cols = [
            {
                "name": "row_column",
                "type": "ROW(NESTED_ROW ROW(NESTED_OBJ VARCHAR))",
                "is_dttm": False,
            }
        ]
        data = [
            {"row_column": '[["a"]]'},
            {"row_column": "[[null]]"},
            {"row_column": "[null]"},
            {"row_column": "null"},
        ]
        actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
            cols, data
        )
        expected_cols = [
            {
                "name": "row_column",
                "type": "ROW(NESTED_ROW ROW(NESTED_OBJ VARCHAR))",
                "is_dttm": False,
            },
            {
                "name": "row_column.nested_row",
                "type": "ROW(NESTED_OBJ VARCHAR)",
                "is_dttm": False,
            },
            {
                "name": "row_column.nested_row.nested_obj",
                "type": "VARCHAR",
                "is_dttm": False,
            },
        ]
        expected_data = [
            {
                "row_column": [["a"]],
                "row_column.nested_row": ["a"],
                "row_column.nested_row.nested_obj": "a",
            },
            {
                "row_column": [[None]],
                "row_column.nested_row": [None],
                "row_column.nested_row.nested_obj": None,
            },
            {
                "row_column": [None],
                "row_column.nested_row": None,
                "row_column.nested_row.nested_obj": "",
            },
            {
                "row_column": None,
                "row_column.nested_row": "",
                "row_column.nested_row.nested_obj": "",
            },
        ]

        expected_expanded_cols = [
            {
                "name": "row_column.nested_row",
                "type": "ROW(NESTED_OBJ VARCHAR)",
                "is_dttm": False,
            },
            {
                "name": "row_column.nested_row.nested_obj",
                "type": "VARCHAR",
                "is_dttm": False,
            },
        ]
        self.assertEqual(actual_cols, expected_cols)
        self.assertEqual(actual_data, expected_data)
        self.assertEqual(actual_expanded_cols, expected_expanded_cols)
예제 #6
0
 def test_presto_expand_data_with_complex_array_columns(self):
     cols = [{
         'name': 'int_column',
         'type': 'BIGINT'
     }, {
         'name':
         'array_column',
         'type':
         'ARRAY(ROW(NESTED_ARRAY ARRAY(ROW(NESTED_OBJ VARCHAR))))'
     }]
     data = [{
         'int_column': 1,
         'array_column': [[[['a'], ['b']]], [[['c'], ['d']]]]
     }, {
         'int_column': 2,
         'array_column': [[[['e'], ['f']]], [[['g'], ['h']]]]
     }]
     actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
         cols, data)
     expected_cols = [{
         'name': 'int_column',
         'type': 'BIGINT'
     }, {
         'name': 'array_column',
         'type': 'ARRAY'
     }, {
         'name': 'array_column.nested_array',
         'type': 'ARRAY'
     }, {
         'name': 'array_column.nested_array.nested_obj',
         'type': 'VARCHAR'
     }]
     expected_data = [{
         'int_column': 1,
         'array_column': [[[['a'], ['b']]], [[['c'], ['d']]]],
         'array_column.nested_array': [['a'], ['b']],
         'array_column.nested_array.nested_obj': 'a'
     }, {
         'int_column': '',
         'array_column': '',
         'array_column.nested_array': '',
         'array_column.nested_array.nested_obj': 'b'
     }, {
         'int_column': '',
         'array_column': '',
         'array_column.nested_array': [['c'], ['d']],
         'array_column.nested_array.nested_obj': 'c'
     }, {
         'int_column': '',
         'array_column': '',
         'array_column.nested_array': '',
         'array_column.nested_array.nested_obj': 'd'
     }, {
         'int_column': 2,
         'array_column': [[[['e'], ['f']]], [[['g'], ['h']]]],
         'array_column.nested_array': [['e'], ['f']],
         'array_column.nested_array.nested_obj': 'e'
     }, {
         'int_column': '',
         'array_column': '',
         'array_column.nested_array': '',
         'array_column.nested_array.nested_obj': 'f'
     }, {
         'int_column': '',
         'array_column': '',
         'array_column.nested_array': [['g'], ['h']],
         'array_column.nested_array.nested_obj': 'g'
     }, {
         'int_column': '',
         'array_column': '',
         'array_column.nested_array': '',
         'array_column.nested_array.nested_obj': 'h'
     }]
     expected_expanded_cols = [{
         'name': 'array_column.nested_array',
         'type': 'ARRAY'
     }, {
         'name': 'array_column.nested_array.nested_obj',
         'type': 'VARCHAR'
     }]
     self.assertEqual(actual_cols, expected_cols)
     self.assertEqual(actual_data, expected_data)
     self.assertEqual(actual_expanded_cols, expected_expanded_cols)