Beispiel #1
0
    def test_transform_array_struct_nested_type(self) -> None:
        column = ColumnMetadata('col1', None,
                                'array<struct<nest1:int,nest2:int>>', 0)
        column.set_column_key(self.column_key)

        array_type = ArrayTypeMetadata(
            name='col1',
            parent=column,
            type_str='array<struct<nest1:int,nest2:int>>')
        inner_struct = StructTypeMetadata(
            name='_inner_',
            parent=array_type,
            type_str='struct<nest1:int,nest2:int>')
        inner_scalar_nest1 = ScalarTypeMetadata(name='nest1',
                                                parent=inner_struct,
                                                type_str='int')
        inner_scalar_nest2 = ScalarTypeMetadata(name='nest2',
                                                parent=inner_struct,
                                                type_str='int')

        array_type.array_inner_type = inner_struct
        inner_struct.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2
        }
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, array_type)
Beispiel #2
0
    def test_transform_map_struct_nested_type(self) -> None:
        column = ColumnMetadata('col1', None,
                                'map<string,struct<nest1:int,nest2:int>>', 0)
        column.set_column_key(self.column_key)

        map_type = MapTypeMetadata(
            name='col1',
            parent=column,
            type_str='map<string,struct<nest1:int,nest2:int>>')
        map_key = ScalarTypeMetadata(name='_map_key',
                                     parent=map_type,
                                     type_str='string')
        inner_struct = StructTypeMetadata(
            name='_map_value',
            parent=map_type,
            type_str='struct<nest1:int,nest2:int>')
        inner_scalar_nest1 = ScalarTypeMetadata(name='nest1',
                                                parent=inner_struct,
                                                type_str='int')
        inner_scalar_nest2 = ScalarTypeMetadata(name='nest2',
                                                parent=inner_struct,
                                                type_str='int')

        map_type.map_key_type = map_key
        map_type.map_value_type = inner_struct
        inner_struct.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2
        }
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, map_type)
Beispiel #3
0
    def test_transform_union_as_nested_type(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'struct<nest1:uniontype<string,struct<c1:int,c2:string>>,'
            'nest2:uniontype<string,int>>', 0)
        column.set_column_key(self.column_key)

        struct_type = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str='struct<nest1:uniontype<string,struct<c1:int,c2:string>>,'
            'nest2:uniontype<string,int>>')
        inner_scalar_nest1 = ScalarTypeMetadata(
            name='nest1',
            parent=struct_type,
            type_str='uniontype<string,struct<c1:int,c2:string>>')
        inner_scalar_nest2 = ScalarTypeMetadata(
            name='nest2', parent=struct_type, type_str='uniontype<string,int>')

        struct_type.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2
        }
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Beispiel #4
0
    def test_transform_struct_map_array_nested_type(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'struct<nest1:map<string,array<int>>,nest2:array<string>>', 0)
        column.set_column_key(self.column_key)

        struct_type = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str='struct<nest1:map<string,array<int>>,nest2:array<string>>'
        )
        inner_map = MapTypeMetadata(name='nest1',
                                    parent=struct_type,
                                    type_str='map<string,array<int>>')
        inner_map_key = ScalarTypeMetadata(name='_map_key',
                                           parent=inner_map,
                                           type_str='string')
        inner_map_array = ArrayTypeMetadata(name='_map_value',
                                            parent=inner_map,
                                            type_str='array<int>')
        inner_struct_array = ArrayTypeMetadata(name='nest2',
                                               parent=struct_type,
                                               type_str='array<string>')

        struct_type.struct_items = {
            'nest1': inner_map,
            'nest2': inner_struct_array
        }
        inner_map.map_key_type = inner_map_key
        inner_map.map_value_type = inner_map_array
        inner_map.sort_order = 0
        inner_struct_array.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Beispiel #5
0
    def test_transform_map_type(self) -> None:
        column = ColumnMetadata('col1', None, 'map<string,map<string,int>>', 0)
        column.set_column_key(self.column_key)

        map_type = MapTypeMetadata(name='col1',
                                   parent=column,
                                   type_str='map<string,map<string,int>>')
        map_key = ScalarTypeMetadata(name='_map_key',
                                     parent=map_type,
                                     type_str='string')
        map_value = MapTypeMetadata(name='_map_value',
                                    parent=map_type,
                                    type_str='map<string,int>')
        inner_map_key = ScalarTypeMetadata(name='_map_key',
                                           parent=map_value,
                                           type_str='string')
        inner_scalar = ScalarTypeMetadata(name='_map_value',
                                          parent=map_value,
                                          type_str='int')

        map_type.map_key_type = map_key
        map_type.map_value_type = map_value
        map_value.map_key_type = inner_map_key
        map_value.map_value_type = inner_scalar

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, map_type)
Beispiel #6
0
    def test_transform_invalid_struct_inner_type(self) -> None:
        column = ColumnMetadata(
            'col1', None, 'struct<nest1:varchar(256)å,'
            'nest2:<derived from deserializer>>', 0)
        column.set_column_key(self.column_key)

        with self.assertRaises(ParseException):
            parse_hive_type(column.type, column.name, column)
Beispiel #7
0
    def test_transform_no_complex_type(self) -> None:
        column = ColumnMetadata('col1', None, 'int', 0)
        column.set_column_key(self.column_key)

        scalar_type = ScalarTypeMetadata(name='col1',
                                         parent=column,
                                         type_str='int')

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, scalar_type)
Beispiel #8
0
    def test_transform_non_alpha_only_types(self) -> None:
        column = ColumnMetadata(
            'col1', None, 'struct<nest1:decimal(10,2),nest2:double precision,'
            'nest3:varchar(32),nest4:map<varchar(32),decimal(10,2)>,'
            'nest5:interval_day_time>', 0)
        column.set_column_key(self.column_key)

        struct_type = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str='struct<nest1:decimal(10,2),nest2:double precision,'
            'nest3:varchar(32),nest4:map<varchar(32),decimal(10,2)>,'
            'nest5:interval_day_time>')
        inner_scalar_nest1 = ScalarTypeMetadata(name='nest1',
                                                parent=struct_type,
                                                type_str='decimal(10,2)')
        inner_scalar_nest2 = ScalarTypeMetadata(name='nest2',
                                                parent=struct_type,
                                                type_str='double precision')
        inner_scalar_nest3 = ScalarTypeMetadata(name='nest3',
                                                parent=struct_type,
                                                type_str='varchar(32)')
        inner_map_nest4 = MapTypeMetadata(
            name='nest4',
            parent=struct_type,
            type_str='map<varchar(32),decimal(10,2)>')
        inner_map_nest4_key = ScalarTypeMetadata(name='_map_key',
                                                 parent=inner_map_nest4,
                                                 type_str='varchar(32)')
        inner_map_nest4_value = ScalarTypeMetadata(name='_map_value',
                                                   parent=inner_map_nest4,
                                                   type_str='decimal(10,2)')
        inner_scalar_nest5 = ScalarTypeMetadata(name='nest5',
                                                parent=struct_type,
                                                type_str='interval_day_time')

        struct_type.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2,
            'nest3': inner_scalar_nest3,
            'nest4': inner_map_nest4,
            'nest5': inner_scalar_nest5
        }
        inner_map_nest4.map_key_type = inner_map_nest4_key
        inner_map_nest4.map_value_type = inner_map_nest4_value
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1
        inner_scalar_nest3.sort_order = 2
        inner_map_nest4.sort_order = 3
        inner_scalar_nest5.sort_order = 4

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Beispiel #9
0
    def test_transform_union_as_scalar_type(self) -> None:
        column = ColumnMetadata('col1', None,
                                'uniontype<string,struct<c1:int,c2:string>>',
                                0)
        column.set_column_key(self.column_key)

        struct_type = ScalarTypeMetadata(
            name='col1',
            parent=column,
            type_str='uniontype<string,struct<c1:int,c2:string>>')

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Beispiel #10
0
    def test_transform_array_type(self) -> None:
        column = ColumnMetadata('col1', None, 'array<array<int>>', 0)
        column.set_column_key(self.column_key)

        array_type = ArrayTypeMetadata(name='col1',
                                       parent=column,
                                       type_str='array<array<int>>')
        inner_array = ArrayTypeMetadata(name='_inner_',
                                        parent=array_type,
                                        type_str='array<int>')

        array_type.array_inner_type = inner_array

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, array_type)
Beispiel #11
0
    def setUp(self) -> None:
        super(TestTableMetadata, self).setUp()
        TableMetadata.serialized_nodes_keys = set()
        TableMetadata.serialized_rels_keys = set()

        column_with_type_metadata = ColumnMetadata(
            'has_nested_type', 'column with nested types',
            'array<array<array<string>>>', 6)
        column_with_type_metadata.set_column_key(
            'hive://gold.test_schema1/test_table1/has_nested_type')
        column_with_type_metadata.set_type_metadata(
            self._set_up_type_metadata(column_with_type_metadata))

        self.table_metadata = TableMetadata(
            'hive', 'gold', 'test_schema1', 'test_table1', 'test_table1', [
                ColumnMetadata('test_id1', 'description of test_table1',
                               'bigint', 0),
                ColumnMetadata('test_id2', 'description of test_id2', 'bigint',
                               1),
                ColumnMetadata('is_active', None, 'boolean', 2),
                ColumnMetadata('source', 'description of source', 'varchar',
                               3),
                ColumnMetadata('etl_created_at',
                               'description of etl_created_at', 'timestamp',
                               4),
                ColumnMetadata('ds', None, 'varchar', 5),
                column_with_type_metadata
            ])

        self.table_metadata2 = TableMetadata(
            'hive', 'gold', 'test_schema1', 'test_table1', 'test_table1', [
                ColumnMetadata('test_id1', 'description of test_table1',
                               'bigint', 0),
                ColumnMetadata('test_id2', 'description of test_id2', 'bigint',
                               1),
                ColumnMetadata('is_active', None, 'boolean', 2),
                ColumnMetadata('source', 'description of source', 'varchar',
                               3),
                ColumnMetadata('etl_created_at',
                               'description of etl_created_at', 'timestamp',
                               4),
                ColumnMetadata('ds', None, 'varchar', 5),
                column_with_type_metadata
            ])
Beispiel #12
0
    def test_transform_array_map_nested_type(self) -> None:
        column = ColumnMetadata('col1', None, 'array<map<string,int>>', 0)
        column.set_column_key(self.column_key)

        array_type = ArrayTypeMetadata(name='col1',
                                       parent=column,
                                       type_str='array<map<string,int>>')
        inner_map = MapTypeMetadata(name='_inner_',
                                    parent=array_type,
                                    type_str='map<string,int>')
        inner_map_key = ScalarTypeMetadata(name='_map_key',
                                           parent=inner_map,
                                           type_str='string')
        inner_scalar = ScalarTypeMetadata(name='_map_value',
                                          parent=inner_map,
                                          type_str='int')

        array_type.array_inner_type = inner_map
        inner_map.map_key_type = inner_map_key
        inner_map.map_value_type = inner_scalar

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, array_type)
Beispiel #13
0
    def test_transform_invalid_array_inner_type(self) -> None:
        column = ColumnMetadata('col1', None, 'array<array<int*>>', 0)
        column.set_column_key(self.column_key)

        with self.assertRaises(ParseException):
            parse_hive_type(column.type, column.name, column)
Beispiel #14
0
    def test_serialize_struct_type_metadata(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>', 0)
        column.set_column_key(self.column_key)

        struct_type_metadata = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str=
            'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>')
        nested_struct_type_metadata_level1 = StructTypeMetadata(
            name='c1',
            parent=struct_type_metadata,
            type_str='struct<c2:struct<c3:string,c4:string>>')
        nested_struct_type_metadata_level2 = StructTypeMetadata(
            name='c2',
            parent=nested_struct_type_metadata_level1,
            type_str='struct<c3:string,c4:string>')
        nested_scalar_type_metadata_c3 = ScalarTypeMetadata(
            name='c3',
            parent=nested_struct_type_metadata_level2,
            type_str='string',
            description='description of c3')
        nested_scalar_type_metadata_c4 = ScalarTypeMetadata(
            name='c4',
            parent=nested_struct_type_metadata_level2,
            type_str='string')
        nested_scalar_type_metadata_c5 = ScalarTypeMetadata(
            name='c5',
            parent=struct_type_metadata,
            type_str='string',
            description='description of c5')

        struct_type_metadata.struct_items = {
            'c1': nested_struct_type_metadata_level1,
            'c5': nested_scalar_type_metadata_c5
        }
        nested_struct_type_metadata_level1.struct_items = {
            'c2': nested_struct_type_metadata_level2
        }
        nested_struct_type_metadata_level2.struct_items = {
            'c3': nested_scalar_type_metadata_c3,
            'c4': nested_scalar_type_metadata_c4
        }
        nested_struct_type_metadata_level1.sort_order = 0
        nested_scalar_type_metadata_c5.sort_order = 1
        nested_struct_type_metadata_level2.sort_order = 0
        nested_scalar_type_metadata_c3.sort_order = 0
        nested_scalar_type_metadata_c4.sort_order = 1

        expected_nodes = [{
            'kind':
            'struct',
            'name':
            'col1',
            'data_type':
            'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1'
        }, {
            'kind':
            'struct',
            'name':
            'c1',
            'data_type':
            'struct<c2:struct<c3:string,c4:string>>',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            0,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1'
        }, {
            'kind':
            'struct',
            'name':
            'c2',
            'data_type':
            'struct<c3:string,c4:string>',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            0,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2'
        }, {
            'kind':
            'scalar',
            'name':
            'c3',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            0,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3'
        }, {
            'description': 'description of c3',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3/_description',
            'LABEL': 'Description',
            'description_source': 'description'
        }, {
            'kind':
            'scalar',
            'name':
            'c4',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            1,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c4'
        }, {
            'kind':
            'scalar',
            'name':
            'c5',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            1,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5'
        }, {
            'description': 'description of c5',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5/_description',
            'LABEL': 'Description',
            'description_source': 'description'
        }]
        expected_rels = [{
            'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Column',
            'TYPE': 'TYPE_METADATA',
            'REVERSE_TYPE': 'TYPE_METADATA_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3/_description',
            'START_LABEL': 'Type_Metadata',
            'END_LABEL': 'Description',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3',
            'TYPE': 'DESCRIPTION',
            'REVERSE_TYPE': 'DESCRIPTION_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c4',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5/_description',
            'START_LABEL': 'Type_Metadata',
            'END_LABEL': 'Description',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5',
            'TYPE': 'DESCRIPTION',
            'REVERSE_TYPE': 'DESCRIPTION_OF'
        }]

        node_row = struct_type_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = struct_type_metadata.next_node()
        for i in range(0, len(expected_nodes)):
            self.assertEqual(actual[i], expected_nodes[i])

        relation_row = struct_type_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = struct_type_metadata.next_relation()
        for i in range(0, len(expected_rels)):
            self.assertEqual(actual[i], expected_rels[i])
Beispiel #15
0
    def test_serialize_map_struct_type_metadata(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'map<string,struct<c1:map<string,string>,c2:string>>', 0)
        column.set_column_key(self.column_key)

        map_type_metadata = MapTypeMetadata(
            name='col1',
            parent=column,
            type_str='map<string,struct<c1:map<string,string>,c2:string>>')
        map_key = ScalarTypeMetadata(name='_map_key',
                                     parent=map_type_metadata,
                                     type_str='string')
        nested_struct_type_metadata_level1 = StructTypeMetadata(
            name='_map_value',
            parent=map_type_metadata,
            type_str='struct<c1:map<string,string>,c2:string>')
        nested_map_type_metadata_level2 = MapTypeMetadata(
            name='c1',
            parent=nested_struct_type_metadata_level1,
            type_str='map<string,string>')
        nested_map_key = ScalarTypeMetadata(
            name='_map_key',
            parent=nested_map_type_metadata_level2,
            type_str='string')
        nested_scalar_type_metadata_level3 = ScalarTypeMetadata(
            name='_map_value',
            parent=nested_map_type_metadata_level2,
            type_str='string')
        nested_scalar_type_metadata_level2 = ScalarTypeMetadata(
            name='c2',
            parent=nested_struct_type_metadata_level1,
            type_str='string')

        map_type_metadata.map_key_type = map_key
        map_type_metadata.map_value_type = nested_struct_type_metadata_level1
        nested_struct_type_metadata_level1.struct_items = {
            'c1': nested_map_type_metadata_level2,
            'c2': nested_scalar_type_metadata_level2
        }
        nested_map_type_metadata_level2.map_key_type = nested_map_key
        nested_map_type_metadata_level2.map_value_type = nested_scalar_type_metadata_level3
        nested_map_type_metadata_level2.sort_order = 0
        nested_scalar_type_metadata_level2.sort_order = 1

        expected_nodes = [{
            'kind':
            'map',
            'name':
            'col1',
            'data_type':
            'map<string,struct<c1:map<string,string>,c2:string>>',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1'
        }, {
            'kind':
            'scalar',
            'name':
            '_map_key',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_key'
        }, {
            'kind':
            'struct',
            'name':
            '_map_value',
            'data_type':
            'struct<c1:map<string,string>,c2:string>',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value'
        }, {
            'kind':
            'map',
            'name':
            'c1',
            'data_type':
            'map<string,string>',
            'sort_order:UNQUOTED':
            0,
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1'
        }, {
            'kind':
            'scalar',
            'name':
            '_map_key',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_key'
        }, {
            'kind':
            'scalar',
            'name':
            '_map_value',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_value'
        }, {
            'kind':
            'scalar',
            'name':
            'c2',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            1,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c2'
        }]
        expected_rels = [{
            'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Column',
            'TYPE': 'TYPE_METADATA',
            'REVERSE_TYPE': 'TYPE_METADATA_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_key',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_key',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_value',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c2',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }]

        node_row = map_type_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = map_type_metadata.next_node()
        for i in range(0, len(expected_nodes)):
            self.assertEqual(actual[i], expected_nodes[i])

        relation_row = map_type_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = map_type_metadata.next_relation()
        for i in range(0, len(expected_rels)):
            self.assertEqual(actual[i], expected_rels[i])
Beispiel #16
0
    def test_serialize_array_type_metadata(self) -> None:
        column = ColumnMetadata('col1', None, 'array<array<array<string>>>', 0)
        column.set_column_key(self.column_key)

        array_type_metadata = ArrayTypeMetadata(
            name='col1', parent=column, type_str='array<array<array<string>>>')
        nested_array_type_metadata_level1 = ArrayTypeMetadata(
            name='_inner_',
            parent=array_type_metadata,
            type_str='array<array<string>>')
        nested_array_type_metadata_level2 = ArrayTypeMetadata(
            name='_inner_',
            parent=nested_array_type_metadata_level1,
            type_str='array<string>')
        nested_scalar_type_metadata_level3 = ScalarTypeMetadata(
            name='_inner_',
            parent=nested_array_type_metadata_level2,
            type_str='string')

        array_type_metadata.array_inner_type = nested_array_type_metadata_level1
        nested_array_type_metadata_level1.array_inner_type = nested_array_type_metadata_level2
        nested_array_type_metadata_level2.array_inner_type = nested_scalar_type_metadata_level3

        expected_nodes = [{
            'kind':
            'array',
            'name':
            'col1',
            'LABEL':
            'Type_Metadata',
            'data_type':
            'array<array<array<string>>>',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1'
        }, {
            'kind':
            'array',
            'name':
            '_inner_',
            'LABEL':
            'Type_Metadata',
            'data_type':
            'array<array<string>>',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_'
        }, {
            'kind':
            'array',
            'name':
            '_inner_',
            'LABEL':
            'Type_Metadata',
            'data_type':
            'array<string>',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_/_inner_'
        }]
        expected_rels = [{
            'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Column',
            'TYPE': 'TYPE_METADATA',
            'REVERSE_TYPE': 'TYPE_METADATA_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_/_inner_',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }]

        node_row = array_type_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = array_type_metadata.next_node()
        for i in range(0, len(expected_nodes)):
            self.assertEqual(actual[i], expected_nodes[i])

        relation_row = array_type_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = array_type_metadata.next_relation()
        for i in range(0, len(expected_rels)):
            self.assertEqual(actual[i], expected_rels[i])