def test_transform_map_type(self) -> None: column = ColumnMetadata('col1', None, 'map<string,map<string,int>>', 0) column.set_column_key(self.column_key) map_type = MapTypeMetadata(name='col1', parent=column, type_str='map<string,map<string,int>>') map_key = ScalarTypeMetadata(name='_map_key', parent=map_type, type_str='string') map_value = MapTypeMetadata(name='_map_value', parent=map_type, type_str='map<string,int>') inner_map_key = ScalarTypeMetadata(name='_map_key', parent=map_value, type_str='string') inner_scalar = ScalarTypeMetadata(name='_map_value', parent=map_value, type_str='int') map_type.map_key_type = map_key map_type.map_value_type = map_value map_value.map_key_type = inner_map_key map_value.map_value_type = inner_scalar actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, map_type)
def transform(self, record: Any) -> TableMetadata: if not isinstance(record, TableMetadata): raise Exception( f"ComplexTypeTransformer expects record of type TableMetadata, received {type(record)}" ) for column in record.columns: try: column.set_column_key(record._get_col_key(column)) column.set_type_metadata( self._parsing_function(column.type, column.name, column)) except Exception as e: # Default to scalar type if the type string cannot be parsed column.set_type_metadata( ScalarTypeMetadata(name=column.name, parent=column, type_str=column.type)) self.failure_count += 1 LOGGER.warning( f"Could not parse type for column {column.name} in table {record.name}: {e}" ) else: self.success_count += 1 return record
def test_transform_struct_map_array_nested_type(self) -> None: column = ColumnMetadata( 'col1', None, 'struct<nest1:map<string,array<int>>,nest2:array<string>>', 0) column.set_column_key(self.column_key) struct_type = StructTypeMetadata( name='col1', parent=column, type_str='struct<nest1:map<string,array<int>>,nest2:array<string>>' ) inner_map = MapTypeMetadata(name='nest1', parent=struct_type, type_str='map<string,array<int>>') inner_map_key = ScalarTypeMetadata(name='_map_key', parent=inner_map, type_str='string') inner_map_array = ArrayTypeMetadata(name='_map_value', parent=inner_map, type_str='array<int>') inner_struct_array = ArrayTypeMetadata(name='nest2', parent=struct_type, type_str='array<string>') struct_type.struct_items = { 'nest1': inner_map, 'nest2': inner_struct_array } inner_map.map_key_type = inner_map_key inner_map.map_value_type = inner_map_array inner_map.sort_order = 0 inner_struct_array.sort_order = 1 actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, struct_type)
def test_hive_parser_with_failures(self) -> None: transformer = ComplexTypeTransformer() config = ConfigFactory.from_dict({ PARSING_FUNCTION: 'databuilder.utils.hive_complex_type_parser.parse_hive_type', }) transformer.init(conf=config) column = ColumnMetadata('col1', 'array type', 'array<array<int>>', 0) table_metadata = TableMetadata('hive', 'gold', 'test_schema', 'test_table', 'test_table', [column]) default_scalar_type = ScalarTypeMetadata(name='col1', parent=column, type_str='array<array<int>>') with patch.object(transformer, '_parsing_function') as mock: mock.side_effect = MagicMock( side_effect=Exception('Could not parse')) result = transformer.transform(table_metadata) self.assertEqual(transformer.success_count, 0) self.assertEqual(transformer.failure_count, 1) for actual in result.columns: self.assertEqual(actual.get_type_metadata(), default_scalar_type)
def test_transform_map_struct_nested_type(self) -> None: column = ColumnMetadata('col1', None, 'map<string,struct<nest1:int,nest2:int>>', 0) column.set_column_key(self.column_key) map_type = MapTypeMetadata( name='col1', parent=column, type_str='map<string,struct<nest1:int,nest2:int>>') map_key = ScalarTypeMetadata(name='_map_key', parent=map_type, type_str='string') inner_struct = StructTypeMetadata( name='_map_value', parent=map_type, type_str='struct<nest1:int,nest2:int>') inner_scalar_nest1 = ScalarTypeMetadata(name='nest1', parent=inner_struct, type_str='int') inner_scalar_nest2 = ScalarTypeMetadata(name='nest2', parent=inner_struct, type_str='int') map_type.map_key_type = map_key map_type.map_value_type = inner_struct inner_struct.struct_items = { 'nest1': inner_scalar_nest1, 'nest2': inner_scalar_nest2 } inner_scalar_nest1.sort_order = 0 inner_scalar_nest2.sort_order = 1 actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, map_type)
def test_transform_no_complex_type(self) -> None: column = ColumnMetadata('col1', None, 'int', 0) column.set_column_key(self.column_key) scalar_type = ScalarTypeMetadata(name='col1', parent=column, type_str='int') actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, scalar_type)
def test_transform_array_struct_nested_type(self) -> None: column = ColumnMetadata('col1', None, 'array<struct<nest1:int,nest2:int>>', 0) column.set_column_key(self.column_key) array_type = ArrayTypeMetadata( name='col1', parent=column, type_str='array<struct<nest1:int,nest2:int>>') inner_struct = StructTypeMetadata( name='_inner_', parent=array_type, type_str='struct<nest1:int,nest2:int>') inner_scalar_nest1 = ScalarTypeMetadata(name='nest1', parent=inner_struct, type_str='int') inner_scalar_nest2 = ScalarTypeMetadata(name='nest2', parent=inner_struct, type_str='int') array_type.array_inner_type = inner_struct inner_struct.struct_items = { 'nest1': inner_scalar_nest1, 'nest2': inner_scalar_nest2 } inner_scalar_nest1.sort_order = 0 inner_scalar_nest2.sort_order = 1 actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, array_type)
def test_transform_union_as_nested_type(self) -> None: column = ColumnMetadata( 'col1', None, 'struct<nest1:uniontype<string,struct<c1:int,c2:string>>,' 'nest2:uniontype<string,int>>', 0) column.set_column_key(self.column_key) struct_type = StructTypeMetadata( name='col1', parent=column, type_str='struct<nest1:uniontype<string,struct<c1:int,c2:string>>,' 'nest2:uniontype<string,int>>') inner_scalar_nest1 = ScalarTypeMetadata( name='nest1', parent=struct_type, type_str='uniontype<string,struct<c1:int,c2:string>>') inner_scalar_nest2 = ScalarTypeMetadata( name='nest2', parent=struct_type, type_str='uniontype<string,int>') struct_type.struct_items = { 'nest1': inner_scalar_nest1, 'nest2': inner_scalar_nest2 } inner_scalar_nest1.sort_order = 0 inner_scalar_nest2.sort_order = 1 actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, struct_type)
def test_transform_union_as_scalar_type(self) -> None: column = ColumnMetadata('col1', None, 'uniontype<string,struct<c1:int,c2:string>>', 0) column.set_column_key(self.column_key) struct_type = ScalarTypeMetadata( name='col1', parent=column, type_str='uniontype<string,struct<c1:int,c2:string>>') actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, struct_type)
def test_transform_array_map_nested_type(self) -> None: column = ColumnMetadata('col1', None, 'array<map<string,int>>', 0) column.set_column_key(self.column_key) array_type = ArrayTypeMetadata(name='col1', parent=column, type_str='array<map<string,int>>') inner_map = MapTypeMetadata(name='_inner_', parent=array_type, type_str='map<string,int>') inner_map_key = ScalarTypeMetadata(name='_map_key', parent=inner_map, type_str='string') inner_scalar = ScalarTypeMetadata(name='_map_value', parent=inner_map, type_str='int') array_type.array_inner_type = inner_map inner_map.map_key_type = inner_map_key inner_map.map_value_type = inner_scalar actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, array_type)
def parse_hive_type(type_str: str, name: str, parent: Union[ColumnMetadata, TypeMetadata]) -> TypeMetadata: type_str = type_str.lower() parsed_type = complex_type.parseString(type_str, parseAll=True) if parsed_type.scalar_type: return ScalarTypeMetadata(name=name, parent=parent, type_str=type_str) results = parsed_type[0] if parsed_type.array_type: array_type_metadata = ArrayTypeMetadata(name=name, parent=parent, type_str=type_str) array_inner_type = parse_hive_type(results.type, '_inner_', array_type_metadata) if not isinstance(array_inner_type, ScalarTypeMetadata): array_type_metadata.array_inner_type = array_inner_type return array_type_metadata elif parsed_type.map_type: map_type_metadata = MapTypeMetadata(name=name, parent=parent, type_str=type_str) map_type_metadata.map_key_type = parse_hive_type(results.key, '_map_key', map_type_metadata) map_type_metadata.map_value_type = parse_hive_type(results.type, '_map_value', map_type_metadata) return map_type_metadata elif parsed_type.struct_type: struct_type_metadata = StructTypeMetadata(name=name, parent=parent, type_str=type_str) struct_items = {} for index, result in enumerate(results): struct_items[result.name] = parse_hive_type(result.type, result.name, struct_type_metadata) struct_items[result.name].sort_order = index struct_type_metadata.struct_items = struct_items return struct_type_metadata else: raise Exception(f"Unrecognized type: {type_str}")
def test_transform_non_alpha_only_types(self) -> None: column = ColumnMetadata( 'col1', None, 'struct<nest1:decimal(10,2),nest2:double precision,' 'nest3:varchar(32),nest4:map<varchar(32),decimal(10,2)>,' 'nest5:interval_day_time>', 0) column.set_column_key(self.column_key) struct_type = StructTypeMetadata( name='col1', parent=column, type_str='struct<nest1:decimal(10,2),nest2:double precision,' 'nest3:varchar(32),nest4:map<varchar(32),decimal(10,2)>,' 'nest5:interval_day_time>') inner_scalar_nest1 = ScalarTypeMetadata(name='nest1', parent=struct_type, type_str='decimal(10,2)') inner_scalar_nest2 = ScalarTypeMetadata(name='nest2', parent=struct_type, type_str='double precision') inner_scalar_nest3 = ScalarTypeMetadata(name='nest3', parent=struct_type, type_str='varchar(32)') inner_map_nest4 = MapTypeMetadata( name='nest4', parent=struct_type, type_str='map<varchar(32),decimal(10,2)>') inner_map_nest4_key = ScalarTypeMetadata(name='_map_key', parent=inner_map_nest4, type_str='varchar(32)') inner_map_nest4_value = ScalarTypeMetadata(name='_map_value', parent=inner_map_nest4, type_str='decimal(10,2)') inner_scalar_nest5 = ScalarTypeMetadata(name='nest5', parent=struct_type, type_str='interval_day_time') struct_type.struct_items = { 'nest1': inner_scalar_nest1, 'nest2': inner_scalar_nest2, 'nest3': inner_scalar_nest3, 'nest4': inner_map_nest4, 'nest5': inner_scalar_nest5 } inner_map_nest4.map_key_type = inner_map_nest4_key inner_map_nest4.map_value_type = inner_map_nest4_value inner_scalar_nest1.sort_order = 0 inner_scalar_nest2.sort_order = 1 inner_scalar_nest3.sort_order = 2 inner_map_nest4.sort_order = 3 inner_scalar_nest5.sort_order = 4 actual = parse_hive_type(column.type, column.name, column) self.assertEqual(actual, struct_type)
def test_serialize_struct_type_metadata(self) -> None: column = ColumnMetadata( 'col1', None, 'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>', 0) column.set_column_key(self.column_key) struct_type_metadata = StructTypeMetadata( name='col1', parent=column, type_str= 'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>') nested_struct_type_metadata_level1 = StructTypeMetadata( name='c1', parent=struct_type_metadata, type_str='struct<c2:struct<c3:string,c4:string>>') nested_struct_type_metadata_level2 = StructTypeMetadata( name='c2', parent=nested_struct_type_metadata_level1, type_str='struct<c3:string,c4:string>') nested_scalar_type_metadata_c3 = ScalarTypeMetadata( name='c3', parent=nested_struct_type_metadata_level2, type_str='string', description='description of c3') nested_scalar_type_metadata_c4 = ScalarTypeMetadata( name='c4', parent=nested_struct_type_metadata_level2, type_str='string') nested_scalar_type_metadata_c5 = ScalarTypeMetadata( name='c5', parent=struct_type_metadata, type_str='string', description='description of c5') struct_type_metadata.struct_items = { 'c1': nested_struct_type_metadata_level1, 'c5': nested_scalar_type_metadata_c5 } nested_struct_type_metadata_level1.struct_items = { 'c2': nested_struct_type_metadata_level2 } nested_struct_type_metadata_level2.struct_items = { 'c3': nested_scalar_type_metadata_c3, 'c4': nested_scalar_type_metadata_c4 } nested_struct_type_metadata_level1.sort_order = 0 nested_scalar_type_metadata_c5.sort_order = 1 nested_struct_type_metadata_level2.sort_order = 0 nested_scalar_type_metadata_c3.sort_order = 0 nested_scalar_type_metadata_c4.sort_order = 1 expected_nodes = [{ 'kind': 'struct', 'name': 'col1', 'data_type': 'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>', 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1' }, { 'kind': 'struct', 'name': 'c1', 'data_type': 'struct<c2:struct<c3:string,c4:string>>', 'LABEL': 'Type_Metadata', 'sort_order:UNQUOTED': 0, 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1' }, { 'kind': 'struct', 'name': 'c2', 'data_type': 'struct<c3:string,c4:string>', 'LABEL': 'Type_Metadata', 'sort_order:UNQUOTED': 0, 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2' }, { 'kind': 'scalar', 'name': 'c3', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'sort_order:UNQUOTED': 0, 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3' }, { 'description': 'description of c3', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3/_description', 'LABEL': 'Description', 'description_source': 'description' }, { 'kind': 'scalar', 'name': 'c4', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'sort_order:UNQUOTED': 1, 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c4' }, { 'kind': 'scalar', 'name': 'c5', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'sort_order:UNQUOTED': 1, 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c5' }, { 'description': 'description of c5', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c5/_description', 'LABEL': 'Description', 'description_source': 'description' }] expected_rels = [{ 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Column', 'TYPE': 'TYPE_METADATA', 'REVERSE_TYPE': 'TYPE_METADATA_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3/_description', 'START_LABEL': 'Type_Metadata', 'END_LABEL': 'Description', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c4', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c5', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c5/_description', 'START_LABEL': 'Type_Metadata', 'END_LABEL': 'Description', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/c5', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF' }] node_row = struct_type_metadata.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = struct_type_metadata.next_node() for i in range(0, len(expected_nodes)): self.assertEqual(actual[i], expected_nodes[i]) relation_row = struct_type_metadata.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship( relation_row) actual.append(relation_row_serialized) relation_row = struct_type_metadata.next_relation() for i in range(0, len(expected_rels)): self.assertEqual(actual[i], expected_rels[i])
def test_serialize_map_struct_type_metadata(self) -> None: column = ColumnMetadata( 'col1', None, 'map<string,struct<c1:map<string,string>,c2:string>>', 0) column.set_column_key(self.column_key) map_type_metadata = MapTypeMetadata( name='col1', parent=column, type_str='map<string,struct<c1:map<string,string>,c2:string>>') map_key = ScalarTypeMetadata(name='_map_key', parent=map_type_metadata, type_str='string') nested_struct_type_metadata_level1 = StructTypeMetadata( name='_map_value', parent=map_type_metadata, type_str='struct<c1:map<string,string>,c2:string>') nested_map_type_metadata_level2 = MapTypeMetadata( name='c1', parent=nested_struct_type_metadata_level1, type_str='map<string,string>') nested_map_key = ScalarTypeMetadata( name='_map_key', parent=nested_map_type_metadata_level2, type_str='string') nested_scalar_type_metadata_level3 = ScalarTypeMetadata( name='_map_value', parent=nested_map_type_metadata_level2, type_str='string') nested_scalar_type_metadata_level2 = ScalarTypeMetadata( name='c2', parent=nested_struct_type_metadata_level1, type_str='string') map_type_metadata.map_key_type = map_key map_type_metadata.map_value_type = nested_struct_type_metadata_level1 nested_struct_type_metadata_level1.struct_items = { 'c1': nested_map_type_metadata_level2, 'c2': nested_scalar_type_metadata_level2 } nested_map_type_metadata_level2.map_key_type = nested_map_key nested_map_type_metadata_level2.map_value_type = nested_scalar_type_metadata_level3 nested_map_type_metadata_level2.sort_order = 0 nested_scalar_type_metadata_level2.sort_order = 1 expected_nodes = [{ 'kind': 'map', 'name': 'col1', 'data_type': 'map<string,struct<c1:map<string,string>,c2:string>>', 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1' }, { 'kind': 'scalar', 'name': '_map_key', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_key' }, { 'kind': 'struct', 'name': '_map_value', 'data_type': 'struct<c1:map<string,string>,c2:string>', 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value' }, { 'kind': 'map', 'name': 'c1', 'data_type': 'map<string,string>', 'sort_order:UNQUOTED': 0, 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1' }, { 'kind': 'scalar', 'name': '_map_key', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_key' }, { 'kind': 'scalar', 'name': '_map_value', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_value' }, { 'kind': 'scalar', 'name': 'c2', 'data_type': 'string', 'LABEL': 'Type_Metadata', 'sort_order:UNQUOTED': 1, 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c2' }] expected_rels = [{ 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Column', 'TYPE': 'TYPE_METADATA', 'REVERSE_TYPE': 'TYPE_METADATA_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_key', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_key', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_value', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c2', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }] node_row = map_type_metadata.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = map_type_metadata.next_node() for i in range(0, len(expected_nodes)): self.assertEqual(actual[i], expected_nodes[i]) relation_row = map_type_metadata.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship( relation_row) actual.append(relation_row_serialized) relation_row = map_type_metadata.next_relation() for i in range(0, len(expected_rels)): self.assertEqual(actual[i], expected_rels[i])
def test_serialize_array_type_metadata(self) -> None: column = ColumnMetadata('col1', None, 'array<array<array<string>>>', 0) column.set_column_key(self.column_key) array_type_metadata = ArrayTypeMetadata( name='col1', parent=column, type_str='array<array<array<string>>>') nested_array_type_metadata_level1 = ArrayTypeMetadata( name='_inner_', parent=array_type_metadata, type_str='array<array<string>>') nested_array_type_metadata_level2 = ArrayTypeMetadata( name='_inner_', parent=nested_array_type_metadata_level1, type_str='array<string>') nested_scalar_type_metadata_level3 = ScalarTypeMetadata( name='_inner_', parent=nested_array_type_metadata_level2, type_str='string') array_type_metadata.array_inner_type = nested_array_type_metadata_level1 nested_array_type_metadata_level1.array_inner_type = nested_array_type_metadata_level2 nested_array_type_metadata_level2.array_inner_type = nested_scalar_type_metadata_level3 expected_nodes = [{ 'kind': 'array', 'name': 'col1', 'LABEL': 'Type_Metadata', 'data_type': 'array<array<array<string>>>', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1' }, { 'kind': 'array', 'name': '_inner_', 'LABEL': 'Type_Metadata', 'data_type': 'array<array<string>>', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_' }, { 'kind': 'array', 'name': '_inner_', 'LABEL': 'Type_Metadata', 'data_type': 'array<string>', 'KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_/_inner_' }] expected_rels = [{ 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Column', 'TYPE': 'TYPE_METADATA', 'REVERSE_TYPE': 'TYPE_METADATA_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }, { 'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_/_inner_', 'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_', 'END_LABEL': 'Type_Metadata', 'START_LABEL': 'Type_Metadata', 'TYPE': 'SUBTYPE', 'REVERSE_TYPE': 'SUBTYPE_OF' }] node_row = array_type_metadata.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = array_type_metadata.next_node() for i in range(0, len(expected_nodes)): self.assertEqual(actual[i], expected_nodes[i]) relation_row = array_type_metadata.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship( relation_row) actual.append(relation_row_serialized) relation_row = array_type_metadata.next_relation() for i in range(0, len(expected_rels)): self.assertEqual(actual[i], expected_rels[i])