def _array_field(field: dict, parent_namespace: str = None, queue: list = None, references: list = []) -> Tuple[dict, list]: """ helper function for adding information to array fields If array contains references to embedded enum or record, will add that as a new file in the queue. Parameters ---------- field: dict array field to extract information from parent_namespace: str namespace of the parent file queue: list queue of files to add to project references: list Returns ------- Field """ kwargs = { 'name': field['name'], 'fieldtype': 'array', 'avrotype': None, 'default': None, 'reference_name': None, 'reference_namespace': None, 'array_item_type': None } if isinstance(field['type']['items'], str): field['type']['items'] = {'type': field['type']['items']} field_item_type = _get_field_type(field['type']['items'], references) # handle primitive types if field_item_type == 'primitive': # add None name to primitive type field['type']['items']['name'] = None kwargs.update( {'array_item_type': _primitive_type(field['type']['items'])}) # handle complex types elif field_item_type == 'record': # array fields don't have names and type need to be nested kwargs.update({ 'array_item_type': _record_field(field={ 'name': 'arrayfield', 'type': field['type']['items'], 'namespace': field['type'].get('namespace', None) }, parent_namespace=parent_namespace, queue=queue, references=references) }) elif field_item_type == 'enum': # array fields don't have names and type need to be nested kwargs.update({ 'array_item_type': _enum_field(field={ 'name': 'arrayfield', 'type': field['type']['items'], 'namespace': field['type'].get('namespace', None) }, parent_namespace=parent_namespace, queue=queue, references=references) }) # handle reference types elif field_item_type == 'reference': kwargs.update({ 'array_item_type': _reference_type(field={'name': field['type']['items']['type']}, references=references) }) else: raise ValueError( f"avro type {field['items']['type']} is not supported") return Field(**kwargs)
def _union_field(field: dict, parent_namespace: str = None, queue: list = None, references: list = []) -> Tuple[dict, list]: """ helper function for adding information to union fields If union contains references to embedded enum or record, will add that as a new file in the queue. Parameters ---------- field: dict union field to extract information from parent_namespace: str name of parent file namespace queue: list queue of files to add to project references: list list of references already made in file Returns ------- field_object: dict object containing necessary info on union field references: potential imports object if a file is generated """ # python is annoying with mutability of this dict kwargs = { 'name': field['name'], 'fieldtype': 'union', 'avrotype': None, 'default': field.get('default', None), 'reference_name': None, 'reference_namespace': None, 'array_item_type': None, 'union_types': [] } # iterate through possibly types for typ in field['type']: field_type = _get_field_type(field={'type': typ}, references=references) # primitive types if field_type == 'primitive': kwargs['union_types'].append( _primitive_type({ 'name': 'uniontype', 'type': typ })) # nested complex record elif field_type == 'record': kwargs['union_types'].append( _record_field(field={ 'name': 'uniontype', 'type': typ }, parent_namespace=_get_namespace( typ, parent_namespace), queue=queue, references=references)) elif field_type == 'array': kwargs['union_types'].append( _array_field(field={ 'name': 'arraytype', 'type': typ }, parent_namespace=parent_namespace, queue=queue, references=references)) # nested complex record elif field_type == 'enum': kwargs['union_types'].append( _enum_field(field={ 'name': 'uniontype', 'type': typ }, parent_namespace=parent_namespace, queue=queue, references=references)) elif field_type == 'map': kwargs['union_types'].append( _map_field(field={ 'name': 'uniontype', 'type': typ }, parent_namespace=parent_namespace, queue=queue, references=references)) # references to previously defined complex types # handle reference types elif field_type == 'reference': kwargs['union_types'].append( _reference_type(field={ 'name': 'uniontype', 'type': typ }, references=references)) else: raise ValueError( f"avro type {field['items']['type']} is not supported") return Field(**kwargs)
def _record_file(file: File, item: dict, queue: List[dict]) -> None: """ function for adding information for record files Parameters ---------- file: dict file object containing information from the avro schema item: dict object to be turned into a file queue: list array of file objects to be processed Returns ------- None """ references = [] for field in item['fields']: fieldtype = _get_field_type( field=field, references=references ) if fieldtype == 'array': field = _array_field( field=field, parent_namespace=file.namespace, queue=queue, references=references ) elif fieldtype == 'map': field = _map_field( field=field, parent_namespace=file.namespace, queue=queue, references=references ) # nested complex record elif fieldtype == 'record': field = _record_field( field=field, parent_namespace=_get_namespace(field['type'], file.namespace), queue=queue, references=references ) # nested complex record elif fieldtype == 'enum': field = _enum_field( field=field, parent_namespace=_get_namespace(field['type'], file.namespace), queue=queue, references=references ) # handle union type elif fieldtype == 'union': field = _union_field( field=field, parent_namespace=file.namespace, queue=queue, references=references ) elif fieldtype == 'reference': field = _reference_type( field=field, references=references ) # handle primitive types elif fieldtype == 'primitive': field = _primitive_type(field) else: raise ValueError('fieldtype is not supported...') file.fields[field.name] = field file.imports += references file.imports = dedupe_imports(file.imports)
def _map_field(field: dict, parent_namespace: str=None, queue: list=None, references: list=[]) -> Tuple[dict, list]: """ helper function for adding information to map fields If map contains references to embedded enum or record, will add that as a new file in the queue. Parameters ---------- field: dict map field to extract information from parent_namespace: str name of parent file namespace queue: list queue of files to add to project references: list list of references already made in file Returns ------- Field """ # python is annoying with mutability of this dict kwargs = { 'name': field['name'], 'fieldtype': 'map', 'avrotype': None, 'default': field.get('default', None), 'reference_name': None, 'reference_namespace': None, 'array_item_type': None, 'union_types': [], 'map_type': None } if isinstance(field['type']['values'], str): map_type = _get_field_type( {'type': field['type']['values']}, references ) else: map_type = _get_field_type( field['type']['values'], references ) # handle primitive types if map_type == 'primitive': kwargs.update({ 'map_type': _primitive_type( {'name': 'maptype', 'type': field['type']['values']} ) }) # handle complex types elif map_type == 'record': # array fields don't have names and type need to be nested kwargs.update({ 'map_type': _record_field( field={'name': 'mapfield', 'type': field['type']['values'], 'namespace': field['type']['values'].get('namespace', None)}, parent_namespace=parent_namespace, queue=queue, references=references) }) elif map_type == 'enum': # array fields don't have names and type need to be nested kwargs.update({ 'map_type': _enum_field( field={'name': 'mapfield', 'type': field['type']['values'], 'namespace': field['type']['values'].get('namespace', None)}, parent_namespace=parent_namespace, queue=queue, references=references) }) elif map_type == 'map': # handle nested maps kwargs.update({ 'map_type': _map_field( field={'name': 'nestedMap', 'type': field['type']['values'], 'namespace''namespace': field['type']['values'].get('namespace', None)}, parent_namespace=parent_namespace, queue=queue, references=references ) }) elif map_type == 'array': # handle nested arrays kwargs.update({ 'map_type': _array_field( field={'name': 'nestedMap', 'type': field['type']['values'], 'namespace''namespace': field['type']['values'].get('namespace', None)}, parent_namespace=parent_namespace, queue=queue, references=references ) }) # handle reference types elif map_type == 'reference': kwargs.update({ 'map_type': _reference_type( field={'name': field['type']['values']}, references=references) }) else: raise ValueError( f"avro type {field['type']['values']} is not supported" ) return Field(**kwargs)