def test_namespace_retrieval(self): """ tests the _get_namespace helper function works """ expected = 'test.test' empty_expected = '' has_namespace = {'namespace': 'test.test'} parent_namespace = 'test.test' namespace = _get_namespace(has_namespace) parent = _get_namespace({}, parent_namespace=parent_namespace) empty = _get_namespace({}) self.assertEqual( expected, namespace, 'an object with a namespace should return the namespace') self.assertEqual( parent_namespace, parent, 'an object with no namespace but a parent_namespace should return the parent_namespace' # NOQA ) self.assertEqual( empty_expected, empty, 'an object with no namespace or parent_namespace should return an empty string' # NOQA )
def _enum_field(field: dict, parent_namespace: str=None, queue: list=None, references: list=None) -> Tuple[dict, list]: """ helper function for adding information to nested enum field will add field as a new file in the queue and will be referenced. Parameters ---------- field: dict field object to extract information from queue: list queue of files to add to project Returns ------- Field """ field['type']['namespace'] = _get_namespace(obj=field['type'], parent_namespace=parent_namespace) reference = _create_reference(field['type']) references.append(reference) queue.append(field['type']) kwargs.update({ 'name': field['name'], 'reference_name': reference.name, 'reference_namespace': reference.namespace, 'fieldtype': 'reference', 'default': field.get('default', None) }) return Field(**kwargs)
def _build_namespace_tree(self) -> None: """ builds tree structure on namespace """ # initialize empty node with empty string name root_node = Node(name='') # populate queue prior to tree building queue = copy.deepcopy(self.obj['avsc']) while queue: # get first item in queue item = queue.pop(0) # impute namespace item['namespace'] = _get_namespace(item) # traverse to namespace starting from root_node current_node = self._traverse_tree(root_node=root_node, namespace=item['namespace']) # initialize empty file obj for mutation file = File(name=item['name'], avrotype=item['type'], namespace=item['namespace'], schema=item, fields={}, imports=[], enum_sumbols=[]) # handle record type if file.avrotype == 'record': _record_file(file, item, queue) # handle enum type file elif file.avrotype == 'enum': _enum_file(file, item) else: raise ValueError(f"{file['type']} is currently not supported.") current_node.files[item['name']] = file self.file_tree = root_node
def _record_file(file: File, item: dict, queue: List[dict]) -> None: """ function for adding information for record files Parameters ---------- file: dict file object containing information from the avro schema item: dict object to be turned into a file queue: list array of file objects to be processed Returns ------- None """ references = [] for field in item['fields']: fieldtype = _get_field_type( field=field, references=references ) if fieldtype == 'array': field = _array_field( field=field, parent_namespace=file.namespace, queue=queue, references=references ) elif fieldtype == 'map': field = _map_field( field=field, parent_namespace=file.namespace, queue=queue, references=references ) # nested complex record elif fieldtype == 'record': field = _record_field( field=field, parent_namespace=_get_namespace(field['type'], file.namespace), queue=queue, references=references ) # nested complex record elif fieldtype == 'enum': field = _enum_field( field=field, parent_namespace=_get_namespace(field['type'], file.namespace), queue=queue, references=references ) # handle union type elif fieldtype == 'union': field = _union_field( field=field, parent_namespace=file.namespace, queue=queue, references=references ) elif fieldtype == 'reference': field = _reference_type( field=field, references=references ) # handle primitive types elif fieldtype == 'primitive': field = _primitive_type(field) else: raise ValueError('fieldtype is not supported...') file.fields[field.name] = field file.imports += references file.imports = dedupe_imports(file.imports)
def _union_field(field: dict, parent_namespace: str = None, queue: list = None, references: list = []) -> Tuple[dict, list]: """ helper function for adding information to union fields If union contains references to embedded enum or record, will add that as a new file in the queue. Parameters ---------- field: dict union field to extract information from parent_namespace: str name of parent file namespace queue: list queue of files to add to project references: list list of references already made in file Returns ------- field_object: dict object containing necessary info on union field references: potential imports object if a file is generated """ # python is annoying with mutability of this dict kwargs = { 'name': field['name'], 'fieldtype': 'union', 'avrotype': None, 'default': field.get('default', None), 'reference_name': None, 'reference_namespace': None, 'array_item_type': None, 'union_types': [] } # iterate through possibly types for typ in field['type']: field_type = _get_field_type(field={'type': typ}, references=references) # primitive types if field_type == 'primitive': kwargs['union_types'].append( _primitive_type({ 'name': 'uniontype', 'type': typ })) # nested complex record elif field_type == 'record': kwargs['union_types'].append( _record_field(field={ 'name': 'uniontype', 'type': typ }, parent_namespace=_get_namespace( typ, parent_namespace), queue=queue, references=references)) elif field_type == 'array': kwargs['union_types'].append( _array_field(field={ 'name': 'arraytype', 'type': typ }, parent_namespace=parent_namespace, queue=queue, references=references)) # nested complex record elif field_type == 'enum': kwargs['union_types'].append( _enum_field(field={ 'name': 'uniontype', 'type': typ }, parent_namespace=parent_namespace, queue=queue, references=references)) elif field_type == 'map': kwargs['union_types'].append( _map_field(field={ 'name': 'uniontype', 'type': typ }, parent_namespace=parent_namespace, queue=queue, references=references)) # references to previously defined complex types # handle reference types elif field_type == 'reference': kwargs['union_types'].append( _reference_type(field={ 'name': 'uniontype', 'type': typ }, references=references)) else: raise ValueError( f"avro type {field['items']['type']} is not supported") return Field(**kwargs)