def test_enum_type(self):
     """ tests enum types return enum """
     enum = {
         'type': 'enum'
     }
     also_enum = {
         'type': {
             'type': 'enum'
         }
     }
     array = {
         'type': {
             'type': 'array'
         }
     }
     self.assertEqual(
         'enum',
         _get_field_type(enum)
     )
     self.assertEqual(
         'enum',
         _get_field_type(also_enum)
     )
     self.assertNotEqual(
         'enum',
         _get_field_type(array)
     )
 def test_record_type(self):
     """ tests record types return record """
     record = {
         'type': {
             'type': 'record'
         }
     }
     also_record = {
         'type': 'record'
     }
     array = {
         'type': {
             'type': 'array'
         }
     }
     self.assertEqual(
         'record',
         _get_field_type(record),
     )
     self.assertEqual(
         'record',
         _get_field_type(also_record)
     )
     self.assertNotEqual(
         'record',
         _get_field_type(array)
     )
    def test_array_field(self):
        """ tests array fields return array type """
        array = {
            'type': {
                'type': 'array'
            }
        }
        not_array = {
            'type': 'record'
        }
        also_not_array = {
            'type': ['this', 'is', 'a', 'union']
        }

        self.assertEqual(
            'array',
            _get_field_type(array),
            'field type should return array'
        )
        self.assertNotEqual(
            'array',
            _get_field_type(not_array),
            'field type should not return array'
        )
        self.assertNotEqual(
            'array',
            _get_field_type(also_not_array),
            'field type shoudl not return array'
        )
 def test_union_type(self):
     """ tests union types return union """
     union = {
         'type': ['this is a union', 'yay']
     }
     array = {
         'type': {
             'type': 'array'
         }
     }
     self.assertEqual(
         'union',
         _get_field_type(union)
     )
     self.assertNotEqual(
         'union',
         _get_field_type(array)
     )
 def test_primitive_type(self):
     """ tests primitive types """
     for _type in PRIMITIVE_TYPES:
         primitive = {
             'type': _type
         }
         self.assertEqual(
             'primitive',
             _get_field_type(primitive)
         )
     array = {
         'type': {
             'type': 'array'
         }
     }
     self.assertNotEqual(
         'primitive',
         _get_field_type(array)
     )
예제 #6
0
def _array_field(field: dict,
                 parent_namespace: str = None,
                 queue: list = None,
                 references: list = []) -> Tuple[dict, list]:
    """ helper function for adding information to array fields

    If array contains references to embedded enum or record,
    will add that as a new file in the queue.

    Parameters
    ----------
        field: dict
            array field to extract information from
        parent_namespace: str
            namespace of the parent file
        queue: list
            queue of files to add to project
        references: list

    Returns
    -------
        Field
    """
    kwargs = {
        'name': field['name'],
        'fieldtype': 'array',
        'avrotype': None,
        'default': None,
        'reference_name': None,
        'reference_namespace': None,
        'array_item_type': None
    }

    if isinstance(field['type']['items'], str):
        field['type']['items'] = {'type': field['type']['items']}

    field_item_type = _get_field_type(field['type']['items'], references)

    # handle primitive types
    if field_item_type == 'primitive':
        # add None name to primitive type
        field['type']['items']['name'] = None
        kwargs.update(
            {'array_item_type': _primitive_type(field['type']['items'])})

    # handle complex types
    elif field_item_type == 'record':
        # array fields don't have names and type need to be nested
        kwargs.update({
            'array_item_type':
            _record_field(field={
                'name': 'arrayfield',
                'type': field['type']['items'],
                'namespace': field['type'].get('namespace', None)
            },
                          parent_namespace=parent_namespace,
                          queue=queue,
                          references=references)
        })

    elif field_item_type == 'enum':
        # array fields don't have names and type need to be nested
        kwargs.update({
            'array_item_type':
            _enum_field(field={
                'name': 'arrayfield',
                'type': field['type']['items'],
                'namespace': field['type'].get('namespace', None)
            },
                        parent_namespace=parent_namespace,
                        queue=queue,
                        references=references)
        })

    # handle reference types
    elif field_item_type == 'reference':
        kwargs.update({
            'array_item_type':
            _reference_type(field={'name': field['type']['items']['type']},
                            references=references)
        })

    else:
        raise ValueError(
            f"avro type {field['items']['type']} is not supported")

    return Field(**kwargs)
예제 #7
0
def _record_file(file: File, item: dict, queue: List[dict]) -> None:
    """ function for adding information for record files

    Parameters
    ----------
        file: dict
            file object containing information from the avro schema
        item: dict
            object to be turned into a file
        queue: list
            array of file objects to be processed

    Returns
    -------
        None
    """
    references = []
    for field in item['fields']:

        fieldtype = _get_field_type(
            field=field,
            references=references
        )

        if fieldtype == 'array':
            field = _array_field(
                field=field,
                parent_namespace=file.namespace,
                queue=queue,
                references=references
            )

        elif fieldtype == 'map':
            field = _map_field(
                field=field,
                parent_namespace=file.namespace,
                queue=queue,
                references=references
            )

        # nested complex record
        elif fieldtype == 'record':
            field = _record_field(
                field=field,
                parent_namespace=_get_namespace(field['type'], file.namespace),
                queue=queue,
                references=references
            )

        # nested complex record
        elif fieldtype == 'enum':
            field = _enum_field(
                field=field,
                parent_namespace=_get_namespace(field['type'], file.namespace),
                queue=queue,
                references=references
            )

        # handle union type
        elif fieldtype == 'union':
            field = _union_field(
                field=field,
                parent_namespace=file.namespace,
                queue=queue,
                references=references
            )

        elif fieldtype == 'reference':
            field = _reference_type(
                field=field,
                references=references
            )

        # handle primitive types
        elif fieldtype == 'primitive':
            field = _primitive_type(field)

        else:
            raise ValueError('fieldtype is not supported...')

        file.fields[field.name] = field
        file.imports += references

    file.imports = dedupe_imports(file.imports)
예제 #8
0
def _union_field(field: dict,
                 parent_namespace: str = None,
                 queue: list = None,
                 references: list = []) -> Tuple[dict, list]:
    """ helper function for adding information to union fields

    If union contains references to embedded enum or record,
    will add that as a new file in the queue.

    Parameters
    ----------
        field: dict
            union field to extract information from
        parent_namespace: str
            name of parent file namespace
        queue: list
            queue of files to add to project
        references: list
            list of references already made in file

    Returns
    -------
        field_object: dict
            object containing necessary info on union field

        references: potential imports object if a file is generated
    """
    # python is annoying with mutability of this dict
    kwargs = {
        'name': field['name'],
        'fieldtype': 'union',
        'avrotype': None,
        'default': field.get('default', None),
        'reference_name': None,
        'reference_namespace': None,
        'array_item_type': None,
        'union_types': []
    }

    # iterate through possibly types
    for typ in field['type']:
        field_type = _get_field_type(field={'type': typ},
                                     references=references)

        # primitive types
        if field_type == 'primitive':
            kwargs['union_types'].append(
                _primitive_type({
                    'name': 'uniontype',
                    'type': typ
                }))

        # nested complex record
        elif field_type == 'record':
            kwargs['union_types'].append(
                _record_field(field={
                    'name': 'uniontype',
                    'type': typ
                },
                              parent_namespace=_get_namespace(
                                  typ, parent_namespace),
                              queue=queue,
                              references=references))

        elif field_type == 'array':
            kwargs['union_types'].append(
                _array_field(field={
                    'name': 'arraytype',
                    'type': typ
                },
                             parent_namespace=parent_namespace,
                             queue=queue,
                             references=references))

        # nested complex record
        elif field_type == 'enum':
            kwargs['union_types'].append(
                _enum_field(field={
                    'name': 'uniontype',
                    'type': typ
                },
                            parent_namespace=parent_namespace,
                            queue=queue,
                            references=references))

        elif field_type == 'map':
            kwargs['union_types'].append(
                _map_field(field={
                    'name': 'uniontype',
                    'type': typ
                },
                           parent_namespace=parent_namespace,
                           queue=queue,
                           references=references))

        # references to previously defined complex types
        # handle reference types
        elif field_type == 'reference':
            kwargs['union_types'].append(
                _reference_type(field={
                    'name': 'uniontype',
                    'type': typ
                },
                                references=references))

        else:
            raise ValueError(
                f"avro type {field['items']['type']} is not supported")

    return Field(**kwargs)
예제 #9
0
def _map_field(field: dict,
               parent_namespace: str=None,
               queue: list=None,
               references: list=[]) -> Tuple[dict, list]:
    """ helper function for adding information to map fields

    If map contains references to embedded enum or record,
    will add that as a new file in the queue.

    Parameters
    ----------
        field: dict
            map field to extract information from
        parent_namespace: str
            name of parent file namespace
        queue: list
            queue of files to add to project
        references: list
            list of references already made in file

    Returns
    -------
        Field
    """
    # python is annoying with mutability of this dict
    kwargs = {
        'name': field['name'],
        'fieldtype': 'map',
        'avrotype': None,
        'default': field.get('default', None),
        'reference_name': None,
        'reference_namespace': None,
        'array_item_type': None,
        'union_types': [],
        'map_type': None
    }

    if isinstance(field['type']['values'], str):
        map_type = _get_field_type(
            {'type': field['type']['values']},
            references
        )

    else:
        map_type = _get_field_type(
            field['type']['values'],
            references
        )

    # handle primitive types
    if map_type == 'primitive':
        kwargs.update({
            'map_type': _primitive_type(
                {'name': 'maptype', 'type': field['type']['values']}
            )
        })

    # handle complex types
    elif map_type == 'record':
        # array fields don't have names and type need to be nested
        kwargs.update({
            'map_type': _record_field(
                field={'name': 'mapfield', 'type': field['type']['values'], 'namespace': field['type']['values'].get('namespace', None)},
                parent_namespace=parent_namespace,
                queue=queue, references=references)
        })

    elif map_type == 'enum':
        # array fields don't have names and type need to be nested
        kwargs.update({
            'map_type': _enum_field(
                field={'name': 'mapfield', 'type': field['type']['values'], 'namespace': field['type']['values'].get('namespace', None)},
                parent_namespace=parent_namespace,
                queue=queue, references=references)
        })

    elif map_type == 'map':
        # handle nested maps
        kwargs.update({
            'map_type': _map_field(
                field={'name': 'nestedMap', 'type': field['type']['values'], 'namespace''namespace': field['type']['values'].get('namespace', None)},
                parent_namespace=parent_namespace,
                queue=queue, references=references
            )
        })

    elif map_type == 'array':
        # handle nested arrays
        kwargs.update({
            'map_type': _array_field(
                field={'name': 'nestedMap', 'type': field['type']['values'], 'namespace''namespace': field['type']['values'].get('namespace', None)},
                parent_namespace=parent_namespace,
                queue=queue, references=references
            )
        })

    # handle reference types
    elif map_type == 'reference':
        kwargs.update({
            'map_type': _reference_type(
                field={'name': field['type']['values']},
                references=references)
        })

    else:
        raise ValueError(
            f"avro type {field['type']['values']} is not supported"
        )

    return Field(**kwargs)