Example #1
0
def schemaless_reader(fo, schema):
    """Reads a single record writen using the schemaless_writer

    Paramaters
    ----------
    fo: file like
        Input stream
    schema: dict
        Reader schema
    """
    acquaint_schema(schema, READERS)
    return read_data(fo, schema)
Example #2
0
    def __init__(self, fo, reader_schema=None):
        """Creates a new iterator

        Paramaters
        ----------
        fo: file like
            Input stream
        reader_schema: dict, optional
            Reader schema

        Example
        -------
        >>> with open('some-file.avro', 'rb') as fo:
        >>>     avro = iter_avro(fo)
        >>>     schema = avro.schema
        >>>     for record in avro:
        >>>         process_record(record)
        """
        self.fo = fo
        try:
            self._header = read_data(fo, HEADER_SCHEMA)
        except StopIteration:
            raise ValueError('cannot read header - is it an avro file?')

        # `meta` values are bytes. So, the actual decoding has to be external.
        self.metadata = \
            dict((k, btou(v)) for k, v in iteritems(self._header['meta']))

        self.schema = self.writer_schema = \
            json.loads(self.metadata['avro.schema'])
        self.codec = self.metadata.get('avro.codec', 'null')
        self.reader_schema = reader_schema

        acquaint_schema(self.writer_schema, READERS)
        if reader_schema:
            populate_schema_defs(reader_schema)
        self._records = _iter_avro(fo,
                                   self._header,
                                   self.codec,
                                   self.writer_schema,
                                   reader_schema)