Example #1
0
def read_data(fo, writer_schema, reader_schema=None):
    """Read data from file object according to schema."""

    record_type = extract_record_type(writer_schema)
    if reader_schema and record_type in AVRO_TYPES:
        match_schemas(writer_schema, reader_schema)
    return READERS[record_type](fo, writer_schema, reader_schema)
Example #2
0
def write_union(fo, datum, schema):
    """A union is encoded by first writing a long value indicating the
    zero-based position within the union of the schema of its value. The value
    is then encoded per the indicated schema within the union."""

    if isinstance(datum, tuple):
        (name, datum) = datum
        for index, candidate in enumerate(schema):
            if extract_record_type(candidate) == 'record':
                if name == candidate["name"]:
                    break
        else:
            msg = 'provided union type name %s not found in schema %s' \
                % (name, schema)
            raise ValueError(msg)
    else:
        pytype = type(datum)
        for index, candidate in enumerate(schema):
            if validate(datum, candidate):
                break
        else:
            msg = '%r (type %s) do not match %s' % (datum, pytype, schema)
            raise ValueError(msg)

    # write data
    write_long(fo, index)
    write_data(fo, datum, schema[index])
Example #3
0
def read_data(fo, writer_schema, reader_schema=None):
    """Read data from file object according to schema."""

    record_type = extract_record_type(writer_schema)
    if reader_schema and record_type in AVRO_TYPES:
        match_schemas(writer_schema, reader_schema)
    try:
        return READERS[record_type](fo, writer_schema, reader_schema)
    except StructError:
        raise EOFError('cannot read %s from %s' % (record_type, fo))
Example #4
0
def read_data(fo, writer_schema, reader_schema=None):
    """Read data from file object according to schema."""

    record_type = extract_record_type(writer_schema)
    if reader_schema and record_type in AVRO_TYPES:
        match_schemas(writer_schema, reader_schema)
    try:
        return READERS[record_type](fo, writer_schema, reader_schema)
    except StructError:
        raise EOFError('cannot read %s from %s' % (record_type, fo))
Example #5
0
def write_data(fo, datum, schema):
    """Write a datum of data to output stream.

    Paramaters
    ----------
    fo: file like
        Output file
    datum: object
        Data to write
    schema: dict
        Schemda to use
    """
    return WRITERS[extract_record_type(schema)](fo, datum, schema)
Example #6
0
def write_data(fo, datum, schema):
    """Write a datum of data to output stream.

    Paramaters
    ----------
    fo: file like
        Output file
    datum: object
        Data to write
    schema: dict
        Schemda to use
    """
    return WRITERS[extract_record_type(schema)](fo, datum, schema)
Example #7
0
def read_data(fo, writer_schema, reader_schema=None):
    """Read data from file object according to schema."""

    record_type = extract_record_type(writer_schema)
    logical_type = extract_logical_type(writer_schema)

    if reader_schema and record_type in AVRO_TYPES:
        match_schemas(writer_schema, reader_schema)
    try:
        data = READERS[record_type](fo, writer_schema, reader_schema)
        if 'logicalType' in writer_schema:
            fn = LOGICAL_READERS[logical_type]
            return fn(data, writer_schema, reader_schema)

        return data
    except StructError:
        raise EOFError('cannot read %s from %s' % (record_type, fo))
Example #8
0
def write_data(fo, datum, schema):
    """Write a datum of data to output stream.

    Paramaters
    ----------
    fo: file like
        Output file
    datum: object
        Data to write
    schema: dict
        Schemda to use
    """

    record_type = extract_record_type(schema)
    logical_type = extract_logical_type(schema)

    fn = WRITERS[record_type]

    if logical_type:
        prepare = LOGICAL_WRITERS[logical_type]
        data = prepare(datum, schema)
        return fn(fo, data, schema)
    return fn(fo, datum, schema)
Example #9
0
def validate(datum, schema):
    """Determine if a python datum is an instance of a schema."""
    record_type = extract_record_type(schema)

    if record_type == 'null':
        return datum is None

    if record_type == 'boolean':
        return isinstance(datum, bool)

    if record_type == 'string':
        return is_str(datum)

    if record_type == 'bytes':
        return isinstance(datum, (bytes, decimal.Decimal))

    if record_type == 'int':
        return ((isinstance(datum, (
            int,
            long,
        )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
                or isinstance(datum, (datetime.time, datetime.datetime)))

    if record_type == 'long':
        return ((isinstance(datum, (
            int,
            long,
        )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
                or isinstance(datum, (datetime.time, datetime.datetime)))

    if record_type in ['float', 'double']:
        return isinstance(datum, (int, long, float))

    if record_type == 'fixed':
        return isinstance(datum, bytes) and len(datum) == schema['size']

    if record_type == 'union':
        if isinstance(datum, tuple):
            (name, datum) = datum
            for candidate in schema:
                if extract_record_type(candidate) == 'record':
                    if name == candidate["name"]:
                        return validate(datum, candidate)
            else:
                return False
        return any(validate(datum, s) for s in schema)

    # dict-y types from here on.
    if record_type == 'enum':
        return datum in schema['symbols']

    if record_type == 'array':
        return (isinstance(datum, Iterable)
                and all(validate(d, schema['items']) for d in datum))

    if record_type == 'map':
        return (isinstance(datum, Mapping)
                and all(is_str(k) for k in datum.keys())
                and all(validate(v, schema['values']) for v in datum.values()))

    if record_type in (
            'record',
            'error',
            'request',
    ):
        return (isinstance(datum, Mapping) and all(
            validate(datum.get(f['name'], f.get('default')), f['type'])
            for f in schema['fields']))

    if record_type in SCHEMA_DEFS:
        return validate(datum, SCHEMA_DEFS[record_type])

    raise ValueError('unkown record type - %s' % record_type)
Example #10
0
def validate(datum, schema):
    """Determine if a python datum is an instance of a schema."""

    record_type = extract_record_type(schema)

    if record_type == 'null':
        return datum is None

    if record_type == 'boolean':
        return isinstance(datum, bool)

    if record_type == 'string':
        return is_str(datum)

    if record_type == 'bytes':
        return isinstance(datum, bytes)

    if record_type == 'int':
        return (
            isinstance(datum, (int, long,)) and
            INT_MIN_VALUE <= datum <= INT_MAX_VALUE
        )

    if record_type == 'long':
        return (
            isinstance(datum, (int, long,)) and
            LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE
        )

    if record_type in ['float', 'double']:
        return isinstance(datum, (int, long, float))

    if record_type == 'fixed':
        return isinstance(datum, bytes) and len(datum) == schema['size']

    if record_type == 'union':
        return any(validate(datum, s) for s in schema)

    # dict-y types from here on.
    if record_type == 'enum':
        return datum in schema['symbols']

    if record_type == 'array':
        return (
            isinstance(datum, Iterable) and
            all(validate(d, schema['items']) for d in datum)
        )

    if record_type == 'map':
        return (
            isinstance(datum, Mapping) and
            all(is_str(k) for k in datum.keys()) and
            all(validate(v, schema['values']) for v in datum.values())
        )

    if record_type in ('record', 'error', 'request',):
        return (
            isinstance(datum, Mapping) and
            all(
                validate(datum.get(f['name'], f.get('default')), f['type'])
                for f in schema['fields']
            )
        )

    if record_type in SCHEMA_DEFS:
        return validate(datum, SCHEMA_DEFS[record_type])

    raise ValueError('unkown record type - %s' % record_type)
Example #11
0
def write_data(fo, datum, schema):
    return WRITERS[extract_record_type(schema)](fo, datum, schema)