def read_data(fo, writer_schema, reader_schema=None): """Read data from file object according to schema.""" record_type = extract_record_type(writer_schema) if reader_schema and record_type in AVRO_TYPES: match_schemas(writer_schema, reader_schema) return READERS[record_type](fo, writer_schema, reader_schema)
def write_union(fo, datum, schema): """A union is encoded by first writing a long value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union.""" if isinstance(datum, tuple): (name, datum) = datum for index, candidate in enumerate(schema): if extract_record_type(candidate) == 'record': if name == candidate["name"]: break else: msg = 'provided union type name %s not found in schema %s' \ % (name, schema) raise ValueError(msg) else: pytype = type(datum) for index, candidate in enumerate(schema): if validate(datum, candidate): break else: msg = '%r (type %s) do not match %s' % (datum, pytype, schema) raise ValueError(msg) # write data write_long(fo, index) write_data(fo, datum, schema[index])
def read_data(fo, writer_schema, reader_schema=None): """Read data from file object according to schema.""" record_type = extract_record_type(writer_schema) if reader_schema and record_type in AVRO_TYPES: match_schemas(writer_schema, reader_schema) try: return READERS[record_type](fo, writer_schema, reader_schema) except StructError: raise EOFError('cannot read %s from %s' % (record_type, fo))
def write_data(fo, datum, schema): """Write a datum of data to output stream. Paramaters ---------- fo: file like Output file datum: object Data to write schema: dict Schemda to use """ return WRITERS[extract_record_type(schema)](fo, datum, schema)
def read_data(fo, writer_schema, reader_schema=None): """Read data from file object according to schema.""" record_type = extract_record_type(writer_schema) logical_type = extract_logical_type(writer_schema) if reader_schema and record_type in AVRO_TYPES: match_schemas(writer_schema, reader_schema) try: data = READERS[record_type](fo, writer_schema, reader_schema) if 'logicalType' in writer_schema: fn = LOGICAL_READERS[logical_type] return fn(data, writer_schema, reader_schema) return data except StructError: raise EOFError('cannot read %s from %s' % (record_type, fo))
def write_data(fo, datum, schema): """Write a datum of data to output stream. Paramaters ---------- fo: file like Output file datum: object Data to write schema: dict Schemda to use """ record_type = extract_record_type(schema) logical_type = extract_logical_type(schema) fn = WRITERS[record_type] if logical_type: prepare = LOGICAL_WRITERS[logical_type] data = prepare(datum, schema) return fn(fo, data, schema) return fn(fo, datum, schema)
def validate(datum, schema): """Determine if a python datum is an instance of a schema.""" record_type = extract_record_type(schema) if record_type == 'null': return datum is None if record_type == 'boolean': return isinstance(datum, bool) if record_type == 'string': return is_str(datum) if record_type == 'bytes': return isinstance(datum, (bytes, decimal.Decimal)) if record_type == 'int': return ((isinstance(datum, ( int, long, )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) or isinstance(datum, (datetime.time, datetime.datetime))) if record_type == 'long': return ((isinstance(datum, ( int, long, )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) or isinstance(datum, (datetime.time, datetime.datetime))) if record_type in ['float', 'double']: return isinstance(datum, (int, long, float)) if record_type == 'fixed': return isinstance(datum, bytes) and len(datum) == schema['size'] if record_type == 'union': if isinstance(datum, tuple): (name, datum) = datum for candidate in schema: if extract_record_type(candidate) == 'record': if name == candidate["name"]: return validate(datum, candidate) else: return False return any(validate(datum, s) for s in schema) # dict-y types from here on. if record_type == 'enum': return datum in schema['symbols'] if record_type == 'array': return (isinstance(datum, Iterable) and all(validate(d, schema['items']) for d in datum)) if record_type == 'map': return (isinstance(datum, Mapping) and all(is_str(k) for k in datum.keys()) and all(validate(v, schema['values']) for v in datum.values())) if record_type in ( 'record', 'error', 'request', ): return (isinstance(datum, Mapping) and all( validate(datum.get(f['name'], f.get('default')), f['type']) for f in schema['fields'])) if record_type in SCHEMA_DEFS: return validate(datum, SCHEMA_DEFS[record_type]) raise ValueError('unkown record type - %s' % record_type)
def validate(datum, schema): """Determine if a python datum is an instance of a schema.""" record_type = extract_record_type(schema) if record_type == 'null': return datum is None if record_type == 'boolean': return isinstance(datum, bool) if record_type == 'string': return is_str(datum) if record_type == 'bytes': return isinstance(datum, bytes) if record_type == 'int': return ( isinstance(datum, (int, long,)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE ) if record_type == 'long': return ( isinstance(datum, (int, long,)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE ) if record_type in ['float', 'double']: return isinstance(datum, (int, long, float)) if record_type == 'fixed': return isinstance(datum, bytes) and len(datum) == schema['size'] if record_type == 'union': return any(validate(datum, s) for s in schema) # dict-y types from here on. if record_type == 'enum': return datum in schema['symbols'] if record_type == 'array': return ( isinstance(datum, Iterable) and all(validate(d, schema['items']) for d in datum) ) if record_type == 'map': return ( isinstance(datum, Mapping) and all(is_str(k) for k in datum.keys()) and all(validate(v, schema['values']) for v in datum.values()) ) if record_type in ('record', 'error', 'request',): return ( isinstance(datum, Mapping) and all( validate(datum.get(f['name'], f.get('default')), f['type']) for f in schema['fields'] ) ) if record_type in SCHEMA_DEFS: return validate(datum, SCHEMA_DEFS[record_type]) raise ValueError('unkown record type - %s' % record_type)
def write_data(fo, datum, schema): return WRITERS[extract_record_type(schema)](fo, datum, schema)