def __init__(self, name, namespace=None, types=None, messages=None): # Ensure valid ctor args if not name: fail_msg = 'Protocols must have a non-empty name.' raise ProtocolParseException(fail_msg) elif not isinstance(name, six.string_types): fail_msg = 'The name property must be a string.' raise ProtocolParseException(fail_msg) elif namespace is not None and not isinstance(namespace, six.string_types): fail_msg = 'The namespace property must be a string.' raise ProtocolParseException(fail_msg) elif types is not None and not isinstance(types, list): fail_msg = 'The types property must be a list.' raise ProtocolParseException(fail_msg) elif (messages is not None and not (hasattr(messages, 'get') and callable(messages.get))): fail_msg = 'The messages property must be a JSON object.' raise ProtocolParseException(fail_msg) self._props = {} self.set_prop('name', name) type_names = schema.Names() if namespace is not None: self.set_prop('namespace', namespace) type_names.default_namespace = namespace if types is not None: self.set_prop('types', self._parse_types(types, type_names)) if messages is not None: self.set_prop('messages', self._parse_messages(messages, type_names)) self._md5 = md5(str(self).encode('US-ASCII')).digest()
def test_deep_column_filter(self): names = schema.Names() ia = 'ia' ib = 'ib' oa = 'oa' ob = 'ob' inner_a = make_field(ia, primitive_schemas.INT, names=names) inner_b = make_field(ib, primitive_schemas.INT, names=names) inner_a_record = schema.RecordSchema('inner_a', 'test', [inner_a, inner_b], names=names) inner_b_record = schema.RecordSchema('inner_b', 'test', [inner_a, inner_b], names=names) outer_a = make_field(oa, inner_a_record, names=names) outer_b = make_field(ob, inner_b_record, names=names) outer_record = schema.RecordSchema('outer', 'test', [outer_a, outer_b], names=schema.Names()) b = object(outer_record, {oa: {ia: 1, ib: 2}, ob: {ia: 3, ib: 4}}) node = from_avro(b) df = node.flatten() df_equality(self, {oa + '_' + ia: [1], oa + '_' + ib: [2], ob + '_' + ia: [3], ob + '_' + ib: [4]}, df) b = object(outer_record, {oa: {ia: 1, ib: 2}, ob: {ia: 3, ib: 4}}) node = from_avro(b, exclude='oa') df = node.flatten() df_equality(self, {ia: [3], ib: [4]}, df) b = object(outer_record, {oa: {ia: 1, ib: 2}, ob: {ia: 3, ib: 4}}) node = from_avro(b, exclude='oa', include=[{}, 'oa.ia']) df = node.flatten() df_equality(self, {oa + '_' + ia: [1], ob + '_' + ia: [3], ib: [4]}, df)
def test_fixed(self): names = schema.Names() fixed_schema = schema.FixedSchema("test", "test", 3, names=names) value = b'abc' b = object(fixed_schema, value) node = bamboo_cpp.convert_avro(b) self.assertListEqual(node.get_list().get_values().tolist(), [value])
def __init__(self, schemadir=".."): self.names = schema.Names() for sf in self.SCHEMAS: print "Loading", sf sfname = os.path.join(schemadir, sf.lower() + ".av") with open(sfname, "r") as fp: obj = json.load(fp) s = schema.make_avsc_object(obj, self.names)
def to_json(self, names=None): if names is None: names = schema.Names() to_dump = {} to_dump['request'] = self.request.to_json(names) to_dump['response'] = self.response.to_json(names) if self.errors: to_dump['errors'] = self.errors.to_json(names) return to_dump
def test_fixed(self): test_schema = schema.FixedSchema('test_enum', None, 5, schema.Names()) self.assertEquals( self.converter.to_json_object(('A' * 5).encode('utf-8'), test_schema), ('A' * 5).encode('utf-8')) self.assertEquals( self.converter.from_json_object(('B' * 5).encode('utf-8'), test_schema), ('B' * 5).encode('utf-8'))
def create_field_schema(self, name, type_schema, has_default=False, default_value=None, aliases=None): return schema.Field(type_schema.to_json(), name, has_default, default=default_value if has_default else None, names=schema.Names(), other_props={'aliases': aliases})
def __init__( self, name, namespace=None, types=tuple(), messages=tuple(), ): """Initializes a new protocol object. Args: name: Protocol name (absolute or relative). namespace: Optional explicit namespace (if name is relative). types: Collection of types in the protocol. messages: Collection of messages in the protocol. """ self._avro_name = schema.Name(name=name, namespace=namespace) self._fullname = self._avro_name.fullname self._name = self._avro_name.simple_name self._namespace = self._avro_name.namespace self._props = {} self._props['name'] = self._name if self._namespace: self._props['namespace'] = self._namespace self._names = schema.Names(default_namespace=self._namespace) self._types = tuple(types) # Map: type full name -> type schema self._type_map = MappingProxyType( {type.fullname: type for type in self._types}) # This assertion cannot fail unless we don't track named schemas properly: assert (len(self._types) == len( self._type_map)), ('Type list %r does not match type map: %r' % (self._types, self._type_map)) # TODO: set props['types'] self._messages = tuple(messages) # Map: message name -> Message # Note that message names are simple names unique within the protocol. self._message_map = MappingProxyType( {message.name: message for message in self._messages}) if len(self._messages) != len(self._message_map): raise ProtocolParseException( 'Invalid protocol %s with duplicate message name: %r' % (self._avro_name, self._messages)) # TODO: set props['messages'] self._md5 = hashlib.md5(str(self).encode('utf-8')).digest()
def create_fixed_decimal_schema(self, size, name, precision, scale=0, aliases=None): return schema.FixedDecimalSchema(size, name, precision, scale, self.test_namespace, names=schema.Names(), other_props={'aliases': aliases})
def to_json(self): to_dump = {} to_dump['protocol'] = self.name names = schema.Names(default_namespace=self.namespace) if self.namespace: to_dump['namespace'] = self.namespace if self.types: to_dump['types'] = [t.to_json(names) for t in self.types] if self.messages: messages_dict = {} for name, body in self.messages.items(): messages_dict[name] = body.to_json(names) to_dump['messages'] = messages_dict return to_dump
def ProtocolFromJSONData(json_data): """Builds an Avro Protocol from its JSON descriptor. Args: json_data: JSON data representing the descriptor of the Avro protocol. Returns: The Avro Protocol parsed from the JSON descriptor. Raises: ProtocolParseException: if the descriptor is invalid. """ if type(json_data) != dict: print(type(json_data)) raise ProtocolParseException( 'Invalid JSON descriptor for an Avro protocol: %r' % json_data) name = json_data.get('protocol') if name is None: raise ProtocolParseException( 'Invalid protocol descriptor with no "name": %r' % json_data) # Namespace is optional namespace = json_data.get('namespace') avro_name = schema.Name(name=name, namespace=namespace) names = schema.Names(default_namespace=avro_name.namespace) type_desc_list = json_data.get('types', tuple()) types = tuple( map(lambda desc: Protocol._ParseTypeDesc(desc, names=names), type_desc_list)) message_desc_map = json_data.get('messages', dict()) messages = tuple( Protocol._ParseMessageDescMap(message_desc_map, names=names)) return Protocol( name=name, namespace=namespace, types=types, messages=messages, )
def register_message(self, obj: dict, type_identifier: int = None) -> int: """ :param obj: A message object to register. :param type_identifier: An optional message type identifier to use for the object. If not specified then a number will be automatically assigned. """ if isinstance(obj, dict): avro_schema = schema.SchemaFromJSONData( obj, schema.Names()) else: avro_schema = obj if type_identifier is None: self._id += 1 type_identifier = self._id self.id2schema[type_identifier] = avro_schema return type_identifier
def test_enum(self): test_schema = schema.EnumSchema('test_enum', None, ['A', 'B'], schema.Names()) self.assertEquals(self.converter.to_json_object('A', test_schema), 'A') self.assertEquals(self.converter.from_json_object('B', test_schema), 'B')
def testSymbolsInReverseOrder(self): enum = schema.EnumSchema('Test', '', ['B', 'A'], schema.Names(), '', {}) self.assertEqual('B', enum.symbols[0])
""" @author Stephen Dawson-Haggerty <*****@*****.**> """ import os import sys import json import uuid from cStringIO import StringIO from avro import schema, io import util import pkgutil NAMESPACE = "edu.berkeley.cs.local" SCHEMA_NAMES = schema.Names(default_namespace=NAMESPACE) SCHEMAS = [ # new extension types "uuid", "UnitofTime", "Duration", "ReadingType", "ReadingValue", 'InstrumentMetadata', 'LocationMetadata', 'OperatorMetadata', # timeseries subobjects "Actuator", "Properties", "Metadata", "TimeSeries", "Collection", "Reporting", ]
def generate_schema(schema_json, use_logical_types=False, custom_imports=None, avro_json_converter=None): """ Generate file containing concrete classes for RecordSchemas in given avro schema json :param str schema_json: JSON representing avro schema :param list[str] custom_imports: Add additional import modules :param str avro_json_converter: AvroJsonConverter type to use for default values :return Dict[str, str]: """ if avro_json_converter is None: avro_json_converter = 'avrojson.AvroJsonConverter' if '(' not in avro_json_converter: avro_json_converter += '(use_logical_types=%s, schema_types=__SCHEMA_TYPES)' % use_logical_types custom_imports = custom_imports or [] names = schema.Names() make_avsc_object(json.loads(schema_json), names) names = [ k for k in six.iteritems(names.names) if isinstance(k[1], (schema.RecordSchema, schema.EnumSchema)) ] names = sorted(names, key=lambda x: x[0]) main_out = StringIO() writer = TabbedWriter(main_out) write_preamble(writer, use_logical_types, custom_imports, schema_json) write_schema_preamble(writer) write_get_schema(writer) write_populate_schemas(writer) writer.write('\n\n\nclass SchemaClasses(object):') writer.tab() writer.write('\n\n') current_namespace = tuple() for name, field_schema in names: # type: str, schema.Schema name = clean_fullname(name) namespace = tuple(name.split('.')[:-1]) if namespace != current_namespace: start_namespace(current_namespace, namespace, writer) current_namespace = namespace if isinstance(field_schema, schema.RecordSchema): logger.debug('Writing schema: %s', clean_fullname(field_schema.fullname)) write_schema_record(field_schema, writer, use_logical_types) elif isinstance(field_schema, schema.EnumSchema): logger.debug('Writing enum: %s', field_schema.fullname) write_enum(field_schema, writer) writer.write('\npass\n') writer.set_tab(0) writer.write('\n__SCHEMA_TYPES = {\n') writer.tab() for name, field_schema in names: writer.write("'%s': SchemaClasses.%sClass,\n" % (clean_fullname( field_schema.fullname), clean_fullname(field_schema.fullname))) writer.untab() writer.write('\n}\n') writer.write('_json_converter = %s\n\n' % avro_json_converter) value = main_out.getvalue() main_out.close() return value, [clean_fullname(name[0]) for name in names]
def create_array_schema(self, items_schema): return schema.ArraySchema(items_schema.to_json(), names=schema.Names())
def make_array_schema(element_schema): names = schema.Names() return schema.ArraySchema(element_schema, names)
def test_enum(self): names = schema.Names() enum_schema = schema.EnumSchema("test", "test", ['a', 'b'], names=names) b = object(enum_schema, 'b') node = bamboo_cpp.convert_avro(b) self.assertListEqual(node.get_list().get_values().tolist(), ['b'])
def create_union_schema(self, *avro_schemas): return schema.UnionSchema( [avro_schema.to_json() for avro_schema in avro_schemas], names=schema.Names())
def create_record_schema(self, name, fields, aliases=None): return schema.RecordSchema(name, self.test_namespace, [field.to_json() for field in fields], names=schema.Names(), other_props={'aliases': aliases})
def create_fixed_schema(self, name, size, aliases=None): return schema.FixedSchema(name, self.test_namespace, size, names=schema.Names(), other_props={'aliases': aliases})
def create_enum_schema(self, name, symbols, aliases=None): return schema.EnumSchema(name, self.test_namespace, symbols, names=schema.Names(), other_props={'aliases': aliases})
def simple_schema(field_name, field_schema): names = schema.Names() field = make_field(field_name, field_schema) return schema.RecordSchema('test', 'test', [field], names=names)
def generate_schema(schema_json, use_logical_types=False, custom_imports=None, avro_json_converter=None): """ Generate file containing concrete classes for RecordSchemas in given avro schema json :param str schema_json: JSON representing avro schema :param list[str] custom_imports: Add additional import modules :param str avro_json_converter: AvroJsonConverter type to use for default values :return Dict[str, str]: """ if avro_json_converter is None: avro_json_converter = 'avrojson.AvroJsonConverter' if '(' not in avro_json_converter: avro_json_converter += f'(use_logical_types={use_logical_types}, schema_types=__SCHEMA_TYPES)' custom_imports = custom_imports or [] names = schema.Names() make_avsc_object(json.loads(schema_json), names) names = [k for k in six.iteritems(names.names) if isinstance(k[1], (schema.RecordSchema, schema.EnumSchema))] names = sorted(names, key=lambda x: x[0]) main_out = StringIO() writer = TabbedWriter(main_out) write_preamble(writer, use_logical_types, custom_imports) write_schema_preamble(writer) write_get_schema(writer) write_populate_schemas(writer) current_namespace = tuple() for name, field_schema in names: # type: str, schema.Schema name = clean_fullname(name) namespace = tuple(name.split('.')[:-1]) if namespace != current_namespace: current_namespace = namespace if isinstance(field_schema, schema.RecordSchema): logger.debug(f'Writing schema: {clean_fullname(field_schema.fullname)}') write_schema_record(field_schema, writer, use_logical_types) elif isinstance(field_schema, schema.EnumSchema): logger.debug(f'Writing enum: {field_schema.fullname}', field_schema.fullname) write_enum(field_schema, writer) writer.set_tab(0) writer.write('\n__SCHEMA_TYPES = {') writer.tab() # Lookup table for fullname. for name, field_schema in names: n = clean_fullname(field_schema.name) full = field_schema.fullname writer.write(f"\n'{full}': {n}Class,") # Lookup table for names without namespace. for name, field_schema in names: n = clean_fullname(field_schema.name) writer.write(f"\n'{n}': {n}Class,") writer.untab() writer.write('\n}\n\n') writer.write(f'_json_converter = {avro_json_converter}\n\n') value = main_out.getvalue() main_out.close() return value, [clean_fullname(name[0]) for name in names]
def __get_names_and_schema(file_name): names = avro_schema.Names() schema = make_avsc_object(json.loads(__read_file(file_name)), names) return names, schema
def __get_names_and_schema(): names = avro_schema.Names() schema = make_avsc_object(json.loads(_STRING_SCHEMA_JSON), names) return names, schema
def create_map_schema(self, values_schema): return schema.MapSchema(values_schema.to_json(), names=schema.Names())
def __str__(self): return json.dumps(self.to_json(schema.Names()))
def make_union_schema(element_schemas): names = schema.Names() return schema.UnionSchema(element_schemas, names=names)