def encode(schema_id): with BytesIO() as out_stream: out_stream.write(struct.pack("b", 0)) out_stream.write(struct.pack(">I", schema_id)) fastavro.schemaless_writer(out_stream, employee_parsed_schema, employee_json_data) return out_stream.getvalue()
def avro_test(): schema = { 'doc': 'test', 'name': 'test', 'namespace': 'test', 'type': 'record', 'fields': [ {'name': 'words', 'type': 'string'}, {'name': 'list', 'type': { 'type': 'array', 'items': 'int' }}, {'name': 'dict', "type": { "type": "map", "values": "string" }}, {'name': 'int', 'type': 'int'}, {'name': 'float', 'type': 'float'} ], } bytes_writer = BytesIO() fastavro.schemaless_writer(bytes_writer, schema, message) src = bytes_writer.getvalue() setup = 'd={}; schema={}; import fastavro; from io import BytesIO; bytes_writer = BytesIO(); fastavro.schemaless_writer(bytes_writer, schema, d); src = bytes_writer.getvalue()'.format(message, schema) result = timeit(setup=setup, stmt='bytes_writer = BytesIO(); fastavro.schemaless_writer(bytes_writer, schema, d); bytes_writer.getvalue()', number=loops) enc_table.append(['Avro serialization', result, sys.getsizeof(src)]) result = timeit(setup=setup, stmt='bytes_writer = BytesIO(); bytes_writer.write(src); bytes_writer.seek(0); data = fastavro.schemaless_reader(bytes_writer, schema)', number=loops) dec_table.append(['Avro deserialization', result]) print("Avro")
def encode(self, data: dict, **kwargs) -> Optional[bytes]: """ Encode the data into an avro byte stream :param data dict: information to be encoded into avro byte stream :param schema: schema to use in decode algorithm :type schema: Optional[dict] :param schemaless: encode without schmea defaults to true :type schemaless: Optional[dict] :return: bytes so long as data is not None :rtype: Optional[bytes] :raises AttributeError: schema must be provide with this implementation :rases fastavro.schema.SchemaParseException: incorrect schema provided, please verify the scheam is correct """ if not data: return None _schema: dict = kwargs.get('schema', None) _schemaless: bool = kwargs.get('schemaless', True) if not _schema: raise AttributeError("Missing schema named argument") schema: fastavro = fastavro.parse_schema(_schema) stream: io.BytesIO = io.BytesIO() if _schemaless: fastavro.schemaless_writer(stream, schema, data) return stream.getvalue() fastavro.writer(stream, schema, [data]) return stream.getvalue()
def publish_aggregate_measurement(self, measurement): """ Publish an aggregate measurement. """ msg = BytesIO() fastavro.schemaless_writer( msg, self._aggregate_schema, { 'kit_serial': '', # Filled on the backend-side for security reasons. 'peripheral': measurement.peripheral.get_name(), 'physical_quantity': measurement.physical_quantity, 'physical_unit': measurement.physical_unit, 'start_datetime': round(measurement.start_datetime.timestamp() * 1000), 'end_datetime': round(measurement.end_datetime.timestamp() * 1000), 'type': measurement.aggregate_type, 'value': measurement.value }) self._mqtt_client.publish( topic=f"kit/{self.serial}/measurement/aggregate", payload=msg.getvalue(), qos= 2 # Deliver exactly once. Maybe downgrade to `1`: deliver at least once. )
def fastavro(self): schema = fastavro.schema.parse_schema(json.loads(SCHEMA)) for encoded_value in self.values: value_buf = BytesIO(encoded_value) decoded = fastavro.schemaless_reader(value_buf, schema) output_buf = BytesIO() fastavro.schemaless_writer(output_buf, schema, decoded)
def serialize_alert(alert, schema=latest_schema, schema_id=0): """Serialize an alert to a byte sequence for sending to Kafka. Parameters ---------- alert : `dict` An alert payload to be serialized. schema : `dict`, optional An Avro schema definition describing how to encode `alert`. By default, the latest schema is used. schema_id : `int`, optional The Confluent Schema Registry ID of the schema. By default, 0 (an invalid ID) is used, indicating that the schema is not registered. Returns ------- serialized : `bytes` The byte sequence describing the alert, including the Confluent Wire Format prefix. """ buf = io.BytesIO() # TODO: Use a proper schema versioning system buf.write(serialize_confluent_wire_header(schema_id)) fastavro.schemaless_writer(buf, schema, alert) return buf.getvalue()
def build(cls, data: Any) -> bytes: if isinstance(data, (dict, list)): dtype = 'json' data = json.dumps( data, ensure_ascii=False, cls=JSONEncoder, ).encode(encoding=cls.ENCODING) elif isinstance(data, bytes): dtype = 'bytes' data = data elif isinstance(data, str): dtype = 'str' data = data.encode(encoding=cls.ENCODING) else: dtype = 'object' data = pickle.dumps(data) d = { 'type': dtype, 'data': data, } wio = io.BytesIO() schemaless_writer(wio, parsed_record_schema, d) return wio.getvalue()
def test_schema_is_custom_dict_type(): """https://github.com/tebeka/fastavro/issues/168""" class CustomDict(dict): pass schema = { 'type': 'record', 'fields': [{ 'name': 'description', "type": ["null", { "type": "array", "items": "string" }, "string"], }], "name": "description", "doc": "A description of the thing." } other_type_schema = CustomDict(schema) record = { 'description': 'value', } new_file = MemoryIO() fastavro.schemaless_writer(new_file, schema, record) new_file.seek(0) new_record = fastavro.schemaless_reader(new_file, other_type_schema) assert record == new_record
async def produce_for_topic(*, loop, producer_settings, topic_name, schema, schema_id, period): logger = structlog.get_logger().bind(topic=topic_name) # Preparse schema schema = fastavro.parse_schema(schema) logger.info('Preparsed schema') # Start up the producer producer = aiokafka.AIOKafkaProducer(loop=loop, **producer_settings) await producer.start() logger.info('Started producer') # Generate and write messages try: for message in generate_message(schema): logger.debug('New message', message=message) message_fh = BytesIO() message_fh.write(struct.pack('>bI', MAGIC_BYTE, schema_id)) fastavro.schemaless_writer( message_fh, schema, message ) message_fh.seek(0) await producer.send_and_wait( topic_name, value=message_fh.read()) # increment prometheus production counter PRODUCED.inc() logger.debug('Sent message') # naieve message period; need to correct for production time await asyncio.sleep(period) finally: await producer.stop()
def record(self): schema = fastavro.schema.load_schema('avsc/AudioFrame.avsc') timestamp = time.time() frames = [] with self.loopback.recorder(self.samplingFreq, blocksize=256) as rec: while True: start = time.time() * 1000.0 samples = rec.record(256) frame = { 'data': samples.tolist(), 'start_ts': start, 'end_ts': time.time() * 1000.0 } fo = io.BytesIO() fastavro.schemaless_writer(fo, schema, frame) self.producer.send(self.topic, fo.getvalue()) frames = frames + [frame] if time.time() - timestamp > 1: no_of_frames = len(frames) no_of_samples = sum(map(lambda f: len(f['data']), frames)) total_volume = numpy.sum( numpy.abs(list(map(lambda x: x['data'], frames)))) print( 'stats per 1s: no_of_frames={}, total_no_of_samples={}, total_volume={}' .format(no_of_frames, no_of_samples, total_volume)) timestamp = time.time() frames = []
def encode_into_avro(alert: dict, schema_file: str) -> str: """Encode a dict record into avro bytes Parameters ---------- alert: dict A Dictionary of alert data schema_file: str Path of avro schema file Returns ---------- value: str a bytes string with avro encoded alert data Examples ---------- >>> r = AlertReader(avro_single_alert) >>> alert = r.to_list(size=1)[0] >>> avro_encoded = encode_into_avro(alert, schema_path) """ with open(schema_file) as f: schema = json.load(f) parsed_schema = fastavro.parse_schema(schema) b = io.BytesIO() fastavro.schemaless_writer(b, parsed_schema, alert) return b.getvalue()
def test_single_record(): # To send with producer message = { "id": 10000, "title": "[FastAVRO] 테스트 공지 제목", "date": "20.12.23", "link": "https://somelink", "writer": "alfex4936", } # How producer produces single data producer_rb = BytesIO() schemaless_writer(producer_rb, parsed_schema, message) # write one record produced_data = producer_rb.getvalue() # How consumer reads single record consumer_rb = BytesIO(produced_data) decoded = schemaless_reader(consumer_rb, parsed_schema) # read one record assert decoded == { "id": 10000, "title": "[FastAVRO] 테스트 공지 제목", "date": "20.12.23", "link": "https://somelink", "writer": "alfex4936", }
def encode(self, obj): self._validate_object_type(obj) buffer = io.BytesIO() m = todict(obj) #m = {k: self._get_serialized_value(v) for k, v in obj.__dict__.items()} fastavro.schemaless_writer(buffer, self._schema, m) return buffer.getvalue()
def writeavrodata(json_data: dict, json_schema: dict) -> io._io.BytesIO: """ Encode json into Avro format given a schema. Parameters ---------- json_data : `dict` The JSON data containing message content. json_schema : `dict` The writer Avro schema for encoding data. Returns ------- `_io.BytesIO` Encoded data. Examples ---------- >>> with open(ztf_alert_sample, mode='rb') as file_data: ... data = readschemadata(file_data) ... # Read the schema ... schema = data.schema ... for record in data: ... bytes = writeavrodata(record, schema) >>> print(type(bytes)) <class '_io.BytesIO'> """ bytes_io = io.BytesIO() fastavro.schemaless_writer(bytes_io, json_schema, json_data) return bytes_io
def test_fastavro_compatibility_deserialize( schema_root: str, schema_identifier: str, schemata: cerializer.schemata.CerializerSchemata) -> None: # patch for not working avro codec cerializer.tests.dev_utils.init_fastavro() namespace = schema_identifier.split('.')[0] schema_name = schema_identifier.split('.')[1] cerializer_codec = cerializer.cerializer.Cerializer( cerializer_schemata=schemata, namespace=namespace, schema_name=schema_name, ) try: # mypy things yaml has no attribute unsafe_load_all, which is not true data_all = yaml.unsafe_load_all( # type: ignore open(os.path.join(schema_root, 'example.yaml'))) SCHEMA_FAVRO = yaml.load(open(os.path.join(schema_root, 'schema.yaml')), Loader=yaml.Loader) for data in data_all: output_fastavro = io.BytesIO() fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data) output_fastavro.seek(0) deserialized = cerializer_codec.deserialize( output_fastavro.getvalue()) output_fastavro.seek(0) assert deserialized == fastavro.schemaless_reader( output_fastavro, SCHEMA_FAVRO) except FileNotFoundError: logging.warning( 'Missing schema or Example file for schema == %s', schema_name, ) assert False
def test_schemaless_writer_and_reader_with_union(): """Testing basic functionality of reader with union when option to return_record_name is true. """ schema = { "name": "Message", "type": "record", "namespace": "test", "fields": [{ "name": "id", "type": "long" }, { "name": "payload", "type": [ { "name": "ApplicationCreated", "type": "record", "fields": [{ "name": "applicationId", "type": "string" }, { "name": "data", "type": "string" }] }, { "name": "ApplicationSubmitted", "type": "record", "fields": [{ "name": "applicationId", "type": "string" }, { "name": "data", "type": "string" }] }, ] }] } record = input_record = { "id": 123, "payload": ("test.ApplicationSubmitted", { "applicationId": "123456789UT", "data": "..." }) } new_file = MemoryIO() fastavro.schemaless_writer(new_file, schema, record) new_file.seek(0) new_record = fastavro.schemaless_reader(new_file, schema, None, True) assert record == new_record
def get_people_using_avro_protocol(): buff = BytesIO() schemaless_writer(buff, people_parsed_schema, people) message = buff.getvalue() # app.logger.info('%s logged in successfully', message) return message.decode("ISO-8859-1")
def avro_encoder(it, schema): for i in it: if i is None: yield None else: data = io.BytesIO() fastavro.schemaless_writer(data, schema, i) yield data.getvalue()
def add_person_using_avro_protocol(): bytes_reader = BytesIO(request.get_data()) person = schemaless_reader(bytes_reader, person_parsed_schema) people.append(person) buf = BytesIO() schemaless_writer(buf, person_parsed_schema, person) message = buf.getvalue() return message
def test_int_binary(value, binary): schema = {"type": "long"} buffer = BytesIO() fastavro.schemaless_writer(buffer, schema, value) assert buffer.getvalue() == binary, "Invalid integer encoding." deserialized = fastavro.schemaless_reader(BytesIO(binary), schema) assert deserialized == value, "Invalid integer decoding."
def serialize_to_avro(rows, schema): """Serializes specified rows into avro format.""" string_output = BytesIO() json_schema = json.loads(schema) for row in rows: fastavro.schemaless_writer(string_output, json_schema, row) avro_output = string_output.getvalue() string_output.close() return avro_output
def encode(self, data: dict, output_stream: BytesIO): try: fastavro.schemaless_writer(output_stream, self.schema, data) return output_stream.getvalue() except ValueError as e: raise EncodingError(f"Data is not valid: {data}\n{e}") except (TypeError, AttributeError) as e: raise InvalidWriterStream( f"Expected BytesIO type, fround {output_stream}: {e}")
def __call__(self, obj, ctx): """ Serializes an object to the Confluent Schema Registry's Avro binary format. Args: obj (object): object instance to serializes. ctx (SerializationContext): Metadata pertaining to the serialization operation. Note: None objects are represented as Kafka Null. Raises: SerializerError: if any error occurs serializing obj Returns: bytes: Confluent Schema Registry formatted Avro bytes """ if obj is None: return None subject = self._subject_name_func(ctx, self._schema_name) if subject not in self._known_subjects: if self._use_latest_version: latest_schema = self._registry.get_latest_version(subject) self._schema_id = latest_schema.schema_id else: # Check to ensure this schema has been registered under subject_name. if self._auto_register: # The schema name will always be the same. We can't however register # a schema without a subject so we set the schema_id here to handle # the initial registration. self._schema_id = self._registry.register_schema( subject, self._schema) else: registered_schema = self._registry.lookup_schema( subject, self._schema) self._schema_id = registered_schema.schema_id self._known_subjects.add(subject) if self._to_dict is not None: value = self._to_dict(obj, ctx) else: value = obj with _ContextStringIO() as fo: # Write the magic byte and schema ID in network byte order (big endian) fo.write(pack('>bI', _MAGIC_BYTE, self._schema_id)) # write the record to the rest of the buffer schemaless_writer(fo, self._parsed_schema, value) return fo.getvalue()
def json2binary(schema, record): iostream = BytesIO() start = int(time.time() * 1000) schemaless_writer(iostream, schema, record) end = int(time.time() * 1000) # 程序运行的时间,单位是毫秒 run_time = end - start # print('Running time: %s Milliseconds' % run_time) serialized = iostream.getvalue() return serialized
def test_schemaless_write_read(): new_file = MemoryIO() fastavro.schemaless_writer(new_file, parsed_schema, records[0]) new_file.seek(0) # bytes로 변환? new_file.seek(0) new_record = fastavro.schemaless_reader(new_file, parsed_schema) assert records[0] == new_record
def compressed_avro_dump( data: tp.Any, *, schema_name: str, schema_version: int, ) -> bytes: with io.BytesIO() as f: fastavro.schemaless_writer(f, schema(schema_name, schema_version), data) blob = f.getvalue() return lzma.compress(blob, format=lzma.FORMAT_RAW, filters=[dict(id=lzma.FILTER_LZMA2, preset=5)])
def serialize(self, data: Data) -> Optional[bytes]: if isinstance(data.data_type, NoData): return None avro_type = ensure_avro_type(data.data_type) schema_id = self._registry_client.get_or_create_id_for_avro_type( avro_type) buffer = io.BytesIO() fastavro.schemaless_writer(buffer, avro_type.fastavro_schema, data.payload) return create_schema_id_prefix(schema_id) + buffer.getvalue()
def _dumps(self, obj): bytes_writer = io.BytesIO() if self.encoding_method: datum = self.encoding_method(obj) schemaless_writer(bytes_writer, self.schema_dict, datum) else: schemaless_writer(bytes_writer, self.schema_dict, datum) return bytes_writer.getvalue()
def write_data(data): """ Encode json with fastavro module into avro format given a schema. :param data: data to encode :return: data encoded """ raw_data = io.BytesIO() schemaless_writer(raw_data, schema, data) return raw_data.getvalue()
def _make_message(*, schema_id: int, schema: Dict[str, Any], data: Any) -> bytes: """Make a message in the Confluent Wire Format. """ message_fh = BytesIO() # Write the Confluent Wire Format prefix. message_fh.write(pack_wire_format_prefix(schema_id)) # Write the Avro-encoded message fastavro.schemaless_writer(message_fh, schema, data) message_fh.seek(0) return message_fh.read()