Example #1
0
 def encode(schema_id):
     with BytesIO() as out_stream:
         out_stream.write(struct.pack("b", 0))
         out_stream.write(struct.pack(">I", schema_id))
         fastavro.schemaless_writer(out_stream, employee_parsed_schema,
                                    employee_json_data)
         return out_stream.getvalue()
Example #2
0
def avro_test():
    schema = {
        'doc': 'test',
        'name': 'test',
        'namespace': 'test',
        'type': 'record',
        'fields': [
            {'name': 'words', 'type': 'string'},
            {'name': 'list', 'type': {
                'type': 'array',
                'items': 'int'
            }},
            {'name': 'dict', "type": {
                "type": "map",
                "values": "string"
            }},
            {'name': 'int', 'type': 'int'},
            {'name': 'float', 'type': 'float'}
        ],
    }
    bytes_writer = BytesIO()
    fastavro.schemaless_writer(bytes_writer, schema, message)
    src = bytes_writer.getvalue()
    setup = 'd={}; schema={}; import fastavro; from io import BytesIO; bytes_writer = BytesIO(); fastavro.schemaless_writer(bytes_writer, schema, d); src = bytes_writer.getvalue()'.format(message, schema)
    result = timeit(setup=setup, stmt='bytes_writer = BytesIO(); fastavro.schemaless_writer(bytes_writer, schema, d); bytes_writer.getvalue()', number=loops)
    enc_table.append(['Avro serialization', result, sys.getsizeof(src)])
    result = timeit(setup=setup, stmt='bytes_writer = BytesIO(); bytes_writer.write(src); bytes_writer.seek(0); data = fastavro.schemaless_reader(bytes_writer, schema)', number=loops)
    dec_table.append(['Avro deserialization', result])
    print("Avro")
Example #3
0
    def encode(self, data: dict, **kwargs) -> Optional[bytes]:
        """ Encode the data into an avro byte stream
            
            :param data dict: information to be encoded into avro byte stream
   
            :param schema: schema to use in decode algorithm
            :type schema: Optional[dict]

            :param schemaless: encode without schmea defaults to true
            :type schemaless: Optional[dict]

            :return: bytes so long as data is not None
            :rtype: Optional[bytes]

            :raises AttributeError: schema must be provide with this implementation
            :rases fastavro.schema.SchemaParseException: incorrect schema provided, please verify the scheam is correct
        """
        if not data:
            return None
        _schema: dict = kwargs.get('schema', None)
        _schemaless: bool = kwargs.get('schemaless', True)
        if not _schema:
            raise AttributeError("Missing schema named argument")

        schema: fastavro = fastavro.parse_schema(_schema)
        stream: io.BytesIO = io.BytesIO()
        if _schemaless:
            fastavro.schemaless_writer(stream, schema, data)
            return stream.getvalue()
        fastavro.writer(stream, schema, [data])
        return stream.getvalue()
Example #4
0
    def publish_aggregate_measurement(self, measurement):
        """
        Publish an aggregate measurement.
        """
        msg = BytesIO()
        fastavro.schemaless_writer(
            msg,
            self._aggregate_schema,
            {
                'kit_serial':
                '',  # Filled on the backend-side for security reasons.
                'peripheral':
                measurement.peripheral.get_name(),
                'physical_quantity':
                measurement.physical_quantity,
                'physical_unit':
                measurement.physical_unit,
                'start_datetime':
                round(measurement.start_datetime.timestamp() * 1000),
                'end_datetime':
                round(measurement.end_datetime.timestamp() * 1000),
                'type':
                measurement.aggregate_type,
                'value':
                measurement.value
            })

        self._mqtt_client.publish(
            topic=f"kit/{self.serial}/measurement/aggregate",
            payload=msg.getvalue(),
            qos=
            2  # Deliver exactly once. Maybe downgrade to `1`: deliver at least once.
        )
Example #5
0
 def fastavro(self):
     schema = fastavro.schema.parse_schema(json.loads(SCHEMA))
     for encoded_value in self.values:
         value_buf = BytesIO(encoded_value)
         decoded = fastavro.schemaless_reader(value_buf, schema)
         output_buf = BytesIO()
         fastavro.schemaless_writer(output_buf, schema, decoded)
Example #6
0
def serialize_alert(alert, schema=latest_schema, schema_id=0):
    """Serialize an alert to a byte sequence for sending to Kafka.

    Parameters
    ----------
    alert : `dict`
        An alert payload to be serialized.
    schema : `dict`, optional
        An Avro schema definition describing how to encode `alert`. By default,
        the latest schema is used.
    schema_id : `int`, optional
        The Confluent Schema Registry ID of the schema. By default, 0 (an
        invalid ID) is used, indicating that the schema is not registered.

    Returns
    -------
    serialized : `bytes`
        The byte sequence describing the alert, including the Confluent Wire
        Format prefix.
    """
    buf = io.BytesIO()
    # TODO: Use a proper schema versioning system
    buf.write(serialize_confluent_wire_header(schema_id))
    fastavro.schemaless_writer(buf, schema, alert)
    return buf.getvalue()
Example #7
0
    def build(cls, data: Any) -> bytes:
        if isinstance(data, (dict, list)):
            dtype = 'json'
            data = json.dumps(
                data,
                ensure_ascii=False,
                cls=JSONEncoder,
            ).encode(encoding=cls.ENCODING)
        elif isinstance(data, bytes):
            dtype = 'bytes'
            data = data
        elif isinstance(data, str):
            dtype = 'str'
            data = data.encode(encoding=cls.ENCODING)
        else:
            dtype = 'object'
            data = pickle.dumps(data)

        d = {
            'type': dtype,
            'data': data,
        }
        wio = io.BytesIO()
        schemaless_writer(wio, parsed_record_schema, d)
        return wio.getvalue()
Example #8
0
def test_schema_is_custom_dict_type():
    """https://github.com/tebeka/fastavro/issues/168"""
    class CustomDict(dict):
        pass

    schema = {
        'type':
        'record',
        'fields': [{
            'name':
            'description',
            "type": ["null", {
                "type": "array",
                "items": "string"
            }, "string"],
        }],
        "name":
        "description",
        "doc":
        "A description of the thing."
    }
    other_type_schema = CustomDict(schema)

    record = {
        'description': 'value',
    }

    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, other_type_schema)
    assert record == new_record
Example #9
0
async def produce_for_topic(*, loop, producer_settings, topic_name, schema,
                            schema_id, period):
    logger = structlog.get_logger().bind(topic=topic_name)

    # Preparse schema
    schema = fastavro.parse_schema(schema)
    logger.info('Preparsed schema')

    # Start up the producer
    producer = aiokafka.AIOKafkaProducer(loop=loop, **producer_settings)
    await producer.start()
    logger.info('Started producer')

    # Generate and write messages
    try:
        for message in generate_message(schema):
            logger.debug('New message', message=message)
            message_fh = BytesIO()
            message_fh.write(struct.pack('>bI', MAGIC_BYTE, schema_id))
            fastavro.schemaless_writer(
                message_fh,
                schema,
                message
            )
            message_fh.seek(0)
            await producer.send_and_wait(
                topic_name, value=message_fh.read())
            # increment prometheus production counter
            PRODUCED.inc()
            logger.debug('Sent message')
            # naieve message period; need to correct for production time
            await asyncio.sleep(period)
    finally:
        await producer.stop()
Example #10
0
    def record(self):
        schema = fastavro.schema.load_schema('avsc/AudioFrame.avsc')

        timestamp = time.time()
        frames = []

        with self.loopback.recorder(self.samplingFreq, blocksize=256) as rec:
            while True:
                start = time.time() * 1000.0
                samples = rec.record(256)
                frame = {
                    'data': samples.tolist(),
                    'start_ts': start,
                    'end_ts': time.time() * 1000.0
                }
                fo = io.BytesIO()
                fastavro.schemaless_writer(fo, schema, frame)

                self.producer.send(self.topic, fo.getvalue())

                frames = frames + [frame]
                if time.time() - timestamp > 1:
                    no_of_frames = len(frames)
                    no_of_samples = sum(map(lambda f: len(f['data']), frames))
                    total_volume = numpy.sum(
                        numpy.abs(list(map(lambda x: x['data'], frames))))
                    print(
                        'stats per 1s: no_of_frames={}, total_no_of_samples={}, total_volume={}'
                        .format(no_of_frames, no_of_samples, total_volume))
                    timestamp = time.time()
                    frames = []
Example #11
0
def encode_into_avro(alert: dict, schema_file: str) -> str:
    """Encode a dict record into avro bytes

    Parameters
    ----------
    alert: dict
        A Dictionary of alert data
    schema_file: str
        Path of avro schema file

    Returns
    ----------
    value: str
        a bytes string with avro encoded alert data

    Examples
    ----------
    >>> r = AlertReader(avro_single_alert)
    >>> alert = r.to_list(size=1)[0]
    >>> avro_encoded = encode_into_avro(alert, schema_path)
    """
    with open(schema_file) as f:
        schema = json.load(f)

    parsed_schema = fastavro.parse_schema(schema)
    b = io.BytesIO()
    fastavro.schemaless_writer(b, parsed_schema, alert)

    return b.getvalue()
Example #12
0
def test_single_record():
    # To send with producer
    message = {
        "id": 10000,
        "title": "[FastAVRO] 테스트 공지 제목",
        "date": "20.12.23",
        "link": "https://somelink",
        "writer": "alfex4936",
    }

    # How producer produces single data
    producer_rb = BytesIO()
    schemaless_writer(producer_rb, parsed_schema, message)  # write one record
    produced_data = producer_rb.getvalue()

    # How consumer reads single record
    consumer_rb = BytesIO(produced_data)
    decoded = schemaless_reader(consumer_rb, parsed_schema)  # read one record
    assert decoded == {
        "id": 10000,
        "title": "[FastAVRO] 테스트 공지 제목",
        "date": "20.12.23",
        "link": "https://somelink",
        "writer": "alfex4936",
    }
 def encode(self, obj):
     self._validate_object_type(obj)
     buffer = io.BytesIO()
     m = todict(obj)
     #m = {k: self._get_serialized_value(v) for k, v in obj.__dict__.items()}
     fastavro.schemaless_writer(buffer, self._schema, m)
     return buffer.getvalue()
Example #14
0
def writeavrodata(json_data: dict, json_schema: dict) -> io._io.BytesIO:
    """ Encode json into Avro format given a schema.

    Parameters
    ----------
    json_data : `dict`
        The JSON data containing message content.
    json_schema : `dict`
        The writer Avro schema for encoding data.

    Returns
    -------
    `_io.BytesIO`
        Encoded data.

    Examples
    ----------
    >>> with open(ztf_alert_sample, mode='rb') as file_data:
    ...   data = readschemadata(file_data)
    ...   # Read the schema
    ...   schema = data.schema
    ...   for record in data:
    ...     bytes = writeavrodata(record, schema)
    >>> print(type(bytes))
    <class '_io.BytesIO'>
    """
    bytes_io = io.BytesIO()
    fastavro.schemaless_writer(bytes_io, json_schema, json_data)
    return bytes_io
Example #15
0
def test_fastavro_compatibility_deserialize(
        schema_root: str, schema_identifier: str,
        schemata: cerializer.schemata.CerializerSchemata) -> None:
    # patch for not working avro codec
    cerializer.tests.dev_utils.init_fastavro()
    namespace = schema_identifier.split('.')[0]
    schema_name = schema_identifier.split('.')[1]
    cerializer_codec = cerializer.cerializer.Cerializer(
        cerializer_schemata=schemata,
        namespace=namespace,
        schema_name=schema_name,
    )
    try:
        # mypy things yaml has no attribute unsafe_load_all, which is not true
        data_all = yaml.unsafe_load_all(  # type: ignore
            open(os.path.join(schema_root, 'example.yaml')))
        SCHEMA_FAVRO = yaml.load(open(os.path.join(schema_root,
                                                   'schema.yaml')),
                                 Loader=yaml.Loader)
        for data in data_all:
            output_fastavro = io.BytesIO()
            fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data)
            output_fastavro.seek(0)
            deserialized = cerializer_codec.deserialize(
                output_fastavro.getvalue())
            output_fastavro.seek(0)
            assert deserialized == fastavro.schemaless_reader(
                output_fastavro, SCHEMA_FAVRO)
    except FileNotFoundError:
        logging.warning(
            'Missing schema or Example file for schema == %s',
            schema_name,
        )
        assert False
Example #16
0
def test_schemaless_writer_and_reader_with_union():
    """Testing basic functionality of reader with union when option to return_record_name is true.
    """
    schema = {
        "name":
        "Message",
        "type":
        "record",
        "namespace":
        "test",
        "fields": [{
            "name": "id",
            "type": "long"
        }, {
            "name":
            "payload",
            "type": [
                {
                    "name":
                    "ApplicationCreated",
                    "type":
                    "record",
                    "fields": [{
                        "name": "applicationId",
                        "type": "string"
                    }, {
                        "name": "data",
                        "type": "string"
                    }]
                },
                {
                    "name":
                    "ApplicationSubmitted",
                    "type":
                    "record",
                    "fields": [{
                        "name": "applicationId",
                        "type": "string"
                    }, {
                        "name": "data",
                        "type": "string"
                    }]
                },
            ]
        }]
    }
    record = input_record = {
        "id":
        123,
        "payload": ("test.ApplicationSubmitted", {
            "applicationId": "123456789UT",
            "data": "..."
        })
    }
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, schema, None, True)
    assert record == new_record
Example #17
0
def get_people_using_avro_protocol():
    buff = BytesIO()
    schemaless_writer(buff, people_parsed_schema, people)
    message = buff.getvalue()

    # app.logger.info('%s logged in successfully', message)

    return message.decode("ISO-8859-1")
Example #18
0
def avro_encoder(it, schema):
    for i in it:
        if i is None:
            yield None
        else:
            data = io.BytesIO()
            fastavro.schemaless_writer(data, schema, i)
            yield data.getvalue()
Example #19
0
def add_person_using_avro_protocol():
    bytes_reader = BytesIO(request.get_data())
    person = schemaless_reader(bytes_reader, person_parsed_schema)
    people.append(person)
    buf = BytesIO()
    schemaless_writer(buf, person_parsed_schema, person)
    message = buf.getvalue()
    return message
Example #20
0
def test_int_binary(value, binary):
    schema = {"type": "long"}
    buffer = BytesIO()

    fastavro.schemaless_writer(buffer, schema, value)
    assert buffer.getvalue() == binary, "Invalid integer encoding."

    deserialized = fastavro.schemaless_reader(BytesIO(binary), schema)
    assert deserialized == value, "Invalid integer decoding."
Example #21
0
 def serialize_to_avro(rows, schema):
     """Serializes specified rows into avro format."""
     string_output = BytesIO()
     json_schema = json.loads(schema)
     for row in rows:
         fastavro.schemaless_writer(string_output, json_schema, row)
     avro_output = string_output.getvalue()
     string_output.close()
     return avro_output
Example #22
0
 def encode(self, data: dict, output_stream: BytesIO):
     try:
         fastavro.schemaless_writer(output_stream, self.schema, data)
         return output_stream.getvalue()
     except ValueError as e:
         raise EncodingError(f"Data is not valid: {data}\n{e}")
     except (TypeError, AttributeError) as e:
         raise InvalidWriterStream(
             f"Expected BytesIO type, fround {output_stream}: {e}")
Example #23
0
    def __call__(self, obj, ctx):
        """
        Serializes an object to the Confluent Schema Registry's Avro binary
        format.

        Args:
            obj (object): object instance to serializes.

            ctx (SerializationContext): Metadata pertaining to the serialization operation.

        Note:
            None objects are represented as Kafka Null.

        Raises:
            SerializerError: if any error occurs serializing obj

        Returns:
            bytes: Confluent Schema Registry formatted Avro bytes

        """
        if obj is None:
            return None

        subject = self._subject_name_func(ctx, self._schema_name)

        if subject not in self._known_subjects:
            if self._use_latest_version:
                latest_schema = self._registry.get_latest_version(subject)
                self._schema_id = latest_schema.schema_id

            else:
                # Check to ensure this schema has been registered under subject_name.
                if self._auto_register:
                    # The schema name will always be the same. We can't however register
                    # a schema without a subject so we set the schema_id here to handle
                    # the initial registration.
                    self._schema_id = self._registry.register_schema(
                        subject, self._schema)
                else:
                    registered_schema = self._registry.lookup_schema(
                        subject, self._schema)
                    self._schema_id = registered_schema.schema_id
            self._known_subjects.add(subject)

        if self._to_dict is not None:
            value = self._to_dict(obj, ctx)
        else:
            value = obj

        with _ContextStringIO() as fo:
            # Write the magic byte and schema ID in network byte order (big endian)
            fo.write(pack('>bI', _MAGIC_BYTE, self._schema_id))
            # write the record to the rest of the buffer
            schemaless_writer(fo, self._parsed_schema, value)

            return fo.getvalue()
Example #24
0
def json2binary(schema, record):
    iostream = BytesIO()
    start = int(time.time() * 1000)
    schemaless_writer(iostream, schema, record)
    end = int(time.time() * 1000)
    # 程序运行的时间,单位是毫秒
    run_time = end - start
    # print('Running time: %s Milliseconds' % run_time)
    serialized = iostream.getvalue()
    return serialized
Example #25
0
def test_schemaless_write_read():
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, parsed_schema, records[0])
    new_file.seek(0)

    # bytes로 변환?
    new_file.seek(0)

    new_record = fastavro.schemaless_reader(new_file, parsed_schema)
    assert records[0] == new_record
Example #26
0
def compressed_avro_dump(
    data: tp.Any,
    *,
    schema_name: str,
    schema_version: int,
) -> bytes:
    with io.BytesIO() as f:
        fastavro.schemaless_writer(f, schema(schema_name, schema_version), data)
        blob = f.getvalue()
    return lzma.compress(blob, format=lzma.FORMAT_RAW, filters=[dict(id=lzma.FILTER_LZMA2, preset=5)])
Example #27
0
 def serialize(self, data: Data) -> Optional[bytes]:
     if isinstance(data.data_type, NoData):
         return None
     avro_type = ensure_avro_type(data.data_type)
     schema_id = self._registry_client.get_or_create_id_for_avro_type(
         avro_type)
     buffer = io.BytesIO()
     fastavro.schemaless_writer(buffer, avro_type.fastavro_schema,
                                data.payload)
     return create_schema_id_prefix(schema_id) + buffer.getvalue()
Example #28
0
    def _dumps(self, obj):
        bytes_writer = io.BytesIO()

        if self.encoding_method:
            datum = self.encoding_method(obj)
            schemaless_writer(bytes_writer, self.schema_dict, datum)
        else:
            schemaless_writer(bytes_writer, self.schema_dict, datum)

        return bytes_writer.getvalue()
Example #29
0
def write_data(data):
    """
    Encode json with fastavro module into avro format given a schema.

    :param data: data to encode
    :return: data encoded
    """
    raw_data = io.BytesIO()
    schemaless_writer(raw_data, schema, data)
    return raw_data.getvalue()
Example #30
0
def _make_message(*, schema_id: int, schema: Dict[str, Any],
                  data: Any) -> bytes:
    """Make a message in the Confluent Wire Format.
    """
    message_fh = BytesIO()
    # Write the Confluent Wire Format prefix.
    message_fh.write(pack_wire_format_prefix(schema_id))
    # Write the Avro-encoded message
    fastavro.schemaless_writer(message_fh, schema, data)
    message_fh.seek(0)
    return message_fh.read()