/
avroconsumer.py
218 lines (170 loc) · 7.98 KB
/
avroconsumer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""
Rejected Consumers for automatic deserialization (and serialization) of
Avro datum in RabbitMQ messages.
"""
import io
import json
from os import path
import fastavro
from rejected import consumer
import requests
DATUM_MIME_TYPE = 'application/vnd.apache.avro.datum'
__version__ = '1.2.0'
class Consumer(consumer.SmartConsumer):
"""Automatically deserialize Avro datum from RabbitMQ messages that have
the ``content-type`` of ``application/vnd.apache.avro.datum``.
"""
def __init__(self, *args, **kwargs):
super(Consumer, self).__init__(*args, **kwargs)
self._avro_schemas = {}
@property
def body(self):
"""Return the message body, deserialized if the content-type is
set properly.
:rtype: any
"""
if self._message_body:
self.logger.debug('Returning %r', self._message_body)
return self._message_body
body = super(Consumer, self).body
if self.content_type == DATUM_MIME_TYPE and self.message_type:
self.logger.debug('Deserializing %r', body)
self._message_body = self._deserialize(
self._avro_schema(self.message_type), body)
return self._message_body
def publish_message(self, exchange, routing_key, properties, body,
no_serialization=False, no_encoding=False,
channel=None):
"""Publish a message to RabbitMQ on the same channel the original
message was received on.
By default, if you pass a non-string object to the body and the
properties have a supported content-type set, the body will be
auto-serialized in the specified content-type.
If the ``content_type`` property is set to
``application/vnd.apache.avro.datum`` and the ``message_type`` is set,
the body will attempt to be serialized as an Avro datum,
If the properties do not have a timestamp set, it will be set to the
current time.
If you specify a content-encoding in the properties and the encoding is
supported, the body will be auto-encoded.
Both of these behaviors can be disabled by setting no_serialization or
no_encoding to True.
:param str exchange: The exchange to publish to
:param str routing_key: The routing key to publish with
:param dict properties: The message properties
:param mixed body: The message body to publish
:param bool no_serialization: Turn off auto-serialization of the body
:param bool no_encoding: Turn off auto-encoding of the body
:param str channel: The channel to publish on
"""
if properties is None:
properties = {}
if not no_serialization and properties.get('type') and \
properties.get('content_type') == DATUM_MIME_TYPE:
body = self._serialize(self._avro_schema(properties['type']), body)
super(Consumer, self).publish_message(
exchange, routing_key, properties, body,
no_serialization, no_encoding, channel)
def _avro_schema(self, message_type):
"""Return the cached Avro schema for the specified message type.
:param str message_type: The message type to get the schema for
:rtype: dict
"""
if message_type not in self._avro_schemas.keys():
self.logger.debug('Fetching %s schema', message_type)
self._avro_schemas[message_type] = self._load_schema(message_type)
self.logger.debug('Returning %s schema', message_type)
return self._avro_schemas[message_type]
def _load_schema(self, message_type=None): # pragma: nocover
"""Return the schema
:param str message_type: Optional message type to load the schema for
:rtype: dict
:raises: NotImplementedError
"""
raise NotImplementedError
@staticmethod
def _deserialize(avro_schema, data):
"""Deserialize an Avro datum with the specified schema string
:param dict avro_schema: The schema JSON snippet
:param str data: The Avro datum to deserialize
:rtype: dict
"""
return fastavro.schemaless_reader(io.BytesIO(data), avro_schema)
@staticmethod
def _serialize(avro_schema, data):
"""Serialize a data structure into an Avro datum
:param dict avro_schema: The parsed Avro schema
:param dict data: The value to turn into an Avro datum
:rtype: str
"""
stream = io.BytesIO()
fastavro.schemaless_writer(stream, avro_schema, data)
return stream.getvalue()
class LocalSchemaConsumer(Consumer):
"""Consumer that loads schema files from disk. The schema file path is
comprised of the ``schema_path`` configuration setting and the
message type, appending the file type ``.avsc`` to the the end.
"""
def initialize(self):
"""Ensure the schema_path is set in the settings"""
self.require_setting('schema_path',
'avroconsumer.LocalSchemaConsumer')
self.settings['schema_path'] = path.normpath(
self.settings['schema_path'])
if not path.exists(self.settings['schema_path']) or \
not path.isdir(self.settings['schema_path']):
raise RuntimeError(
'schema_path {!r} is invalid'.format(
self.settings['schema_path']))
super(LocalSchemaConsumer, self).initialize()
def _load_schema(self, message_type=None):
"""Load the schema file from the file system, raising a
``rejected.consumer.ConsumerError`` if the the schema file can
not be found. The schema file path is comprised of the
``schema_path`` configuration setting and the message type,
appending the file type ``.avsc`` to the the end.
:param str message_type: Optional message type to load the schema for
:rtype: dict
"""
message_type = message_type or self.message_type
file_path = path.normpath(path.join(
self.settings['schema_path'], '{0}.avsc'.format(message_type)))
if not path.exists(file_path):
raise consumer.ConsumerException(
'Missing schema file: {0}'.format(file_path))
with open(file_path, 'r') as handle:
return json.load(handle)
class RemoteSchemaConsumer(Consumer):
"""Consumer class that implements Avro Datum decoding that loads Avro
schemas from a remote URI. The URI format for requests is configured
in the rejected configuration for the consumer with the
``schema_uri_format`` parameter:
.. code:: yaml
config:
schema uri_format: http://schema-server/avro/{0}.avsc
The ``{0}`` value is the placeholder for the message type value.
"""
def initialize(self):
self.require_setting(
'schema_uri_format', 'avroconsumer.RemoteSchemaConsumer')
super(RemoteSchemaConsumer, self).initialize()
def _load_schema(self, message_type=None):
"""Load the schema file from the file system, raising a
``rejected.consumer.ConsumerError`` if the the schema file can
not be found. The schema file path is comprised of the
``schema_path`` configuration setting and the message type,
appending the file type ``.avsc`` to the the end.
:param str message_type: Optional message type to load the schema for
:rtype: dict
"""
message_type = message_type or self.message_type
url = self._schema_url(message_type)
self.logger.debug('Loading schema for %s from %s', message_type, url)
response = requests.get(url)
if not response.ok:
self.logger.error('Could not fetch Avro schema for %s (%s)',
message_type, response.status_code)
raise consumer.ConsumerException('Error fetching avro schema')
return response.json()
def _schema_url(self, message_type):
return self.settings['schema_uri_format'].format(message_type)