async def test_predict_ce_avro_binary(self, http_server_client): schema = avro.schema.parse(test_avsc_schema) msg = {"name": "foo", "favorite_number": 1, "favorite_color": "pink"} writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(msg, encoder) data = bytes_writer.getvalue() event = dummy_cloud_event(data, set_contenttype=True) # Creates the HTTP request representation of the CloudEvent in binary content mode headers, body = to_binary(event) resp = await http_server_client.fetch('/v1/models/TestModel:predict', method="POST", headers=headers, body=body) assert resp.code == 200 assert resp.headers['content-type'] == "application/json" assert resp.headers['ce-specversion'] == "1.0" assert resp.headers["ce-id"] != "36077800-0c23-4f38-a0b4-01f4369f670a" assert resp.headers['ce-source'] == "io.kserve.kfserver.TestModel" assert resp.headers['ce-type'] == "io.kserve.inference.response" assert resp.headers['ce-time'] > "2021-01-28T21:04:43.144141+00:00" assert resp.body == b'{"predictions": [["foo", 1, "pink"]]}'
def sendPackageTimeout(self, accountId): message = { "accountId": accountId, "host": None, "item": None, "severity": "ERROR", "description": "account %s workflow timeout" % accountId } all = { "timestamp": 1L, "src": "rundeck", "host_ip": "10.74.113.101", "rawdata": json.dumps(message) } schema = avro.schema.parse(avro_schema) writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(all, encoder) try: self.producer.send_messages(b"%s" % self.zabbix_alert, bytes_writer.getvalue()) logger.info("send to zabbix sa successfully") except: logger.error( "occur error when send package timeout message to zabbix alert topic" )
def senddata(): producer = KafkaProducer(bootstrap_servers="localhost:9092") schema = avro.schema.parse(open("schema/flood.json").read()) data = json.load(open("data/sample.json", "r")) for item in data: a_item = { "construction": str(item.get("construction")), "county": str(item.get("county")), "eq_site_deductible": item.get("eq_site_deductible"), "eq_site_limit": item.get("eq_site_limit"), "fl_site_deductible": item.get("fl_site_deductible"), "fl_site_limit": item.get("fl_site_limit"), "fr_site_deductible": item.get("fr_site_deductible"), "fr_site_limit": item.get("fr_site_limit"), "hu_site_deductible": item.get("hu_site_deductible"), "hu_site_limit": item.get("hu_site_limit"), "line": str(item.get("line")), "point_granularity": item.get("point_granularity"), "point_latitude": item.get("point_latitude"), "point_longitude": item.get("point_longitude"), "policyID": item.get("policyID"), "statecode": str(item.get("statecode")), "tiv_2011": item.get("tiv_2011"), "tiv_2012": item.get("tiv_2012") } # defines encoding format writer = avro.io.DatumWriter(schema) bytes_writes = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writes) writer.write(a_item, encoder) raw_bytes = bytes_writes.getvalue() producer.send("floodinfo", raw_bytes) producer.flush()
def deserialize(self, test_out, exp_out): logger = logging.getLogger() if self.sink["serialize"]: if not isinstance(exp_out, str): raise TypeError("'exp_out' must be of type 'str'") if not isinstance(test_out, bytes): raise TypeError("'test_out' must be of type 'bytes'") if self.sink["type"] == "Avro": py_obj = json.loads(exp_out) self.tc_drv.store_exp_one(py_obj) value = bytearray(test_out) bytes_reader = io.BytesIO(value) decoder = avro.io.BinaryDecoder(bytes_reader) py_obj = self.sink["avro_reader"].read(decoder) self.tc_drv.store_rx_one(py_obj) return py_obj elif self.sink["type"] == "Binary": self.tc_drv.store_exp_one(exp_out) self.tc_drv.store_rx_one(test_out) return test_out.decode("utf-8") else: self.tc_drv.store_exp_one(exp_out) return test_out
def handle_avro_client_print_to_file(connection, address): schema = avro.schema.Parse(open("schema/addressbook.avsc", "rb").read()) data = connection.recv(4) message_length, = struct.unpack('>I', data) message = connection.recv(message_length) message_buf = io.BytesIO(message) reader = avro.datafile.DataFileReader(message_buf, avro.io.DatumReader()) # Create a data file using DataFileWriter dataFile = open("schema/addressbook.avro", "wb") writer = DataFileWriter(dataFile, DatumWriter(), schema) for thing in reader: writer.append(thing) reader.close() writer.close() return (len(message))
def serialize_data(self, data, schema): writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(data, encoder) return bytes_writer.getvalue()
def consumer(self, consumer_group="flume_test_group"): simple_consumer = self.collect_topic.get_balanced_consumer( reset_offset_on_start=False, auto_commit_enable=True, auto_commit_interval_ms=1000, consumer_group=consumer_group, consumer_timeout_ms=10000, zookeeper_connect=ZOOKEEPER_HOST, ) # simple_consumer = self.collect_topic.get_simple_consumer( # reset_offset_on_start=False, # auto_commit_enable=True, # auto_commit_interval_ms=1000, # consumer_group="flume_test_group", # consumer_timeout_ms=1000, # ) count = 0 consumer = [] for message in simple_consumer: # print 'offset: %s' % message.offset, 'data: ' + message.value bytes_msg = io.BytesIO(message.value[5:]) decode_msg = avro.io.BinaryDecoder(bytes_msg) recode_msg = self.avro_reader.read(decode_msg) # print message.offset, recode_msg # simple_consumer.commit_offsets() consumer.append(recode_msg) count += 1 print count return consumer
def convert(self, obj_map): writer = avro.io.DatumWriter(self.schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(obj_map, encoder) val = bytes_writer.getvalue() return val
def _decodemsg(self,msg): value = bytearray(msg.value) bytes_reader = io.BytesIO(value[5:]) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(self.schema) message = reader.read(decoder) return message
def divoltecall(): # get configuration parameters from confguration file conf = Configuration("default.yml") host,username,password,dbName = conf.getMySQLDetails() kafka_host,kafka_port = conf.getBrokerDetails() topic,consumergroup = conf.getConsumerDetails() schemaAvro = conf.getAvroSchema() # Kafka Broker Configuration broker_config=kafka_host+":"+str(kafka_port) # To consume messages consumer = KafkaConsumer(topic, group_id=consumergroup, bootstrap_servers=[broker_config]) # read Avro schema schema = avro.schema.parse(open(schemaAvro).read()) # Open database connection db = MySQLdb.connect(host,username,password,dbName) # prepare a cursor object using cursor() method cursor = db.cursor() for msg in consumer: bytes_reader = io.BytesIO(msg.value) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) user1 = reader.read(decoder) insertIntoDatabase(user1) # disconnect from server db.close()
def run(self): ctx = ServiceContext() config = ctx.getConfigService() queue = ctx.getQueueService() self.schema = avro.schema.parse(avro_schema) constructor="KafkaConsumer(%s,group_id=%s,bootstrap_servers=%s)" topics = config.get("Input Plugin: kafka_collector","kafka_topics") group_id = config.get("Input Plugin: kafka_collector","kafka_groupid") bootstrap_server = config.get("Message","kafka_broker") str = constructor % (topics,group_id,bootstrap_server) self.consumer = eval(str) for msg in self.consumer: value = bytearray(msg.value) topic = msg.topic bytes_reader = io.BytesIO(value[5:]) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(self.schema) kafkamsg = reader.read(decoder) try: jsondata = json.loads(kafkamsg['rawdata']) eventType = jsondata["eventName"] jsondata['topic'] = topic queue.put(EventFactory.getEvent(eventType,jsondata)) except InputError,e: self.error(str(e)) except:
def avro_decode_message(self, message): if message: bytes_from_message = bytearray(message) # Check ID for coherency message_id = int.from_bytes(bytes_from_message[1:5], byteorder='big') if self._schema_id != message_id: logging.warning("Possible incoherence between message's id (%d) and schema's id (%d), for topic (%s)", message_id, self._schema_id, self.topic) # Remove 5-byte header the first byte is reserved for future, 4 bytes for 32 bit number indicating ID message = bytes(bytes_from_message[5:]) # Parse the rest of the message using the schema bytes_reader = io.BytesIO(message) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(self.avro_schema) decoded_messages = [] # We iterate in case there are more than one messages while bytes_reader.tell() < len(message): try: # Here is where the messages are read decoded_messages.append(reader.read(decoder)) sys.stdout.flush() except Exception as e: logging.error(e) return decoded_messages
def get_tweet(msg): bytes_reader = io.BytesIO(msg) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) tweet = reader.read(decoder) return tweet
def deserializeBinaryFromStream(schemaFile, binaryData): bytes_reader = io.BytesIO(binaryData) decoder = avro.io.BinaryDecoder(bytes_reader) schema = parse_schema(schemaFile) reader = avro.io.DatumReader(schema) data = reader.read(decoder) return data
def deserializeBinaryFromFile(schemaFile, inputFile): bytes_reader = io.BytesIO(open(inputFile, "rb").read()) decoder = avro.io.BinaryDecoder(bytes_reader) schema = parse_schema(schemaFile) reader = avro.io.DatumReader(schema) data = reader.read(decoder) return data
def send_inventory(self, account_id, module, operation, result, data): message = { "accountId": account_id, "module": module, "operation": operation, "result": result, "data": data } all = { "timestamp": 1L, "src": "sa_inventory", "host_ip": "10.74.113.101", "rawdata": json.dumps(message) } schema = avro.schema.parse(avro_schema) writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(all, encoder) try: self.producer.send_messages(b"%s" % self.inventory_notify, bytes_writer.getvalue()) logger.info("send to redis successfully") except Exception as exp: logger.error("occur error when send package inventory message to " "sa inventory(dms.log.inventory) topic" + exp.message)
def create_avro_message(log_device, writer, id): """Create message bytes using Avro schema""" # Initialise with magic byte = 0 and 4 byte schema id # TODO use id rather than hardcoding id kafka_magic = io.BytesIO(b'\x00\x00\x00\x00\x01') bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write( { "name": log_device.get_name(), "value": log_device.get_value(), "time": log_device.get_time(), "datetime": str(datetime.datetime.utcnow()).split('.')[0] }, encoder) return kafka_magic.getvalue() + bytes_writer.getvalue()
def _parserequest(self, request): schema = avro.schema.parse(test_avsc_schema) raw_bytes = request bytes_reader = io.BytesIO(raw_bytes) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) record1 = reader.read(decoder) return record1
def commonToAvroBinarySchema(schema, dictContent): writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(dictContent, encoder) raw_bytes = bytes_writer.getvalue() b = bytearray() b.extend(raw_bytes) return b
def writeMessageWithId(schema_name, message): schema = avro.schema.Parse(schema_cache[schema_name]) writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) bytes_writer.write(bytes([0])) bytes_writer.write(id_cache[schema_name].to_bytes(4, byteorder="big")) writer.write(message, encoder) return bytes_writer.getvalue()
def retrainClassifier(session, db_config): ''' Gathers available labeled faces from the database for personas, sub-personas to train a new keras classifier ''' #Get table names from the config (keyspace, personaTableName, subPersonaTableName, subPersonaFaceEdgeTableName, faceSubPersonaEdgeTableName, rawImageTableName, faceImageTableName) = getTables(db_config) #Grab the list of personas to retrieve their labels persona_list = list(session.execute("SELECT persona_name FROM " + personaTableName)) persona_list = list(map(lambda x: x.persona_name, persona_list)) features_list = [] labels_list = [] classes = len(persona_list) logging.info("Found {0} personas".format(len(persona_list))) schema = avro.schema.Parse(open("./VGGFaceFeatures.avsc", "r").read()) for persona in persona_list: image_id_list = list(session.execute("SELECT sub_persona_name, assoc_face_id, label_v_predict_assoc_flag FROM {0} WHERE sub_persona_name='{1}'".format(subPersonaFaceEdgeTableName, persona),timeout=60)) logging.info("{0} features retrieved for {1}".format(len(image_id_list), persona)) for image_id in image_id_list: image_features = None while image_features is None: try: image_features = list(session.execute("SELECT face_id, face_bytes, feature_bytes FROM {0} WHERE face_id='{1}'".format(faceImageTableName, image_id.assoc_face_id))) except: time.sleep(60) pass for image_byte in image_features: bytes_reader = io.BytesIO(image_byte.feature_bytes) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) features = reader.read(decoder) features_list.append(features['features']) labels_list.append(persona) #time.sleep(60) #Convert the persona labels into integers for keras, produce reversal dictionary homogenized_label_list = list(map(lambda x: persona_list.index(x), labels_list)) label_persona_dictionary = dict(map(lambda x: (persona_list.index(x), x), persona_list)) #label_persona_dictionary = dict(zip(homogenized_label_list, persona_list)) logging.info("Generated conversion dictionary") logging.info(label_persona_dictionary) model = Sequential() model.add(Dense(1024, input_dim=512, activation='relu')) #we add dense layers so that the model can learn more complex functions and classify for better results. model.add(Dense(1024,activation='relu')) #dense layer 2 model.add(Dense(512,activation='relu')) #dense layer 3 model.add(Dense(1,activation='softmax')) #final layer with softmax activation model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x=np.array(features_list), y=np.array(homogenized_label_list), epochs=2, batch_size=1) scores = model.evaluate(np.array(features_list), np.array(homogenized_label_list)) logging.info("Generated model") return (model, label_persona_dictionary)
def avro_encoder(schema, value: dict): """ Encode dictionary to avro format with designated schema """ writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(value, encoder) raw_bytes = bytes_writer.getvalue() return raw_bytes
def getBlobPayload(self, blob, block_blob_service): container_name = self.getPipelineContainer() blob_bytes = block_blob_service.get_blob_to_bytes( container_name, blob.name) avro_content = blob_bytes.content last_modified = blob_bytes.properties.last_modified.isoformat() content_bytes = io.BytesIO(avro_content) all_payloads = avro.datafile.DataFileReader(content_bytes, avro.io.DatumReader()) return all_payloads, last_modified
def avro_encode_messages(self, json_messages): bytes_writer = io.BytesIO() writer = avro.io.DatumWriter(self.avro_schema) encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(json_messages, encoder) raw_bytes = bytes_writer.getvalue() # Add 5-byte header the first byte is reserved for future, 4 bytes for 32 bit number indicating ID return bytes(bytearray(b'\x00') + bytearray(self._schema_id.to_bytes(4, byteorder='big')) + bytearray(raw_bytes))
def handle_avro_client(connection): message = connection.recv() message_buf = io.BytesIO(message) reader = avro.datafile.DataFileReader(message_buf, avro.io.DatumReader()) for thing in reader: print(thing) reader.close() return (len(message))
def predictImage(image_features): schema = avro.schema.Parse(open("./VGGFaceFeatures.avsc", "rb").read()) bytes_reader = io.BytesIO(image_features.feature_bytes) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) features = reader.read(decoder) # prediction = model.predict(features) logging.info(prediction) return prediction
def encode(self, item, writers_schema=None): """Returns encoded data - ``item``: item to be encoded according to schma - ``writers_schema``: avro writers schema """ writer = avro.io.DatumWriter(writers_schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(item, encoder) encoded = bytes_writer.getvalue() return encoded
def deserializeBinaryFromStreamWithHeader(headerSchemaFile, dataSchema, binaryData): bytes_reader = io.BytesIO(binaryData) decoder = avro.io.BinaryDecoder(bytes_reader) headerSchema = parse_schema(headerSchemaFile) dataSchema = parse_schema(dataSchema) reader = avro.io.DatumReader(headerSchema) header = reader.read(decoder) datareader = avro.io.DatumReader(dataSchema) data = datareader.read(decoder) return {'header': header, 'data': data}
def serializeDataToBinaryFile(schemaFile, outputFile, dataToSerialize): writer = io.BytesIO() encoder = avro.io.BinaryEncoder(writer) schema = parse_schema(schemaFile) datum_writer = avro.io.DatumWriter(schema) datum_writer.write(dataToSerialize, encoder) raw_bytes = writer.getvalue() newFile = open(outputFile, "wb") newFile.write(raw_bytes) newFile.close() logging.debug("Binary data written to:" + outputFile)
def deserialize_msg(msg, serializer, schema=None): if serializer == "Avro": bytes_reader = io.BytesIO(msg.value) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) msg_data = reader.read(decoder) return_val = msg_data elif serializer == "JSON": return_val = json.loads(msg) else: return_val = msg return return_val