def test_multi_register(self): basic = avro.loads(data_gen.BASIC_SCHEMA) adv = avro.loads(data_gen.ADVANCED_SCHEMA) subject = 'test' client = self.client id1 = client.register(subject, basic) latest1 = client.get_latest_schema(subject) v1 = client.get_version(subject, basic) self.assertLatest(latest1, id1, basic, v1) id2 = client.register(subject, adv) latest2 = client.get_latest_schema(subject) v2 = client.get_version(subject, adv) self.assertLatest(latest2, id2, adv, v2) self.assertNotEqual(id1, id2) self.assertNotEqual(latest1, latest2) # ensure version is higher self.assertTrue(latest1[2] < latest2[2]) client.register(subject, basic) latest3 = client.get_latest_schema(subject) # latest should not change with a re-reg self.assertEqual(latest2, latest3)
def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id)
def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id)
def test_multi_subject_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) # register again under different subject dupe_id = client.register('other', parsed) self.assertEqual(schema_id, dupe_id) self.assertEqual(len(client.id_to_schema), 1)
def test_dupe_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' client = self.client schema_id = client.register(subject, parsed) self.assertTrue(schema_id > 0) latest = client.get_latest_schema(subject) # register again under same subject dupe_id = client.register(subject, parsed) self.assertEqual(schema_id, dupe_id) dupe_latest = client.get_latest_schema(subject) self.assertEqual(latest, dupe_latest)
def test_dupe_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' client = self.client schema_id = client.register(subject, parsed) self.assertTrue(schema_id > 0) latest = client.get_latest_schema(subject) # register again under same subject dupe_id = client.register(subject, parsed) self.assertEqual(schema_id, dupe_id) dupe_latest = client.get_latest_schema(subject) self.assertEqual(latest, dupe_latest)
def __init__(self, server, schema_registry, topic, emails): # key schema definition for a pageview self.key = avro.loads(""" { "namespace": "pageview", "name":"key", "type":"record", "fields" : [ {"name" : "pageview_id", "type" : "string"} ] } """) # Value Schema definition for a pageview self.value = avro.loads(""" { "namespace": "pageview", "name": "value", "type": "record", "fields" : [ {"name" : "email", "type" : "string"}, {"name" : "url", "type" : "string"}, {"name" : "timestamp", "type" : "string"}, {"name" : "pageview_id", "type" : "string"} ] } """) # define pageview producer with avro serialization self.producer = AvroProducer( { 'bootstrap.servers': server, 'schema.registry.url': schema_registry }, default_key_schema=self.key, default_value_schema=self.value) self.topic = topic self.emails = emails
def producer(self): """ Create and return avro producer object. """ try: schema = Schema(self.registry_url, self.topic) schema_json = schema.get_latest_schema() avroProducer = AvroProducer({ \ 'bootstrap.servers': '{}'.format(self.servers), \ 'schema.registry.url': 'http://{}'.format(self.registry_url) \ }, default_value_schema=avro.loads(schema_json['schema'])) return avroProducer except Exception as e: return format(e)
def get_kafka_producer(self): # return a producer instance # :param: producer configuration self._properties["error_cb"] = self.error_cb self._properties["bootstrap.servers"] = self._config.get( 'bootstrap.servers') # self._properties["schema.registry.url"] = self._config.get('schema.registry') # if self._config.get('security_protocol') != 'None': # self.add_property("security.protocol", self._config.get('security_protocol')) # self.add_property("ssl.key.password", self._config.get('kafka-cert-password')) if self._config.get('avro_producer') and self._config.get( 'schema_registry') is not None: self.add_property("schema.registry.url", self._config.get('schema_registry')) key_schema = avro.loads(self._AVRO_SCHEMA_KEY) value_schema = avro.loads(self._AVRO_SCHEMA_VALUE) producer = AvroProducer(self._properties, default_key_schema=key_schema, default_value_schema=value_schema) else: producer = Producer(self._properties) return producer
def __init__(self, avro_schema_path, bootsrap_servers='localhost:29092', schema_registry='http://localhost:8081', topic_subscribe='quickstart-elastic-news'): # remember if you are running this code outside docker set bootstrap.servers = 'localhost:29092' and 'schema.registry.url'= 'http://localhost:8081', # otherwise set bootstrap.servers = 'kafka:9092' and 'schema.registry.url'= 'http://schema-registry:8081' # "debug":"all", self.conf = { 'schema.registry.url': schema_registry, 'bootstrap.servers': bootsrap_servers } self.SCHEMA = avro.loads(open(avro_schema_path, "r").read()) self.topic_subscribe = topic_subscribe self.logger = logging.getLogger(__name__)
class Purchase: username: str = field(default_factory=faker.user_name) currency: str = field(default_factory=faker.currency_code) amount: int = field(default_factory=lambda: random.randint(100, 200000)) schema = avro.loads("""{ "type": "record", "name": "purchase", "namespace": "com.udacity.lesson3.sample3", "fields": [ {"name": "username", "type": "string"}, {"name": "currency", "type": "string"}, {"name": "amount", "type": "int"} ] }""")
def __init__(self, driver, nameSalt): self.driver = driver self.topic = "travis_correct_string_avrosr" + nameSalt ValueSchemaStr = """ { "type":"record", "name":"value_schema", "fields":[ {"name":"id","type":"int"}, {"name":"firstName","type":"string"}, {"name":"time","type":"int"} ] } """ self.valueSchema = avro.loads(ValueSchemaStr)
def generate_records(): avro_producer_settings = { 'bootstrap.servers': "localhost:19092", 'group.id': 'groupid', 'schema.registry.url': "http://127.0.0.1:8081" } producer = AvroProducer(avro_producer_settings) key_schema = loads('"string"') value_schema = load("schema.avsc") i = 1 while True: row = {"int_field": int(i), "string_field": str(i)} producer.produce(topic="avro_topic", key="key-{}".format(i), value=row, key_schema=key_schema, value_schema=value_schema) print(row) sleep(1) i+=1
class ClickEvent: email: str = field(default_factory=faker.email) timestamp: str = field(default_factory=faker.iso8601) uri: str = field(default_factory=faker.uri) number: int = field(default_factory=lambda: random.randint(0, 999)) schema = avro.loads("""{ "type": "record", "name": "click_event", "namespace": "rest-proxy-avro", "fields": [ {"name": "email", "type": "string"}, {"name": "timestamp", "type": "string"}, {"name": "uri", "type": "string"}, {"name": "number", "type": "int"} ] }""")
def produce(schema_json, data): print('schema:\n') pprint.pprint(schema_json) print('\n') print('message:\n') pprint.pprint(data) print('\n') schema_avro = avro.loads(json.dumps(schema_json)) producer = AvroProducer({'bootstrap.servers': broker}, default_value_schema=schema_avro, schema_registry=schema_registry) producer.poll(0) producer.produce(topic=topic, value=data) producer.flush()
def save_new_key_schema_in_SR(SCHEMA_REGISTRY_URL, topic): # Another way of creating Schema and use with Message. Earlier we created the Schema in SR Directly. key_schema_str = """ { "namespace": "my.test", "name": "key", "type": "record", "fields" : [ { "name" : "name", "type" : "string" } ] } """ key_schema = avro.loads(key_schema_str) return key_schema
def test_getters(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client subject = 'test' version = client.get_version(subject, parsed) self.assertEqual(version, None) schema = client.get_by_id(1) self.assertEqual(schema, None) latest = client.get_latest_schema(subject) self.assertEqual(latest, (None, None, None)) # register schema_id = client.register(subject, parsed) latest = client.get_latest_schema(subject) version = client.get_version(subject, parsed) self.assertLatest(latest, schema_id, parsed, version) fetched = client.get_by_id(schema_id) self.assertEqual(fetched, parsed)
def produce_dataset_mce(mce, kafka_config): """ Produces a MetadataChangeEvent to Kafka """ conf = { 'bootstrap.servers': kafka_config.bootstrap_server, 'on_delivery': delivery_report, 'schema.registry.url': kafka_config.schema_registry } key_schema = avro.loads('{"type": "string"}') record_schema = avro.load(kafka_config.avsc_path) producer = AvroProducer(conf, default_key_schema=key_schema, default_value_schema=record_schema) producer.produce(topic=kafka_config.kafka_topic, key=mce['proposedSnapshot'][1]['urn'], value=mce) producer.flush()
def fly_avro_drones(bootstrap_servers, schema_registry_url, nmessages, default_value_schema_str=drone_schema_str, producer_dict_kwargs=None, topic_name="drones_raw", time_delay=0, drones=None): """ A simple example of sending structured messages from drones to a message broker. Args: bootstrap_servers (str): Comma separated string of Kafka servers schema_registry_url (str): Schema registry URL nmessages (int): Number of messages to send default_value_schema_str (str): String Avro schema compatible with mdrone messages producer_dict_kwargs (dict): Optional keyword arguments for producer topic_name (str): Topic name to which drone messages will be sent time_delay (int): Delay time between cycles when producing messages drones (iterable): Iterable of drones from which to generate messages Tip: Schemas should match the messages sent by drones. """ pdk = { 'bootstrap.servers': bootstrap_servers, 'schema.registry.url': schema_registry_url }, if isinstance(producer_dict_kwargs, dict): pdk.update(producer_dict_kwargs) producer = avro.AvroProducer( pdk, default_value_schema=avro.loads(default_value_schema_str)) z = len(str(nmessages)) # Pretty print cycle number for logging for i in range(nmessages): print("====MESSAGE SET {}====".format(str(i).zfill(z))) for drone in drones: msg = drone.message() print(msg) producer.produce(topic=topic_name, value={k: getattr(msg, k) for k in msg._fields}) time.sleep(time_delay) producer.flush()
def test_getters(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client subject = 'test' version = client.get_version(subject, parsed) self.assertEqual(version, None) schema = client.get_by_id(1) self.assertEqual(schema, None) latest = client.get_latest_schema(subject) self.assertEqual(latest, (None, None, None)) # register schema_id = client.register(subject, parsed) latest = client.get_latest_schema(subject) version = client.get_version(subject, parsed) self.assertLatest(latest, schema_id, parsed, version) fetched = client.get_by_id(schema_id) self.assertEqual(fetched, parsed)
def setupTopic_routerHsBridge(server, schema_registry_url): global regexp_routerHSBridge global avroProducer_routerHSBridge global count_routerHSBridge count_routerHSBridge = 0 # Topic routerHSBridge scans for a regexathat returns this pattern: #<31>Apr 1 06:51:39 KKM-WiFi24K-CCR10 hs-bridge_CCO_243: new host detected 54:9F:13:6F:3C:3A/10.243.52.180 by TCP :50872 -> 203.113.34.26:80 avro_schema = """ {"namespace": "weblog.kkr.avro", "type": "record", "name": "routerHSBridge", "fields": [ {"name": "month", "type": ["null","string"],"default":null} ,{"name": "day", "type": ["null","string"],"default":null} ,{"name": "time", "type": ["null","string"],"default":null} ,{"name": "kkm", "type": ["null","string"],"default":null} ,{"name": "hs_bridge" , "type": ["null","string"],"default":null} ,{"name": "message" , "type": ["null","string"],"default":null} ] }""" schema = avro.loads(avro_schema) avroProducer_routerHSBridge = AvroProducer( { 'bootstrap.servers': server, 'schema.registry.url': schema_registry_url }, default_value_schema=schema) regexp_routerHSBridge = re.compile(('(<27>|<28>|<30>|<31>)' '(?P<month>\D{3})' '(?P<whitespace1>\s{1,2})' '(?P<day>\d{1,2})' '(?P<whitespace2>\s{1,2})' '(?P<time>\d{2}:\d{2}:\d{2})' '(?P<whitespace3>\s{1,2})' '(?P<kkm>\D{3}-.{7}-\D{3}\d{1,2})' '(?P<whitespace4>\s{1})' '(?P<hs_bridge>[^\s]+:)' '(?P<whitespace5>\s{1})' '(?P<message>[^\s]+)' '.*'), re.IGNORECASE)
def __init__(self): # This is the Avro Schema for messages self.value_schema_str = """ { "name": "value", "type": "record", "fields" : [ {"name" : "network", "type" : "float"}, {"name" : "disk", "type" : "float"}, {"name" : "cpu", "type" : "float"}, {"name" : "timestamp", "type" : "long"} ] }""" self.value_schema = avro.loads(self.value_schema_str) self.avroProducer = AvroProducer({ 'bootstrap.servers': 'broker:29092', 'on_delivery': self.delivery_report, 'schema.registry.url': 'http://schema-registry:8081' }, default_value_schema=self.value_schema)
def setupTopic_routerElse(server, schema_registry_url): global regexp_routerElse global avroProducer_routerElse global count_routerElse count_routerElse = 0 # Topic routerElse scans for a regex that returns this pattern: #<31>Apr 1 07:00:06 CWT-WiFi24K-CCR05 .* # for example : #<31>Apr 1 07:00:06 CWT-WiFi24K-CCR05 already 15 logins in progress\n' avro_schema = """ {"namespace": "weblog.kkr.avro", "type": "record", "name": "routerElse", "fields": [ {"name": "month", "type": ["null","string"],"default":null} ,{"name": "day", "type": ["null","string"],"default":null} ,{"name": "time", "type": ["null","string"],"default":null} ,{"name": "kkm", "type": ["null","string"],"default":null} ,{"name": "message" , "type": ["null","string"],"default":null} ] }""" schema = avro.loads(avro_schema) avroProducer_routerElse = AvroProducer( { 'bootstrap.servers': server, 'schema.registry.url': schema_registry_url }, default_value_schema=schema) regexp_routerElse = re.compile(('(<30>|<31>)' '(?P<month>\D{3})' '(?P<whitespace1>\s{1,2})' '(?P<day>\d{1,2})' '(?P<whitespace2>\s{1,2})' '(?P<time>\d{2}:\d{2}:\d{2})' '(?P<whitespace3>\s{1,2})' '(?P<kkm>\D{3}-.{7}-\D{3}\d{1,2})' '(?P<whitespace4>\s{1,3})' '(?P<message>[^\s]+)' '.*'), re.IGNORECASE)
class ClickEvent: email: str = field(default_factory=faker.email) timestamp: str = field(default_factory=faker.iso8601) uri: str = field(default_factory=faker.uri) number: int = field(default_factory=lambda: random.randint(0, 999)) attributes: dict = field(default_factory=ClickAttribute.attributes) # # TODO: Load the schema using the Confluent avro loader # See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/load.py#L23 # schema = avro.loads( """{ "type": "record", "name": "click_event", "namespace": "com.udacity.lesson3.exercise4", "fields": [ {"name": "email", "type": "string"}, {"name": "timestamp", "type": "string"}, {"name": "uri", "type": "string"}, {"name": "number", "type": "int"}, { "name": "attributes", "type": { "type": "map", "values": { "type": "record", "name": "attribute", "fields": [ {"name": "element", "type": "string"}, {"name": "content", "type": "string"} ] } } } ] }""" )
def send(self, topic=None, msg="{'foo':'bar'}", lang='json', schema=None): ''' Kafka send message Send json and avro messages ''' log.debug("[KafkaDriver][send] producer start: " + str(self.server)) log.debug("[KafkaDriver][send] send message: " + str(msg)) if (topic is None): topic = self.topic log.debug("[KafkaDriver][send] topic: " + str(topic)) if (lang == 'json'): producer = KafkaProducer(bootstrap_servers=self.server + ':9092') log.debug("[KafkaDriver][send] json msg") res = producer.send(self.topic, key=None, value=msg) log.debug("[KafkaDriver][send] produce result: " + str(res.get())) time.sleep(1) producer.close log.debug("[KafkaDriver][send] end") elif (lang == 'avro'): log.debug("[KafkaDriver][send] avro msg") log.debug("[KafkaDriver][send] schema: " + str(schema)) value_schema = avro.loads(schema) avroProducer = AvroProducer({ 'bootstrap.servers': self.server, 'schema.registry.url': 'http://' + self.schema_registry + ':8081' }, default_value_schema=value_schema) res = avroProducer.produce(topic=self.topic, value=msg) log.debug("[KafkaDriver][send] produce result: " + str(res)) time.sleep(1) avroProducer.flush() log.debug("[KafkaDriver][send] end")
def __init__(self, broker, schema_registry, schema=None, logging_enabled=False): """ Initialization of the Producer which instatiates an AvroProducer class Parameters ---------- broker: str The URL of the broker (example: 'localhost:9092') schema_registry: str The URL of the confluent Schema Registry endpoint (example: 'http://localhost:8081') schema: str The default AVRO schema to use to serialize messages logger: Logger object, optional The logger object which will be used to log messages if provided topics variable length argument list of the string names of topics to produce too """ if schema is not None: self.schema = avro.loads(schema) else: self.schema = None self.__producer = AvroProducer( { "bootstrap.servers": broker, "schema.registry.url": schema_registry }, default_key_schema=self.schema) if logging_enabled: self.logger = logging.getLogger(__name__) else: self.logger = None self.produce_flag = True self.production_last_stoped = 0 self.total_time_producing_stoped = 0 self.__msg_queue = PriorityQueue()
logging.info(ipo_data) # publish to kafka if config is specified if options.kafka_config is not None: from confluent_kafka import avro from confluent_kafka.avro import AvroProducer config = None with open(options.kafka_config) as f: try: config = yaml.safe_load(f) except yaml.YAMLError as exc: logging.error(exc) exit(1) value_schema = avro.loads(json.dumps(config['value-schema'])) avroProducer = AvroProducer( { 'bootstrap.servers': f"{config['connection']['kafka-host']}:{config['connection']['kafka-port']}", 'schema.registry.url': f"http://{config['connection']['schema-registry-host']}:{config['connection']['schema-registry-port']}" }, default_value_schema=value_schema) for ipo_record in ipo_data: # sample ipo record # {'expiration_date': '11/4/2019', # 'priced_date': '5/7/2019', # 'company_name': 'LANDCADIA HOLDINGS II, INC.',
# import argparse from uuid import uuid4 from six.moves import input from confluent_kafka import avro # Parse Schema used for serializing User class record_schema = avro.loads(""" { "namespace": "confluent.io.examples.serialization.avro", "name": "User", "type": "record", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": "int"}, {"name": "favorite_color", "type": "string"} ] } """) class User(object): """ User stores the deserialized user Avro record. """ # Use __slots__ to explicitly declare all data members. __slots__ = ["name", "favorite_number", "favorite_color", "id"]
from math import ceil from calculation import calculate_average, calculate_ranking from schema import key_schema_avg_str, key_schema_rank_str, value_schema_avg_str, value_schema_rank_str import datetime c = AvroConsumer({ 'bootstrap.servers': 'localhost:9092', 'group.id': 'join-ksql', 'schema.registry.url': 'http://0.0.0.0:8081' }) # c.assign([Partition]) c.subscribe(['students_result_source']) value_schema_avg = avro.loads(value_schema_avg_str) key_schema_avg = avro.loads(key_schema_avg_str) value_schema_rank = avro.loads(value_schema_rank_str) key_schema_rank = avro.loads(key_schema_rank_str) producer_avg = AvroProducer({ 'bootstrap.servers': 'localhost:9092', 'schema.registry.url': 'http://0.0.0.0:8081' }, default_key_schema=key_schema_avg, default_value_schema=value_schema_avg) producer_rank = AvroProducer({ 'bootstrap.servers': 'localhost:9092', 'schema.registry.url': 'http://0.0.0.0:8081' }, default_key_schema=key_schema_rank, default_value_schema=value_schema_rank) while True:
StructField("CREATE_DATE", StringType(), nullable=True), StructField("ACCOUNTING_IDENT", StringType(), nullable=True) ]) #---------------------------------------- # 00003 - Get the Schema of Source Topic : #---------------------------------------- from schema import getting_value_schema, getting_key_schema var_val_schema = getting_value_schema(var_cassandra_conn_host, var_topic_src_name, var_schema_url_port) var_key_schema = getting_key_schema(var_cassandra_conn_host, var_topic_src_name, var_schema_url_port) value_schema = avro.loads(var_val_schema) key_schema = avro.loads(var_key_schema) from df import getting_df_value_schema var_df_schema = getting_df_value_schema(var_val_schema) #-------------------------------------------- # 00004 - Processing the Each Kafka Messages : #-------------------------------------------- # This part of Code writing the messages into compact topic : def handler(message): records = message.collect()
from confluent_kafka import avro key_schema = avro.loads(""" { "doc": "Sample schema to help you get started.", "fields": [ { "doc": "", "name": "prefix", "type": "string" }, { "doc": "", "name": "sensorId", "type": "string" } ], "name": "QRSComplexKey", "namespace": "com.cinvestav", "type": "record" } """)
key_schema_str = """ { "namespace": "my.test", "name": "key", "type": "record", "fields" : [ { "name" : "name", "type" : "string" } ] } """ value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) value = {"name": "Value"} key = {"name": "Key"} def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
def main(argv): brokers = argv[0] schemaRegistryUrl = argv[1] timestamp = argv[2] id = argv[3] firstName = argv[4] lastName = argv[5] value_schema_str = """ { "namespace": "my.test", "name": "Person", "type": "record", "fields" : [ { "name" : "id", "type" : "int" }, { "name" : "firstName", "type" : "string" }, { "name" : "lastName", "type" : "string" } ] } """ key_schema_str = """ { "namespace": "my.test", "name": "PersonKey", "type": "record", "fields" : [ { "name" : "id", "type" : "string" } ] } """ value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) value = {"id": int(id), "firstName": firstName, "lastName": lastName} key = {"id": id} avroProducer = AvroProducer( { 'bootstrap.servers': brokers, 'schema.registry.url': schemaRegistryUrl, 'compression.codec': 'snappy' }, default_key_schema=key_schema, default_value_schema=value_schema) avroProducer.produce(topic='person-v1', value=value, key=key, timestamp=int(timestamp)) avroProducer.flush()
def main(argv): """The main function runs when the script is called """ # parser = argparse.ArgumentParser(description="Feed data from Pi 3 Sense Hat board into a Kafka topic.") # parser.add_argument("--target", # help="IP address of target Kafka broker.", # action="store", dest="target", type=str, required=True) # parser.add_argument("--topic", help="Kafka topic name to send message to.", action="store", dest="topic", type=str, # required=True) # # args = parser.parse_args() # Set up the configured Schema registry Avro schema for the test Kafka topic. value_schema_str = """ { "type": "record", "name": "base_unprocessed_data", "namespace": "push_im_subsystem.im_data", "doc": "DRAFT Apache AVRO data value schema for push of real-time unprocessed data from MWCC sub-systems. AIMS UAID used as the Kafka key for each value record. Note this requires all associated sensor data (accuracy, range, etc) to be established from attributes of the AIMS UAID related data", "fields": [ { "name": "SOSAobservedProperty", "type": "string", "doc": "https://www.w3.org/TR/vocab-ssn/#SOSAobservedProperty", "default": "SOSAobservedProperty" }, { "name": "SOSAhasResult", "type": { "type": "record", "doc": "http://qudt.org/schema/qudt#QuantityValue", "name": "QuantityValue", "namespace": "qudt", "fields": [ { "name": "numericValue", "type": "double", "doc": "http://qudt.org/schema/qudt#numericValue", "default": 0.00 }, { "name": "unit", "type": "string", "doc": "http://qudt.org/schema/qudt#unit", "default": "http://qudt.org/1.1/vocab/unit#" } ] }, "doc": "https://www.w3.org/TR/vocab-ssn/#SOSAhasResult" }, { "name": "timestamp_clock_sync", "type": { "name": "ptp_sync_status", "type": "record", "doc": "The status of the PTP client clock sync", "fields": [ { "name": "ptp_clock_status", "type": "string", "doc": "The PTP client daemon PTP_Clock_status of the clock responsible for the SOSAresultTime", "default": "ptp_clock_status" }, { "name": "ptp_best_master_id", "type": "string", "doc": "The PTP client daemon Best_master_ID of the PTP client master clock providing current time sync for the SOSAresultTime", "default": "ptp_best_master_id" }, { "name": "ptp_offset_from_master", "type": "float", "doc": "The PTP client daemon Offset_from_Master clock. https://www.w3.org/TR/2017/REC-owl-time-20171019/#time:Duration https://www.w3.org/TR/2017/REC-owl-time-20171019/#time:unitSecond", "default": 0.00 } ] }, "doc": "The status of the acquisition system clock responsible for the SOSAresultTime" }, { "name": "SOSAresultTime", "type": "string", "doc": "https://www.w3.org/TR/vocab-ssn/#SOSAresultTime https://www.w3.org/TR/2017/REC-owl-time-20171019/#time:Instant https://www.w3.org/TR/xmlschema11-2/#dateTimeStamp", "default": "SOSAresultTime" } ] } """ key_schema_str = """ { "type": "record", "name": "key", "namespace": "push_im_subsystem.im_data", "fields": [ { "name": "aims_asset_id", "type": "string" } ] } """ print("running avro loads on schemas") value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) # Set a example key for the message which controls which partition the message ends up in Kafka. key = {"aims_asset_id": "HS2-000024H7L"} # Run a scheduled infinite loop to read from sensor. def send_to_kafka(): Timer(10.0, send_to_kafka).start() try: print("running") avro_producer = AvroProducer( { 'bootstrap.servers': 'up01:9092,up02:9092,up03:9092', 'schema.registry.url': 'http://up04:8081' }, default_key_schema=key_schema, default_value_schema=value_schema) value = read_from_sense_hat() print(value) avro_producer.poll(0) avro_producer.produce(topic='test_avro_2', value=value, key=key, callback=delivery_report) avro_producer.flush() except Exception as e: logging.error(traceback.format_exc()) send_to_kafka()
,{"name": "ACTOR2GEO_LONG" ,"type": ["null","string"],"default":null} ,{"name": "ACTOR2GEO_FEATUREID" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_TYPE" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_FULLNAME" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_COUNTRYCODE" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_ADM1CODE" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_ADM2CODE" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_LAT" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_LONG" ,"type": ["null","string"],"default":null} ,{"name": "ACTIONGEO_FEATUREID" ,"type": ["null","string"],"default":null} ,{"name": "DATEADDED" ,"type": ["null","string"],"default":null} ,{"name": "SOURCEURL" ,"type": ["null","string"],"default":null} ,{"name": "SITE" ,"type": ["null","string"],"default":null} ] }""".replace("REPLACEME_TOPIC", topic) key_schema = avro.loads(key_schema) value_schema = avro.loads(schema_values_str) avroProducer_gdeltEvent = AvroProducer( { 'bootstrap.servers': server, 'schema.registry.url': schema_registry_url }, default_value_schema=value_schema, default_key_schema=key_schema) load(datafile, topic, server) avroProducer_gdeltEvent.flush() #if __name__ == "__main__": # main()
#!/usr/bin/env python from confluent_kafka import avro from confluent_kafka.avro import AvroProducer key_schema = open("./schemas/aduss-user-key.avsc", "rb").read() value_schema = open("./schemas/aduss-user-values.avsc", "rb").read() value_schema = avro.loads(value_schema) key_schema = avro.loads(key_schema) key = {"project": "prj-users"} topic = "tpc-aduss-users" first_user = { "user": { "id": 1, "first_name": "John", "last_name": "Steinbeck", "tzid": "CA", "website_url": "foobar.com", "manager": { "id": 1000, "code": 12345 } } } producer = AvroProducer( { 'bootstrap.servers': 'localhost:9092',
def test_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) self.assertEqual(len(client.id_to_schema), 1)
def test_schema_from_string(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) self.assertTrue(isinstance(parsed, schema.Schema))
from uuid import uuid4 from confluent_kafka import avro # Parse Schema used for serializing User class record_schema = avro.loads(""" { "namespace": "confluent.io.examples.serialization.avro", "name": "User", "type": "record", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": "int"}, {"name": "favorite_color", "type": "string"} ] } """) deloitte_kafka_schema = avro.loads(""" {"namespace": "be.deloitte.kafka", "type": "record", "name": "Image", "fields": [ {"name": "imageId" , "type": "string"}, {"name": "timestamp", "type": { "type": "long", "logicalType": "timestamp-millis" }}, {"name": "numOfBoats", "type": "int"}, {"name": "occupancyRate", "type": "double"}, {"name": "image", "type" : "string"} ] } """)
def test_context(self): with self.client as c: parsed = avro.loads(data_gen.BASIC_SCHEMA) schema_id = c.register('test', parsed) self.assertTrue(schema_id > 0) self.assertEqual(len(c.id_to_schema), 1)