def test_multi_register(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        subject = 'test'
        client = self.client

        id1 = client.register(subject, basic)
        latest1 = client.get_latest_schema(subject)
        v1 = client.get_version(subject, basic)
        self.assertLatest(latest1, id1, basic, v1)

        id2 = client.register(subject, adv)
        latest2 = client.get_latest_schema(subject)
        v2 = client.get_version(subject, adv)
        self.assertLatest(latest2, id2, adv, v2)

        self.assertNotEqual(id1, id2)
        self.assertNotEqual(latest1, latest2)
        # ensure version is higher
        self.assertTrue(latest1[2] < latest2[2])

        client.register(subject, basic)
        latest3 = client.get_latest_schema(subject)
        # latest should not change with a re-reg
        self.assertEqual(latest2, latest3)
 def test_encode_record_with_schema(self):
     topic = 'test'
     basic = avro.loads(data_gen.BASIC_SCHEMA)
     subject = 'test-value'
     schema_id = self.client.register(subject, basic)
     records = data_gen.BASIC_ITEMS
     for record in records:
         message = self.ms.encode_record_with_schema(topic, basic, record)
         self.assertMessageIsSame(message, record, schema_id)
    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)
    def test_multi_subject_register(self):
        parsed = avro.loads(data_gen.BASIC_SCHEMA)
        client = self.client
        schema_id = client.register('test', parsed)
        self.assertTrue(schema_id > 0)

        # register again under different subject
        dupe_id = client.register('other', parsed)
        self.assertEqual(schema_id, dupe_id)
        self.assertEqual(len(client.id_to_schema), 1)
    def test_dupe_register(self):
        parsed = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        client = self.client
        schema_id = client.register(subject, parsed)
        self.assertTrue(schema_id > 0)
        latest = client.get_latest_schema(subject)

        # register again under same subject
        dupe_id = client.register(subject, parsed)
        self.assertEqual(schema_id, dupe_id)
        dupe_latest = client.get_latest_schema(subject)
        self.assertEqual(latest, dupe_latest)
Exemple #6
0
    def test_dupe_register(self):
        parsed = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        client = self.client
        schema_id = client.register(subject, parsed)
        self.assertTrue(schema_id > 0)
        latest = client.get_latest_schema(subject)

        # register again under same subject
        dupe_id = client.register(subject, parsed)
        self.assertEqual(schema_id, dupe_id)
        dupe_latest = client.get_latest_schema(subject)
        self.assertEqual(latest, dupe_latest)
Exemple #7
0
    def __init__(self, server, schema_registry, topic, emails):
        # key schema definition for a pageview
        self.key = avro.loads("""
        {
            "namespace": "pageview",
            "name":"key",
            "type":"record",
            "fields" : [
                {"name" : "pageview_id", "type" : "string"}
            ]
        }
        """)

        # Value Schema definition for a pageview
        self.value = avro.loads("""
        {
           "namespace": "pageview",
           "name": "value",
           "type": "record",
           "fields" : [
                {"name" : "email", "type" : "string"},
                {"name" : "url", "type" : "string"},
                {"name" : "timestamp", "type" : "string"},
                {"name" : "pageview_id", "type" : "string"}
           ]
        }
        """)

        # define pageview producer with avro serialization
        self.producer = AvroProducer(
            {
                'bootstrap.servers': server,
                'schema.registry.url': schema_registry
            },
            default_key_schema=self.key,
            default_value_schema=self.value)
        self.topic = topic
        self.emails = emails
 def producer(self):
     """
         Create and return avro producer object.
     """
     try:
         schema = Schema(self.registry_url, self.topic)
         schema_json = schema.get_latest_schema()
         avroProducer = AvroProducer({ \
         'bootstrap.servers': '{}'.format(self.servers), \
         'schema.registry.url': 'http://{}'.format(self.registry_url) \
         }, default_value_schema=avro.loads(schema_json['schema']))
         return avroProducer
     except Exception as e:
         return format(e)
    def get_kafka_producer(self):
        # return a producer instance
        # :param: producer configuration
        self._properties["error_cb"] = self.error_cb
        self._properties["bootstrap.servers"] = self._config.get(
            'bootstrap.servers')
        # self._properties["schema.registry.url"] = self._config.get('schema.registry')
        # if self._config.get('security_protocol') != 'None':
        #     self.add_property("security.protocol", self._config.get('security_protocol'))
        #     self.add_property("ssl.key.password", self._config.get('kafka-cert-password'))

        if self._config.get('avro_producer') and self._config.get(
                'schema_registry') is not None:
            self.add_property("schema.registry.url",
                              self._config.get('schema_registry'))
            key_schema = avro.loads(self._AVRO_SCHEMA_KEY)
            value_schema = avro.loads(self._AVRO_SCHEMA_VALUE)
            producer = AvroProducer(self._properties,
                                    default_key_schema=key_schema,
                                    default_value_schema=value_schema)
        else:
            producer = Producer(self._properties)

        return producer
 def __init__(self,
              avro_schema_path,
              bootsrap_servers='localhost:29092',
              schema_registry='http://localhost:8081',
              topic_subscribe='quickstart-elastic-news'):
     # remember if you are running this code outside docker set bootstrap.servers = 'localhost:29092' and 'schema.registry.url'= 'http://localhost:8081',
     # otherwise set bootstrap.servers = 'kafka:9092' and 'schema.registry.url'= 'http://schema-registry:8081'
     # "debug":"all",
     self.conf = {
         'schema.registry.url': schema_registry,
         'bootstrap.servers': bootsrap_servers
     }
     self.SCHEMA = avro.loads(open(avro_schema_path, "r").read())
     self.topic_subscribe = topic_subscribe
     self.logger = logging.getLogger(__name__)
class Purchase:
    username: str = field(default_factory=faker.user_name)
    currency: str = field(default_factory=faker.currency_code)
    amount: int = field(default_factory=lambda: random.randint(100, 200000))

    schema = avro.loads("""{
        "type": "record",
        "name": "purchase",
        "namespace": "com.udacity.lesson3.sample3",
        "fields": [
            {"name": "username", "type": "string"},
            {"name": "currency", "type": "string"},
            {"name": "amount", "type": "int"}
        ]
    }""")
    def __init__(self, driver, nameSalt):
        self.driver = driver
        self.topic = "travis_correct_string_avrosr" + nameSalt

        ValueSchemaStr = """
        {
            "type":"record",
            "name":"value_schema",
            "fields":[
                {"name":"id","type":"int"},
                {"name":"firstName","type":"string"},
                {"name":"time","type":"int"}
            ]
        }
        """
        self.valueSchema = avro.loads(ValueSchemaStr)
def generate_records():
	avro_producer_settings = {
		'bootstrap.servers': "localhost:19092",
		'group.id': 'groupid',
		'schema.registry.url': "http://127.0.0.1:8081"
	}
	producer = AvroProducer(avro_producer_settings)
	key_schema = loads('"string"')
	value_schema = load("schema.avsc")
	i = 1
	while True:
		row = {"int_field": int(i), "string_field": str(i)}
		producer.produce(topic="avro_topic", key="key-{}".format(i), value=row, key_schema=key_schema, value_schema=value_schema)
		print(row)
		sleep(1)
		i+=1
Exemple #14
0
class ClickEvent:
    email: str = field(default_factory=faker.email)
    timestamp: str = field(default_factory=faker.iso8601)
    uri: str = field(default_factory=faker.uri)
    number: int = field(default_factory=lambda: random.randint(0, 999))

    schema = avro.loads("""{
        "type": "record",
        "name": "click_event",
        "namespace": "rest-proxy-avro",
        "fields": [
            {"name": "email", "type": "string"},
            {"name": "timestamp", "type": "string"},
            {"name": "uri", "type": "string"},
            {"name": "number", "type": "int"}
        ]
    }""")
def produce(schema_json, data):

    print('schema:\n')
    pprint.pprint(schema_json)
    print('\n')

    print('message:\n')
    pprint.pprint(data)
    print('\n')

    schema_avro = avro.loads(json.dumps(schema_json))
    producer = AvroProducer({'bootstrap.servers': broker},
                            default_value_schema=schema_avro,
                            schema_registry=schema_registry)

    producer.poll(0)
    producer.produce(topic=topic, value=data)
    producer.flush()
def save_new_key_schema_in_SR(SCHEMA_REGISTRY_URL, topic):

    # Another way of creating Schema and use with Message. Earlier we created the Schema in SR Directly.
    key_schema_str = """
        {
           "namespace": "my.test",
           "name": "key",
           "type": "record",
           "fields" : [
             {
               "name" : "name",
               "type" : "string"
             }
           ]
        }
        """

    key_schema = avro.loads(key_schema_str)
    return key_schema
    def test_getters(self):
        parsed = avro.loads(data_gen.BASIC_SCHEMA)
        client = self.client
        subject = 'test'
        version = client.get_version(subject, parsed)
        self.assertEqual(version, None)
        schema = client.get_by_id(1)
        self.assertEqual(schema, None)
        latest = client.get_latest_schema(subject)
        self.assertEqual(latest, (None, None, None))

        # register
        schema_id = client.register(subject, parsed)
        latest = client.get_latest_schema(subject)
        version = client.get_version(subject, parsed)
        self.assertLatest(latest, schema_id, parsed, version)

        fetched = client.get_by_id(schema_id)
        self.assertEqual(fetched, parsed)
Exemple #18
0
def produce_dataset_mce(mce, kafka_config):
    """
    Produces a MetadataChangeEvent to Kafka
    """
    conf = {
        'bootstrap.servers': kafka_config.bootstrap_server,
        'on_delivery': delivery_report,
        'schema.registry.url': kafka_config.schema_registry
    }
    key_schema = avro.loads('{"type": "string"}')
    record_schema = avro.load(kafka_config.avsc_path)
    producer = AvroProducer(conf,
                            default_key_schema=key_schema,
                            default_value_schema=record_schema)

    producer.produce(topic=kafka_config.kafka_topic,
                     key=mce['proposedSnapshot'][1]['urn'],
                     value=mce)
    producer.flush()
Exemple #19
0
def fly_avro_drones(bootstrap_servers,
                    schema_registry_url,
                    nmessages,
                    default_value_schema_str=drone_schema_str,
                    producer_dict_kwargs=None,
                    topic_name="drones_raw",
                    time_delay=0,
                    drones=None):
    """
    A simple example of sending structured messages from drones to a message broker.

    Args:
        bootstrap_servers (str): Comma separated string of Kafka servers
        schema_registry_url (str): Schema registry URL
        nmessages (int): Number of messages to send
        default_value_schema_str (str): String Avro schema compatible with mdrone messages
        producer_dict_kwargs (dict): Optional keyword arguments for producer
        topic_name (str): Topic name to which drone messages will be sent
        time_delay (int): Delay time between cycles when producing messages
        drones (iterable): Iterable of drones from which to generate messages

    Tip:
        Schemas should match the messages sent by drones.
    """
    pdk = {
        'bootstrap.servers': bootstrap_servers,
        'schema.registry.url': schema_registry_url
    },
    if isinstance(producer_dict_kwargs, dict):
        pdk.update(producer_dict_kwargs)
    producer = avro.AvroProducer(
        pdk, default_value_schema=avro.loads(default_value_schema_str))
    z = len(str(nmessages))  # Pretty print cycle number for logging
    for i in range(nmessages):
        print("====MESSAGE SET {}====".format(str(i).zfill(z)))
        for drone in drones:
            msg = drone.message()
            print(msg)
            producer.produce(topic=topic_name,
                             value={k: getattr(msg, k)
                                    for k in msg._fields})
        time.sleep(time_delay)
    producer.flush()
    def test_getters(self):
        parsed = avro.loads(data_gen.BASIC_SCHEMA)
        client = self.client
        subject = 'test'
        version = client.get_version(subject, parsed)
        self.assertEqual(version, None)
        schema = client.get_by_id(1)
        self.assertEqual(schema, None)
        latest = client.get_latest_schema(subject)
        self.assertEqual(latest, (None, None, None))

        # register
        schema_id = client.register(subject, parsed)
        latest = client.get_latest_schema(subject)
        version = client.get_version(subject, parsed)
        self.assertLatest(latest, schema_id, parsed, version)

        fetched = client.get_by_id(schema_id)
        self.assertEqual(fetched, parsed)
Exemple #21
0
def setupTopic_routerHsBridge(server, schema_registry_url):
    global regexp_routerHSBridge
    global avroProducer_routerHSBridge
    global count_routerHSBridge
    count_routerHSBridge = 0

    # Topic routerHSBridge scans for a regexathat returns this pattern:
    #<31>Apr  1 06:51:39 KKM-WiFi24K-CCR10 hs-bridge_CCO_243: new host detected 54:9F:13:6F:3C:3A/10.243.52.180 by TCP :50872 -> 203.113.34.26:80
    avro_schema = """
  {"namespace": "weblog.kkr.avro",
  "type": "record",
  "name": "routerHSBridge",
  "fields": [
     {"name": "month", "type": ["null","string"],"default":null}
    ,{"name": "day", "type": ["null","string"],"default":null}
    ,{"name": "time", "type": ["null","string"],"default":null}
    ,{"name": "kkm", "type": ["null","string"],"default":null}
    ,{"name": "hs_bridge"  , "type": ["null","string"],"default":null}
    ,{"name": "message"  , "type": ["null","string"],"default":null}
  ]
  }"""
    schema = avro.loads(avro_schema)
    avroProducer_routerHSBridge = AvroProducer(
        {
            'bootstrap.servers': server,
            'schema.registry.url': schema_registry_url
        },
        default_value_schema=schema)

    regexp_routerHSBridge = re.compile(('(<27>|<28>|<30>|<31>)'
                                        '(?P<month>\D{3})'
                                        '(?P<whitespace1>\s{1,2})'
                                        '(?P<day>\d{1,2})'
                                        '(?P<whitespace2>\s{1,2})'
                                        '(?P<time>\d{2}:\d{2}:\d{2})'
                                        '(?P<whitespace3>\s{1,2})'
                                        '(?P<kkm>\D{3}-.{7}-\D{3}\d{1,2})'
                                        '(?P<whitespace4>\s{1})'
                                        '(?P<hs_bridge>[^\s]+:)'
                                        '(?P<whitespace5>\s{1})'
                                        '(?P<message>[^\s]+)'
                                        '.*'), re.IGNORECASE)
    def __init__(self):

        # This is the Avro Schema for messages
        self.value_schema_str = """
        {  "name": "value",
           "type": "record",
           "fields" : [
             {"name" : "network", "type" : "float"},
             {"name" : "disk", "type" : "float"},
             {"name" : "cpu", "type" : "float"},
             {"name" : "timestamp", "type" : "long"}
           ]
        }"""
        self.value_schema = avro.loads(self.value_schema_str)

        self.avroProducer = AvroProducer({
            'bootstrap.servers': 'broker:29092',
            'on_delivery': self.delivery_report,
            'schema.registry.url': 'http://schema-registry:8081'
            }, default_value_schema=self.value_schema)
Exemple #23
0
def setupTopic_routerElse(server, schema_registry_url):
    global regexp_routerElse
    global avroProducer_routerElse
    global count_routerElse
    count_routerElse = 0

    # Topic routerElse scans for a regex that returns this pattern:
    #<31>Apr  1 07:00:06 CWT-WiFi24K-CCR05 .*
    # for example :
    #<31>Apr  1 07:00:06 CWT-WiFi24K-CCR05 already 15 logins in progress\n'
    avro_schema = """
  {"namespace": "weblog.kkr.avro",
  "type": "record",
  "name": "routerElse",
  "fields": [
     {"name": "month", "type": ["null","string"],"default":null}
    ,{"name": "day", "type": ["null","string"],"default":null}
    ,{"name": "time", "type": ["null","string"],"default":null}
    ,{"name": "kkm", "type": ["null","string"],"default":null}
    ,{"name": "message"  , "type": ["null","string"],"default":null}
  ]
  }"""
    schema = avro.loads(avro_schema)
    avroProducer_routerElse = AvroProducer(
        {
            'bootstrap.servers': server,
            'schema.registry.url': schema_registry_url
        },
        default_value_schema=schema)

    regexp_routerElse = re.compile(('(<30>|<31>)'
                                    '(?P<month>\D{3})'
                                    '(?P<whitespace1>\s{1,2})'
                                    '(?P<day>\d{1,2})'
                                    '(?P<whitespace2>\s{1,2})'
                                    '(?P<time>\d{2}:\d{2}:\d{2})'
                                    '(?P<whitespace3>\s{1,2})'
                                    '(?P<kkm>\D{3}-.{7}-\D{3}\d{1,2})'
                                    '(?P<whitespace4>\s{1,3})'
                                    '(?P<message>[^\s]+)'
                                    '.*'), re.IGNORECASE)
class ClickEvent:
    email: str = field(default_factory=faker.email)
    timestamp: str = field(default_factory=faker.iso8601)
    uri: str = field(default_factory=faker.uri)
    number: int = field(default_factory=lambda: random.randint(0, 999))
    attributes: dict = field(default_factory=ClickAttribute.attributes)

    #
    # TODO: Load the schema using the Confluent avro loader
    #       See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/load.py#L23
    #
    schema = avro.loads(
      """{
        "type": "record",
        "name": "click_event",
        "namespace": "com.udacity.lesson3.exercise4",
        "fields": [
            {"name": "email", "type": "string"},
            {"name": "timestamp", "type": "string"},
            {"name": "uri", "type": "string"},
            {"name": "number", "type": "int"},
            {
                "name": "attributes",
                "type": {
                    "type": "map",
                    "values": {
                        "type": "record",
                        "name": "attribute",
                        "fields": [
                            {"name": "element", "type": "string"},
                            {"name": "content", "type": "string"}
                        ]
                    }
                }
            }
        ]
    }"""
    )
Exemple #25
0
    def send(self, topic=None, msg="{'foo':'bar'}", lang='json', schema=None):

        '''

        Kafka send message

        Send json and avro messages

        '''


        log.debug("[KafkaDriver][send] producer start: " + str(self.server))
        log.debug("[KafkaDriver][send] send message: " + str(msg))
        if (topic is None):
            topic = self.topic
        log.debug("[KafkaDriver][send] topic: " + str(topic))
        if (lang == 'json'):
            producer = KafkaProducer(bootstrap_servers=self.server + ':9092')
            log.debug("[KafkaDriver][send] json msg")
            res = producer.send(self.topic, key=None, value=msg)
            log.debug("[KafkaDriver][send] produce result: " + str(res.get()))
            time.sleep(1)
            producer.close
            log.debug("[KafkaDriver][send] end")
        elif (lang == 'avro'):
            log.debug("[KafkaDriver][send] avro msg")
            log.debug("[KafkaDriver][send] schema: " + str(schema))
            value_schema = avro.loads(schema)
            avroProducer = AvroProducer({
                 'bootstrap.servers': self.server,
                 'schema.registry.url': 'http://' + self.schema_registry + ':8081'
                 }, default_value_schema=value_schema)

            res = avroProducer.produce(topic=self.topic, value=msg)
            log.debug("[KafkaDriver][send] produce result: " + str(res))
            time.sleep(1)
            avroProducer.flush()
            log.debug("[KafkaDriver][send] end")
Exemple #26
0
 def __init__(self,
              broker,
              schema_registry,
              schema=None,
              logging_enabled=False):
     """
     Initialization of the Producer which instatiates an AvroProducer class 
     Parameters
     ----------
     broker: str
         The URL of the broker (example: 'localhost:9092')
     schema_registry: str
         The URL of the confluent Schema Registry endpoint (example: 'http://localhost:8081')
     schema: str
         The default AVRO schema to use to serialize messages
     logger: Logger object, optional
         The logger object which will be used to log messages if provided
     topics
         variable length argument list of the string names of topics to produce too
     """
     if schema is not None:
         self.schema = avro.loads(schema)
     else:
         self.schema = None
     self.__producer = AvroProducer(
         {
             "bootstrap.servers": broker,
             "schema.registry.url": schema_registry
         },
         default_key_schema=self.schema)
     if logging_enabled:
         self.logger = logging.getLogger(__name__)
     else:
         self.logger = None
     self.produce_flag = True
     self.production_last_stoped = 0
     self.total_time_producing_stoped = 0
     self.__msg_queue = PriorityQueue()
    logging.info(ipo_data)

    # publish to kafka if config is specified
    if options.kafka_config is not None:
        from confluent_kafka import avro
        from confluent_kafka.avro import AvroProducer

        config = None
        with open(options.kafka_config) as f:
            try:
                config = yaml.safe_load(f)
            except yaml.YAMLError as exc:
                logging.error(exc)
                exit(1)

        value_schema = avro.loads(json.dumps(config['value-schema']))

        avroProducer = AvroProducer(
            {
                'bootstrap.servers':
                f"{config['connection']['kafka-host']}:{config['connection']['kafka-port']}",
                'schema.registry.url':
                f"http://{config['connection']['schema-registry-host']}:{config['connection']['schema-registry-port']}"
            },
            default_value_schema=value_schema)

        for ipo_record in ipo_data:
            # sample ipo record
            # {'expiration_date': '11/4/2019',
            #  'priced_date': '5/7/2019',
            #  'company_name': 'LANDCADIA HOLDINGS II, INC.',
#

import argparse
from uuid import uuid4

from six.moves import input

from confluent_kafka import avro

# Parse Schema used for serializing User class
record_schema = avro.loads("""
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
""")


class User(object):
    """
        User stores the deserialized user Avro record.
    """

    # Use __slots__ to explicitly declare all data members.
    __slots__ = ["name", "favorite_number", "favorite_color", "id"]
from math import ceil
from calculation import calculate_average, calculate_ranking
from schema import key_schema_avg_str, key_schema_rank_str, value_schema_avg_str, value_schema_rank_str
import datetime

c = AvroConsumer({
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'join-ksql',
    'schema.registry.url': 'http://0.0.0.0:8081'
    })

# c.assign([Partition])

c.subscribe(['students_result_source'])

value_schema_avg = avro.loads(value_schema_avg_str)
key_schema_avg = avro.loads(key_schema_avg_str)
value_schema_rank = avro.loads(value_schema_rank_str)
key_schema_rank = avro.loads(key_schema_rank_str)

producer_avg = AvroProducer({
    'bootstrap.servers': 'localhost:9092',
    'schema.registry.url': 'http://0.0.0.0:8081'
}, default_key_schema=key_schema_avg, default_value_schema=value_schema_avg)

producer_rank = AvroProducer({
    'bootstrap.servers': 'localhost:9092',
    'schema.registry.url': 'http://0.0.0.0:8081'
}, default_key_schema=key_schema_rank, default_value_schema=value_schema_rank)

while True:
Exemple #30
0
    StructField("CREATE_DATE", StringType(), nullable=True),
    StructField("ACCOUNTING_IDENT", StringType(), nullable=True)
])

#----------------------------------------
# 00003 - Get the Schema of Source Topic :
#----------------------------------------

from schema import getting_value_schema, getting_key_schema

var_val_schema = getting_value_schema(var_cassandra_conn_host,
                                      var_topic_src_name, var_schema_url_port)
var_key_schema = getting_key_schema(var_cassandra_conn_host,
                                    var_topic_src_name, var_schema_url_port)

value_schema = avro.loads(var_val_schema)
key_schema = avro.loads(var_key_schema)

from df import getting_df_value_schema

var_df_schema = getting_df_value_schema(var_val_schema)

#--------------------------------------------
# 00004 - Processing the Each Kafka Messages :
#--------------------------------------------

# This part of Code writing the messages into compact topic :


def handler(message):
    records = message.collect()
Exemple #31
0
from confluent_kafka import avro

key_schema = avro.loads("""
{
  "doc": "Sample schema to help you get started.",
  "fields": [
    {
      "doc": "",
      "name": "prefix",
      "type": "string"
    },
    {
      "doc": "",
      "name": "sensorId",
      "type": "string"
    }
  ],
  "name": "QRSComplexKey",
  "namespace": "com.cinvestav",
  "type": "record"
}
""")
key_schema_str = """
{
   "namespace": "my.test",
   "name": "key",
   "type": "record",
   "fields" : [
     {
       "name" : "name",
       "type" : "string"
     }
   ]
}
"""

value_schema = avro.loads(value_schema_str)
key_schema = avro.loads(key_schema_str)
value = {"name": "Value"}
key = {"name": "Key"}


def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))

Exemple #33
0
def main(argv):

    brokers = argv[0]
    schemaRegistryUrl = argv[1]
    timestamp = argv[2]
    id = argv[3]
    firstName = argv[4]
    lastName = argv[5]

    value_schema_str = """
	{
	   "namespace": "my.test",
	   "name": "Person",
	   "type": "record",
	   "fields" : [
		 {
		   "name" : "id",
		   "type" : "int"
		 },
		 {
		   "name" : "firstName",
		   "type" : "string"
		 },
		 {
		   "name" : "lastName",
		   "type" : "string"
		 }
	   ]
	}
	"""

    key_schema_str = """
	{
	   "namespace": "my.test",
	   "name": "PersonKey",
	   "type": "record",
	   "fields" : [
		 {
		   "name" : "id",
		   "type" : "string"
		 }
	   ]
	}
	"""

    value_schema = avro.loads(value_schema_str)
    key_schema = avro.loads(key_schema_str)
    value = {"id": int(id), "firstName": firstName, "lastName": lastName}
    key = {"id": id}

    avroProducer = AvroProducer(
        {
            'bootstrap.servers': brokers,
            'schema.registry.url': schemaRegistryUrl,
            'compression.codec': 'snappy'
        },
        default_key_schema=key_schema,
        default_value_schema=value_schema)

    avroProducer.produce(topic='person-v1',
                         value=value,
                         key=key,
                         timestamp=int(timestamp))
    avroProducer.flush()
Exemple #34
0
def main(argv):
    """The main function runs when the script is called """
    # parser = argparse.ArgumentParser(description="Feed data from Pi 3 Sense Hat board into a Kafka topic.")
    # parser.add_argument("--target",
    #                     help="IP address of target Kafka broker.",
    #                     action="store", dest="target", type=str, required=True)
    # parser.add_argument("--topic", help="Kafka topic name to send message to.", action="store", dest="topic", type=str,
    #                     required=True)
    #
    # args = parser.parse_args()

    # Set up the configured Schema registry Avro schema for the test Kafka topic.
    value_schema_str = """
    {
      "type": "record",
      "name": "base_unprocessed_data",
      "namespace": "push_im_subsystem.im_data",
      "doc": "DRAFT Apache AVRO data value schema for push of real-time unprocessed data from MWCC sub-systems. AIMS UAID used as the Kafka key for each value record. Note this requires all associated sensor data (accuracy, range, etc) to be established from attributes of the AIMS UAID related data",
      "fields": [
        {
          "name": "SOSAobservedProperty",
          "type": "string",
          "doc": "https://www.w3.org/TR/vocab-ssn/#SOSAobservedProperty",
          "default": "SOSAobservedProperty"
        },
        {
          "name": "SOSAhasResult",
          "type": {
            "type": "record",
            "doc": "http://qudt.org/schema/qudt#QuantityValue",
            "name": "QuantityValue",
            "namespace": "qudt",
            "fields": [
              {
                "name": "numericValue",
                "type": "double",
                "doc": "http://qudt.org/schema/qudt#numericValue",
                "default": 0.00
              },
              {
                "name": "unit",
                "type": "string",
                "doc": "http://qudt.org/schema/qudt#unit",
                "default": "http://qudt.org/1.1/vocab/unit#"
              }
            ]
          },
          "doc": "https://www.w3.org/TR/vocab-ssn/#SOSAhasResult"
        },
        {
          "name": "timestamp_clock_sync",
          "type": {
            "name": "ptp_sync_status",
            "type": "record",
            "doc": "The status of the PTP client clock sync",
            "fields": [
              {
                "name": "ptp_clock_status",
                "type": "string",
                "doc": "The PTP client daemon PTP_Clock_status of the clock responsible for the SOSAresultTime",
                "default": "ptp_clock_status"
              },
              {
                "name": "ptp_best_master_id",
                "type": "string",
                "doc": "The PTP client daemon Best_master_ID of the PTP client master clock providing current time sync for the SOSAresultTime",
                "default": "ptp_best_master_id"
              },
              {
                "name": "ptp_offset_from_master",
                "type": "float",
                "doc": "The PTP client daemon Offset_from_Master clock. https://www.w3.org/TR/2017/REC-owl-time-20171019/#time:Duration https://www.w3.org/TR/2017/REC-owl-time-20171019/#time:unitSecond",
                "default": 0.00
              }
            ]
          },
          "doc": "The status of the acquisition system clock responsible for the SOSAresultTime"
        },
        {
          "name": "SOSAresultTime",
          "type": "string",
          "doc": "https://www.w3.org/TR/vocab-ssn/#SOSAresultTime https://www.w3.org/TR/2017/REC-owl-time-20171019/#time:Instant https://www.w3.org/TR/xmlschema11-2/#dateTimeStamp",
          "default": "SOSAresultTime"
        }
      ]
    }    
    """

    key_schema_str = """
    {
      "type": "record",
      "name": "key",
      "namespace": "push_im_subsystem.im_data",
      "fields": [
        {
          "name": "aims_asset_id",
          "type": "string"
        }
      ]
    }
    """
    print("running avro loads on schemas")
    value_schema = avro.loads(value_schema_str)
    key_schema = avro.loads(key_schema_str)

    # Set a example key for the message which controls which partition the message ends up in Kafka.
    key = {"aims_asset_id": "HS2-000024H7L"}

    # Run a scheduled infinite loop to read from sensor.
    def send_to_kafka():
        Timer(10.0, send_to_kafka).start()
        try:
            print("running")
            avro_producer = AvroProducer(
                {
                    'bootstrap.servers': 'up01:9092,up02:9092,up03:9092',
                    'schema.registry.url': 'http://up04:8081'
                },
                default_key_schema=key_schema,
                default_value_schema=value_schema)

            value = read_from_sense_hat()

            print(value)

            avro_producer.poll(0)

            avro_producer.produce(topic='test_avro_2',
                                  value=value,
                                  key=key,
                                  callback=delivery_report)
            avro_producer.flush()

        except Exception as e:
            logging.error(traceback.format_exc())

    send_to_kafka()
,{"name": "ACTOR2GEO_LONG" ,"type": ["null","string"],"default":null}
,{"name": "ACTOR2GEO_FEATUREID" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_TYPE" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_FULLNAME" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_COUNTRYCODE" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_ADM1CODE" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_ADM2CODE" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_LAT" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_LONG" ,"type": ["null","string"],"default":null}
,{"name": "ACTIONGEO_FEATUREID" ,"type": ["null","string"],"default":null}
,{"name": "DATEADDED" ,"type": ["null","string"],"default":null}
,{"name": "SOURCEURL" ,"type": ["null","string"],"default":null}
,{"name": "SITE" ,"type": ["null","string"],"default":null}
]
}""".replace("REPLACEME_TOPIC", topic)

key_schema = avro.loads(key_schema)
value_schema = avro.loads(schema_values_str)

avroProducer_gdeltEvent = AvroProducer(
    {
        'bootstrap.servers': server,
        'schema.registry.url': schema_registry_url
    },
    default_value_schema=value_schema,
    default_key_schema=key_schema)
load(datafile, topic, server)
avroProducer_gdeltEvent.flush()

#if __name__ == "__main__":
#    main()
#!/usr/bin/env python

from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer

key_schema = open("./schemas/aduss-user-key.avsc", "rb").read()
value_schema = open("./schemas/aduss-user-values.avsc", "rb").read()

value_schema = avro.loads(value_schema)
key_schema = avro.loads(key_schema)

key = {"project": "prj-users"}
topic = "tpc-aduss-users"

first_user = {
    "user": {
        "id": 1,
        "first_name": "John",
        "last_name": "Steinbeck",
        "tzid": "CA",
        "website_url": "foobar.com",
        "manager": {
            "id": 1000,
            "code": 12345
        }
    }
}

producer = AvroProducer(
    {
        'bootstrap.servers': 'localhost:9092',
 def test_register(self):
     parsed = avro.loads(data_gen.BASIC_SCHEMA)
     client = self.client
     schema_id = client.register('test', parsed)
     self.assertTrue(schema_id > 0)
     self.assertEqual(len(client.id_to_schema), 1)
 def test_schema_from_string(self):
     parsed = avro.loads(data_gen.BASIC_SCHEMA)
     self.assertTrue(isinstance(parsed, schema.Schema))
Exemple #39
0
from uuid import uuid4
from confluent_kafka import avro

# Parse Schema used for serializing User class
record_schema = avro.loads("""
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
""")

deloitte_kafka_schema = avro.loads("""
{"namespace": "be.deloitte.kafka",
  "type": "record",
  "name": "Image",
  "fields": [
      {"name": "imageId" , "type": "string"},
      {"name": "timestamp", "type": { "type": "long", "logicalType": "timestamp-millis" }},
      {"name": "numOfBoats", "type": "int"},
      {"name": "occupancyRate", "type": "double"},
      {"name": "image", "type" : "string"}
  ]
 }
""")
 def test_context(self):
     with self.client as c:
         parsed = avro.loads(data_gen.BASIC_SCHEMA)
         schema_id = c.register('test', parsed)
         self.assertTrue(schema_id > 0)
         self.assertEqual(len(c.id_to_schema), 1)