def test_produce_with_custom_registry(self):
     schema_registry = MockSchemaRegistryClient()
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer({}, schema_registry=schema_registry)
     producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey',
                      key_schema=key_schema)
 def test_produce_primitive_string_key(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey',
                          key_schema=key_schema)
 def test_produce_with_empty_key_value_with_schema(self):
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry,
                             default_key_schema=key_schema,
                             default_value_schema=value_schema)
     producer.produce(topic='test', value=0.0, key='')
 def test_produce_with_empty_key_no_schema(self):
     value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry,
                             default_value_schema=value_schema)
     with self.assertRaises(KeySerializerError):
         producer.produce(topic='test', value=0.0, key='')
def verify_schema_registry_client():
    from confluent_kafka import avro

    sr_conf = {'url': schema_registry_url}
    sr = avro.CachedSchemaRegistryClient(sr_conf)

    subject = str(uuid.uuid4())

    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')
    schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))

    schema_id = sr.register(subject, schema)
    assert schema == sr.get_by_id(schema_id)
    latest_id, latest_schema, latest_version = sr.get_latest_schema(subject)
    assert schema == latest_schema
    assert sr.get_version(subject, schema) == latest_version
    sr.update_compatibility("FULL", subject)
    assert sr.get_compatibility(subject) == "FULL"
    assert sr.test_compatibility(subject, schema)
    assert sr.delete_subject(subject) == [1]
Exemple #6
0
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #https://dattell.com/data-architecture-blog/kafka-optimization-how-many-partitions-are-needed/
        #I liked the calculation here. I will start with these values because I do not know the
        #the technical limitations.  It seem like with 8 stations and 12 arrivals an hour
        #we should need to process 10 mb per topic X 8 Stations # 12 arrivals X 60 seconds
        #x 60 minutes = 34 560 000 mb per hour
        #topic_name = topic name principals, Business Name Chicago Transport Authority (CTA), Name of the Python program, Name of the class # TODO: Come up with a better topic name
        topic_name = "CTAProducersStation"
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.
            value_schema,  # TODO: Uncomment once schema is defined
            num_partitions=1,
            num_replicas=1,
        )

        #self.topic.name = topic_name
        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)
        self.line = color.name
        #self.train_status = train_status.name

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""

        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        # call the producer.produce class, try to find errors,
        # for testing purposes printed successful processing

        try:
            self.producer.produce(topic=self.topic_name,
                                  key={"timestamp": self.time_millis()},
                                  value={
                                      "station_id": self.station_id,
                                      "train_id": train.train_id,
                                      "direction": direction,
                                      "line": self.line,
                                      "train_status": train.status.name,
                                      "prev_station_id": prev_station_id,
                                      "prev_direction": prev_direction,
                                  })

        except Exception as e:
            logger.info("arrival kafka integration incomplete - skipping")
            print(
                f"Exception while producing record value to topic - {self.topic_name}: {e}"
            )
        #else:
        #    print(f"Successfully producing record value to topic - {self.topic_name}")

    def __str__(self):
        print("station after _str_ line 101")
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
def verify_avro_explicit_read_schema():
    from confluent_kafka import avro
    """ verify that reading Avro with explicit reader schema works"""
    base_conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'schema.registry.url': schema_registry_url
    }

    consumer_conf = dict(
        base_conf, **{
            'group.id': 'test.py',
            'session.timeout.ms': 6000,
            'enable.auto.commit': False,
            'on_commit': print_commit_result,
            'auto.offset.reset': 'earliest',
            'schema.registry.url': schema_registry_url
        })

    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')
    writer_schema = avro.load(os.path.join(avsc_dir, "user_v1.avsc"))
    reader_schema = avro.load(os.path.join(avsc_dir, "user_v2.avsc"))

    user_value1 = {"name": " Rogers Nelson"}

    user_value2 = {"name": "Kenny Loggins"}

    combinations = [
        dict(key=user_value1,
             key_schema=writer_schema,
             value=user_value2,
             value_schema=writer_schema),
        dict(key=user_value2,
             key_schema=writer_schema,
             value=user_value1,
             value_schema=writer_schema)
    ]
    avro_topic = topic + str(uuid.uuid4())

    p = avro.AvroProducer(base_conf)
    for i, combo in enumerate(combinations):
        p.produce(topic=avro_topic, **combo)
    p.flush()

    c = avro.AvroConsumer(consumer_conf,
                          reader_key_schema=reader_schema,
                          reader_value_schema=reader_schema)
    c.subscribe([avro_topic])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(1)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error {}".format(msg.error()))
            continue

        msgcount += 1
        # Avro schema projection should return the two fields not present in the writer schema
        try:
            assert (msg.key().get('favorite_number') == 42)
            assert (msg.key().get('favorite_color') == "purple")
            assert (msg.value().get('favorite_number') == 42)
            assert (msg.value().get('favorite_color') == "purple")
            print(
                "success: schema projection worked for explicit reader schema")
        except KeyError:
            raise confluent_kafka.avro.SerializerError(
                "Schema projection failed when setting reader schema.")
Exemple #8
0
def verify_avro():
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests',
                            'avro')

    # Producer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'api.version.request': api_version_request
    }

    # Create producer
    if schema_registry_url:
        conf['schema.registry.url'] = schema_registry_url
        p = avro.AvroProducer(conf)
    else:
        p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry())

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=float_value,
             key_schema=prim_float),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=str_value,
             key_schema=prim_string),
        dict(value=float_value,
             value_schema=prim_float,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=float_value,
             value_schema=prim_float,
             key=str_value,
             key_schema=prim_string),
        dict(value=str_value,
             value_schema=prim_string,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=str_value,
             value_schema=prim_string,
             key=float_value,
             key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='', value_schema=prim_string, key=0.,
             key_schema=prim_float),
        dict(value=0., value_schema=prim_float, key='',
             key_schema=prim_string),
    ]

    # Consumer config
    cons_conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'auto.offset.reset': 'earliest'
    }

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        p.produce(**combo)
        p.poll(0)
        p.flush()

        # Create consumer
        conf = copy(cons_conf)
        if schema_registry_url:
            conf['schema.registry.url'] = schema_registry_url
            c = avro.AvroConsumer(conf)
        else:
            c = avro.AvroConsumer(conf,
                                  schema_registry=InMemorySchemaRegistry())
        c.subscribe([combo['topic']])

        while True:
            msg = c.poll(0)
            if msg is None:
                continue

            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    break
                else:
                    continue

            tstype, timestamp = msg.timestamp()
            print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
                  (msg.topic(), msg.partition(), msg.offset(), msg.key(),
                   msg.value(), tstype, timestamp))

            # omit empty Avro fields from payload for comparison
            record_key = msg.key()
            record_value = msg.value()
            if isinstance(msg.key(), dict):
                record_key = {
                    k: v
                    for k, v in msg.key().items() if v is not None
                }

            if isinstance(msg.value(), dict):
                record_value = {
                    k: v
                    for k, v in msg.value().items() if v is not None
                }

            assert combo.get('key') == record_key
            assert combo.get('value') == record_value

            c.commit(msg, asynchronous=False)

        # Close consumer
        c.close()
from time import sleep
import os
import atexit

from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer

DRIVER_FILE_PREFIX = "./drivers/"
KAFKA_TOPIC = "driver-positions-pyavro"
# Load a driver id from an environment variable
# if it isn't present use "driver-3"
DRIVER_ID = os.getenv("DRIVER_ID", "driver-3")

print("Starting Python Avro producer.")

value_schema = avro.load("position_value.avsc")
key_schema = avro.load("position_key.avsc")

# Configure the location of the bootstrap server, Confluent interceptors
# and a partitioner compatible with Java, and key/value schemas
# see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
producer = AvroProducer(
    {
        'bootstrap.servers': 'kafka:9092',
        'plugin.library.paths': 'monitoring-interceptor',
        'partitioner': 'murmur2_random',
        'schema.registry.url': 'http://schema-registry:8081'
    },
    default_key_schema=key_schema,
    default_value_schema=value_schema)
 def test_schema_from_file(self):
     parsed = avro.load(data_gen.get_schema_path('adv_schema.avsc'))
     self.assertTrue(isinstance(parsed, schema.Schema))
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        topic_name = f"{station_name}"  # TODO: Come up with a better topic name
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.
            value_schema,  # TODO: Uncomment once schema is defined
            num_partitions=3,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        #
        ##logger.info("arrival kafka integration incomplete - skipping")
        #self.producer.produce(
        #    topic=self.topic_name,
        #    key={"timestamp": self.time_millis()},
        #    value={
        #        #
        #        #
        #        # TODO: Configure this
        #        #
        #        #
        #    },
        #)
        station_key = {"timestamp": self.time_millis()}
        station_val = {
            "station_id": self.station_id,
            "train_id": train.train_id,
            "direction": direction,
            "line": self.color,
            "train_status": train.status.name,
            "prev_station_id": prev_station_id,
            "prev_direction": prev_direction
        }

        print('Topic: ', self.topic_name)
        print('Producer: ', self.producer.produce)
        print('key_schema: ', self.key_schema)
        print('Key: ', station_key)
        print('value_schema: ', self.value_schema)
        print('Val: ', station_val)
        self.producer.produce(topic=self.topic_name,
                              key_schema=self.key_schema,
                              key=station_key,
                              value_schema=self.value_schema,
                              value=station_val)
        print('Stations issa go')

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
def verify_avro_explicit_read_schema():
    from confluent_kafka import avro

    """ verify that reading Avro with explicit reader schema works"""
    base_conf = {'bootstrap.servers': bootstrap_servers,
                 'error_cb': error_cb,
                 'schema.registry.url': schema_registry_url}

    consumer_conf = dict(base_conf, **{
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'on_commit': print_commit_result,
        'auto.offset.reset': 'earliest',
        'schema.registry.url': schema_registry_url})

    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')
    writer_schema = avro.load(os.path.join(avsc_dir, "user_v1.avsc"))
    reader_schema = avro.load(os.path.join(avsc_dir, "user_v2.avsc"))

    user_value1 = {
        "name": " Rogers Nelson"
    }

    user_value2 = {
        "name": "Kenny Loggins"
    }

    combinations = [
        dict(key=user_value1, key_schema=writer_schema, value=user_value2, value_schema=writer_schema),
        dict(key=user_value2, key_schema=writer_schema, value=user_value1, value_schema=writer_schema)
    ]
    avro_topic = topic + str(uuid.uuid4())

    p = avro.AvroProducer(base_conf)
    for i, combo in enumerate(combinations):
        p.produce(topic=avro_topic, **combo)
    p.flush()

    c = avro.AvroConsumer(consumer_conf, reader_key_schema=reader_schema, reader_value_schema=reader_schema)
    c.subscribe([avro_topic])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(1)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error {}".format(msg.error()))
            continue

        msgcount += 1
        # Avro schema projection should return the two fields not present in the writer schema
        try:
            assert(msg.key().get('favorite_number') == 42)
            assert(msg.key().get('favorite_color') == "purple")
            assert(msg.value().get('favorite_number') == 42)
            assert(msg.value().get('favorite_color') == "purple")
            print("success: schema projection worked for explicit reader schema")
        except KeyError:
            raise confluent_kafka.avro.SerializerError("Schema projection failed when setting reader schema.")
 def test_produce_value_and_key_schemas(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_value_schema=value_schema,
                             default_key_schema=value_schema)
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', value={"name": 'abc"'}, key={"name": 'abc"'})
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        topic_name = f"{station_name}"  # TODO: Come up with a better topic name
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            # TODO:
            value_schema=Station.
            value_schema,  # TODO: Uncomment once schema is defined
            # https://www.confluent.io/blog/how-choose-number-topics-partitions-kafka-cluster/
            # p : single partition for production
            # c : single partition for consumption
            # t : target throughput
            # choose at least max(t/p, t/c)
            # partions = max(throughput/#producers, throughput/#consumers)

            # Partitions = Max(Overall Throughput/Producer Throughput, Overall Throughput/Consumer Throughput)
            # Example from video, with 3 Producers and 5 Consumers, each operating at 10MB/s per single producer/consumer
            # partition: Max(100MBs/(3 * 10MB/s), 100MBs/(5 * 10MB/s)) = Max(2) ~= *4 partitions needed*
            # TODO:
            num_partitions=
            2,  # higher partition leads to higher throughput but high latency
            # TODO:
            num_replicas=1,  # replicas  shared between brokers
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        #
        #logger.info("arrival kafka integration incomplete - skipping")

        # make sure the arrival events to kafka are paired with Avro key and value schemas

        # look at train.py and line.py to get the properties of those instances (train and line)
        self.producer.produce(
            topic=self.topic_name,
            key={"timestamp": self.time_millis()},
            value={
                # TODO: Configure this
                "station_id": self.station_id,
                "train_id": train.
                train_id,  # to get train_id, look at `self.train_id` in train.py
                "direction": direction,
                "line": self.color.
                name,  # to get the line , look at `self.color.name` in line.py
                "train_status": train.status.
                name,  # to get train status, look at `self.status.name` in train.py 
                "prev_station_id": prev_station_id,
                "prev_direction": prev_direction
            },
        )
        logger.info(f"producing arrival event to kafka is complete")

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
Exemple #15
0
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        topic_name = f"org.chicago.cta.station.arrivals.{station_name}"
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=5,
            num_replicas=3,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        #
        arrival_data = Arrival(self.station_id, direction, prev_station_id,
                               prev_direction, train.train_id,
                               train.status.name, self.color.name)
        print(f"Arrival data: {asdict(arrival_data)}")

        self.producer.produce(
            topic=self.topic_name,
            key={"timestamp": self.time_millis()},
            value={
                "station_id": self.station_id,
                "direction": direction,
                "prev_station_id": prev_station_id,
                "prev_direction": prev_direction,
                "train_id": train.train_id,
                "train_status": train.status.name,
                "line": self.color.name
                #
                # TODO: Check if line config is right
                #
                #
            },
        )

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
Exemple #16
0
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        # Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        super().__init__(
            topic_name=f"org.chicago.cta.station.{station_name}.arrivals",
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=5,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        # Complete this function by producing an arrival message to Kafka
        try:
            self.producer.produce(
                topic=self.topic_name,
                key={"timestamp": self.time_millis()},
                key_schema=self.key_schema,
                value_schema=self.value_schema,
                value={
                    "station_id": self.station_id,
                    "train_id": train.train_id,
                    "direction": direction,
                    "line": self.color.name,
                    "train_status": train.status.name,
                    "prev_station_id": prev_station_id,
                    "prev_direction": prev_direction,
                },
            )
        except Exception as e:
            logger.fatal(e)
            raise e

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
Exemple #17
0
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below, done
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of

        topic_name = f"station_topic_{station_name}_{color.name}"  # TODO: Come up with a better topic name, done
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.
            value_schema,  # TODO: Uncomment once schema is defined, done
            num_partitions=1,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        # TODO: Complete this function by producing an arrival message to Kafka, done

        logger.info(
            f"{self.topic_name}: train {train.train_id} arrived from direction \
            {direction} and prev_direction {prev_direction} \
            and prev_station_id {prev_station_id} ")

        #print(f"station_id: {self.station_id}")
        #print(f"trainid: {train.train_id}")
        #print(f"dir: {direction}")
        #print(f"colorname: {self.color.name}")
        #print(f"statusname: {train.status.name}")
        #print(f"prevstid: {prev_station_id}")
        #print(f"prevdir: {prev_direction}")

        if not prev_station_id:
            prev_station_id = 0

        if not prev_direction:
            prev_direction = 'None'

        self.producer.produce(topic=self.topic_name,
                              key={"timestamp": self.time_millis()},
                              value={
                                  "station_id": self.station_id,
                                  "train_id": train.train_id,
                                  "direction": direction,
                                  "line": self.color.name,
                                  "train_status": train.status.name,
                                  "prev_station_id": prev_station_id,
                                  "prev_direction": prev_direction,
                              })

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
def run_avro_loop(producer_conf, consumer_conf):
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')

    p = avro.AvroProducer(producer_conf)

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.0

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float),
        dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string),
        dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic),
        dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string),
        dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic),
        dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='', value_schema=prim_string, key=0.0, key_schema=prim_float),
        dict(value=0.0, value_schema=prim_float, key='', key_schema=prim_string),
    ]

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        combo['headers'] = [('index', str(i))]
        p.produce(**combo)
    p.flush()

    c = avro.AvroConsumer(consumer_conf)
    c.subscribe([(t['topic']) for t in combinations])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(1)

        if msg is None:
            continue
        if msg.error():
            print(msg.error())
            continue

        tstype, timestamp = msg.timestamp()
        print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
              (msg.topic(), msg.partition(), msg.offset(),
               msg.key(), msg.value(), tstype, timestamp))

        # omit empty Avro fields from payload for comparison
        record_key = msg.key()
        record_value = msg.value()
        index = int(dict(msg.headers())['index'])

        if isinstance(msg.key(), dict):
            record_key = {k: v for k, v in msg.key().items() if v is not None}

        if isinstance(msg.value(), dict):
            record_value = {k: v for k, v in msg.value().items() if v is not None}

        assert combinations[index].get('key') == record_key
        assert combinations[index].get('value') == record_value

        c.commit()
        msgcount += 1

    # Close consumer
    c.close()
Exemple #19
0
        self.message = self.getattribute(Timing_data, 'message')


if __name__ == "__main__":
    if len(sys.argv) != 5:
        sys.stderr.write('Usage: %s <bootstrap-brokers> <topic>\n' %
                         sys.argv[0])
        sys.exit(1)

    #config
    broker = sys.argv[1]
    schema_registry_url = sys.argv[2]

    #config avroProducer
    avrotopic = sys.argv[3]
    value_schema = avro.load(
        '/home/silence/PycharmProjects/test/Avro/Timing.avsc')
    key_schema = avro.load('/home/silence/PycharmProjects/test/Avro/Id.avsc')
    avroconf = {
        'bootstrap.servers': broker,
        'schema.registry.url': schema_registry_url
    }

    #config jsonComsumer
    jsontopic = sys.argv[4]
    jsonconf = {'bootstrap.servers': broker}

    #create avroProducer
    avroProducer = AvroProducer(avroconf,
                                default_key_schema=key_schema,
                                default_value_schema=value_schema)
Exemple #20
0
class KafkaStream(metaclass=IterateStream):

    __metaclass__ = IterateStream

    CONFIG = {
        'start': {
            'group.id': 'groupid',
            'default.topic.config': {
                'auto.offset.reset': 'beginning',
                'auto.commit.enable': 'false'
            }
        },
        'end': {
            'group.id': 'groupid'
        }
    }

    OFFSETS = {
        'start': confluent_kafka.OFFSET_BEGINNING,
        'end': confluent_kafka.OFFSET_END
    }

    KEY_SCHEMA = avro.load(os.path.join(SCHEMAS, 'keyschema.avsc'))
    VALUE_SCHEMA = {
        'gdax': avro.load(os.path.join(SCHEMAS, 'gdax' + '.avsc')),
        'reddit': avro.load(os.path.join(SCHEMAS, 'reddit' + '.avsc')),
        'twitter': avro.load(os.path.join(SCHEMAS, 'twitter' + '.avsc'))
    }

    @classmethod
    def producer(cls, topic='gdax'):
        ip = cls.determine_ip()
        return AvroProducer(
            {
                'bootstrap.servers': ip + ':9092',
                'schema.registry.url': 'http://' + ip + ':8081'
            },
            default_key_schema=cls.KEY_SCHEMA[topic],
            default_value_schema=cls.VALUE_SCHEMA)

    @classmethod
    def consumer(cls, topic='gdax', offset='start'):
        ip = cls.determine_ip()
        try:
            _offset = cls.OFFSETS[offset]
            _config = cls.CONFIG[offset]
        except KeyError:
            _config = cls.CONFIG['end']
            _offset = offset

        print(_offset, _config)
        cls.avro_consumer = AvroConsumer(
            dict(
                {
                    'bootstrap.servers': ip + ':9092',
                    'schema.registry.url': 'http://' + ip + ':8081'
                }, **{
                    'group.id': str(uuid.uuid1()).split('-')[0],
                    'default.topic.config': {
                        'auto.offset.reset': 'beginning',
                        'auto.commit.enable': 'false'
                    }
                }))
        cls.avro_consumer.assign(
            [TopicPartition(topic, partition=0, offset=_offset)])

        return cls

    @staticmethod
    def determine_ip():
        try:
            return os.environ['KAFKA_SERVER_IP']
        except KeyError:
            return 'localhost'
Exemple #21
0
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #

        # TODO: Come up with a better topic name
        topic_name = f"org.chicago.cta.station.arrivals.{station_name}"

        # TODO: Include/fill the following in the call to super.__init__():
        #       value_schema=Station.value_schema,
        #       num_partitions=???,
        #       num_replicas=???,

        # call the super to instantiate super's vars also incl. self.producer
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=3,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        #

        # schemas have already been set in instance creation hence commented out
        self.producer.produce(
            topic=self.topic_name,
            key={"timestamp": self.time_millis()},
            # key_schema=Station.key_schema,
            # value_schema=Station.value_schema,
            value={
                "station_id": self.station_id,
                "train_id": train.train_id,
                "direction": direction,
                "line": self.color.name,
                "train_status": train.status.name,
                "prev_station_id": prev_station_id,
                "prev_direction": prev_direction,
            },
        )

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | " \
               "departing to {:<30} | ".format(
                self.station_id,
                self.name,
                self.a_train.train_id if self.a_train is not None else "---",
                self.dir_a.name if self.dir_a is not None else "---",
                self.b_train.train_id if self.b_train is not None else "---",
                self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
 def test_schema_load_parse_error(self):
     with pytest.raises(avro.ClientError) as excinfo:
         avro.load(data_gen.get_schema_path("invalid_scema.avsc"))
     assert 'Schema parse failed:' in str(excinfo.value)
Exemple #23
0
class Turnstile(Producer):
    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_key.json")

    #
    # TODO: Define this value schema in `schemas/turnstile_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_value.json")

    def __init__(self, station):
        """Create the Turnstile"""
        station_name = (station.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        super().__init__(
            f"{station_name}",  # TODO: Come up with a better topic name
            key_schema=Turnstile.key_schema,
            value_schema=Turnstile.
            value_schema,  #TODO: Uncomment once schema is defined
            num_partitions=3,
            num_replicas=1,
        )
        self.station = station
        self.turnstile_hardware = TurnstileHardware(station)

    def run(self, timestamp, time_step):
        """Simulates riders entering through the turnstile."""
        num_entries = self.turnstile_hardware.get_entries(timestamp, time_step)
        ##logger.info("turnstile kafka integration incomplete - skipping")
        #
        #
        # TODO: Complete this function by emitting a message to the turnstile topic for the number
        # of entries that were calculated
        #
        #

        turnstile_key = {"timestamp": self.time_millis()}
        turnstile_val = {
            "station_id": self.station.station_id,
            "station_name": self.station.name,
            "line": self.station.color
        }
        print('\n\n\n\nTopic: ', self.topic_name)

        print('Producer: ', self.producer.produce)
        print('key_schema: ', self.key_schema)
        print('Key: ', turnstile_key)
        print('value_schema: ', self.value_schema)
        print('Val: ', turnstile_val)
        self.producer.produce(topic=self.topic_name,
                              key_schema=self.key_schema,
                              key=turnstile_key,
                              value_schema=self.value_schema,
                              value=turnstile_val)

        print('Producers issa go!\n\n\n\n')
Exemple #24
0
MESSAGE_INTERVAL_SECONDS=5
topic="ingester"

def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))

conf = {
    "bootstrap.servers": "10.227.52.245:31090,10.227.52.246:31091,10.227.52.247:31092",
    "on_delivery": delivery_report,
    "schema.registry.url": "http://10.227.52.247:30553"
}
key_schema = avro.load("./schemas/{}-key.avsc".format(topic))
value_schema = avro.load("./schemas/{}-value.avsc".format(topic))

avroProducer = AvroProducer(conf, default_key_schema=key_schema, default_value_schema=value_schema)

with open('./data/example_ingest_messages.json', 'r') as json_file:
  messages = json.load(json_file)
  for key, value in messages:
    print(topic, key, value)
    avroProducer.produce(topic=topic, key=key, value=value)
    avroProducer.poll(0)
    time.sleep(MESSAGE_INTERVAL_SECONDS)
avroProducer.flush()
Exemple #25
0
import asyncio
import random
from datetime import datetime
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer
import numpy as np
from scipy.stats import t, norm, multinomial

event_1_schema = avro.load('/Users/mbarak/projects/github/showcase/core/src/main/resources/Event.avsc')
key_schema = avro.load('/Users/mbarak/projects/github/showcase/core/src/main/resources/UserKey.avsc')
event_2_schema = avro.load('/Users/mbarak/projects/github/showcase/core/src/main/resources/Event2.avsc')

users = [i for i in range(100)]

event_1_producer = AvroProducer({'bootstrap.servers': 'localhost:9092', 'schema.registry.url': 'http://localhost:8081'}, default_value_schema=event_1_schema, default_key_schema=key_schema)
event_2_producer = AvroProducer({'bootstrap.servers': 'localhost:9092', 'schema.registry.url': 'http://localhost:8081'}, default_value_schema=event_2_schema, default_key_schema=key_schema)


async def generate_event_1(users, producer):
    n11 = norm(10, 2)
    n12 = norm(20,5)
    rv = multinomial(1, [0.3, 0.2, 0.5])

    def gen_event(user):
        return (
            "user_%s" % user,
                {
                    "userId": "user_%s" % user,
                    "userValue1": round(n11.rvs() if user % 4 == 0 else n12.rvs(), 2),
                    "userValue2": int(np.argmax(rv.rvs())),
                    "timestamp": int((datetime.utcnow() - datetime(1970, 1, 1)).total_seconds() * 1000)
Exemple #26
0
def verify_avro_https():
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests',
                            'avro')

    # Producer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'api.version.request': api_version_request
    }

    conf.update(testconf.get('schema_registry_https', {}))

    p = avro.AvroProducer(conf)

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.0

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=float_value,
             key_schema=prim_float),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=str_value,
             key_schema=prim_string),
        dict(value=float_value,
             value_schema=prim_float,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=float_value,
             value_schema=prim_float,
             key=str_value,
             key_schema=prim_string),
        dict(value=str_value,
             value_schema=prim_string,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=str_value,
             value_schema=prim_string,
             key=float_value,
             key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='',
             value_schema=prim_string,
             key=0.0,
             key_schema=prim_float),
        dict(value=0.0,
             value_schema=prim_float,
             key='',
             key_schema=prim_string),
    ]

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        combo['headers'] = [('index', str(i))]
        p.produce(**combo)
    p.flush()

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': generate_group_id(),
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'auto.offset.reset': 'earliest'
    }

    conf.update(testconf.get('schema_registry_https', {}))

    c = avro.AvroConsumer(conf)
    c.subscribe([(t['topic']) for t in combinations])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(0)

        if msg is None or msg.error():
            continue

        tstype, timestamp = msg.timestamp()
        print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
              (msg.topic(), msg.partition(), msg.offset(), msg.key(),
               msg.value(), tstype, timestamp))

        # omit empty Avro fields from payload for comparison
        record_key = msg.key()
        record_value = msg.value()
        index = int(dict(msg.headers())['index'])

        if isinstance(msg.key(), dict):
            record_key = {k: v for k, v in msg.key().items() if v is not None}

        if isinstance(msg.value(), dict):
            record_value = {
                k: v
                for k, v in msg.value().items() if v is not None
            }

        assert combinations[index].get('key') == record_key
        assert combinations[index].get('value') == record_value

        c.commit()
        msgcount += 1

    # Close consumer
    c.close()
Exemple #27
0
def load_avro_schema_from_file(
        key_schema_file: str, value_schema_file: str) -> Tuple[Schema, Schema]:
    key_schema = avro.load(key_schema_file)
    value_schema = avro.load(value_schema_file)

    return key_schema, value_schema
def run_avro_loop(producer_conf, consumer_conf):
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')

    p = avro.AvroProducer(producer_conf)

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.0

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=float_value,
             key_schema=prim_float),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=str_value,
             key_schema=prim_string),
        dict(value=float_value,
             value_schema=prim_float,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=float_value,
             value_schema=prim_float,
             key=str_value,
             key_schema=prim_string),
        dict(value=str_value,
             value_schema=prim_string,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=str_value,
             value_schema=prim_string,
             key=float_value,
             key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='',
             value_schema=prim_string,
             key=0.0,
             key_schema=prim_float),
        dict(value=0.0,
             value_schema=prim_float,
             key='',
             key_schema=prim_string),
    ]

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        combo['headers'] = [('index', str(i))]
        p.produce(**combo)
    p.flush()

    c = avro.AvroConsumer(consumer_conf)
    c.subscribe([(t['topic']) for t in combinations])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(1)

        if msg is None:
            continue
        if msg.error():
            print(msg.error())
            continue

        tstype, timestamp = msg.timestamp()
        print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
              (msg.topic(), msg.partition(), msg.offset(), msg.key(),
               msg.value(), tstype, timestamp))

        # omit empty Avro fields from payload for comparison
        record_key = msg.key()
        record_value = msg.value()
        index = int(dict(msg.headers())['index'])

        if isinstance(msg.key(), dict):
            record_key = {k: v for k, v in msg.key().items() if v is not None}

        if isinstance(msg.value(), dict):
            record_value = {
                k: v
                for k, v in msg.value().items() if v is not None
            }

        assert combinations[index].get('key') == record_key
        assert combinations[index].get('value') == record_value

        c.commit()
        msgcount += 1

    # Close consumer
    c.close()
Exemple #29
0
#!/usr/bin/env python
# coding: utf-8

from bs4 import BeautifulSoup
import requests
import re
from time import sleep
import json
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer

url = input("geef de url van de video pagina: ")

topic_name = input("geef de naam van de topic: ")

value_schema = avro.load('schema/ValueSchema.avsc')
key_schema = avro.load('schema/KeySchema.avsc')


avroProducer = AvroProducer(
    {'message.max.bytes' : '15728640', 'bootstrap.servers': '127.0.0.1:9092', 'schema.registry.url': 'http://127.0.0.1:8081'}, 
    default_key_schema=key_schema, default_value_schema=value_schema
)


def getM3U8_1(json_obj):
    return json.loads(json_obj).get("text"), json.loads(json_obj).get("video").get("video_url")



def getM3U8_2():
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))
        topic_name = f"{constants.STATION_TOPIC_PREFIX}.{station_name}"
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=1,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color.name
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        logger.info("arrival kafka integration")
        self.producer.produce(
            topic=self.topic_name,
            key={"timestamp": self.time_millis()},
            value={
                "station_id": self.station_id,
                "train_id": train.train_id,
                "direction": direction,
                "line": self.color,
                "train_status": train.status.name,
                "prev_station_id": prev_station_id,
                "prev_direction": prev_direction,
            },
        )

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
class Turnstile(Producer):
    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_key.json")

    #
    # TODO: Define this value schema in `schemas/turnstile_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_value.json")

    def __init__(self, station):
        """Create the Turnstile"""
        station_name = (station.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        super().__init__(
            "org.chicago.cta.turnstile",  # TODO: Come up with a better topic name
            key_schema=Turnstile.key_schema,
            # TODO:
            value_schema=Turnstile.value_schema,
            # TODO:
            num_partitions=3,
            # TODO:
            num_replicas=1)
        self.station = station
        self.turnstile_hardware = TurnstileHardware(station)

    def run(self, timestamp, time_step):
        """Simulates riders entering through the turnstile."""
        try:
            num_entries = self.turnstile_hardware.get_entries(
                timestamp, time_step)
            #logger.info("turnstile kafka integration incomplete - skipping")
            #
            #
            # TODO: Complete this function by emitting a message to the turnstile topic for the number
            # of entries that were calculated
            #
            #

            # make sure the arrival events to kafka are paired with Avro key and value schemas
            logger.info(" %s people entered this station %s ", num_entries,
                        self.station.name)
            #
            for _ in range(num_entries):
                self.producer.produce(
                    topic=self.topic_name,
                    key={"timestamp": self.time_millis()},
                    value={
                        # TODO: Configure this
                        "station_id": self.station.station_id,
                        "station_name": self.station.name,
                        "line": self.station.color.name
                    },
                )
        except Exception as e:
            logger.info(
                "Turnstile failed to write to topic {} with exception {}".
                format(self.topic_name, e))
            logger.info("schema : {}".format(Turnstile.value_schema))
            logger.info("value : {}, {}, {}".format(self.station.station_id,
                                                    self.station.name,
                                                    self.station.color.name))
Exemple #32
0
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        topic_name = 'station.arrivals.fav'  # TODO: Come up with a better topic name
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=1,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        # TODO: Complete this function by producing an arrival message to Kafka
        self.producer.produce(
            topic=self.topic_name,
            key={"timestamp": self.time_millis()},
            value={
                'station_id': self.station_id,
                'train_id': train.train_id,
                'direction': direction,
                'train_status': train.status.name,
                'line': self.color.name,
                'prev_station_id': prev_station_id,
                'prev_direction': prev_direction
            },
        )

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        topic_name = "org.chicago.cta.station.arrivals.v1"  # old "com.udacity.cta.station.arrival"
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=1,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        #
        logger.info("arrival kafka integration complete !!")
        self.producer.produce(
            topic=self.topic_name,
            key={"timestamp": self.time_millis()},
            value={
                'station_id': self.station_id,
                'train_id': train.train_id,
                'direction': direction,
                'line': self.color.name,
                'train_status': train.status.name,
                'prev_station_id': prev_station_id,
                'prev_direction': prev_direction,
            },
        )

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
class Station(Producer):
    """Defines a single station"""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_key.json")

    #
    # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/arrival_value.json")

    def __init__(self,
                 station_id,
                 name,
                 color,
                 direction_a=None,
                 direction_b=None):
        self.name = name
        station_name = (self.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        topic_name = f"{ARRIVALS_TOPIC_PREFIX}{station_name}"  # TODO: Come up with a better topic name
        super().__init__(
            topic_name,
            key_schema=Station.key_schema,
            value_schema=Station.value_schema,
            num_partitions=1,
            num_replicas=1,
        )

        self.station_id = int(station_id)
        self.color = color
        self.dir_a = direction_a
        self.dir_b = direction_b
        self.a_train = None
        self.b_train = None
        self.turnstile = Turnstile(self)

    def run(self, train, direction, prev_station_id, prev_direction):
        """Simulates train arrivals at this station"""
        #
        #
        # TODO: Complete this function by producing an arrival message to Kafka
        #
        #
        value = {
            "station_id": int(self.station_id),
            "train_id": str(train.train_id),
            "direction": str(direction),
            "line": str(self.color.name),
            "train_status": str(train.status.value),
            "prev_station_id": str(prev_station_id),
            "prev_direction": str(prev_direction),
        }

        self.producer.produce(topic=self.topic_name,
                              key={"timestamp": self.time_millis()},
                              value=value)

    def __str__(self):
        return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format(
            self.station_id,
            self.name,
            self.a_train.train_id if self.a_train is not None else "---",
            self.dir_a.name if self.dir_a is not None else "---",
            self.b_train.train_id if self.b_train is not None else "---",
            self.dir_b.name if self.dir_b is not None else "---",
        )

    def __repr__(self):
        return str(self)

    def arrive_a(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'a' direction"""
        self.a_train = train
        self.run(train, "a", prev_station_id, prev_direction)

    def arrive_b(self, train, prev_station_id, prev_direction):
        """Denotes a train arrival at this station in the 'b' direction"""
        self.b_train = train
        self.run(train, "b", prev_station_id, prev_direction)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.turnstile.close()
        super(Station, self).close()
Exemple #35
0
class Turnstile(Producer):
    """Defines a turnstile in a train station."""

    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_key.json")

    value_schema = avro.load(
       f"{Path(__file__).parents[0]}/schemas/turnstile_value.json"
    )

    curve_df = pd.read_csv(
        f"{Path(__file__).parents[1]}/data/ridership_curve.csv"
    )
    seed_df = pd.read_csv(
        f"{Path(__file__).parents[1]}/data/ridership_seed.csv"
    )

    def __init__(self, station_id: int, station_name: str, color: str):
        super().__init__(config["TOPIC"]["TURNSTILE"],
                         key_schema=self.key_schema,
                         value_schema=self.value_schema,
                         num_partitions=1,
                         num_replicas=1)

        self._station_name = station_name
        self._station_id = station_id
        self._color = color

        self._metrics_df = self.seed_df[
            self.seed_df["station_id"] == station_id]
        self._weekday_ridership = int(
            round(self._metrics_df.iloc[0]["avg_weekday_rides"])
        )
        self._saturday_ridership = int(
            round(self._metrics_df.iloc[0]["avg_saturday_rides"])
        )
        self._sunday_ridership = int(
            round(self._metrics_df.iloc[0]["avg_sunday-holiday_rides"])
        )

        self._steps_per_hour = \
            float(config['PARAM']['TIMER_UPDATE_TIME_INTERVAL']) / \
            float(config['PARAM']['CTA_LINE_UPDATE_INTERVAL'])

    def _get_entries(self):
        """Returns the number of turnstile entries."""
        dow = timer.weekday
        if dow >= 0 or dow < 5:
            num_riders = self._weekday_ridership
        elif dow == 6:
            num_riders = self._saturday_ridership
        else:
            num_riders = self._sunday_ridership

        hour_curve = self.curve_df[self.curve_df["hour"] == timer.hour]
        hour_ratio = hour_curve.iloc[0]["ridership_ratio"]

        num_entries = num_riders * hour_ratio / self._steps_per_hour
        num_entries *= random.uniform(0.8, 1.2)
        return round(num_entries)

    async def _produce(self):
        self._producer.produce(
            topic=self._topic_name,
            key={"timestamp": self.time_millis()},
            key_schema=self._key_schema,
            value={
                "station_id": self._station_id,
                "station_name": self._station_name,
                "line": self._color
            },
            value_schema=self._value_schema
        )

    async def run(self):
        """Override."""
        n_entries = self._get_entries()
        ret = asyncio.create_task(asyncio.sleep(0))
        if n_entries > 0:
            ret = asyncio.gather(*[asyncio.create_task(self._produce())
                                 for _ in range(n_entries)])

        logger.debug(f"{n_entries} entries in {self._station_name}")
        return ret
Exemple #36
0
class Turnstile(Producer):
    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_key.json")

    #
    # TODO: Define this value schema in `schemas/turnstile_value.json, then uncomment the below
    #
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/turnstile_value.json")

    def __init__(self, station):
        """Create the Turnstile"""
        station_name = (station.name.lower().replace("/", "_and_").replace(
            " ", "_").replace("-", "_").replace("'", ""))

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #
        super().__init__(
            topic_name="org.chicago.cta.station.turnstile.v1",
            key_schema=Turnstile.key_schema,
            value_schema=Turnstile.value_schema,
            num_partitions=4,
            num_replicas=1,
        )
        self.station = station
        self.turnstile_hardware = TurnstileHardware(station)

    def run(self, timestamp, time_step):
        """Simulates riders entering through the turnstile."""
        num_entries = self.turnstile_hardware.get_entries(timestamp, time_step)
        logger.info("turnstile kafka integration incomplete - skipping")
        #
        #
        # TODO: Complete this function by emitting a message to the turnstile topic for the number
        # of entries that were calculated
        #
        #
        logger.debug(
            "%s riders have entered station %s at %s",
            num_entries,
            self.station.name,
            timestamp.isoformat(),
        )

        for _ in range(num_entries):
            try:
                self.producer.produce(
                    topic=self.topic_name,
                    key={"timestamp": self.time_millis()},
                    value={
                        "station_id": self.station.station_id,
                        "station_name": self.station.name,
                        "line": self.station.color.name,
                    },
                )
            except Exception as e:
                logger.fatal(e)
                raise e
class Turnstile(Producer):
    key_schema = avro.load(f"{Path(__file__).parents[0]}/schemas/turnstile_key.json")

    #
    # TODO: Define this value schema in `schemas/turnstile_value.json, then uncomment the below
    #
    value_schema = avro.load(
       f"{Path(__file__).parents[0]}/schemas/turnstile_value.json"
    )

    def __init__(self, station):
        """Create the Turnstile"""
        station_name = (
            station.name.lower()
            .replace("/", "_and_")
            .replace(" ", "_")
            .replace("-", "_")
            .replace("'", "")
        )

        #
        #
        # TODO: Complete the below by deciding on a topic name, number of partitions, and number of
        # replicas
        #
        #        
        super().__init__(
            "org.chicago.cta.turnstile", # TODO: Come up with a better topic name
            key_schema=Turnstile.key_schema,
            value_schema=Turnstile.value_schema,
            num_partitions=1,
            num_replicas=1,
        )
        self.station = station
        self.turnstile_hardware = TurnstileHardware(station)

    def run(self, timestamp, time_step):
        """Simulates riders entering through the turnstile."""
        num_entries = self.turnstile_hardware.get_entries(timestamp, time_step)
        logger.info("turnstile kafka integration data")
        #
        #
        # TODO: Complete this function by emitting a message to the turnstile topic for the number
        # of entries that were calculated
        #
        #   
        logger.info(f"Start emitting message to turnstile topic {self.topic_name}")
        for _ in range (num_entries):
            self.producer.produce(
                topic=self.topic_name,
                key={"timestamp": self.time_millis()},
                value={
                    "station_id": self.station.station_id,
                    "station_name":self.station.name,
                    "line": self.station.color.name,
                },
                value_schema=self.value_schema,
                key_schema=self.key_schema
            )
            
        logger.info(f"Turnstile info emmited for topic {self.topic_name}")
            
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer
from lipsum import generate_words
import os
import random

SCHEMA_REGISTRY_URL = 'http://172.17.0.5:8081'
BOOTSTRAP_SERVERS = '172.17.0.4'

AVSC_DIR = os.path.dirname(os.path.realpath(__file__))
KEY_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'primitive_string.avsc'))
VALUE_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'basic_schema.avsc'))

TOPIC = 'avrotopic'
KEY = "mykey"

avroProducer = AvroProducer({'bootstrap.servers': BOOTSTRAP_SERVERS,
                             'schema.registry.url': SCHEMA_REGISTRY_URL},
                            default_key_schema=KEY_SCHEMA,
                            default_value_schema=VALUE_SCHEMA)


for i in xrange(100):
    value = {"name": generate_words(count=1),
             "surname": generate_words(count=2),
             "number": random.randint(0, 100)}

    print str(value)

    avroProducer.produce(topic=TOPIC,
                         value=value,
Exemple #39
0
def verify_avro():
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro')

    # Producer config
    conf = {'bootstrap.servers': bootstrap_servers,
            'error_cb': error_cb,
            'api.version.request': api_version_request,
            'default.topic.config': {'produce.offset.report': True}}

    # Create producer
    if schema_registry_url:
        conf['schema.registry.url'] = schema_registry_url
        p = avro.AvroProducer(conf)
    else:
        p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry())

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float),
        dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string),
        dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic),
        dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string),
        dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic),
        dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float),
    ]

    # Consumer config
    cons_conf = {'bootstrap.servers': bootstrap_servers,
                 'group.id': 'test.py',
                 'session.timeout.ms': 6000,
                 'enable.auto.commit': False,
                 'api.version.request': api_version_request,
                 'on_commit': print_commit_result,
                 'error_cb': error_cb,
                 'default.topic.config': {
                     'auto.offset.reset': 'earliest'
                 }}

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        p.produce(**combo)
        p.poll(0)
        p.flush()

        # Create consumer
        conf = copy(cons_conf)
        if schema_registry_url:
            conf['schema.registry.url'] = schema_registry_url
            c = avro.AvroConsumer(conf)
        else:
            c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry())
        c.subscribe([combo['topic']])

        while True:
            msg = c.poll(0)
            if msg is None:
                continue

            if msg.error():
                if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                    break
                else:
                    continue

            tstype, timestamp = msg.timestamp()
            print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
                  (msg.topic(), msg.partition(), msg.offset(),
                   msg.key(), msg.value(), tstype, timestamp))

            c.commit(msg, async=False)

        # Close consumer
        c.close()