def _make_offset_consumer(self) -> DeserializingConsumer:
        """
        Creates the underlying instance of :class:`confluent_kafka.avro.AvroConsumer` which is used to fetch the last
        committed producer offsets.
        """

        key_deserializer = AvroDeserializer(self.schema_registry_client)
        value_deserializer = AvroDeserializer(self.schema_registry_client)

        config = {
            "bootstrap.servers": self.config["bootstrap.servers"],
            "key.deserializer": key_deserializer,
            "value.deserializer": value_deserializer,
            "enable.partition.eof": True,
            "group.id": f'{self.config["offset_topic"]}_fetcher',
            "default.topic.config": {
                "auto.offset.reset": "latest"
            },
            **self.config["kafka_opts"],
            **self.config["kafka_consumer_opts"],
        }

        offset_consumer = DeserializingConsumer(config)

        logger.info(
            f"Offset Consumer created with config: {pformat(config, indent=2)}"
        )
        return offset_consumer
Beispiel #2
0
    def _make_consumer(self) -> DeserializingConsumer:
        schema_registry_client = SchemaRegistryClient(
            {"url": self.config["schema_registry"]})
        key_deserializer = AvroDeserializer(schema_registry_client)
        value_deserializer = AvroDeserializer(schema_registry_client)

        config = {
            "bootstrap.servers": ",".join(self.config["bootstrap_servers"]),
            "key.deserializer": key_deserializer,
            "value.deserializer": value_deserializer,
            "enable.auto.commit": False,
            "enable.partition.eof": True,
            "group.id": self.config["group_id"],
            "default.topic.config": {
                "auto.offset.reset": "earliest"
            },
            **self.config["kafka_opts"],
        }

        hash_sensitive_values = self.config["hash_sensitive_values"]
        consumer = DeserializingConsumer(config)
        hidden_config = hide_sensitive_values(
            config, hash_sensitive_values=hash_sensitive_values)
        logger.info(
            f"AvroConsumer created with config: {pformat(hidden_config, indent=2)}"
        )
        # noinspection PyArgumentList
        consumer.subscribe(self.config["topics"],
                           on_assign=self._on_assign,
                           on_revoke=self._on_revoke)
        return consumer
Beispiel #3
0
def main(args):
    topic = args.topic

    key_schema_str = open('schema/KeySchema.avsc', "r").read()
    value_schema_str = open('schema/ValueSchema.avsc', "r").read()

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_key_deserializer = AvroDeserializer(key_schema_str,
                                             schema_registry_client,
                                             dict_to_user_quote_key)
    avro_value_deserializer = AvroDeserializer(value_schema_str,
                                               schema_registry_client,
                                               dict_to_user_quote_value)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': avro_key_deserializer,
        'value.deserializer': avro_value_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user_quote = msg.value()
            if user_quote is not None:
                print("User {} Quote record: product_id: {}\n"
                      "\tquoted_price: {}\n"
                      "\tquoted_quantity: {}\n"
                      "\tuser_note: {}\n".format(msg.key().user_id,
                                                 user_quote.product_id,
                                                 user_quote.quoted_price,
                                                 user_quote.quoted_quantity,
                                                 user_quote.user_note))
        except KeyboardInterrupt:
            break

    consumer.close()
Beispiel #4
0
def run_consumer(shutdown_flag, clients, lock):
    print("Starting Kafka Consumer.")
    schema_registry_client = SchemaRegistryClient(
        {"url": "http://localhost:8081"})
    deserializer = AvroDeserializer(schema_registry_client)
    config = {
        "bootstrap.servers": "localhost:9092",
        "group.id": "dashboard-demo",
        "value.deserializer": deserializer
    }

    consumer = DeserializingConsumer(config)
    consumer.subscribe(["DASHBOARD"])

    while not shutdown_flag.done():
        msg = consumer.poll(0.2)

        if msg is None:
            print("Waiting...")
        elif msg.error():
            print(f"ERROR: {msg.error()}")
        else:
            value = msg.value()
            formatted = simplejson.dumps(value)
            print(f"Sending {formatted} to {clients}")

            with lock:
                websockets.broadcast(clients, formatted)

    print("Closing Kafka Consumer")
    consumer.close()
def test_avro_record_serialization_custom(kafka_cluster):
    """
    Tests basic Avro serializer to_dict and from_dict object hook functionality.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

    """
    topic = kafka_cluster.create_topic("serialization-avro")
    sr = kafka_cluster.schema_registry()

    user = User('Bowie', 47, 'purple')
    value_serializer = AvroSerializer(sr, User.schema_str,
                                      lambda user, ctx:
                                      dict(name=user.name,
                                           favorite_number=user.favorite_number,
                                           favorite_color=user.favorite_color))

    value_deserializer = AvroDeserializer(sr, User.schema_str,
                                          lambda user_dict, ctx:
                                          User(**user_dict))

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    producer.produce(topic, value=user, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    user2 = msg.value()

    assert user2 == user
Beispiel #6
0
 def _make_deserializer(self):
     return {
         SchemaType.AVRO:
         AvroDeserializer(self.sr_client,
                          AVRO_SCHEMA,
                          from_dict=lambda d, _: AvroPayload(d['val'])),
         SchemaType.PROTOBUF:
         ProtobufDeserializer(ProtobufPayloadClass)
     }[self.schema_type]
Beispiel #7
0
 def create_deserializer(self):
     self.deserializer = {}
     if self.in_topic is not None:
         for topic in self.in_topic:
             if self.in_schema[topic] is None:
                 self.deserializer[topic] = StringDeserializer("utf_8")
             else:
                 schema_str = self.in_schema[topic].schema_str
                 self.deserializer[topic] = AvroDeserializer(
                     schema_str, self.schema_registry)
Beispiel #8
0
def main():
    schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL})

    avro_deserializer = AvroDeserializer(
        schema_registry_client=schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'key.deserializer': string_deserializer,
        'max.poll.interval.ms': MAX_POLL_INTERVAL_MS,
        'value.deserializer': avro_deserializer,
        'group.id': CONSUMER_GROUP
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([TOPIC])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            msg_value = msg.value()
            if msg_value is not None:
                try:
                    measurements = list(dict(msg_value).get("measurements"))
                    measurements_df = pd.DataFrame(measurements)

                    groups = measurements_df.groupby("tenant")

                    for _, group in groups:
                        tenant = group.iloc[0]['tenant']
                        device_registry = DeviceRegistry(
                            tenant, AIRQO_BASE_URL)

                        group_measurements = list(
                            group.to_dict(orient="records"))
                        for i in range(0, len(group_measurements),
                                       int(REQUEST_BODY_SIZE)):
                            measurements_list = group_measurements[
                                i:i + int(REQUEST_BODY_SIZE)]

                            device_registry.insert_events(measurements_list)

                except Exception as ex:
                    print(ex)

        except KeyboardInterrupt:
            break

    consumer.close()
Beispiel #9
0
def test_delivery_report_serialization(kafka_cluster, load_avsc, avsc, data, record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_avsc (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    Raises:
        AssertionError on test failure

    """
    topic = kafka_cluster.create_topic("serialization-avro-dr")
    sr = kafka_cluster.schema_registry()
    schema_str = load_avsc(avsc)

    value_serializer = AvroSerializer(sr, schema_str)

    value_deserializer = AvroDeserializer(sr, schema_str)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    def assert_cb(err, msg):
        actual = value_deserializer(SerializationContext(topic,
                                                         MessageField.VALUE),
                                    msg.value())

        if record_type == "record":
            assert [v == actual[k] for k, v in data.items()]
        elif record_type == 'float':
            assert data == pytest.approx(actual)
        else:
            assert actual == data

    producer.produce(topic, value=data, partition=0, on_delivery=assert_cb)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    # schema may include default which need not exist in the original
    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data
Beispiel #10
0
def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str,
                                         schema_registry_client,
                                         dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': avro_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n"
                      .format(msg.key(), user.name,
                              user.favorite_color,
                              user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
Beispiel #11
0
def plain_avro_consumer(running_cluster_config: Dict[str, str],
                        topic_and_partitions: Tuple[str, int]):
    topic_id, _ = topic_and_partitions
    schema_registry_client = SchemaRegistryClient(
        {"url": running_cluster_config["schema-registry"]})
    key_deserializer = AvroDeserializer(schema_registry_client)
    value_deserializer = AvroDeserializer(schema_registry_client)
    config = {
        "bootstrap.servers": running_cluster_config["broker"],
        "group.id": f"{topic_id}_consumer",
        "key.deserializer": key_deserializer,
        "value.deserializer": value_deserializer,
        "enable.partition.eof": False,
        "default.topic.config": {
            "auto.offset.reset": "earliest"
        },
        "allow.auto.create.topics": True,
    }
    consumer = DeserializingConsumer(config)
    consumer.subscribe([topic_id])
    consumer.list_topics()
    return consumer
Beispiel #12
0
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 group: str,
                 callback: Callable[[Message], None],
                 schema_registry_url,
                 schema,
                 poll_timeout: float = 1.0,
                 config=None):

        super().__init__(
            bootstrap_servers, topic, group, callback,
            AvroDeserializer(
                schema, SchemaRegistryClient({"url": schema_registry_url})),
            poll_timeout, config)
Beispiel #13
0
def run_consumer(container_manager):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_deserializer = AvroDeserializer(schemas.run_record_schema,
                                         schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': "runs-consumers",
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': 'false'
    }

    consumer = DeserializingConsumer(conf)
    print('[+] Listening for incoming runs')

    try:
        consumer_topics = [config['kafka']['runs-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            try:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    continue

                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('[-] Run initialization')
                    print(msg.value())
                    consumer.commit(asynchronous=False)
                    # handlers.handle_run_execution(container_manager, msg.value())
                    threading.Thread(target=handlers.handle_run_execution,
                                     args=(container_manager,
                                           msg.value())).start()
            except ConsumeError as e:
                print(
                    f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}'
                )
    finally:
        consumer.close()
def process_row(serialized_data):
    schema = '''
    {
    "namespace": "org.mddarr.rides.event.dto",
     "type": "record",
     "name": "AvroRideCoordinate",
     "fields": [
         {"name": "dataID", "type": "string"},
         {"name": "value", "type": "double"}
     ]
    }
    '''
    schemaRegistryClient = SchemaRegistryClient(
        {"url": "http://localhost:8081"})
    avroDeserializer = AvroDeserializer(schema, schemaRegistryClient)
    serializationContext = SerializationContext("time-series", schema)
    deserialized_row = avroDeserializer(serialized_data, serializationContext)
    return str(deserialized_row['value'])
Beispiel #15
0
def main(args):
    topic = args.topic
    schema_str = MetricSchema
    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')
    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }
    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])
    client = InfluxDBClient(host=args.host_influx,
                            port=8086,
                            username='******',
                            password='******')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue
            timespent = msg.value()
            if timespent is not None:
                print("time ==>", timespent)
                print(timespent["metricName"])
                print(timespent["time"])
                client.switch_database('datascience')
                json_body = [{
                    "measurement": "metric",
                    "fields": {
                        "name": timespent["metricName"],
                        "value": timespent["time"]
                    }
                }]
            client.write_points(json_body)
        except KeyboardInterrupt:
            break
    consumer.close()
Beispiel #16
0
    def create_consumer(self, registry_client):
        """
        Subscribes to topic defined in configs and creates a consumer to deserialize messages from topic

        :param registry_client: SchemaRegistryClient object
            get this from register_client()

        :return: DeserializingConsumer object
        """
        metadata_schema = None
        topic = None
        if self.metadata_type == "COLLECTION":
            metadata_schema = registry_client.get_latest_version(
                self.collection_topic + '-value').schema.schema_str
            topic = self.collection_topic

        if self.metadata_type == "GRANULE":
            metadata_schema = registry_client.get_latest_version(
                self.granule_topic + '-value').schema.schema_str
            topic = self.granule_topic

        metadata_deserializer = AvroDeserializer(metadata_schema,
                                                 registry_client)

        consumer_conf = {'bootstrap.servers': self.brokers}

        if self.security:
            consumer_conf['security.protocol'] = 'SSL'
            consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc']
            consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc']
            consumer_conf['ssl.certificate.location'] = self.conf['security'][
                'certLoc']

        meta_consumer_conf = consumer_conf
        meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8')
        meta_consumer_conf['value.deserializer'] = metadata_deserializer
        meta_consumer_conf['group.id'] = self.group_id
        meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset

        metadata_consumer = DeserializingConsumer(meta_consumer_conf)
        metadata_consumer.subscribe([topic])
        return metadata_consumer
def process_row(serialized_data):
    schema = '''
    {
    "namespace": "org.mddarr.rides.event.dto",
     "type": "record",
     "name": "AvroRideCoordinate",
     "fields": [
         {"name": "eventime", "type": "long"},
         {"name": "latitude", "type": "double"},
         {"name": "longitude", "type": "double"}
     ]
    }
    '''
    schemaRegistryClient = SchemaRegistryClient({"url": "http://localhost:8081"})
    avroDeserializer = AvroDeserializer(schema, schemaRegistryClient)
    serializationContext = SerializationContext("coordinates", schema)
    deserialized_row = avroDeserializer(serialized_data, serializationContext)
    print("THE DESERIALIZED ROW LOOKS LIKE " + str(deserialized_row))

    return [deserialized_row['latitude'], deserialized_row['longitude'],float(deserialized_row['eventime'])]
    def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')
        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)
        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
    def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Consumer name for logging purposes
        self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)
 
 
        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')

         # Get Schema for the value
        self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id
        # print('The Schema ID for the value is: {}'.format(self.schema_id_value))
        self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str
        print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name))
        print(self.logging_prefix + ' - Value Schema:')
        print(self.logging_prefix + ' - -------------\n')
        print(self.logging_prefix + ' - ' + self.value_schema + '\n')

        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, 
                                                                        self.key_deserializer,
                                                                        self.value_deserializer)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)

        # Print consumer configuration
        EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url'])

        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
Beispiel #20
0
def test_avro_record_serialization(kafka_cluster, load_avsc, avsc, data, record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_avsc (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    Raises:
        AssertionError on test failure

    """
    topic = kafka_cluster.create_topic("serialization-avro")
    sr = kafka_cluster.schema_registry()

    schema_str = load_avsc(avsc)
    value_serializer = AvroSerializer(sr, schema_str)

    value_deserializer = AvroDeserializer(sr, schema_str)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    producer.produce(topic, value=data, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data
Beispiel #21
0
def receive_record(args):
    """ Receives Record using a DeserializingConsumer & AvroDeserializer """
    topics = [args.topic.rstrip()]

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_deserializer = AvroDeserializer(schema_registry_client, DATA_SCHEMA,
                                         dict_to_data)

    string_deserializer = StringDeserializer('utf_8')

    consumer_config = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': 'earliest'
    }

    consumer = DeserializingConsumer(consumer_config)
    consumer.subscribe(topics)

    print(f'Consuming data records from topic(s) {topics}. ^C to exit.')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(10.0)
            if msg is None:
                print('\t---Waiting. . .')
                continue

            data = msg.value()
            if data is not None:
                print(f'Data record {msg.key()}:\n' f'\tValues: {data}')
        except KeyboardInterrupt:
            break
    print('\nClosing consumer.')
    consumer.close()
Beispiel #22
0
def main(args):
    topic = args.topic

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    schema_obj = schema_registry_client.get_latest_version(subject_name='example_serde_avro-value')

    avro_deserializer = AvroDeserializer(schema_obj.schema.schema_str,
                                         schema_registry_client,
                                         dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': avro_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print(f"User record {msg.key()}:\n name: {user.name}\n"
                      f"\tfavorite_number: {user.favorite_color}\n"
                      f"\tfavorite_color: {user.favorite_number}\n")
        except KeyboardInterrupt:
            break

    consumer.close()
Beispiel #23
0
if __name__ == '__main__':

    # Read arguments and configurations and initialize
    args = ccloud_lib.parse_args()
    config_file = args.config_file
    topic = args.topic
    conf = ccloud_lib.read_ccloud_config(config_file)

    schema_registry_conf = {
        'url': conf['schema.registry.url'],
        'basic.auth.user.info': conf['basic.auth.user.info']
    }
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    name_avro_deserializer = AvroDeserializer(
        schema_registry_client=schema_registry_client,
        schema_str=ccloud_lib.name_schema,
        from_dict=ccloud_lib.Name.dict_to_name)
    count_avro_deserializer = AvroDeserializer(
        schema_registry_client=schema_registry_client,
        schema_str=ccloud_lib.count_schema,
        from_dict=ccloud_lib.Count.dict_to_count)

    # for full list of configurations, see:
    #   https://docs.confluent.io/platform/current/clients/confluent-kafka-python/#deserializingconsumer
    consumer_conf = ccloud_lib.pop_schema_registry_params_from_config(conf)
    consumer_conf['key.deserializer'] = name_avro_deserializer
    consumer_conf['value.deserializer'] = count_avro_deserializer
    consumer_conf['group.id'] = 'python_example_group_2'
    consumer_conf['auto.offset.reset'] = 'earliest'
    consumer = DeserializingConsumer(consumer_conf)
Beispiel #24
0
import logging

from confluent_kafka import DeserializingConsumer
from confluent_kafka.avro import SerializerError
from confluent_kafka.schema_registry.avro import AvroDeserializer

from avro_schemas.key_schema import key_schema_str
from avro_schemas.value_schema import value_schema_str
from constants import SCHEMA_REGISTRY_CLIENT, Topics, BOOTSTRAP_SERVERS
from utils import reset_to_beginning_on_assign, convert_epoch_to_datetime, reset_to_end_on_assign

consumer = DeserializingConsumer({
    "bootstrap.servers":
    BOOTSTRAP_SERVERS,
    "key.deserializer":
    AvroDeserializer(schema_str=key_schema_str,
                     schema_registry_client=SCHEMA_REGISTRY_CLIENT),
    "value.deserializer":
    AvroDeserializer(schema_str=value_schema_str,
                     schema_registry_client=SCHEMA_REGISTRY_CLIENT),
    "group.id":
    "consumer",
    "auto.offset.reset":
    "earliest"
})

consumer.subscribe(
    topics=[Topics.TOPIC_1.value],
    on_assign=reset_to_beginning_on_assign,
    # on_assign=reset_to_end_on_assign,
)
Beispiel #25
0
class TestMessages:
    test_messages = [
        'test message 1', 'test message 2', 'test message 3', 'test message 4'
    ]
    topic = 'christian_test'
    conf = kafka_utils.read_config('producer_google_chicago_1.config',
                                   'producer_google_chicago_1')
    schema_registry_conf = {'url': conf['schema.registry.url']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)
    key_schema_file = portfolio_path + "/kafka" + conf['google.key.schema.file']
    value_schema_file = portfolio_path + "/kafka" + conf[
        'google.value.schema.file']
    key_schema, value_schema = kafka_utils.load_avro_schema_from_file(
        key_schema_file, value_schema_file)
    key_avro_serializer = AvroSerializer(key_schema, schema_registry_client,
                                         google.Key.key_to_dict)
    value_avro_serializer = AvroSerializer(value_schema,
                                           schema_registry_client,
                                           google.Value.value_to_dict)
    key_avro_deserializer = AvroDeserializer(key_schema,
                                             schema_registry_client,
                                             google.Key.dict_to_key)
    value_avro_deserializer = AvroDeserializer(value_schema,
                                               schema_registry_client,
                                               google.Value.dict_to_value)

    def test_producer(self):
        # Read arguments and configurations and initialize
        producer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.serializer': self.key_avro_serializer,
            'value.serializer': self.value_avro_serializer
        }
        producer = SerializingProducer(producer_config)

        delivered_records = 0
        for text in self.test_messages:
            url = 'www.test.com'
            scraper_dt = datetime.now(pytz.timezone('America/Denver'))
            scraper_dt = scraper_dt.strftime("%Y/%m/%d %H:%M:%S %z")
            value_obj = google.Value(text=text, scraper_dt=scraper_dt)
            key_obj = google.Key(url=(url))
            producer.produce(topic=self.topic,
                             key=key_obj,
                             value=value_obj,
                             on_delivery=kafka_utils.acked)
            delivered_records += producer.poll()
        producer.flush()

        assert delivered_records == len(self.test_messages)

    def test_consumer(self):
        consumer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.deserializer': self.key_avro_deserializer,
            'value.deserializer': self.value_avro_deserializer,
            'group.id': '1',
            'auto.offset.reset': 'earliest'
        }
        offset = kafka_utils.offset - len(self.test_messages) + 1
        consumer = DeserializingConsumer(consumer_config)
        partitions = []
        partition = TopicPartition(topic=self.topic,
                                   partition=0,
                                   offset=offset)
        partitions.append(partition)
        consumer.assign(partitions)
        # Process messages
        result = []
        attempt = 0
        while len(result) < len(self.test_messages):
            try:
                msg = consumer.poll(1.0)
                attempt += 1
                if msg is None:
                    print("no message received")
                    if attempt < 10:
                        pass
                    else:
                        break
                elif msg.error():
                    break
                else:
                    value_object = msg.value()
                    text = value_object.text
                    print("adding {} to result".format(text))
                    result.append(text)
            except KeyboardInterrupt:
                break
            except SerializerError as e:
                break
        # Leave group and commit final offsets
        consumer.close()

        assert result == self.test_messages
Beispiel #26
0
def main():

    sr_conf = {'url': SCHEMA_REGISTRY_URL}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    schema_str = """
    {
        "namespace": "io.confluent.ksql.avro_schemas",
        "name": "User",
        "type": "record",
        "fields":[
        {"name":"DATESTAMP","type":"string"},
        {"name":"TIMESTAMP","type":"string"},
        {"name":"MILLISEC","type":"string"},
        {"name":"LOGLEVEL","type":"string"},
        {"name":"REQUESTID","type":"string"},
        {"name":"RECORDFORMATVERSION","type":"string"},
        {"name":"SOURCEIP","type":"string"},
        {"name":"DNSDOMAIN","type":"string"},
        {"name":"MESSAGETYPE","type":"string"},
        {"name":"OPERATION","type":"string"},
        {"name":"AUTHUSER","type":"string"},
        {"name":"AUTHDOMAIN","type":"string"},
        {"name":"HTTPCODE","type":"string"},
        {"name":"SOURCEBYTES","type":"string"},
        {"name":"RESPONSEBYTES","type":"string"},
        {"name":"ELAPSEDTIME","type":"string"},
        {"name":"DOMAIN","type":"string"},
        {"name":"BUCKET","type":"string"},
        {"name":"OBJECT","type":"string"}
        ]
    }
    """

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            record = msg.value()
            if record is not None:
                if record['OPERATION'] == "POST" and record[
                        'DOMAIN'] != "%28none%29":
                    urllistraw = "http://" + record['DOMAIN'] + "/" + record[
                        'BUCKET'] + "/" + record['OBJECT']
                    urllist = urllistraw[:-1]
                    print(urllist)
                    r = requests.head(urllist)
                    print(r.headers)
                else:
                    continue
        except KeyboardInterrupt:
            break

    consumer.close()
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroDeserializer
from confluent_kafka.serialization import SerializationContext

schemaRegistryClient = SchemaRegistryClient({"url": "http://*****:*****@'
)
serializationContext = SerializationContext("time-series", schema)

deserialized_message = avroDeserializer(message, serializationContext)
if __name__ == '__main__':

    # Read arguments and configurations and initialize
    args = ccloud_lib.parse_args()
    config_file = args.config_file
    topic = args.topic
    conf = ccloud_lib.read_ccloud_config(config_file)

    schema_registry_conf = {
        'url': conf['schema.registry.url'],
        'basic.auth.user.info': conf['schema.registry.basic.auth.user.info']
    }
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    name_avro_deserializer = AvroDeserializer(ccloud_lib.name_schema,
                                              schema_registry_client,
                                              ccloud_lib.Name.dict_to_name)
    count_avro_deserializer = AvroDeserializer(ccloud_lib.count_schema,
                                               schema_registry_client,
                                               ccloud_lib.Count.dict_to_count)

    # for full list of configurations, see:
    #   https://docs.confluent.io/current/clients/confluent-kafka-python/#deserializingconsumer
    consumer_conf = {
        'bootstrap.servers': conf['bootstrap.servers'],
        'sasl.mechanisms': conf['sasl.mechanisms'],
        'security.protocol': conf['security.protocol'],
        'sasl.username': conf['sasl.username'],
        'sasl.password': conf['sasl.password'],
        'key.deserializer': name_avro_deserializer,
        'value.deserializer': count_avro_deserializer,
 def create_avro_deserializer(self, topic_name):
     schema_string = self.load_avro_schema_string(topic_name)
     return AvroDeserializer(schema_string, self.registry_client)
if __name__ == '__main__':

    # Read arguments and configurations and initialize
    args = ccloud_lib_rssfeeds.parse_args()
    config_file = args.config_file
    topic = args.topic
    conf = ccloud_lib_rssfeeds.read_ccloud_config(config_file)

    schema_registry_conf = {
        'url': conf['schema.registry.url'],
        'basic.auth.user.info': conf['schema.registry.basic.auth.user.info']
    }
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)
    # schema for value
    value_avro_deserializer = AvroDeserializer(
        ccloud_lib_rssfeeds.value_schema, schema_registry_client,
        ccloud_lib_rssfeeds.Value.dict_to_value)

    # for full list of configurations, see:
    #   https://docs.confluent.io/current/clients/confluent-kafka-python/#deserializingconsumer
    consumer_conf = {
        'bootstrap.servers': conf['bootstrap.servers'],
        'sasl.mechanisms': conf['sasl.mechanisms'],
        'security.protocol': conf['security.protocol'],
        'sasl.username': conf['sasl.username'],
        'sasl.password': conf['sasl.password'],
        #        'key.deserializer': name_avro_deserializer,
        'value.deserializer': value_avro_deserializer,
        'group.id': 'rssfeed-consumer-1',
        'auto.offset.reset': 'earliest'
    }