Ejemplo n.º 1
0
    def _make_consumer(self) -> DeserializingConsumer:
        schema_registry_client = SchemaRegistryClient(
            {"url": self.config["schema_registry"]})
        key_deserializer = AvroDeserializer(schema_registry_client)
        value_deserializer = AvroDeserializer(schema_registry_client)

        config = {
            "bootstrap.servers": ",".join(self.config["bootstrap_servers"]),
            "key.deserializer": key_deserializer,
            "value.deserializer": value_deserializer,
            "enable.auto.commit": False,
            "enable.partition.eof": True,
            "group.id": self.config["group_id"],
            "default.topic.config": {
                "auto.offset.reset": "earliest"
            },
            **self.config["kafka_opts"],
        }

        hash_sensitive_values = self.config["hash_sensitive_values"]
        consumer = DeserializingConsumer(config)
        hidden_config = hide_sensitive_values(
            config, hash_sensitive_values=hash_sensitive_values)
        logger.info(
            f"AvroConsumer created with config: {pformat(hidden_config, indent=2)}"
        )
        # noinspection PyArgumentList
        consumer.subscribe(self.config["topics"],
                           on_assign=self._on_assign,
                           on_revoke=self._on_revoke)
        return consumer
Ejemplo n.º 2
0
    def __init__(self, consumer=None):
        super().__init__()
        self.logger = logging.getLogger(__package__)
        self.logger.debug("Initializing the consumer")

        self.consumer = consumer
        self.message_handler = HandleMessage()
        self._stop_event = threading.Event()

        # Track whether there is currently a message being processed. Just a raw
        # bool is OK because the subscription is configured to prefetch 1
        # message at a time - i.e. this function should NOT run in parallel
        self._processing = False

        while self.consumer is None:
            try:
                self.logger.debug("Getting the kafka consumer")

                config = kafka_config_from_env()

                config['key.deserializer'] = StringDeserializer('utf_8')
                config['value.deserializer'] = StringDeserializer('utf_8')
                config['on_commit'] = self.on_commit
                config['group.id'] = GROUP_ID
                config['auto.offset.reset'] = 'earliest'
                self.consumer = DeserializingConsumer(config)
            except KafkaException as err:
                self.logger.error("Could not initialize the consumer: %s", err)
                raise ConnectionException(
                    "Could not initialize the consumer") from err

        self.consumer.subscribe([TRANSACTIONS_TOPIC])
Ejemplo n.º 3
0
    def __init__(self, consumer_topic, producer_topic, client_id,
                 bootstrap_servers, consumer_proto_class, producer_proto_class,
                 processor, max_thread_calls):
        self.consumer_topic = consumer_topic
        self.producer_topic = producer_topic
        self.client_id = client_id
        self.bootstrap_servers = bootstrap_servers
        self.consumer_proto_class = consumer_proto_class
        self.producer_proto_class = producer_proto_class
        self.processor = processor
        self.max_thread_calls = max_thread_calls

        self.kafka_consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'group.id':
            self.client_id,
            'auto.offset.reset':
            "earliest",
            'value.deserializer':
            self.derializer
        })
        self.kafka_consumer.subscribe([self.consumer_topic])

        self.kafka_producer = SerializingProducer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'queue.buffering.max.messages':
            500000,
            'value.serializer':
            self.serialize
        })

        self.thread_queue = deque(maxlen=self.max_thread_calls)
        self.latest_thread_queue_id = 1
Ejemplo n.º 4
0
def receive():
    json_deserializer = JSONDeserializer(USER_SCHEMA, from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')
    consumer_conf = {
        'bootstrap.servers': 'localhost:9092',
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': 'django-kafka',
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([USER_TOPIC])
    """
    The idea is to start the Kafka consumer when the message is sent to the Kafka producer.
    Resulting in two queues: Task Queue and Message/Content Queue.
    Multi-threading might be an overkill for a simple application, hence the for loop (Temporary). 
    """
    for x in range(200):
        try:
            msg = consumer.poll(timeout=5.0)
            if msg is not None:
                user = msg.value()
                if user is not None:
                    print("User record {}: username: {}\n"
                          "\tdata: {}\n".format(msg.key(), user.username,
                                                user.data))

        except Exception as e:
            print('An exception occurred: {}'.format(e))
            logging.error(traceback.format_exc())
Ejemplo n.º 5
0
def main(args):
    topic = args.topic

    schema_str = """
    {
      "$schema": "http://json-schema.org/draft-07/schema#",
      "title": "User",
      "description": "A Confluent Kafka Python User",
      "type": "object",
      "properties": {
        "name": {
          "description": "User's name",
          "type": "string"
        },
        "favorite_number": {
          "description": "User's favorite number",
          "type": "number",
          "exclusiveMinimum": 0
        },
        "favorite_color": {
          "description": "User's favorite color",
          "type": "string"
        }
      },
      "required": [ "name", "favorite_number", "favorite_color" ]
    }
    """
    json_deserializer = JSONDeserializer(schema_str, from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n".format(msg.key(), user.name,
                                                      user.favorite_color,
                                                      user.favorite_number))
        except KeyboardInterrupt:
            break
Ejemplo n.º 6
0
def set_offsets_to_time(start_from_seconds_ago: int, consumer: confluent_kafka.DeserializingConsumer,
                        partitions: List[confluent_kafka.TopicPartition]) -> None:
    start_from = datetime.now(timezone.utc) - timedelta(seconds=start_from_seconds_ago)
    logger.info('Setting consumer offsets to start from %s', start_from)
    for p in partitions:
        p.offset = int(start_from.timestamp() * 1000)  # yep, it's a weird API
    consumer.assign(partitions)
    for p in consumer.offsets_for_times(partitions):
        logger.debug('Topic %s partition %s SEEKing to offset %s', p.topic, p.partition, p.offset)
        consumer.seek(p)
Ejemplo n.º 7
0
 def _init_consumer(topics: List[str], config: Dict) -> Consumer:
     """config must contain:
         `bootstrap.servers`
         'group.id'
     but may contain every other kafka setting as well
     """
     assert "bootstrap.servers" in config.keys()
     assert "group.id" in config.keys()
     consumer = DeserializingConsumer(config)
     consumer.subscribe(topics)
     return consumer
def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_registry_client, schema_str,
                                         dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n".format(msg.key(), user.name,
                                                      user.favorite_color,
                                                      user.favorite_number))
        except KeyboardInterrupt:
            break
Ejemplo n.º 9
0
    def consumer(self,
                 conf=None,
                 key_deserializer=None,
                 value_deserializer=None):
        """
        Returns a consumer bound to this cluster.

        Args:
            conf (dict): Consumer config overrides

            key_deserializer (Deserializer): deserializer to apply to
                message key

            value_deserializer (Deserializer): deserializer to apply to
                message value

        Returns:
            Consumer: A new DeserializingConsumer instance

        """
        consumer_conf = self.client_conf({
            'group.id': str(uuid1()),
            'auto.offset.reset': 'earliest'
        })

        if conf is not None:
            consumer_conf.update(conf)

        if key_deserializer is not None:
            consumer_conf['key.deserializer'] = key_deserializer

        if value_deserializer is not None:
            consumer_conf['value.deserializer'] = value_deserializer

        return DeserializingConsumer(consumer_conf)
Ejemplo n.º 10
0
    def _make_offset_consumer(self) -> DeserializingConsumer:
        """
        Creates the underlying instance of :class:`confluent_kafka.avro.AvroConsumer` which is used to fetch the last
        committed producer offsets.
        """

        key_deserializer = AvroDeserializer(self.schema_registry_client)
        value_deserializer = AvroDeserializer(self.schema_registry_client)

        config = {
            "bootstrap.servers": self.config["bootstrap.servers"],
            "key.deserializer": key_deserializer,
            "value.deserializer": value_deserializer,
            "enable.partition.eof": True,
            "group.id": f'{self.config["offset_topic"]}_fetcher',
            "default.topic.config": {
                "auto.offset.reset": "latest"
            },
            **self.config["kafka_opts"],
            **self.config["kafka_consumer_opts"],
        }

        offset_consumer = DeserializingConsumer(config)

        logger.info(
            f"Offset Consumer created with config: {pformat(config, indent=2)}"
        )
        return offset_consumer
def kafpubsub(args):
    publisher = pubsub.PublisherClient()
    project_id = args.project
    kafka_topic = args.topic
    pubsub_topic = f'projects/{project_id}/topics/{kafka_topic}'

    try:
        publisher.create_topic(pubsub_topic)
    except AlreadyExists:
        pass  # I don't need an error if topic already created.

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_server,
        'group.id': args.group_id,
        'auto.offset.reset': args.auto_offset_reset
    }
    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([kafka_topic])

    logging.info(
        f'Publish Kafka ({args.bootstrap_server}) values to pubsub...')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            logging.debug(f'> {datetime.today()} | {msg.key()}\n')

            publisher.publish(pubsub_topic, msg.value())
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 12
0
def run_consumer(shutdown_flag, clients, lock):
    print("Starting Kafka Consumer.")
    schema_registry_client = SchemaRegistryClient(
        {"url": "http://localhost:8081"})
    deserializer = AvroDeserializer(schema_registry_client)
    config = {
        "bootstrap.servers": "localhost:9092",
        "group.id": "dashboard-demo",
        "value.deserializer": deserializer
    }

    consumer = DeserializingConsumer(config)
    consumer.subscribe(["DASHBOARD"])

    while not shutdown_flag.done():
        msg = consumer.poll(0.2)

        if msg is None:
            print("Waiting...")
        elif msg.error():
            print(f"ERROR: {msg.error()}")
        else:
            value = msg.value()
            formatted = simplejson.dumps(value)
            print(f"Sending {formatted} to {clients}")

            with lock:
                websockets.broadcast(clients, formatted)

    print("Closing Kafka Consumer")
    consumer.close()
def main(args):
    topic = args.topic

    protobuf_deserializer = ProtobufDeserializer(user_pb2.User)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': protobuf_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n"
                      .format(msg.key(), user.name,
                              user.favorite_color,
                              user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 14
0
    def _consume(self, on_consume):
        if isinstance(on_consume, types.FunctionType):
            callback = on_consume
        else:
            callback_cls = on_consume()
            callback = callback_cls.on_message

        consumer = DeserializingConsumer(self.kafka_config)
        consumer.subscribe([self.topic])
        q = Queue(maxsize=self.num_threads)

        msg = None
        while True:
            try:
                # Check if we should rate limit
                msg = consumer.poll(1)
                if msg is None:
                    continue
                if msg.error():
                    logger.error(
                        f'Worker for topic {self.topic} error: {msg.error()}')
                    continue

                q.put(msg)
                t = threading.Thread(
                    target=_process_msg,
                    args=(q, consumer, callback, self.topic),
                )
                t.start()
            except Exception as err:
                logger.error(
                    f'Worker for topic {self.topic} terminated: {err}')
                logger.error(msg)
                consumer.close()
                break
Ejemplo n.º 15
0
def main(args):
    topic = args.topic

    protobuf_deserializer = ProtobufDeserializer(user_pb2.User)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': protobuf_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print(f"User record {msg.key()}:\n name: {user.name}\n"
                      f"\tfavorite_number: {user.favorite_color}\n"
                      f"\tfavorite_color: {user.favorite_number}\n")
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 16
0
    def consume(self, count: int):
        consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.brokers,
            'key.deserializer':
            StringDeserializer('utf_8'),
            'value.deserializer':
            self._make_deserializer(),
            'group.id':
            self.group,
            'auto.offset.reset':
            "earliest"
        })
        consumer.subscribe([self.topic])

        self.logger.info("Consuming %d %s records from topic %s with group %s",
                         count, self.schema_type.name, self.topic, self.group)
        while self.consumed < count:
            msg = consumer.poll(1)
            if msg is None:
                continue
            payload = msg.value()
            self.logger.debug("Consumed %d at %d", payload.val, msg.offset())
            assert payload.val == self.consumed
            self.consumed += 1

        consumer.close()
Ejemplo n.º 17
0
def main():
    string_deserializer = StringDeserializer('utf_8')
    conf = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'bitcoin_group',
        'key.deserializer': string_deserializer,
        'value.deserializer': string_deserializer,
        'session.timeout.ms': 6000,
        'fetch.wait.max.ms': 5000,
        'auto.offset.reset': 'smallest',
        'enable.auto.commit': 'false',
        'fetch.min.bytes': 307200
    }

    consumer = DeserializingConsumer(conf)
    consumer.subscribe(['bitcoin-transaction'])
    messages = []
    try:
        while True:
            msg = consumer.poll(timeout=1000)
            if msg is None:
                continue
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                obj = json.loads(msg.value())
                transaction = dict_to_transaction(obj)
                messages.append(transaction)
                if len(messages) > 100:
                    messages = sorted(messages,
                                      key=lambda x: x.price,
                                      reverse=True)[0:10]
                    print(messages)
                consumer.commit(asynchronous=False)

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
Ejemplo n.º 18
0
    def create_consumer(self, registry_client):
        """
        Subscribes to topic defined in configs and creates a consumer to deserialize messages from topic

        :param registry_client: SchemaRegistryClient object
            get this from register_client()

        :return: DeserializingConsumer object
        """
        metadata_schema = None
        topic = None
        if self.metadata_type == "COLLECTION":
            metadata_schema = registry_client.get_latest_version(
                self.collection_topic + '-value').schema.schema_str
            topic = self.collection_topic

        if self.metadata_type == "GRANULE":
            metadata_schema = registry_client.get_latest_version(
                self.granule_topic + '-value').schema.schema_str
            topic = self.granule_topic

        metadata_deserializer = AvroDeserializer(metadata_schema,
                                                 registry_client)

        consumer_conf = {'bootstrap.servers': self.brokers}

        if self.security:
            consumer_conf['security.protocol'] = 'SSL'
            consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc']
            consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc']
            consumer_conf['ssl.certificate.location'] = self.conf['security'][
                'certLoc']

        meta_consumer_conf = consumer_conf
        meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8')
        meta_consumer_conf['value.deserializer'] = metadata_deserializer
        meta_consumer_conf['group.id'] = self.group_id
        meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset

        metadata_consumer = DeserializingConsumer(meta_consumer_conf)
        metadata_consumer.subscribe([topic])
        return metadata_consumer
Ejemplo n.º 19
0
class Consumer:
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 group: str,
                 callback: Callable[[Message], None],
                 value_deserializer=None,
                 poll_timeout: float = 1.0,
                 config=None):

        consumer_config = {
            "bootstrap.servers": bootstrap_servers,
            "group.id": group,
            "value.deserializer": value_deserializer
        }
        if config:
            consumer_config.update(config)

        self.consumer = DeserializingConsumer(consumer_config)
        self.topic = topic
        self.callback = callback
        self.poll_timeout = poll_timeout

    def start(self):
        logger.info("Starting Kafka consumer")
        self.consumer.subscribe([self.topic])

        while True:
            message = self.consumer.poll(self.poll_timeout)

            if message is None:
                continue

            if message.error():
                print(f"Consumer error: {message.error()}")
                continue

            self.callback(message)

    def close(self):
        logger.info("Closing Kafka consumer")
        self.consumer.close()
Ejemplo n.º 20
0
    def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')
        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)
        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
Ejemplo n.º 21
0
def run_consumer(container_manager):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_deserializer = AvroDeserializer(schemas.run_record_schema,
                                         schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': "runs-consumers",
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': 'false'
    }

    consumer = DeserializingConsumer(conf)
    print('[+] Listening for incoming runs')

    try:
        consumer_topics = [config['kafka']['runs-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            try:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    continue

                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('[-] Run initialization')
                    print(msg.value())
                    consumer.commit(asynchronous=False)
                    # handlers.handle_run_execution(container_manager, msg.value())
                    threading.Thread(target=handlers.handle_run_execution,
                                     args=(container_manager,
                                           msg.value())).start()
            except ConsumeError as e:
                print(
                    f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}'
                )
    finally:
        consumer.close()
Ejemplo n.º 22
0
def consume(consumer: DeserializingConsumer, timeout) -> iter:
    while True:
        # Waiting for message until timeout reached if there is no message.
        # If message exists, message will be returned.
        message = consumer.poll(timeout)
        # print('[kafka] polling...')
        if message is None:
            continue
        if message.error():
            print('Consumer error: {}'.format(message.error()))
            continue
        yield message
Ejemplo n.º 23
0
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 group: str,
                 callback: Callable[[Message], None],
                 value_deserializer=None,
                 poll_timeout: float = 1.0,
                 config=None):

        consumer_config = {
            "bootstrap.servers": bootstrap_servers,
            "group.id": group,
            "value.deserializer": value_deserializer
        }
        if config:
            consumer_config.update(config)

        self.consumer = DeserializingConsumer(consumer_config)
        self.topic = topic
        self.callback = callback
        self.poll_timeout = poll_timeout
    def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Consumer name for logging purposes
        self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)
 
 
        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')

         # Get Schema for the value
        self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id
        # print('The Schema ID for the value is: {}'.format(self.schema_id_value))
        self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str
        print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name))
        print(self.logging_prefix + ' - Value Schema:')
        print(self.logging_prefix + ' - -------------\n')
        print(self.logging_prefix + ' - ' + self.value_schema + '\n')

        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, 
                                                                        self.key_deserializer,
                                                                        self.value_deserializer)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)

        # Print consumer configuration
        EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url'])

        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
Ejemplo n.º 25
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "latest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            enrich(evt, session, producer, outputtopic)

        except Exception:
            print('Exception', sys.exc_info()[0])
            continue

    consumer.close()
Ejemplo n.º 26
0
def main():
    schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL})

    avro_deserializer = AvroDeserializer(
        schema_registry_client=schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'key.deserializer': string_deserializer,
        'max.poll.interval.ms': MAX_POLL_INTERVAL_MS,
        'value.deserializer': avro_deserializer,
        'group.id': CONSUMER_GROUP
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([TOPIC])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            msg_value = msg.value()
            if msg_value is not None:
                try:
                    measurements = list(dict(msg_value).get("measurements"))
                    measurements_df = pd.DataFrame(measurements)

                    groups = measurements_df.groupby("tenant")

                    for _, group in groups:
                        tenant = group.iloc[0]['tenant']
                        device_registry = DeviceRegistry(
                            tenant, AIRQO_BASE_URL)

                        group_measurements = list(
                            group.to_dict(orient="records"))
                        for i in range(0, len(group_measurements),
                                       int(REQUEST_BODY_SIZE)):
                            measurements_list = group_measurements[
                                i:i + int(REQUEST_BODY_SIZE)]

                            device_registry.insert_events(measurements_list)

                except Exception as ex:
                    print(ex)

        except KeyboardInterrupt:
            break

    consumer.close()
 def __new__(cls):
     # Consumer configuration. Must match Stimzi/Kafka configuration.
     config = {
         'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
         'client.id': socket.gethostname(),
         'group.id': "text-preprocessor",
         'auto.offset.reset': "earliest",
         'session.timeout.ms': 10000,
         'enable.auto.commit': True,  # default
         'auto.commit.interval.ms': 5000,  # default
         'key.deserializer': StringDeserializer('utf_8'),
         'value.deserializer': StringDeserializer('utf_8')
     }
     return DeserializingConsumer(config)
Ejemplo n.º 28
0
    def __init__(self, db: SummaryDAOFactory):
        super(ConsumerLoop, self).__init__()

        logging.basicConfig(
            format='%(asctime)s %(name)s %(levelname)-8s %(message)s',
            level=logging.DEBUG,
            datefmt='%d/%m/%Y %I:%M:%S %p')
        self.logger = logging.getLogger("DispatcherConsumerLoop")

        # Consumer configuration. Must match Stimzi/Kafka configuration.
        config = {
            'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
            'client.id': socket.gethostname(),
            'group.id': "dispatcher",
            'auto.offset.reset': "earliest",
            'session.timeout.ms': 10000,
            'enable.auto.commit': True,  # default
            'auto.commit.interval.ms': 5000,  # default
            'key.deserializer': StringDeserializer('utf_8'),
            'value.deserializer': StringDeserializer('utf_8')
        }
        self.consumer = DeserializingConsumer(config)
        self.db = db
        self.consumed_msg_schema = TextPostprocessingConsumedMsgSchema()
Ejemplo n.º 29
0
def main(args):
    topic = args.topic

    key_schema_str = open('schema/KeySchema.avsc', "r").read()
    value_schema_str = open('schema/ValueSchema.avsc', "r").read()

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_key_deserializer = AvroDeserializer(key_schema_str,
                                             schema_registry_client,
                                             dict_to_user_quote_key)
    avro_value_deserializer = AvroDeserializer(value_schema_str,
                                               schema_registry_client,
                                               dict_to_user_quote_value)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': avro_key_deserializer,
        'value.deserializer': avro_value_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user_quote = msg.value()
            if user_quote is not None:
                print("User {} Quote record: product_id: {}\n"
                      "\tquoted_price: {}\n"
                      "\tquoted_quantity: {}\n"
                      "\tuser_note: {}\n".format(msg.key().user_id,
                                                 user_quote.product_id,
                                                 user_quote.quoted_price,
                                                 user_quote.quoted_quantity,
                                                 user_quote.user_note))
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 30
0
def main():
    top = 20
    consumer = DeserializingConsumer({
        'bootstrap.servers':
        os.environ['KAFKA_BROKERS'],
        'security.protocol':
        'SASL_SSL',
        'sasl.mechanism':
        'SCRAM-SHA-512',
        'sasl.password':
        os.environ['KAFKA_PASS'],
        'sasl.username':
        os.environ['KAFKA_USER'],
        'ssl.ca.location':
        '/usr/local/share/ca-certificates/Yandex/YandexCA.crt',
        'group.id':
        'group1',
        'key.deserializer':
        StringDeserializer(),
        'value.deserializer':
        LongDeserializer(),
    })

    consumer.subscribe(['streams-wordcount-output'])

    try:
        frequencies = []
        while True:
            msg = consumer.poll(1.0)
            if msg is None:
                if frequencies:
                    print('==============================================')
                    print(f'Current list of top {top} most frequent words:')
                    frequencies = sorted(frequencies,
                                         key=lambda x: x[1],
                                         reverse=True)
                    for frequency in frequencies[0:top]:
                        print(f'{frequency[0]}: {frequency[1]}')
                    frequencies.clear()
                continue
            elif msg.error():
                print('error: {}'.format(msg.error()))
            else:
                frequencies.append((msg.key(), msg.value()))
    except KeyboardInterrupt:
        pass
    finally:
        consumer.close()