Ejemplo n.º 1
0
def main(args):
    topic = args.topic

    protobuf_deserializer = ProtobufDeserializer(user_pb2.User)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': protobuf_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print(f"User record {msg.key()}:\n name: {user.name}\n"
                      f"\tfavorite_number: {user.favorite_color}\n"
                      f"\tfavorite_color: {user.favorite_number}\n")
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 2
0
    def _consume(self, on_consume):
        if isinstance(on_consume, types.FunctionType):
            callback = on_consume
        else:
            callback_cls = on_consume()
            callback = callback_cls.on_message

        consumer = DeserializingConsumer(self.kafka_config)
        consumer.subscribe([self.topic])
        q = Queue(maxsize=self.num_threads)

        msg = None
        while True:
            try:
                # Check if we should rate limit
                msg = consumer.poll(1)
                if msg is None:
                    continue
                if msg.error():
                    logger.error(
                        f'Worker for topic {self.topic} error: {msg.error()}')
                    continue

                q.put(msg)
                t = threading.Thread(
                    target=_process_msg,
                    args=(q, consumer, callback, self.topic),
                )
                t.start()
            except Exception as err:
                logger.error(
                    f'Worker for topic {self.topic} terminated: {err}')
                logger.error(msg)
                consumer.close()
                break
def main(args):
    topic = args.topic

    protobuf_deserializer = ProtobufDeserializer(user_pb2.User)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': protobuf_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n"
                      .format(msg.key(), user.name,
                              user.favorite_color,
                              user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
def kafpubsub(args):
    publisher = pubsub.PublisherClient()
    project_id = args.project
    kafka_topic = args.topic
    pubsub_topic = f'projects/{project_id}/topics/{kafka_topic}'

    try:
        publisher.create_topic(pubsub_topic)
    except AlreadyExists:
        pass  # I don't need an error if topic already created.

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_server,
        'group.id': args.group_id,
        'auto.offset.reset': args.auto_offset_reset
    }
    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([kafka_topic])

    logging.info(
        f'Publish Kafka ({args.bootstrap_server}) values to pubsub...')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            logging.debug(f'> {datetime.today()} | {msg.key()}\n')

            publisher.publish(pubsub_topic, msg.value())
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 5
0
    def consume(self, count: int):
        consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.brokers,
            'key.deserializer':
            StringDeserializer('utf_8'),
            'value.deserializer':
            self._make_deserializer(),
            'group.id':
            self.group,
            'auto.offset.reset':
            "earliest"
        })
        consumer.subscribe([self.topic])

        self.logger.info("Consuming %d %s records from topic %s with group %s",
                         count, self.schema_type.name, self.topic, self.group)
        while self.consumed < count:
            msg = consumer.poll(1)
            if msg is None:
                continue
            payload = msg.value()
            self.logger.debug("Consumed %d at %d", payload.val, msg.offset())
            assert payload.val == self.consumed
            self.consumed += 1

        consumer.close()
Ejemplo n.º 6
0
def run_consumer(shutdown_flag, clients, lock):
    print("Starting Kafka Consumer.")
    schema_registry_client = SchemaRegistryClient(
        {"url": "http://localhost:8081"})
    deserializer = AvroDeserializer(schema_registry_client)
    config = {
        "bootstrap.servers": "localhost:9092",
        "group.id": "dashboard-demo",
        "value.deserializer": deserializer
    }

    consumer = DeserializingConsumer(config)
    consumer.subscribe(["DASHBOARD"])

    while not shutdown_flag.done():
        msg = consumer.poll(0.2)

        if msg is None:
            print("Waiting...")
        elif msg.error():
            print(f"ERROR: {msg.error()}")
        else:
            value = msg.value()
            formatted = simplejson.dumps(value)
            print(f"Sending {formatted} to {clients}")

            with lock:
                websockets.broadcast(clients, formatted)

    print("Closing Kafka Consumer")
    consumer.close()
def main(args):
    topic = args.topic

    schema_str = """
    {
      "$schema": "http://json-schema.org/draft-07/schema#",
      "title": "User",
      "description": "A Confluent Kafka Python User",
      "type": "object",
      "properties": {
        "name": {
          "description": "User's name",
          "type": "string"
        },
        "favorite_number": {
          "description": "User's favorite number",
          "type": "number",
          "exclusiveMinimum": 0
        },
        "favorite_color": {
          "description": "User's favorite color",
          "type": "string"
        }
      },
      "required": [ "name", "favorite_number", "favorite_color" ]
    }
    """
    json_deserializer = JSONDeserializer(schema_str, from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n".format(msg.key(), user.name,
                                                      user.favorite_color,
                                                      user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 8
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "latest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            enrich(evt, session, producer, outputtopic)

        except Exception:
            print('Exception', sys.exc_info()[0])
            continue

    consumer.close()
Ejemplo n.º 9
0
def main():
    schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL})

    avro_deserializer = AvroDeserializer(
        schema_registry_client=schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'key.deserializer': string_deserializer,
        'max.poll.interval.ms': MAX_POLL_INTERVAL_MS,
        'value.deserializer': avro_deserializer,
        'group.id': CONSUMER_GROUP
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([TOPIC])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            msg_value = msg.value()
            if msg_value is not None:
                try:
                    measurements = list(dict(msg_value).get("measurements"))
                    measurements_df = pd.DataFrame(measurements)

                    groups = measurements_df.groupby("tenant")

                    for _, group in groups:
                        tenant = group.iloc[0]['tenant']
                        device_registry = DeviceRegistry(
                            tenant, AIRQO_BASE_URL)

                        group_measurements = list(
                            group.to_dict(orient="records"))
                        for i in range(0, len(group_measurements),
                                       int(REQUEST_BODY_SIZE)):
                            measurements_list = group_measurements[
                                i:i + int(REQUEST_BODY_SIZE)]

                            device_registry.insert_events(measurements_list)

                except Exception as ex:
                    print(ex)

        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 10
0
def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str,
                                         schema_registry_client,
                                         dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': avro_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n"
                      .format(msg.key(), user.name,
                              user.favorite_color,
                              user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 11
0
def main():
    top = 20
    consumer = DeserializingConsumer({
        'bootstrap.servers':
        os.environ['KAFKA_BROKERS'],
        'security.protocol':
        'SASL_SSL',
        'sasl.mechanism':
        'SCRAM-SHA-512',
        'sasl.password':
        os.environ['KAFKA_PASS'],
        'sasl.username':
        os.environ['KAFKA_USER'],
        'ssl.ca.location':
        '/usr/local/share/ca-certificates/Yandex/YandexCA.crt',
        'group.id':
        'group1',
        'key.deserializer':
        StringDeserializer(),
        'value.deserializer':
        LongDeserializer(),
    })

    consumer.subscribe(['streams-wordcount-output'])

    try:
        frequencies = []
        while True:
            msg = consumer.poll(1.0)
            if msg is None:
                if frequencies:
                    print('==============================================')
                    print(f'Current list of top {top} most frequent words:')
                    frequencies = sorted(frequencies,
                                         key=lambda x: x[1],
                                         reverse=True)
                    for frequency in frequencies[0:top]:
                        print(f'{frequency[0]}: {frequency[1]}')
                    frequencies.clear()
                continue
            elif msg.error():
                print('error: {}'.format(msg.error()))
            else:
                frequencies.append((msg.key(), msg.value()))
    except KeyboardInterrupt:
        pass
    finally:
        consumer.close()
Ejemplo n.º 12
0
def main(args):
    topic = args.topic

    key_schema_str = open('schema/KeySchema.avsc', "r").read()
    value_schema_str = open('schema/ValueSchema.avsc', "r").read()

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_key_deserializer = AvroDeserializer(key_schema_str,
                                             schema_registry_client,
                                             dict_to_user_quote_key)
    avro_value_deserializer = AvroDeserializer(value_schema_str,
                                               schema_registry_client,
                                               dict_to_user_quote_value)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': avro_key_deserializer,
        'value.deserializer': avro_value_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user_quote = msg.value()
            if user_quote is not None:
                print("User {} Quote record: product_id: {}\n"
                      "\tquoted_price: {}\n"
                      "\tquoted_quantity: {}\n"
                      "\tuser_note: {}\n".format(msg.key().user_id,
                                                 user_quote.product_id,
                                                 user_quote.quoted_price,
                                                 user_quote.quoted_quantity,
                                                 user_quote.user_note))
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 13
0
def main():
    string_deserializer = StringDeserializer('utf_8')
    conf = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'bitcoin_group',
        'key.deserializer': string_deserializer,
        'value.deserializer': string_deserializer,
        'session.timeout.ms': 6000,
        'fetch.wait.max.ms': 5000,
        'auto.offset.reset': 'smallest',
        'enable.auto.commit': 'false',
        'fetch.min.bytes': 307200
    }

    consumer = DeserializingConsumer(conf)
    consumer.subscribe(['bitcoin-transaction'])
    messages = []
    try:
        while True:
            msg = consumer.poll(timeout=1000)
            if msg is None:
                continue
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                obj = json.loads(msg.value())
                transaction = dict_to_transaction(obj)
                messages.append(transaction)
                if len(messages) > 100:
                    messages = sorted(messages,
                                      key=lambda x: x.price,
                                      reverse=True)[0:10]
                    print(messages)
                consumer.commit(asynchronous=False)

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
Ejemplo n.º 14
0
def run_consumer(container_manager):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_deserializer = AvroDeserializer(schemas.run_record_schema,
                                         schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': "runs-consumers",
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': 'false'
    }

    consumer = DeserializingConsumer(conf)
    print('[+] Listening for incoming runs')

    try:
        consumer_topics = [config['kafka']['runs-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            try:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    continue

                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('[-] Run initialization')
                    print(msg.value())
                    consumer.commit(asynchronous=False)
                    # handlers.handle_run_execution(container_manager, msg.value())
                    threading.Thread(target=handlers.handle_run_execution,
                                     args=(container_manager,
                                           msg.value())).start()
            except ConsumeError as e:
                print(
                    f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}'
                )
    finally:
        consumer.close()
Ejemplo n.º 15
0
    def test_consumer(self):
        consumer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.deserializer': self.key_avro_deserializer,
            'value.deserializer': self.value_avro_deserializer,
            'group.id': '1',
            'auto.offset.reset': 'earliest'
        }
        offset = kafka_utils.offset - len(self.test_messages) + 1
        consumer = DeserializingConsumer(consumer_config)
        partitions = []
        partition = TopicPartition(topic=self.topic,
                                   partition=0,
                                   offset=offset)
        partitions.append(partition)
        consumer.assign(partitions)
        # Process messages
        result = []
        attempt = 0
        while len(result) < len(self.test_messages):
            try:
                msg = consumer.poll(1.0)
                attempt += 1
                if msg is None:
                    print("no message received")
                    if attempt < 10:
                        pass
                    else:
                        break
                elif msg.error():
                    break
                else:
                    value_object = msg.value()
                    text = value_object.text
                    print("adding {} to result".format(text))
                    result.append(text)
            except KeyboardInterrupt:
                break
            except SerializerError as e:
                break
        # Leave group and commit final offsets
        consumer.close()

        assert result == self.test_messages
Ejemplo n.º 16
0
def main(args):
    topic = args.topic
    schema_str = MetricSchema
    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')
    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }
    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])
    client = InfluxDBClient(host=args.host_influx,
                            port=8086,
                            username='******',
                            password='******')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue
            timespent = msg.value()
            if timespent is not None:
                print("time ==>", timespent)
                print(timespent["metricName"])
                print(timespent["time"])
                client.switch_database('datascience')
                json_body = [{
                    "measurement": "metric",
                    "fields": {
                        "name": timespent["metricName"],
                        "value": timespent["time"]
                    }
                }]
            client.write_points(json_body)
        except KeyboardInterrupt:
            break
    consumer.close()
Ejemplo n.º 17
0
class Consumer:
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 group: str,
                 callback: Callable[[Message], None],
                 value_deserializer=None,
                 poll_timeout: float = 1.0,
                 config=None):

        consumer_config = {
            "bootstrap.servers": bootstrap_servers,
            "group.id": group,
            "value.deserializer": value_deserializer
        }
        if config:
            consumer_config.update(config)

        self.consumer = DeserializingConsumer(consumer_config)
        self.topic = topic
        self.callback = callback
        self.poll_timeout = poll_timeout

    def start(self):
        logger.info("Starting Kafka consumer")
        self.consumer.subscribe([self.topic])

        while True:
            message = self.consumer.poll(self.poll_timeout)

            if message is None:
                continue

            if message.error():
                print(f"Consumer error: {message.error()}")
                continue

            self.callback(message)

    def close(self):
        logger.info("Closing Kafka consumer")
        self.consumer.close()
Ejemplo n.º 18
0
def consume():
    reusableConsumer = DeserializingConsumer(getConfigs())
    reusableConsumer.subscribe(["myprototopic"])
    while (True):
        try:
            msg = reusableConsumer.poll(0.1)

            if msg is None:
                continue
            else:
                key = msg.key()
                value = msg.value()
                print("Received msg name: {}, fav food: {}, times eaten: {}".
                      format(value.name, value.favoriteFood, value.timesEaten))

        except KeyboardInterrupt:
            break

    print("Closing Consumer")
    reusableConsumer.close()
Ejemplo n.º 19
0
def receive_record(args):
    """ Receives Record using a DeserializingConsumer & AvroDeserializer """
    topics = [args.topic.rstrip()]

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_deserializer = AvroDeserializer(schema_registry_client, DATA_SCHEMA,
                                         dict_to_data)

    string_deserializer = StringDeserializer('utf_8')

    consumer_config = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': 'earliest'
    }

    consumer = DeserializingConsumer(consumer_config)
    consumer.subscribe(topics)

    print(f'Consuming data records from topic(s) {topics}. ^C to exit.')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(10.0)
            if msg is None:
                print('\t---Waiting. . .')
                continue

            data = msg.value()
            if data is not None:
                print(f'Data record {msg.key()}:\n' f'\tValues: {data}')
        except KeyboardInterrupt:
            break
    print('\nClosing consumer.')
    consumer.close()
Ejemplo n.º 20
0
def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)
    schema_obj = schema_registry_client.get_latest_version(
        subject_name='example_serde_json-value')

    json_deserializer = JSONDeserializer(schema_obj.schema.schema_str,
                                         from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print(f"User record {msg.key()}:\n name: {user.name}\n"
                      f"\tfavorite_number: {user.favorite_color}\n"
                      f"\tfavorite_color: {user.favorite_number}\n")
        except KeyboardInterrupt:
            break
    consumer.close()
class KafkaAvroConsumer:

    def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Consumer name for logging purposes
        self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)
 
 
        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')

         # Get Schema for the value
        self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id
        # print('The Schema ID for the value is: {}'.format(self.schema_id_value))
        self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str
        print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name))
        print(self.logging_prefix + ' - Value Schema:')
        print(self.logging_prefix + ' - -------------\n')
        print(self.logging_prefix + ' - ' + self.value_schema + '\n')

        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, 
                                                                        self.key_deserializer,
                                                                        self.value_deserializer)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)

        # Print consumer configuration
        EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url'])

        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
    
    def traceResponse(self, msg):
        print(self.logging_prefix + ' - New event received\n\tTopic: {}\n\tPartition: {}\n\tOffset: {}\n\tkey: {}\n\tvalue: {}\n'
                    .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value()))

    # Polls for next event
    def pollNextEvent(self):
        # Poll for messages
        msg = self.consumer.poll(timeout=POLL_TIMEOUT)
        anEvent = {}
        # Validate the returned message
        if msg is None:
            print(self.logging_prefix + ' - [INFO] - No new messages on the topic')
            return None
        elif msg.error():
            if ("PARTITION_EOF" in msg.error()):
                print(self.logging_prefix + ' - [INFO] - End of partition')
            else:
                print(self.logging_prefix + ' - [ERROR] - Consumer error: {}'.format(msg.error()))
            return None
        else:
            # Print the message
            self.traceResponse(msg)
        return msg.value()

   
    
    # Polls for the next event but returns the raw event
    def pollNextRawEvent(self):
        records = self.consumer.poll(timeout=POLL_TIMEOUT)
        if records is None:
            return None
        if records.error():
            # Stop reading if we find end of partition in the error message
            if ("PARTITION_EOF" in records.error()):
                return None
            else:
                print(self.logging_prefix + ' - [ERROR] - Consumer error: {}'.format(records.error()))
                return None
        else:
            self.traceResponse(records)
        return records


    def commitEvent(self,event):
        self.consumer.commit(event)

    def close(self):
        self.consumer.close()
Ejemplo n.º 22
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_enriched_event_str = EnrichedEventSchema
    schema_dict = ast.literal_eval(schema_enriched_event_str)
    schema_metrics = MetricSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_metrics, schema_registry_client)
    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    avro_deserializer = AvroDeserializer(schema_enriched_event_str,
                                         schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")

    cluster.register_user_type('datascience', 'datafield', Datafield)

    client_influxdb = InfluxDBClient('35.181.155.182', 8086, "dbsaleh2")
    # client_influxdb = InfluxDBClient(url="http://35.181.155.182:8086 , "mydb")

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()

            query = f"""
            insert into eventenrich (
                        "eventId" ,
                        "dateTimeRef",
                        "nomenclatureEv",
                        "canal",
                        "media",
                        "schemaVersion",
                        "headerVersion",
                        "serveur",
                        "adresseIP",
                        "idTelematique",
                        "idPersonne",
                        "dateNaissance",
                        "paysResidence",
                        "paysNaissance",
                        "revenusAnnuel",
                        "csp",
                        "eventBC",
                        "eventContent"
                        )
                        VALUES (%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s)
                    """

            #eventBc = evt["EventBusinessContext"][0].replace("com.bnpparibas.dsibddf.event.","")
            eventBc = evt["eventBC"].replace("com.bnpparibas.dsibddf.event.",
                                             "")
            eventContent = evt["EventBusinessContext"][1]

            transformed_event = transform_enriched_event_to_cassandra_model(
                evt, eventBc, schema_dict, eventContent)

            insert_enriched_event_to_cassandra(transformed_event, session,
                                               query)

            elapsed_time = (time.time() - start)

        except Exception as e:
            print(f"Exception => {e}")
            continue

        query = 'SELECT * FROM metrics'
        result = client_influxdb.query(query, database="dbsaleh2")
        print(result)

        data = []

        print(elapsed_time)
        metrics = [{
            "measurement": "metrics",
            "fields": {
                "metricName": "hystorize",
                "timeforhystorize": elapsed_time
            }
        }]
        data.append(metrics)

        # client_influxdb.write_points("hystorize",elapsed_time, database="dbsaleh2")
        client_influxdb.write_points(metrics, database="dbsaleh2")
        producer.produce(topic=outputtopic,
                         value={
                             'metricName': "hystorize",
                             'time': elapsed_time
                         },
                         on_delivery=delivery_report)
        producer.flush()

    consumer.close()
Ejemplo n.º 23
0
def main():

    sr_conf = {'url': SCHEMA_REGISTRY_URL}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    schema_str = """
    {
        "namespace": "io.confluent.ksql.avro_schemas",
        "name": "User",
        "type": "record",
        "fields":[
        {"name":"DATESTAMP","type":"string"},
        {"name":"TIMESTAMP","type":"string"},
        {"name":"MILLISEC","type":"string"},
        {"name":"LOGLEVEL","type":"string"},
        {"name":"REQUESTID","type":"string"},
        {"name":"RECORDFORMATVERSION","type":"string"},
        {"name":"SOURCEIP","type":"string"},
        {"name":"DNSDOMAIN","type":"string"},
        {"name":"MESSAGETYPE","type":"string"},
        {"name":"OPERATION","type":"string"},
        {"name":"AUTHUSER","type":"string"},
        {"name":"AUTHDOMAIN","type":"string"},
        {"name":"HTTPCODE","type":"string"},
        {"name":"SOURCEBYTES","type":"string"},
        {"name":"RESPONSEBYTES","type":"string"},
        {"name":"ELAPSEDTIME","type":"string"},
        {"name":"DOMAIN","type":"string"},
        {"name":"BUCKET","type":"string"},
        {"name":"OBJECT","type":"string"}
        ]
    }
    """

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            record = msg.value()
            if record is not None:
                if record['OPERATION'] == "POST" and record[
                        'DOMAIN'] != "%28none%29":
                    urllistraw = "http://" + record['DOMAIN'] + "/" + record[
                        'BUCKET'] + "/" + record['OBJECT']
                    urllist = urllistraw[:-1]
                    print(urllist)
                    r = requests.head(urllist)
                    print(r.headers)
                else:
                    continue
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 24
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    loop = asyncio.get_event_loop()

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            print("msg=>", evt)

            def enrich(evt):
                print("evt", evt)
                if evt is not None:
                    print("récupérer dans kafka")
                    row = session.execute(
                        GET_ENRICHED_DATA_QUERY,
                        (evt["EventHeader"]["acteurDeclencheur"]["idPersonne"],
                         )).one()

                    if row:
                        evt['EnrichedData'] = row
                        # evt['EventBusinessContext'] = evt["EventBusinessContext"][1]
                        EnrichedEvent = {
                            "eventId":
                            evt["EventHeader"]["eventId"],
                            "dateTimeRef":
                            evt["EventHeader"]["dateTimeRef"],
                            "nomenclatureEv":
                            evt["EventHeader"]["nomenclatureEv"],
                            "canal":
                            evt["EventHeader"]["canal"],
                            "media":
                            evt["EventHeader"]["media"],
                            "schemaVersion":
                            evt["EventHeader"]["schemaVersion"],
                            "headerVersion":
                            evt["EventHeader"]["headerVersion"],
                            "serveur":
                            evt["EventHeader"]["serveur"],
                            "adresseIP":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["adresseIP"],
                            "idTelematique":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["idTelematique"],
                            "idPersonne":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["idPersonne"],
                            "dateNaissance":
                            row["dateNaissance"],
                            "paysResidence":
                            row["paysResidence"],
                            "paysNaissance":
                            row["paysNaissance"],
                            "revenusAnnuel":
                            row["revenusAnnuel"],
                            "csp":
                            row["csp"],
                            "EventBusinessContext":
                            evt["EventBusinessContext"]
                        }

                        producer.produce(topic=outputtopic,
                                         key=str(uuid4()),
                                         value=EnrichedEvent,
                                         on_delivery=delivery_report)
                        producer.flush()

            async_enrich = async_wrap(enrich)
            loop.run_until_complete(async_enrich(evt))

        except Exception:
            print('Exception')
            continue

    consumer.close()
Ejemplo n.º 25
0
class KafkaAvroConsumer:

    def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')
        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)
        # Subscribe to the topic
        self.consumer.subscribe([topic_name])


    def getSchemaRegistryConf(self):
        try:
            # For IBM Event Streams on IBM Cloud and on OpenShift, the Schema Registry URL is some sort of
            # https://KAFKA_USER:KAFKA_PASSWORD@SCHEMA_REGISTRY_URL
            # Make sure the SCHEMA_REGISTRY_URL your provide is in the form described above.
            url = os.environ['SCHEMA_REGISTRY_URL']
            # If we are talking to ES on prem, it uses an SSL self-signed certificate.
            # Therefore, we need the CA public certificate for the SSL connection to happen.
            if (os.path.isfile(os.getenv('KAFKA_CERT','/certs/es-cert.pem'))):
                ssl = os.getenv('KAFKA_CERT','/certs/es-cert.pem')
                return {'url': url, 'ssl.ca.location': ssl}
            return {'url': url}
        except KeyError:
            print('[KafkaAvroConsumer] - [ERROR] - There is no SCHEMA_REGISTRY_URL environment variable')
            exit(1)

    def getConsumerConfiguration(self, groupID, autocommit):
        try:
            options ={
                    'bootstrap.servers': os.environ['KAFKA_BROKERS'],
                    'group.id': groupID,
                    'key.deserializer': self.key_deserializer,
                    'value.deserializer': self.value_deserializer,
                    'auto.offset.reset': "earliest",
                    'enable.auto.commit': autocommit,
            }
            if (os.getenv('KAFKA_PASSWORD','') != ''):
                # Set security protocol common to ES on prem and on IBM Cloud
                options['security.protocol'] = 'SASL_SSL'
                # Depending on the Kafka User, we will know whether we are talking to ES on prem or on IBM Cloud
                # If we are connecting to ES on IBM Cloud, the SASL mechanism is plain
                if (os.getenv('KAFKA_USER','') == 'token'):
                    options['sasl.mechanisms'] = 'PLAIN'
                # If we are connecting to ES on OCP, the SASL mechanism is scram-sha-512
                else:
                    options['sasl.mechanisms'] = 'SCRAM-SHA-512'
                # Set the SASL username and password
                options['sasl.username'] = os.getenv('KAFKA_USER','')
                options['sasl.password'] = os.getenv('KAFKA_PASSWORD','')
            # If we are talking to ES on prem, it uses an SSL self-signed certificate.
            # Therefore, we need the CA public certificate for the SSL connection to happen.
            if (os.path.isfile(os.getenv('KAFKA_CERT','/certs/es-cert.pem'))):
                options['ssl.ca.location'] = os.getenv('KAFKA_CERT','/certs/es-cert.pem')
            
            # Print out the producer configuration
            self.printConsumerConfiguration(options)

            return options

        except KeyError as error:
            print('[KafkaAvroConsumer] - [ERROR] - A required environment variable does not exist: ' + error)
            exit(1)

    def printConsumerConfiguration(self,options):
        # Printing out consumer config for debugging purposes        
        print("[KafkaAvroConsumer] - This is the configuration for the consumer:")
        print("[KafkaAvroConsumer] - -------------------------------------------")
        print('[KafkaAvroConsumer] - Bootstrap Server:      {}'.format(options['bootstrap.servers']))
        print('[KafkaAvroConsumer] - Schema Registry url:   {}'.format(self.schema_registry_conf['url'].split('@')[-1]))
        if (os.getenv('KAFKA_PASSWORD','') != ''):
            # Obfuscate password
            if (len(options['sasl.password']) > 3):
                obfuscated_password = options['sasl.password'][0] + "*****" + options['sasl.password'][len(options['sasl.password'])-1]
            else:
                obfuscated_password = "******"
            print('[KafkaAvroConsumer] - Security Protocol:     {}'.format(options['security.protocol']))
            print('[KafkaAvroConsumer] - SASL Mechanism:        {}'.format(options['sasl.mechanisms']))
            print('[KafkaAvroConsumer] - SASL Username:         {}'.format(options['sasl.username']))
            print('[KafkaAvroConsumer] - SASL Password:         {}'.format(obfuscated_password))
            if (os.path.isfile(os.getenv('KAFKA_CERT','/certs/es-cert.pem'))): 
                print('[KafkaAvroConsumer] - SSL CA Location:       {}'.format(options['ssl.ca.location']))
        print('[KafkaAvroConsumer] - Offset Reset:          {}'.format(options['auto.offset.reset']))
        print('[KafkaAvroConsumer] - Autocommit:            {}'.format(options['enable.auto.commit']))
        print("[KafkaAvroConsumer] - -------------------------------------------")

    
    def traceResponse(self, msg):
        print('[KafkaConsumer] - Topic {} partition [{}] at offset {}:\n\tkey: {}\n\tvalue: {}'
                    .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value() ))

    # Polls for next event
    def pollNextEvent(self):
        # Poll for messages
        msg = self.consumer.poll(timeout=10.0)
        anEvent = {}
        # Validate the returned message
        if msg is None:
            print("[KafkaAvroConsumer] - [INFO] - No new messages on the topic")
            return None
        elif msg.error():
            if ("PARTITION_EOF" in msg.error()):
                print("[KafkaAvroConsumer] - [INFO] - End of partition")
            else:
                print("[KafkaAvroConsumer] - [ERROR] - Consumer error: {}".format(msg.error()))
            return None
        else:
            # Print the message
            self.traceResponse(msg)
        return msg.value()

    # Polls for events until it finds an event where keyId=keyname
    def pollNextEventKeyIdKeyName(self, keyID, keyname):
        gotIt = False
        while not gotIt:
            msg = self.consumer.poll(timeout=10.0)
            # Continue if we have not received a message yet
            if msg is None:
                continue
            if msg.error():
                print("[KafkaAvroConsumer] - [ERROR] - Consumer error: {}".format(msg.error()))
                # Stop reading if we find end of partition in the error message
                if ("PARTITION_EOF" in msg.error()):
                    gotIt= True
                continue
            self.traceResponse(msg)
            # If we've found our event based on keyname and keyID, stop reading messages
            if (msg.value()[keyname] == keyID):
                gotIt = True
        return msg.value()

    # Polls for events until it finds an event with same key
    def pollNextEventByKey(self, keyID):
        if (str(keyID) == ""):
            print("[KafkaAvroConsumer] - [ERROR] - Consumer error: Key is an empty string")
            return None
        gotIt = False
        while not gotIt:
            msg = self.consumer.poll(timeout=10.0)
            # Continue if we have not received a message yet
            if msg is None:
                continue
            if msg.error():
                print("[KafkaAvroConsumer] - [ERROR] - Consumer error: {}".format(msg.error()))
                # Stop reading if we find end of partition in the error message
                if ("PARTITION_EOF" in msg.error()):
                    gotIt= True
                continue
            self.traceResponse(msg)
            # If we've found our event based on keyname and keyID, stop reading messages
            if (msg.key() == keyID):
                gotIt = True
        return msg.value()
    
    # Polls for the next event but returns the raw event
    def pollNextRawEvent(self):
        msg = self.consumer.poll(timeout=5.0)
        if msg is None:
            return None
        if msg.error():
            # Stop reading if we find end of partition in the error message
            if ("PARTITION_EOF" in msg.error()):
                return None
            else:
                print("[KafkaAvroConsumer] - [ERROR] - Consumer error: {}".format(msg.error()))
                return None
        return msg

    # Polls for events endlessly
    def pollEvents(self):
        gotIt = False
        while not gotIt:
            msg = self.consumer.poll(timeout=10.0)
            if msg is None:
                continue
            if msg.error():
                print("[KafkaAvroConsumer] - [ERROR] - Consumer error: {}".format(msg.error()))
                if ("PARTITION_EOF" in msg.error()):
                    gotIt= True
                continue
            self.traceResponse(msg)


    def close(self):
        self.consumer.close()
Ejemplo n.º 26
0

def consume(consumer: DeserializingConsumer, timeout) -> iter:
    while True:
        # Waiting for message until timeout reached if there is no message.
        # If message exists, message will be returned.
        message = consumer.poll(timeout)
        # print('[kafka] polling...')
        if message is None:
            continue
        if message.error():
            print('Consumer error: {}'.format(message.error()))
            continue
        yield message


if __name__ == '__main__':
    c = DeserializingConsumer({
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'my-consumer-group',
        'auto.offset.reset': 'earliest',
        'key.deserializer': IntegerDeserializer()
    })
    c.subscribe(topics=['sample-topic'])
    try:
        for msg in consume(c, timeout=1.0):
            print(f'{msg.key()},{msg.value().decode("utf-8")}')
    finally:
        print('Consumer will be closed')
        c.close()
Ejemplo n.º 27
0
class ConsumerLoop(StoppableThread):
    """Kafka consumer loop.

    This class implements a :class:`confluent_kafka.Consumer`.

    For more information, see the official Confluent Kafka
    `Consumer documentation
    <https://docs.confluent.io/platform/current/clients/confluent-kafka-python/#pythonclient-consumer>`__.
    """
    def __init__(self, db: SummaryDAOFactory):
        super(ConsumerLoop, self).__init__()

        logging.basicConfig(
            format='%(asctime)s %(name)s %(levelname)-8s %(message)s',
            level=logging.DEBUG,
            datefmt='%d/%m/%Y %I:%M:%S %p')
        self.logger = logging.getLogger("DispatcherConsumerLoop")

        # Consumer configuration. Must match Stimzi/Kafka configuration.
        config = {
            'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
            'client.id': socket.gethostname(),
            'group.id': "dispatcher",
            'auto.offset.reset': "earliest",
            'session.timeout.ms': 10000,
            'enable.auto.commit': True,  # default
            'auto.commit.interval.ms': 5000,  # default
            'key.deserializer': StringDeserializer('utf_8'),
            'value.deserializer': StringDeserializer('utf_8')
        }
        self.consumer = DeserializingConsumer(config)
        self.db = db
        self.consumed_msg_schema = TextPostprocessingConsumedMsgSchema()

    def run(self):
        try:
            topics_to_subscribe = [KafkaTopic.READY.value]
            self.consumer.subscribe(topics_to_subscribe)
            self.logger.debug(f'Consumer subscribed to topic(s): '
                              f'{topics_to_subscribe}')

            while not self.stopped():
                msg = self.consumer.poll(timeout=1.0)
                if msg is None:
                    continue
                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        self.logger.error(
                            f'{msg.topic()} in partition {msg.partition} '
                            f'{msg.partition()} reached end at offset '
                            f'{msg.offset()}')
                    elif msg.error():
                        self.logger.error("Undefined error in consumer loop")
                        raise KafkaException(msg.error())
                else:
                    self.logger.debug(f'Message consumed: [key]: {msg.key()}, '
                                      f'[value]: "{msg.value()[:500]} [...]"')

                    data = self.consumed_msg_schema.loads(msg.value())
                    summary = self.db.update_summary(
                        id_=msg.key(),
                        ended_at=datetime.now(),
                        status=SummaryStatus.COMPLETED.value,
                        summary=data['text_postprocessed'],
                        params=data['params']  # validated params
                    )  # important: keys must match DB columns
                    self.logger.debug(f"Consumer message processed. "
                                      f"Summary updated: {summary}")
        finally:
            self.logger.debug("Consumer loop stopped. Closing consumer...")
            self.consumer.close(
            )  # close down consumer to commit final offsets
Ejemplo n.º 28
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_enriched_event_str = EnrichedEventSchema
    schema_metrics = MetricSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_metrics, schema_registry_client)
    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    avro_deserializer = AvroDeserializer(schema_enriched_event_str,
                                         schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    client_influxdb = InfluxDBClient('35.181.155.182', 8086, "dbsaleh2")

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()

            idPersonne = evt["idPersonne"]

            rows = session.execute(GET_ENRICHED_EVENT_QUERY, (idPersonne, ))
            if rows:
                # print(idPersonne, f"rows={rows.all().__len__()}")
                # stat_process(idPersonne, rows)
                # som = rec_process(rows,0,0)
                # print("some", som)

                # row["csp"] = get_value_column_enriched_data(row, "csp")
                # row["paysNaissance"] = get_value_column_enriched_data(row, "paysNaissance")
                #
                #
                # #get_value_column_event_content
                # row['appVersion'] = get_value_column_event_content(row, "appVersion")
                # row['montant'] = get_value_column_event_content(row, "montant")
                # row['androidID'] = get_value_column_event_content(row, "androidID")

                # del rows[0]['eventContent']

                elapsed_time = time.time() - start

                #producer.produce(topic=outputtopic, key=str(uuid4()), value={'metricName':"hystorize",'time':elapsed_time}, on_delivery=delivery_report)
                #producer.flush()

        except Exception:
            print('Exception')
            continue

        metrics = [{
            "measurement": "metrics",
            "fields": {
                "metricName": "score",
                "timeforscore": elapsed_time
            }
        }]
        print(elapsed_time)

        client_influxdb.write_points(metrics, database="dbsaleh2")
        producer.produce(topic=outputtopic,
                         value={
                             'metricName': "score",
                             'time': elapsed_time
                         },
                         on_delivery=delivery_report)
        producer.flush()

    consumer.close()
from confluent_kafka import DeserializingConsumer

if __name__ == '__main__':
    consumer_conf = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'kafka-client',
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe(['dbserver1.inventory.customers'])

    print('Kafka Client Listening...')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            print(msg.value())
            print()
        except KeyboardInterrupt:
            break

    consumer.close()
Ejemplo n.º 30
0
class Broker:
    def __init__(self, consumer_topic, producer_topic, client_id,
                 bootstrap_servers, consumer_proto_class, producer_proto_class,
                 processor, max_thread_calls):
        self.consumer_topic = consumer_topic
        self.producer_topic = producer_topic
        self.client_id = client_id
        self.bootstrap_servers = bootstrap_servers
        self.consumer_proto_class = consumer_proto_class
        self.producer_proto_class = producer_proto_class
        self.processor = processor
        self.max_thread_calls = max_thread_calls

        self.kafka_consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'group.id':
            self.client_id,
            'auto.offset.reset':
            "earliest",
            'value.deserializer':
            self.derializer
        })
        self.kafka_consumer.subscribe([self.consumer_topic])

        self.kafka_producer = SerializingProducer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'queue.buffering.max.messages':
            500000,
            'value.serializer':
            self.serialize
        })

        self.thread_queue = deque(maxlen=self.max_thread_calls)
        self.latest_thread_queue_id = 1

    def derializer(self, bytes_message, _):
        message = image_pb2.ImageInfo()
        message.ParseFromString(bytes_message)
        return message

    def serialize(self, message, _):
        return message.SerializeToString()

    def get_thread_id(self):
        result = self.latest_thread_queue_id
        if result == self.max_thread_calls:
            self.latest_thread_queue_id = 1
        else:
            self.latest_thread_queue_id += 1
        return result

    def is_thread_queue_full(self):
        return len(self.thread_queue) == self.max_thread_calls

    def produce_when_ready(self, thread_id, message):
        while self.thread_queue[-1] != thread_id:
            logging.warning("Thread {} got stuck in queue".format(thread_id))
            # time.sleep(0.01)
        self.kafka_producer.poll(0.0)
        self.kafka_producer.produce(topic=self.producer_topic, value=message)
        self.thread_queue.pop()

    def call_processor(self, thread_id, value, start_time):
        result = self.processor.process(value)
        self.produce_when_ready(thread_id, result)
        logging.debug("Total time for thead" + str(thread_id) + " is " +
                      str(time.time() - start_time / 1000))

    def run(self):
        while True:
            try:
                if self.is_thread_queue_full():
                    logging.warning(
                        "Thread queue is full, waiting for previous threads to finished"
                    )
                    continue

                msg = self.kafka_consumer.poll(1.0)
                if msg is None or msg.value() is None:
                    logging.warning("No messages from kafka")
                    continue

                caller_thread_id = self.get_thread_id()
                caller_thread = threading.Thread(target=self.call_processor,
                                                 args=(caller_thread_id,
                                                       msg.value(),
                                                       msg.timestamp()[1]))
                self.thread_queue.appendleft(caller_thread_id)
                caller_thread.start()

            except KeyboardInterrupt:
                break

        self.kafka_consumer.close()
        self.kafka_producer.flush()