예제 #1
0
    def __init__(self, consumer=None):
        super().__init__()
        self.logger = logging.getLogger(__package__)
        self.logger.debug("Initializing the consumer")

        self.consumer = consumer
        self.message_handler = HandleMessage()
        self._stop_event = threading.Event()

        # Track whether there is currently a message being processed. Just a raw
        # bool is OK because the subscription is configured to prefetch 1
        # message at a time - i.e. this function should NOT run in parallel
        self._processing = False

        while self.consumer is None:
            try:
                self.logger.debug("Getting the kafka consumer")

                config = kafka_config_from_env()

                config['key.deserializer'] = StringDeserializer('utf_8')
                config['value.deserializer'] = StringDeserializer('utf_8')
                config['on_commit'] = self.on_commit
                config['group.id'] = GROUP_ID
                config['auto.offset.reset'] = 'earliest'
                self.consumer = DeserializingConsumer(config)
            except KafkaException as err:
                self.logger.error("Could not initialize the consumer: %s", err)
                raise ConnectionException(
                    "Could not initialize the consumer") from err

        self.consumer.subscribe([TRANSACTIONS_TOPIC])
class Configuration:

    commit_log_topic: str
    snapshot_topic: str
    bootstrap_servers: str
    group_id: str
    batch_timeout_sec: int = 5
    messages_per_transaction: int = 2000

    store_consumer: dict = field(
        default_factory=lambda: {
            'bootstrap.servers': None,
            'group.id': None,
            'auto.offset.reset': 'earliest',
            'enable.auto.commit': False,
            'enable.auto.offset.store': False,
            'enable.partition.eof': True,
            'key.deserializer': StringDeserializer(),
            'value.deserializer': JSONDeserializer(),
            # 'stats_cb': publish_statistics,
            # 'statistics.interval.ms': 15000,
        })

    consumer: dict = field(
        default_factory=lambda: {
            'bootstrap.servers': None,
            'group.id': None,
            'auto.offset.reset': 'earliest',
            'enable.auto.commit': False,
            'enable.auto.offset.store': False,
            'enable.partition.eof': False,
            'key.deserializer': StringDeserializer(),
            'value.deserializer': JSONDeserializer(),
            # 'value.deserializer': DummyDeserializer(),
            # 'stats_cb': publish_statistics,
            # 'statistics.interval.ms': 15000,
        })

    producer: dict = field(
        default_factory=lambda: {
            'bootstrap.servers': None,
            'transactional.id': None,
            'transaction.timeout.ms': 60000,
            'enable.idempotence': True,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': JSONSerializer(),
            'debug': 'broker,eos',
        })

    def __post_init__(self):
        self.store_consumer['bootstrap.servers'] = \
            self.consumer['bootstrap.servers'] = \
            self.producer['bootstrap.servers'] = \
            self.bootstrap_servers

        self.store_consumer['group.id'] = \
            self.consumer['group.id'] = \
            self.producer['transactional.id'] = \
            self.group_id
 def __new__(cls):
     # Consumer configuration. Must match Stimzi/Kafka configuration.
     config = {
         'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
         'client.id': socket.gethostname(),
         'group.id': "text-preprocessor",
         'auto.offset.reset': "earliest",
         'session.timeout.ms': 10000,
         'enable.auto.commit': True,  # default
         'auto.commit.interval.ms': 5000,  # default
         'key.deserializer': StringDeserializer('utf_8'),
         'value.deserializer': StringDeserializer('utf_8')
     }
     return DeserializingConsumer(config)
예제 #4
0
def receive():
    json_deserializer = JSONDeserializer(USER_SCHEMA, from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')
    consumer_conf = {
        'bootstrap.servers': 'localhost:9092',
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': 'django-kafka',
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([USER_TOPIC])
    """
    The idea is to start the Kafka consumer when the message is sent to the Kafka producer.
    Resulting in two queues: Task Queue and Message/Content Queue.
    Multi-threading might be an overkill for a simple application, hence the for loop (Temporary). 
    """
    for x in range(200):
        try:
            msg = consumer.poll(timeout=5.0)
            if msg is not None:
                user = msg.value()
                if user is not None:
                    print("User record {}: username: {}\n"
                          "\tdata: {}\n".format(msg.key(), user.username,
                                                user.data))

        except Exception as e:
            print('An exception occurred: {}'.format(e))
            logging.error(traceback.format_exc())
    def from_file(cls, config_file_path, **kwargs):
        """
        config_file_path = path to the config file to use in defining this consumer

        Possible keyword arguments:
        logger = the logger object to use
        !!!!! any other keyword arguments will be added to the configuration (with underscores replaced with dots) !!!!!
        """
        parser = ConfigFileParser(config_file_path, **kwargs)
        configs = parser.get_config_dict_for_groups(['cluster', 'consumer'])
        for argname, arg in kwargs.items():
            if argname == 'logger':
                continue
            configs[argname.replace('_', '.')] = arg
        #if the group.id has been set as "new" generate a new group ID
        if 'group.id' in configs.keys() and configs['group.id'].lower(
        ) == 'new':
            configs['group.id'] = str(uuid.uuid1())
        #if one of several recognized deserializers have been given as config paramenters for the key/value serializer, replace them with the actual class
        names_to_classes = {
            'DoubleDeserializer': DoubleDeserializer(),
            'IntegerDeserializer': IntegerDeserializer(),
            'StringDeserializer': StringDeserializer(),
            'DataFileChunkDeserializer': DataFileChunkDeserializer(),
        }
        configs_to_check = ['key.deserializer', 'value.deserializer']
        for cfg in configs_to_check:
            if cfg in configs.keys():
                if configs[cfg] in names_to_classes:
                    configs[cfg] = names_to_classes[configs[cfg]]
        return cls(configs)
예제 #6
0
    def consume(self, count: int):
        consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.brokers,
            'key.deserializer':
            StringDeserializer('utf_8'),
            'value.deserializer':
            self._make_deserializer(),
            'group.id':
            self.group,
            'auto.offset.reset':
            "earliest"
        })
        consumer.subscribe([self.topic])

        self.logger.info("Consuming %d %s records from topic %s with group %s",
                         count, self.schema_type.name, self.topic, self.group)
        while self.consumed < count:
            msg = consumer.poll(1)
            if msg is None:
                continue
            payload = msg.value()
            self.logger.debug("Consumed %d at %d", payload.val, msg.offset())
            assert payload.val == self.consumed
            self.consumed += 1

        consumer.close()
예제 #7
0
def test_string_serialization(kafka_cluster, data, codec):
    """
    Tests basic unicode serialization/deserialization functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        data (unicode): input data

        codec (str): encoding type

    """
    topic = kafka_cluster.create_topic("serialization-string")

    producer = kafka_cluster.producer(value_serializer=StringSerializer(codec))

    producer.produce(topic, value=data)
    producer.flush()

    consumer = kafka_cluster.consumer(
        value_deserializer=StringDeserializer(codec))

    consumer.subscribe([topic])

    msg = consumer.poll()

    assert msg.value() == data

    consumer.close()
예제 #8
0
def main(args):
    topic = args.topic

    protobuf_deserializer = ProtobufDeserializer(user_pb2.User)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': protobuf_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print(f"User record {msg.key()}:\n name: {user.name}\n"
                      f"\tfavorite_number: {user.favorite_color}\n"
                      f"\tfavorite_color: {user.favorite_number}\n")
        except KeyboardInterrupt:
            break

    consumer.close()
def main(args):
    topic = args.topic

    protobuf_deserializer = ProtobufDeserializer(user_pb2.User)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': protobuf_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n"
                      .format(msg.key(), user.name,
                              user.favorite_color,
                              user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
def main(args):
    topic = args.topic

    schema_str = """
    {
      "$schema": "http://json-schema.org/draft-07/schema#",
      "title": "User",
      "description": "A Confluent Kafka Python User",
      "type": "object",
      "properties": {
        "name": {
          "description": "User's name",
          "type": "string"
        },
        "favorite_number": {
          "description": "User's favorite number",
          "type": "number",
          "exclusiveMinimum": 0
        },
        "favorite_color": {
          "description": "User's favorite color",
          "type": "string"
        }
      },
      "required": [ "name", "favorite_number", "favorite_color" ]
    }
    """
    json_deserializer = JSONDeserializer(schema_str, from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n".format(msg.key(), user.name,
                                                      user.favorite_color,
                                                      user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
예제 #11
0
 def create_deserializer(self):
     self.deserializer = {}
     if self.in_topic is not None:
         for topic in self.in_topic:
             if self.in_schema[topic] is None:
                 self.deserializer[topic] = StringDeserializer("utf_8")
             else:
                 schema_str = self.in_schema[topic].schema_str
                 self.deserializer[topic] = AvroDeserializer(
                     schema_str, self.schema_registry)
예제 #12
0
파일: enrich.py 프로젝트: MED-SALAH/lclfpy
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "latest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            enrich(evt, session, producer, outputtopic)

        except Exception:
            print('Exception', sys.exc_info()[0])
            continue

    consumer.close()
예제 #13
0
def main():
    schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL})

    avro_deserializer = AvroDeserializer(
        schema_registry_client=schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'key.deserializer': string_deserializer,
        'max.poll.interval.ms': MAX_POLL_INTERVAL_MS,
        'value.deserializer': avro_deserializer,
        'group.id': CONSUMER_GROUP
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([TOPIC])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            msg_value = msg.value()
            if msg_value is not None:
                try:
                    measurements = list(dict(msg_value).get("measurements"))
                    measurements_df = pd.DataFrame(measurements)

                    groups = measurements_df.groupby("tenant")

                    for _, group in groups:
                        tenant = group.iloc[0]['tenant']
                        device_registry = DeviceRegistry(
                            tenant, AIRQO_BASE_URL)

                        group_measurements = list(
                            group.to_dict(orient="records"))
                        for i in range(0, len(group_measurements),
                                       int(REQUEST_BODY_SIZE)):
                            measurements_list = group_measurements[
                                i:i + int(REQUEST_BODY_SIZE)]

                            device_registry.insert_events(measurements_list)

                except Exception as ex:
                    print(ex)

        except KeyboardInterrupt:
            break

    consumer.close()
예제 #14
0
def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str,
                                         schema_registry_client,
                                         dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.deserializer': string_deserializer,
                     'value.deserializer': avro_deserializer,
                     'group.id': args.group,
                     'auto.offset.reset': "earliest"}

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print("User record {}: name: {}\n"
                      "\tfavorite_number: {}\n"
                      "\tfavorite_color: {}\n"
                      .format(msg.key(), user.name,
                              user.favorite_color,
                              user.favorite_number))
        except KeyboardInterrupt:
            break

    consumer.close()
예제 #15
0
    def __init__(self, db: SummaryDAOFactory):
        super(ConsumerLoop, self).__init__()

        logging.basicConfig(
            format='%(asctime)s %(name)s %(levelname)-8s %(message)s',
            level=logging.DEBUG,
            datefmt='%d/%m/%Y %I:%M:%S %p')
        self.logger = logging.getLogger("DispatcherConsumerLoop")

        # Consumer configuration. Must match Stimzi/Kafka configuration.
        config = {
            'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
            'client.id': socket.gethostname(),
            'group.id': "dispatcher",
            'auto.offset.reset': "earliest",
            'session.timeout.ms': 10000,
            'enable.auto.commit': True,  # default
            'auto.commit.interval.ms': 5000,  # default
            'key.deserializer': StringDeserializer('utf_8'),
            'value.deserializer': StringDeserializer('utf_8')
        }
        self.consumer = DeserializingConsumer(config)
        self.db = db
        self.consumed_msg_schema = TextPostprocessingConsumedMsgSchema()
def main():
    top = 20
    consumer = DeserializingConsumer({
        'bootstrap.servers':
        os.environ['KAFKA_BROKERS'],
        'security.protocol':
        'SASL_SSL',
        'sasl.mechanism':
        'SCRAM-SHA-512',
        'sasl.password':
        os.environ['KAFKA_PASS'],
        'sasl.username':
        os.environ['KAFKA_USER'],
        'ssl.ca.location':
        '/usr/local/share/ca-certificates/Yandex/YandexCA.crt',
        'group.id':
        'group1',
        'key.deserializer':
        StringDeserializer(),
        'value.deserializer':
        LongDeserializer(),
    })

    consumer.subscribe(['streams-wordcount-output'])

    try:
        frequencies = []
        while True:
            msg = consumer.poll(1.0)
            if msg is None:
                if frequencies:
                    print('==============================================')
                    print(f'Current list of top {top} most frequent words:')
                    frequencies = sorted(frequencies,
                                         key=lambda x: x[1],
                                         reverse=True)
                    for frequency in frequencies[0:top]:
                        print(f'{frequency[0]}: {frequency[1]}')
                    frequencies.clear()
                continue
            elif msg.error():
                print('error: {}'.format(msg.error()))
            else:
                frequencies.append((msg.key(), msg.value()))
    except KeyboardInterrupt:
        pass
    finally:
        consumer.close()
예제 #17
0
def getConfigs():
    value_deserializer = ProtobufDeserializer(FoodPreferences_pb2.PersonFood)

    configs = {
        'bootstrap.servers': '<CCLOUD_DNS>',
        'security.protocol': 'SASL_SSL',
        'sasl.mechanism': 'PLAIN',
        'sasl.username': '******',
        'sasl.password': '******',
        'group.id': 'consumingPythonWorld',
        'client.id': 'pythonConsumption',
        'key.deserializer': StringDeserializer('utf_8'),
        'value.deserializer': value_deserializer
    }

    return configs
예제 #18
0
def main():
    string_deserializer = StringDeserializer('utf_8')
    conf = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'bitcoin_group',
        'key.deserializer': string_deserializer,
        'value.deserializer': string_deserializer,
        'session.timeout.ms': 6000,
        'fetch.wait.max.ms': 5000,
        'auto.offset.reset': 'smallest',
        'enable.auto.commit': 'false',
        'fetch.min.bytes': 307200
    }

    consumer = DeserializingConsumer(conf)
    consumer.subscribe(['bitcoin-transaction'])
    messages = []
    try:
        while True:
            msg = consumer.poll(timeout=1000)
            if msg is None:
                continue
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                obj = json.loads(msg.value())
                transaction = dict_to_transaction(obj)
                messages.append(transaction)
                if len(messages) > 100:
                    messages = sorted(messages,
                                      key=lambda x: x.price,
                                      reverse=True)[0:10]
                    print(messages)
                consumer.commit(asynchronous=False)

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
예제 #19
0
def run_consumer(container_manager):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_deserializer = AvroDeserializer(schemas.run_record_schema,
                                         schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': "runs-consumers",
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': 'false'
    }

    consumer = DeserializingConsumer(conf)
    print('[+] Listening for incoming runs')

    try:
        consumer_topics = [config['kafka']['runs-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            try:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    continue

                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('[-] Run initialization')
                    print(msg.value())
                    consumer.commit(asynchronous=False)
                    # handlers.handle_run_execution(container_manager, msg.value())
                    threading.Thread(target=handlers.handle_run_execution,
                                     args=(container_manager,
                                           msg.value())).start()
            except ConsumeError as e:
                print(
                    f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}'
                )
    finally:
        consumer.close()
예제 #20
0
def main(args):
    topic = args.topic
    schema_str = MetricSchema
    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')
    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }
    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])
    client = InfluxDBClient(host=args.host_influx,
                            port=8086,
                            username='******',
                            password='******')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue
            timespent = msg.value()
            if timespent is not None:
                print("time ==>", timespent)
                print(timespent["metricName"])
                print(timespent["time"])
                client.switch_database('datascience')
                json_body = [{
                    "measurement": "metric",
                    "fields": {
                        "name": timespent["metricName"],
                        "value": timespent["time"]
                    }
                }]
            client.write_points(json_body)
        except KeyboardInterrupt:
            break
    consumer.close()
예제 #21
0
    def create_consumer(self, registry_client):
        """
        Subscribes to topic defined in configs and creates a consumer to deserialize messages from topic

        :param registry_client: SchemaRegistryClient object
            get this from register_client()

        :return: DeserializingConsumer object
        """
        metadata_schema = None
        topic = None
        if self.metadata_type == "COLLECTION":
            metadata_schema = registry_client.get_latest_version(
                self.collection_topic + '-value').schema.schema_str
            topic = self.collection_topic

        if self.metadata_type == "GRANULE":
            metadata_schema = registry_client.get_latest_version(
                self.granule_topic + '-value').schema.schema_str
            topic = self.granule_topic

        metadata_deserializer = AvroDeserializer(metadata_schema,
                                                 registry_client)

        consumer_conf = {'bootstrap.servers': self.brokers}

        if self.security:
            consumer_conf['security.protocol'] = 'SSL'
            consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc']
            consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc']
            consumer_conf['ssl.certificate.location'] = self.conf['security'][
                'certLoc']

        meta_consumer_conf = consumer_conf
        meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8')
        meta_consumer_conf['value.deserializer'] = metadata_deserializer
        meta_consumer_conf['group.id'] = self.group_id
        meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset

        metadata_consumer = DeserializingConsumer(meta_consumer_conf)
        metadata_consumer.subscribe([topic])
        return metadata_consumer
예제 #22
0
    def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')
        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)
        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
    def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Consumer name for logging purposes
        self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)
 
 
        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')

         # Get Schema for the value
        self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id
        # print('The Schema ID for the value is: {}'.format(self.schema_id_value))
        self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str
        print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name))
        print(self.logging_prefix + ' - Value Schema:')
        print(self.logging_prefix + ' - -------------\n')
        print(self.logging_prefix + ' - ' + self.value_schema + '\n')

        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, 
                                                                        self.key_deserializer,
                                                                        self.value_deserializer)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)

        # Print consumer configuration
        EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url'])

        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
예제 #24
0
def receive_record(args):
    """ Receives Record using a DeserializingConsumer & AvroDeserializer """
    topics = [args.topic.rstrip()]

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_deserializer = AvroDeserializer(schema_registry_client, DATA_SCHEMA,
                                         dict_to_data)

    string_deserializer = StringDeserializer('utf_8')

    consumer_config = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group,
        'auto.offset.reset': 'earliest'
    }

    consumer = DeserializingConsumer(consumer_config)
    consumer.subscribe(topics)

    print(f'Consuming data records from topic(s) {topics}. ^C to exit.')
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(10.0)
            if msg is None:
                print('\t---Waiting. . .')
                continue

            data = msg.value()
            if data is not None:
                print(f'Data record {msg.key()}:\n' f'\tValues: {data}')
        except KeyboardInterrupt:
            break
    print('\nClosing consumer.')
    consumer.close()
예제 #25
0
def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)
    schema_obj = schema_registry_client.get_latest_version(
        subject_name='example_serde_json-value')

    json_deserializer = JSONDeserializer(schema_obj.schema.schema_str,
                                         from_dict=dict_to_user)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': json_deserializer,
        'group.id': args.group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            user = msg.value()
            if user is not None:
                print(f"User record {msg.key()}:\n name: {user.name}\n"
                      f"\tfavorite_number: {user.favorite_color}\n"
                      f"\tfavorite_color: {user.favorite_number}\n")
        except KeyboardInterrupt:
            break
    consumer.close()
예제 #26
0
    def __init__(
        self,
        topic,
        value_deserializer,
        num_workers,
        num_threads,
        kafka_config,
        schema_registry_url,
        callback,
    ):
        logger.debug(
            f'Create a kafka consumer for topic {topic} with {num_workers} workers and {num_threads} threads',
        )
        self.topic = topic
        self.num_threads = num_threads
        self.num_workers = num_workers

        self.kafka_config = kafka_config
        self.kafka_config = {
            'key.deserializer':
            StringDeserializer('utf_8'),
            'enable.auto.commit':
            False,
            'auto.offset.reset':
            'latest',
            'value.deserializer':
            make_deserializer(
                topic=self.topic,
                from_dict=value_deserializer.from_dict,
                schema_registry_url=schema_registry_url,
            ),
        }
        self.kafka_config.update(kafka_config)

        self.workers = []
        self.callback = callback
예제 #27
0
파일: test_io.py 프로젝트: MED-SALAH/lclfpy
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    loop = asyncio.get_event_loop()

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            print("msg=>", evt)

            def enrich(evt):
                print("evt", evt)
                if evt is not None:
                    print("récupérer dans kafka")
                    row = session.execute(
                        GET_ENRICHED_DATA_QUERY,
                        (evt["EventHeader"]["acteurDeclencheur"]["idPersonne"],
                         )).one()

                    if row:
                        evt['EnrichedData'] = row
                        # evt['EventBusinessContext'] = evt["EventBusinessContext"][1]
                        EnrichedEvent = {
                            "eventId":
                            evt["EventHeader"]["eventId"],
                            "dateTimeRef":
                            evt["EventHeader"]["dateTimeRef"],
                            "nomenclatureEv":
                            evt["EventHeader"]["nomenclatureEv"],
                            "canal":
                            evt["EventHeader"]["canal"],
                            "media":
                            evt["EventHeader"]["media"],
                            "schemaVersion":
                            evt["EventHeader"]["schemaVersion"],
                            "headerVersion":
                            evt["EventHeader"]["headerVersion"],
                            "serveur":
                            evt["EventHeader"]["serveur"],
                            "adresseIP":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["adresseIP"],
                            "idTelematique":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["idTelematique"],
                            "idPersonne":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["idPersonne"],
                            "dateNaissance":
                            row["dateNaissance"],
                            "paysResidence":
                            row["paysResidence"],
                            "paysNaissance":
                            row["paysNaissance"],
                            "revenusAnnuel":
                            row["revenusAnnuel"],
                            "csp":
                            row["csp"],
                            "EventBusinessContext":
                            evt["EventBusinessContext"]
                        }

                        producer.produce(topic=outputtopic,
                                         key=str(uuid4()),
                                         value=EnrichedEvent,
                                         on_delivery=delivery_report)
                        producer.flush()

            async_enrich = async_wrap(enrich)
            loop.run_until_complete(async_enrich(evt))

        except Exception:
            print('Exception')
            continue

    consumer.close()
예제 #28
0
import time

from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.serialization import StringDeserializer
from jlab_jaws.avro.subject_schemas.serde import ActiveAlarmSerde
from jlab_jaws.eventsource.table import EventSourceTable
from tabulate import tabulate

from common import get_row_header

bootstrap_servers = os.environ.get('BOOTSTRAP_SERVERS', 'localhost:9092')

sr_conf = {'url': os.environ.get('SCHEMA_REGISTRY', 'http://localhost:8081')}
schema_registry_client = SchemaRegistryClient(sr_conf)

key_deserializer = StringDeserializer()
value_deserializer = ActiveAlarmSerde.deserializer(schema_registry_client)


def get_row(msg):
    timestamp = msg.timestamp()
    headers = msg.headers()
    key = msg.key()
    value = msg.value()

    if value is None:
        row = [key, None]
    else:
        row = [key,
               value]
예제 #29
0
    consumer = kafka_cluster.consumer(
        value_deserializer=StringDeserializer(codec))

    consumer.subscribe([topic])

    msg = consumer.poll()

    assert msg.value() == data

    consumer.close()


@pytest.mark.parametrize(
    "key_serializer, value_serializer, key_deserializer, value_deserializer, key, value",  # noqa: E501
    [(DoubleSerializer(), StringSerializer('utf_8'), DoubleDeserializer(),
      StringDeserializer(), -31.2168215450814477, u'Jämtland'),
     (StringSerializer('utf_16'), DoubleSerializer(),
      StringDeserializer('utf_16'), DoubleDeserializer(), u'Härjedalen',
      1.2168215450814477)])
def test_mixed_serialization(kafka_cluster, key_serializer, value_serializer,
                             key_deserializer, value_deserializer, key, value):
    """
    Tests basic mixed serializer/deserializer functionality.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        key_serializer (Serializer): serializer to test

        key_deserializer (Deserializer): deserializer to validate serializer
예제 #30
0
# Home Assistant. Control a light via home assistant REST API.
# The Hops/Kafka part is inspired by Ahmad Al-Shishtawy's blog at
# https://www.logicalclocks.com/blog/using-an-external-python-kafka-client-to-interact-with-a-hopsworks-cluster
#
#

from confluent_kafka import DeserializingConsumer
from confluent_kafka.serialization import StringDeserializer
import toml, json
import requests

# Load HopsWorks Kafka configuration - and Home Assistant API token
conf = toml.load("config.toml")

# Initialize a simple String deserializer for the key and value
string_deserializer = StringDeserializer('utf_8')

# Initialize the consumer
consumer_conf = {
    'bootstrap.servers': conf['hops']['url'] + ':' + conf['kafka']['port'],
    'security.protocol': 'SSL',
    'ssl.ca.location': conf['project']['ca_file'],
    'ssl.certificate.location': conf['project']['certificate_file'],
    'ssl.key.location': conf['project']['key_file'],
    'ssl.key.password': conf['project']['key_password'],
    'key.deserializer': string_deserializer,
    'value.deserializer': string_deserializer,
    'group.id': conf['kafka']['consumer']['group_id'],
    'auto.offset.reset': conf['kafka']['consumer']['auto_offset_reset'],
}
consumer = DeserializingConsumer(consumer_conf)