Ejemplo n.º 1
0
def main():
    string_deserializer = StringDeserializer('utf_8')
    conf = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'bitcoin_group',
        'key.deserializer': string_deserializer,
        'value.deserializer': string_deserializer,
        'session.timeout.ms': 6000,
        'fetch.wait.max.ms': 5000,
        'auto.offset.reset': 'smallest',
        'enable.auto.commit': 'false',
        'fetch.min.bytes': 307200
    }

    consumer = DeserializingConsumer(conf)
    consumer.subscribe(['bitcoin-transaction'])
    messages = []
    try:
        while True:
            msg = consumer.poll(timeout=1000)
            if msg is None:
                continue
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                obj = json.loads(msg.value())
                transaction = dict_to_transaction(obj)
                messages.append(transaction)
                if len(messages) > 100:
                    messages = sorted(messages,
                                      key=lambda x: x.price,
                                      reverse=True)[0:10]
                    print(messages)
                consumer.commit(asynchronous=False)

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
Ejemplo n.º 2
0
def run_consumer(container_manager):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_deserializer = AvroDeserializer(schemas.run_record_schema,
                                         schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': "runs-consumers",
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': 'false'
    }

    consumer = DeserializingConsumer(conf)
    print('[+] Listening for incoming runs')

    try:
        consumer_topics = [config['kafka']['runs-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            try:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    continue

                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('[-] Run initialization')
                    print(msg.value())
                    consumer.commit(asynchronous=False)
                    # handlers.handle_run_execution(container_manager, msg.value())
                    threading.Thread(target=handlers.handle_run_execution,
                                     args=(container_manager,
                                           msg.value())).start()
            except ConsumeError as e:
                print(
                    f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}'
                )
    finally:
        consumer.close()
class KafkaAvroConsumer:

    def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True):

        # Consumer name for logging purposes
        self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)
 
 
        # Key Deserializer
        self.key_deserializer = StringDeserializer('utf_8')

         # Get Schema for the value
        self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id
        # print('The Schema ID for the value is: {}'.format(self.schema_id_value))
        self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str
        print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name))
        print(self.logging_prefix + ' - Value Schema:')
        print(self.logging_prefix + ' - -------------\n')
        print(self.logging_prefix + ' - ' + self.value_schema + '\n')

        # Value Deserializer
        # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change
        # https://github.com/confluentinc/confluent-kafka-python/issues/834
        self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client)

        # Get the consumer configuration
        self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, 
                                                                        self.key_deserializer,
                                                                        self.value_deserializer)
        # Create the consumer
        self.consumer = DeserializingConsumer(self.consumer_conf)

        # Print consumer configuration
        EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url'])

        # Subscribe to the topic
        self.consumer.subscribe([topic_name])
    
    def traceResponse(self, msg):
        print(self.logging_prefix + ' - New event received\n\tTopic: {}\n\tPartition: {}\n\tOffset: {}\n\tkey: {}\n\tvalue: {}\n'
                    .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value()))

    # Polls for next event
    def pollNextEvent(self):
        # Poll for messages
        msg = self.consumer.poll(timeout=POLL_TIMEOUT)
        anEvent = {}
        # Validate the returned message
        if msg is None:
            print(self.logging_prefix + ' - [INFO] - No new messages on the topic')
            return None
        elif msg.error():
            if ("PARTITION_EOF" in msg.error()):
                print(self.logging_prefix + ' - [INFO] - End of partition')
            else:
                print(self.logging_prefix + ' - [ERROR] - Consumer error: {}'.format(msg.error()))
            return None
        else:
            # Print the message
            self.traceResponse(msg)
        return msg.value()

   
    
    # Polls for the next event but returns the raw event
    def pollNextRawEvent(self):
        records = self.consumer.poll(timeout=POLL_TIMEOUT)
        if records is None:
            return None
        if records.error():
            # Stop reading if we find end of partition in the error message
            if ("PARTITION_EOF" in records.error()):
                return None
            else:
                print(self.logging_prefix + ' - [ERROR] - Consumer error: {}'.format(records.error()))
                return None
        else:
            self.traceResponse(records)
        return records


    def commitEvent(self,event):
        self.consumer.commit(event)

    def close(self):
        self.consumer.close()
    consumer_config = {
        'bootstrap.servers': conf['bootstrap.servers'],
        'key.deserializer': key_avro_deserializer,
        'value.deserializer': value_avro_deserializer,
        'group.id': '1'
    }
    consumer = DeserializingConsumer(consumer_config)

    #create the sql interface
    db = create_engine("postgres://*****:*****@postgres:5432/ngrams")

    #Wait until the kafka topic is up before proceeding
    kafka_utils.wait_topic(consumer, topic)

    #assign partitions to consumer
    partitions = []
    partition = TopicPartition(topic=topic, partition=0, offset=0)
    partitions.append(partition)
    consumer.assign(partitions)
    #maybe should change this to subscribe so we can use the on_assign and on_revoke callbacks

    #there is an infinite loop within this function that won't break until it sees a keyboard interupt
    msg = kafka_utils.consume_messages(consumer, db,
                                       kafka_utils.send_ngrams_postgres)

    # After you exit the poll loop commit the final offsets and close the consumer
    consumer.commit(message=msg, asynchronous=False)
    consumer.close()
    #finally disconnect from redis server
    r.connection_pool.disconnect()