Exemple #1
0
def consume(ctx, group, client, topics):
    """Consume an Avro-serialized message.
    """
    topics = [str(t) for t in topics]
    print('Starting consumer\n\tGroup: {group}\n\tClient: {client}\n\t'
          'Topic(s): {topics}'.format(group=group,
                                      client=client,
                                      topics=', '.join(topics)))

    settings = {
        'bootstrap.servers': get_broker_url(ctx.parent),
        'schema.registry.url': get_registry_url(ctx.parent),
        # Identify the consumer group
        'group.id': group,
        # Identify the client within the consumer group
        'client.id': client,
        # Automatically commit the current offset for this consumer
        'enable.auto.commit': True,
        'session.timeout.ms': 6000,
        'default.topic.config': {
            # "smallest" means that old messages aren't ignored if an offset
            # hasn't been committed yet. Otherwise the default is "latest",
            # which has the consumer only pick up new messages.
            'auto.offset.reset': 'smallest'
        }
    }
    c = avro.AvroConsumer(settings)
    c.subscribe(topics)

    try:
        while True:
            try:
                msg = c.poll(0.1)
            except SerializerError as e:
                print("Message deserialization failed for {}: {}".format(
                    msg, e))
                break

            if msg is None:
                continue

            elif not msg.error():
                print('Received message (topic={topic}, key={key}):'
                      '\n\t{message}'.format(topic=msg.topic(),
                                             key=msg.key(),
                                             message=msg.value()))

            elif msg.error().code() == KafkaError._PARTITION_EOF:
                print('End of partition reached {0}/{1}'.format(
                    msg.topic(), msg.partition()))

            else:
                print('Error occured: {0}'.format(msg.error().str()))

    except KeyboardInterrupt:
        pass

    finally:
        # Close the consumer so that the consumer group an rebalance
        c.close()
Exemple #2
0
    def connect(self):
        logger = logging.getLogger()

        if self.type == "None":
            return

        if self.type == "Kafka":
            logger.debug("brokers: {}, group: {}, topic: {}".format(
                self.brokers, self.group, self.topic))
            self.cons = Consumer({
                'bootstrap.servers': self.brokers,
                'group.id': self.group,
                'default.topic.config': {
                    'auto.offset.reset': 'smallest',
                }
            })

            self.cons.subscribe([self.topic])
        elif self.type == "CFKafka":
            logger.debug(
                "brokers: {}, schema_reg: {}, group: {}, topic: {}".format(
                    self.brokers, self.schema_reg, self.group, self.topic))
            self.cons = avro.AvroConsumer({
                'bootstrap.servers': self.brokers,
                'schema.registry.url': self.schema_reg,
                'group.id': self.group,
                'default.topic.config': {
                    'auto.offset.reset': 'smallest',
                }
            })

            self.cons.subscribe([self.topic])
Exemple #3
0
def verify_avro():
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests',
                            'avro')

    # Producer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'api.version.request': api_version_request,
        'default.topic.config': {
            'produce.offset.report': True
        }
    }

    # Create producer
    if schema_registry_url:
        conf['schema.registry.url'] = schema_registry_url
        p = avro.AvroProducer(conf)
    else:
        p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry())

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=float_value,
             key_schema=prim_float),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=str_value,
             key_schema=prim_string),
        dict(value=float_value,
             value_schema=prim_float,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=float_value,
             value_schema=prim_float,
             key=str_value,
             key_schema=prim_string),
        dict(value=str_value,
             value_schema=prim_string,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=str_value,
             value_schema=prim_string,
             key=float_value,
             key_schema=prim_float),
    ]

    # Consumer config
    cons_conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        p.produce(**combo)
        p.poll(0)
        p.flush()

        # Create consumer
        conf = copy(cons_conf)
        if schema_registry_url:
            conf['schema.registry.url'] = schema_registry_url
            c = avro.AvroConsumer(conf)
        else:
            c = avro.AvroConsumer(conf,
                                  schema_registry=InMemorySchemaRegistry())
        c.subscribe([combo['topic']])

        while True:
            msg = c.poll(0)
            if msg is None:
                continue

            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    break
                else:
                    continue

            tstype, timestamp = msg.timestamp()
            print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
                  (msg.topic(), msg.partition(), msg.offset(), msg.key(),
                   msg.value(), tstype, timestamp))

            c.commit(msg, async=False)

        # Close consumer
        c.close()
def verify_avro_https():
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests',
                            'avro')

    # Producer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'api.version.request': api_version_request
    }

    conf.update(testconf.get('schema_registry_https', {}))

    p = avro.AvroProducer(conf)

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.0

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=float_value,
             key_schema=prim_float),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=str_value,
             key_schema=prim_string),
        dict(value=float_value,
             value_schema=prim_float,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=float_value,
             value_schema=prim_float,
             key=str_value,
             key_schema=prim_string),
        dict(value=str_value,
             value_schema=prim_string,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=str_value,
             value_schema=prim_string,
             key=float_value,
             key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='',
             value_schema=prim_string,
             key=0.0,
             key_schema=prim_float),
        dict(value=0.0,
             value_schema=prim_float,
             key='',
             key_schema=prim_string),
    ]

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        combo['headers'] = [('index', str(i))]
        p.produce(**combo)
    p.flush()

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': generate_group_id(),
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'auto.offset.reset': 'earliest'
    }

    conf.update(testconf.get('schema_registry_https', {}))

    c = avro.AvroConsumer(conf)
    c.subscribe([(t['topic']) for t in combinations])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(0)

        if msg is None or msg.error():
            continue

        tstype, timestamp = msg.timestamp()
        print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
              (msg.topic(), msg.partition(), msg.offset(), msg.key(),
               msg.value(), tstype, timestamp))

        # omit empty Avro fields from payload for comparison
        record_key = msg.key()
        record_value = msg.value()
        index = int(dict(msg.headers())['index'])

        if isinstance(msg.key(), dict):
            record_key = {k: v for k, v in msg.key().items() if v is not None}

        if isinstance(msg.value(), dict):
            record_value = {
                k: v
                for k, v in msg.value().items() if v is not None
            }

        assert combinations[index].get('key') == record_key
        assert combinations[index].get('value') == record_value

        c.commit()
        msgcount += 1

    # Close consumer
    c.close()
def verify_avro():
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests',
                            'avro')

    # Producer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'api.version.request': api_version_request
    }

    # Create producer
    if schema_registry_url:
        conf['schema.registry.url'] = schema_registry_url
        p = avro.AvroProducer(conf)
    else:
        p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry())

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=float_value,
             key_schema=prim_float),
        dict(value={'name': 'abc'},
             value_schema=basic,
             key=str_value,
             key_schema=prim_string),
        dict(value=float_value,
             value_schema=prim_float,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=float_value,
             value_schema=prim_float,
             key=str_value,
             key_schema=prim_string),
        dict(value=str_value,
             value_schema=prim_string,
             key={'name': 'abc'},
             key_schema=basic),
        dict(value=str_value,
             value_schema=prim_string,
             key=float_value,
             key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='', value_schema=prim_string, key=0.,
             key_schema=prim_float),
        dict(value=0., value_schema=prim_float, key='',
             key_schema=prim_string),
    ]

    # Consumer config
    cons_conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'auto.offset.reset': 'earliest'
    }

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        p.produce(**combo)
        p.flush()

        # Create consumer
        conf = copy(cons_conf)
        if schema_registry_url:
            conf['schema.registry.url'] = schema_registry_url
            c = avro.AvroConsumer(conf)
        else:
            c = avro.AvroConsumer(conf,
                                  schema_registry=InMemorySchemaRegistry())
        c.subscribe([combo['topic']])

        while True:
            msg = c.poll(0)
            if msg is None:
                continue

            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    break
                else:
                    continue

            tstype, timestamp = msg.timestamp()
            print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
                  (msg.topic(), msg.partition(), msg.offset(), msg.key(),
                   msg.value(), tstype, timestamp))

            # omit empty Avro fields from payload for comparison
            record_key = msg.key()
            record_value = msg.value()
            if isinstance(msg.key(), dict):
                record_key = {
                    k: v
                    for k, v in msg.key().items() if v is not None
                }

            if isinstance(msg.value(), dict):
                record_value = {
                    k: v
                    for k, v in msg.value().items() if v is not None
                }

            assert combo.get('key') == record_key
            assert combo.get('value') == record_value

            c.commit(msg, asynchronous=False)

        # Close consumer
        c.close()
def verify_avro_explicit_read_schema():
    """ verify that reading Avro with explicit reader schema works"""
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests',
                            'avro')

    # Producer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'error_cb': error_cb,
        'api.version.request': api_version_request,
        'default.topic.config': {
            'produce.offset.report': True
        }
    }

    # Create producer
    if schema_registry_url:
        conf['schema.registry.url'] = schema_registry_url
        p = avro.AvroProducer(conf)
    else:
        p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry())

    key_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    schema1 = avro.load(os.path.join(avsc_dir, "user_v1.avsc"))
    schema2 = avro.load(os.path.join(avsc_dir, "user_v2.avsc"))
    float_value = 32.
    val = {"name": "abc", "favorite_number": 42, "favorite_colo": "orange"}
    val1 = {"name": "abc"}

    combinations = [
        dict(value=val,
             value_schema=schema2,
             key=float_value,
             key_schema=key_schema,
             reader_value_schema=schema1,
             reader_key_schema=key_schema),
        dict(value=val1,
             value_schema=schema1,
             key=float_value,
             key_schema=key_schema,
             reader_value_schema=schema2,
             reader_key_schema=key_schema),
    ]

    # Consumer config
    cons_conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    for i, combo in enumerate(combinations):
        reader_key_schema = combo.pop("reader_key_schema")
        reader_value_schema = combo.pop("reader_value_schema")
        combo['topic'] = str(uuid.uuid4())
        p.produce(**combo)
        p.poll(0)
        p.flush()

        # Create consumer
        conf = copy(cons_conf)
        if schema_registry_url:
            conf['schema.registry.url'] = schema_registry_url
            c = avro.AvroConsumer(conf,
                                  reader_key_schema=reader_key_schema,
                                  reader_value_schema=reader_value_schema)
        else:
            c = avro.AvroConsumer(conf,
                                  schema_registry=InMemorySchemaRegistry(),
                                  reader_key_schema=reader_key_schema,
                                  reader_value_schema=reader_value_schema)

        c.subscribe([combo['topic']])

        while True:
            msg = c.poll(0)
            if msg is None:
                continue

            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    break
                else:
                    continue

            tstype, timestamp = msg.timestamp()
            print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
                  (msg.topic(), msg.partition(), msg.offset(), msg.key(),
                   msg.value(), tstype, timestamp))

            # omit empty Avro fields from payload for comparison
            record_key = msg.key()
            record_value = msg.value()
            if isinstance(msg.key(), dict):
                record_key = {
                    k: v
                    for k, v in msg.key().items() if v is not None
                }

            if isinstance(msg.value(), dict):
                record_value = {
                    k: v
                    for k, v in msg.value().items() if v is not None
                }

            assert combo.get('key') == record_key
            assert combo.get('value')['name'] == record_value['name']
            c.commit(msg, asynchronous=False)
        # Close consumer
        c.close()
    pass
def run_avro_loop(producer_conf, consumer_conf):
    from confluent_kafka import avro
    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')

    p = avro.AvroProducer(producer_conf)

    prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
    prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
    basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
    str_value = 'abc'
    float_value = 32.0

    combinations = [
        dict(key=float_value, key_schema=prim_float),
        dict(value=float_value, value_schema=prim_float),
        dict(key={'name': 'abc'}, key_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic),
        dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float),
        dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string),
        dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic),
        dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string),
        dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic),
        dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float),
        # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342)
        dict(value='', value_schema=prim_string, key=0.0, key_schema=prim_float),
        dict(value=0.0, value_schema=prim_float, key='', key_schema=prim_string),
    ]

    for i, combo in enumerate(combinations):
        combo['topic'] = str(uuid.uuid4())
        combo['headers'] = [('index', str(i))]
        p.produce(**combo)
    p.flush()

    c = avro.AvroConsumer(consumer_conf)
    c.subscribe([(t['topic']) for t in combinations])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(1)

        if msg is None:
            continue
        if msg.error():
            print(msg.error())
            continue

        tstype, timestamp = msg.timestamp()
        print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
              (msg.topic(), msg.partition(), msg.offset(),
               msg.key(), msg.value(), tstype, timestamp))

        # omit empty Avro fields from payload for comparison
        record_key = msg.key()
        record_value = msg.value()
        index = int(dict(msg.headers())['index'])

        if isinstance(msg.key(), dict):
            record_key = {k: v for k, v in msg.key().items() if v is not None}

        if isinstance(msg.value(), dict):
            record_value = {k: v for k, v in msg.value().items() if v is not None}

        assert combinations[index].get('key') == record_key
        assert combinations[index].get('value') == record_value

        c.commit()
        msgcount += 1

    # Close consumer
    c.close()
def verify_avro_explicit_read_schema():
    from confluent_kafka import avro

    """ verify that reading Avro with explicit reader schema works"""
    base_conf = {'bootstrap.servers': bootstrap_servers,
                 'error_cb': error_cb,
                 'schema.registry.url': schema_registry_url}

    consumer_conf = dict(base_conf, **{
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'on_commit': print_commit_result,
        'auto.offset.reset': 'earliest',
        'schema.registry.url': schema_registry_url})

    avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro')
    writer_schema = avro.load(os.path.join(avsc_dir, "user_v1.avsc"))
    reader_schema = avro.load(os.path.join(avsc_dir, "user_v2.avsc"))

    user_value1 = {
        "name": " Rogers Nelson"
    }

    user_value2 = {
        "name": "Kenny Loggins"
    }

    combinations = [
        dict(key=user_value1, key_schema=writer_schema, value=user_value2, value_schema=writer_schema),
        dict(key=user_value2, key_schema=writer_schema, value=user_value1, value_schema=writer_schema)
    ]
    avro_topic = topic + str(uuid.uuid4())

    p = avro.AvroProducer(base_conf)
    for i, combo in enumerate(combinations):
        p.produce(topic=avro_topic, **combo)
    p.flush()

    c = avro.AvroConsumer(consumer_conf, reader_key_schema=reader_schema, reader_value_schema=reader_schema)
    c.subscribe([avro_topic])

    msgcount = 0
    while msgcount < len(combinations):
        msg = c.poll(1)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error {}".format(msg.error()))
            continue

        msgcount += 1
        # Avro schema projection should return the two fields not present in the writer schema
        try:
            assert(msg.key().get('favorite_number') == 42)
            assert(msg.key().get('favorite_color') == "purple")
            assert(msg.value().get('favorite_number') == 42)
            assert(msg.value().get('favorite_color') == "purple")
            print("success: schema projection worked for explicit reader schema")
        except KeyError:
            raise confluent_kafka.avro.SerializerError("Schema projection failed when setting reader schema.")