def consume_messages(cls, **kwargs):
        def job(consumer_, redis_client_, redis_channel_):
            for msg in consumer_.poll():
                message = msg.value
                logger.info(ujson.loads(message))
                redis_client_.publish(redis_channel_, message)

        def shut_down(consumer_):
            consumer_.shut_down()

        # get consumer
        kafka_broker = kwargs.get(KAFKA_BROKER) or DEFAULT_KAFKA_BROKER
        kafka_topic = kwargs.get(
            KAFKA_OUTPUT_TOPIC) or DEFAULT_KAFKA_OUTPUT_TOPIC
        consumer = Consumer(kafka_broker, kafka_topic)

        # get redis
        redis_channel = kwargs.get(REDIS_CHANNEL) or DEFAULT_REDIS_CHANNEL
        redis_host = kwargs.get(REDIS_HOST) or DEFAULT_REDIS_HOST
        redis_port = kwargs.get(REDIS_PORT) or DEFAULT_REDIS_PORT
        redis_client = redis.StrictRedis(host=redis_host, port=redis_port)

        atexit.register(shut_down, consumer)

        scheduler = Scheduler(1, job, consumer, redis_client, redis_channel)
        scheduler.run()
Exemple #2
0
    def consumer_instance(self, consumer_group_name, force_payload_decode,
                          topic, team_name, pre_rebalance_callback,
                          post_rebalance_callback, registered_schema,
                          registered_compatible_schema,
                          registered_non_compatible_schema):
        consumer_source = FixedSchemas(
            registered_schema.schema_id,
            registered_non_compatible_schema.schema_id)

        consumer = Consumer(
            consumer_name=consumer_group_name,
            team_name=team_name,
            expected_frequency_seconds=ExpectedFrequency.constantly,
            topic_to_consumer_topic_state_map=None,
            consumer_source=consumer_source,
            force_payload_decode=force_payload_decode,
            auto_offset_reset='largest',  # start from the tail of the topic
            pre_rebalance_callback=pre_rebalance_callback,
            post_rebalance_callback=post_rebalance_callback,
        )
        with mock.patch.object(
                consumer,
                '_get_topics_in_region_from_topic_name',
                side_effect=[[x] for x in consumer_source.get_topics()]):
            yield consumer
Exemple #3
0
def _get_message_and_alter_range(
    start_timestamp,
    topic_to_consumer_topic_state_map,
    topic_to_range_map,
    result_topic_to_consumer_topic_state_map
):
    """Create a consumer based on our topic_to_consumer_state_map, get a message, and based
    on that message's timestamp, adjust our topic ranges and maps with _update_ranges_for_message and
    _move_finisehd_topics_to_result_map"""
    # We create a new consumer each time since it would otherwise require refactoring how
    # we consume from KafkaConsumerGroups (currently use an iterator, which doesn't support big jumps in offset)
    with Consumer(
        'data_pipeline_tailer_starting_offset_getter-{}'.format(
            str(UUID(bytes=FastUUID().uuid4()).hex)
        ),
        'bam',
        ExpectedFrequency.constantly,
        topic_to_consumer_topic_state_map
    ) as consumer:
        message = consumer.get_message(timeout=0.1, blocking=True)
        if message is None:
            return
        _update_ranges_for_message(
            message,
            start_timestamp,
            topic_to_consumer_topic_state_map,
            topic_to_range_map
        )
    _move_finished_topics_to_result_map(
        topic_to_consumer_topic_state_map,
        topic_to_range_map,
        result_topic_to_consumer_topic_state_map
    )
Exemple #4
0
    def run(self):
        logger.info(
            "Starting to consume from {}".format(self.topic_to_offsets_map)
        )

        with Consumer(
            # The tailer name should be unique - if it's not, partitions will
            # be split between multiple tailer instances
            'data_pipeline_tailer-{}'.format(
                str(UUID(bytes=FastUUID().uuid4()).hex)
            ),
            'bam',
            ExpectedFrequency.constantly,
            self.topic_to_offsets_map,
            auto_offset_reset=self.options.offset_reset_location,
            cluster_name=self.options.cluster_name
        ) as consumer:
            message_count = 0
            while self.keep_running(message_count):
                message = consumer.get_message(blocking=True, timeout=0.1)
                if message is not None:
                    if self.options.end_timestamp is None or message.timestamp < self.options.end_timestamp:
                        print self._format_message(message)
                        message_count += 1
                    else:
                        self._running = False
                        logger.info(
                            "Latest message surpasses --end-timestamp. Stopping tailer..."
                        )
Exemple #5
0
 def test_consumer_initial_registration_message(self, topic):
     """
     Assert that an initial RegistrationMessage is sent upon starting
     the Consumer with a non-empty topic_to_consumer_topic_state_map.
     """
     with attach_spy_on_func(clog, 'log_line') as func_spy:
         fake_topic = ConsumerTopicState({}, 23)
         with Consumer(
                 consumer_name='test_consumer',
                 team_name='bam',
                 expected_frequency_seconds=ExpectedFrequency.constantly,
                 topic_to_consumer_topic_state_map={topic: fake_topic}):
             assert func_spy.call_count == 1
Exemple #6
0
 def consumer_instance(self, consumer_group_name, team_name,
                       consumer_source, pre_rebalance_callback,
                       post_rebalance_callback):
     return Consumer(
         consumer_name=consumer_group_name,
         team_name=team_name,
         expected_frequency_seconds=ExpectedFrequency.constantly,
         consumer_source=consumer_source,
         topic_refresh_frequency_seconds=0.5,
         auto_offset_reset='smallest',
         pre_rebalance_callback=pre_rebalance_callback,
         post_rebalance_callback=post_rebalance_callback,
     )
Exemple #7
0
 def consumer_two_instance(self, topic, pii_topic, consumer_init_kwargs):
     consumer_two = Consumer(topic_to_consumer_topic_state_map={
         topic: None,
         pii_topic: None
     },
                             **consumer_init_kwargs)
     with mock.patch.object(
             consumer_two,
             '_get_topics_in_region_from_topic_name',
             side_effect=[
                 [x] for x in
                 consumer_two.topic_to_consumer_topic_state_map.keys()
             ]):
         yield consumer_two
Exemple #8
0
 def log_consumer_instance(
     self,
     log_topic,
     consumer_init_kwargs,
 ):
     log_consumer = Consumer(
         topic_to_consumer_topic_state_map={log_topic: None},
         auto_offset_reset='largest',  # start from the tail of the topic,
         **consumer_init_kwargs)
     with mock.patch.object(
             log_consumer,
             '_get_scribe_topics_from_topic_name',
             side_effect=[
                 [FakeScribeKafka().get_scribe_kafka_topic_from_logname(x)]
                 for x in
                 log_consumer.topic_to_consumer_topic_state_map.keys()
             ]):
         yield log_consumer
Exemple #9
0
 def consumer_instance(
     self,
     topic,
     pii_topic,
     consumer_init_kwargs,
 ):
     consumer = Consumer(
         topic_to_consumer_topic_state_map={
             topic: None,
             pii_topic: None
         },
         auto_offset_reset='largest',  # start from the tail of the topic,
         **consumer_init_kwargs)
     with mock.patch.object(
             consumer,
             '_get_topics_in_region_from_topic_name',
             side_effect=[[
                 x
             ] for x in consumer.topic_to_consumer_topic_state_map.keys()]):
         yield consumer
Exemple #10
0
 def consumer_instance(self, topic, team_name):
     return Consumer(
         consumer_name='test_consumer',
         team_name=team_name,
         expected_frequency_seconds=ExpectedFrequency.constantly,
         topic_to_consumer_topic_state_map={topic: None})