def consume_messages(cls, **kwargs): def job(consumer_, redis_client_, redis_channel_): for msg in consumer_.poll(): message = msg.value logger.info(ujson.loads(message)) redis_client_.publish(redis_channel_, message) def shut_down(consumer_): consumer_.shut_down() # get consumer kafka_broker = kwargs.get(KAFKA_BROKER) or DEFAULT_KAFKA_BROKER kafka_topic = kwargs.get( KAFKA_OUTPUT_TOPIC) or DEFAULT_KAFKA_OUTPUT_TOPIC consumer = Consumer(kafka_broker, kafka_topic) # get redis redis_channel = kwargs.get(REDIS_CHANNEL) or DEFAULT_REDIS_CHANNEL redis_host = kwargs.get(REDIS_HOST) or DEFAULT_REDIS_HOST redis_port = kwargs.get(REDIS_PORT) or DEFAULT_REDIS_PORT redis_client = redis.StrictRedis(host=redis_host, port=redis_port) atexit.register(shut_down, consumer) scheduler = Scheduler(1, job, consumer, redis_client, redis_channel) scheduler.run()
def consumer_instance(self, consumer_group_name, force_payload_decode, topic, team_name, pre_rebalance_callback, post_rebalance_callback, registered_schema, registered_compatible_schema, registered_non_compatible_schema): consumer_source = FixedSchemas( registered_schema.schema_id, registered_non_compatible_schema.schema_id) consumer = Consumer( consumer_name=consumer_group_name, team_name=team_name, expected_frequency_seconds=ExpectedFrequency.constantly, topic_to_consumer_topic_state_map=None, consumer_source=consumer_source, force_payload_decode=force_payload_decode, auto_offset_reset='largest', # start from the tail of the topic pre_rebalance_callback=pre_rebalance_callback, post_rebalance_callback=post_rebalance_callback, ) with mock.patch.object( consumer, '_get_topics_in_region_from_topic_name', side_effect=[[x] for x in consumer_source.get_topics()]): yield consumer
def _get_message_and_alter_range( start_timestamp, topic_to_consumer_topic_state_map, topic_to_range_map, result_topic_to_consumer_topic_state_map ): """Create a consumer based on our topic_to_consumer_state_map, get a message, and based on that message's timestamp, adjust our topic ranges and maps with _update_ranges_for_message and _move_finisehd_topics_to_result_map""" # We create a new consumer each time since it would otherwise require refactoring how # we consume from KafkaConsumerGroups (currently use an iterator, which doesn't support big jumps in offset) with Consumer( 'data_pipeline_tailer_starting_offset_getter-{}'.format( str(UUID(bytes=FastUUID().uuid4()).hex) ), 'bam', ExpectedFrequency.constantly, topic_to_consumer_topic_state_map ) as consumer: message = consumer.get_message(timeout=0.1, blocking=True) if message is None: return _update_ranges_for_message( message, start_timestamp, topic_to_consumer_topic_state_map, topic_to_range_map ) _move_finished_topics_to_result_map( topic_to_consumer_topic_state_map, topic_to_range_map, result_topic_to_consumer_topic_state_map )
def run(self): logger.info( "Starting to consume from {}".format(self.topic_to_offsets_map) ) with Consumer( # The tailer name should be unique - if it's not, partitions will # be split between multiple tailer instances 'data_pipeline_tailer-{}'.format( str(UUID(bytes=FastUUID().uuid4()).hex) ), 'bam', ExpectedFrequency.constantly, self.topic_to_offsets_map, auto_offset_reset=self.options.offset_reset_location, cluster_name=self.options.cluster_name ) as consumer: message_count = 0 while self.keep_running(message_count): message = consumer.get_message(blocking=True, timeout=0.1) if message is not None: if self.options.end_timestamp is None or message.timestamp < self.options.end_timestamp: print self._format_message(message) message_count += 1 else: self._running = False logger.info( "Latest message surpasses --end-timestamp. Stopping tailer..." )
def test_consumer_initial_registration_message(self, topic): """ Assert that an initial RegistrationMessage is sent upon starting the Consumer with a non-empty topic_to_consumer_topic_state_map. """ with attach_spy_on_func(clog, 'log_line') as func_spy: fake_topic = ConsumerTopicState({}, 23) with Consumer( consumer_name='test_consumer', team_name='bam', expected_frequency_seconds=ExpectedFrequency.constantly, topic_to_consumer_topic_state_map={topic: fake_topic}): assert func_spy.call_count == 1
def consumer_instance(self, consumer_group_name, team_name, consumer_source, pre_rebalance_callback, post_rebalance_callback): return Consumer( consumer_name=consumer_group_name, team_name=team_name, expected_frequency_seconds=ExpectedFrequency.constantly, consumer_source=consumer_source, topic_refresh_frequency_seconds=0.5, auto_offset_reset='smallest', pre_rebalance_callback=pre_rebalance_callback, post_rebalance_callback=post_rebalance_callback, )
def consumer_two_instance(self, topic, pii_topic, consumer_init_kwargs): consumer_two = Consumer(topic_to_consumer_topic_state_map={ topic: None, pii_topic: None }, **consumer_init_kwargs) with mock.patch.object( consumer_two, '_get_topics_in_region_from_topic_name', side_effect=[ [x] for x in consumer_two.topic_to_consumer_topic_state_map.keys() ]): yield consumer_two
def log_consumer_instance( self, log_topic, consumer_init_kwargs, ): log_consumer = Consumer( topic_to_consumer_topic_state_map={log_topic: None}, auto_offset_reset='largest', # start from the tail of the topic, **consumer_init_kwargs) with mock.patch.object( log_consumer, '_get_scribe_topics_from_topic_name', side_effect=[ [FakeScribeKafka().get_scribe_kafka_topic_from_logname(x)] for x in log_consumer.topic_to_consumer_topic_state_map.keys() ]): yield log_consumer
def consumer_instance( self, topic, pii_topic, consumer_init_kwargs, ): consumer = Consumer( topic_to_consumer_topic_state_map={ topic: None, pii_topic: None }, auto_offset_reset='largest', # start from the tail of the topic, **consumer_init_kwargs) with mock.patch.object( consumer, '_get_topics_in_region_from_topic_name', side_effect=[[ x ] for x in consumer.topic_to_consumer_topic_state_map.keys()]): yield consumer
def consumer_instance(self, topic, team_name): return Consumer( consumer_name='test_consumer', team_name=team_name, expected_frequency_seconds=ExpectedFrequency.constantly, topic_to_consumer_topic_state_map={topic: None})