def insert_data_to_database(self, data): # we might be passed LazyData if isinstance(data, LazyData): data = dict(data.items()) cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, timeout=3600) preprocess_event.delay(cache_key=cache_key, start_time=time())
def process_message(self, message): message = msgpack.unpackb(message.value(), use_list=False) body = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = message["project_id"] # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess cache_key = cache_key_from_project_id_and_event_id(project_id=project_id, event_id=event_id) cache_timeout = 3600 default_cache.set(cache_key, body, cache_timeout, raw=True) # queue the event for processing preprocess_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", 3600)
def insert_data_to_database(data): preprocess_event.delay(data=data)
def insert_data_to_database(self, data): cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, timeout=3600) preprocess_event.delay(cache_key=cache_key, start_time=time())
def insert_data_to_database(data): cache_key = "e:{0}".format(data["event_id"]) cache.set(cache_key, data, timeout=3600) preprocess_event.delay(cache_key=cache_key)
def insert_data_to_database(data): cache_key = 'e:{0}'.format(data['event_id']) cache.set(cache_key, data, timeout=3600) preprocess_event.delay(cache_key=cache_key)
def run_ingest_consumer( commit_batch_size, consumer_group, consumer_type, max_fetch_time_seconds, initial_offset_reset="latest", is_shutdown_requested=lambda: False, ): """ Handles events coming via a kafka queue. The events should have already been processed (normalized... ) upstream (by Relay). :param commit_batch_size: the number of message the consumer will try to process/commit in one loop :param consumer_group: kafka consumer group name :param consumer_type: an enumeration defining the types of ingest messages see `ConsumerType` :param max_fetch_time_seconds: the maximum number of seconds a consume operation will be blocked waiting for the specified commit_batch_size number of messages to appear in the queue before it returns. At the end of the specified time the consume operation will return however many messages it has ( including an empty array if no new messages are available). :param initial_offset_reset: offset reset policy when there's no available offset for the consumer :param is_shutdown_requested: Callable[[],bool] predicate checked after each loop, if it returns True the forwarder stops (by default is lambda: False). In normal operation this should be left to default. For unit testing it offers a way to cleanly stop the forwarder after some particular condition is achieved. """ logger.debug("Starting ingest-consumer...") consumer = _create_consumer(consumer_group, consumer_type, initial_offset_reset) consumer.subscribe([ConsumerType.get_topic_name(consumer_type)]) # setup a flag to mark termination signals received, see below why we use an array termination_signal_received = [False] def termination_signal_handler(_sig_id, _frame): """ Function to use a hook for SIGINT and SIGTERM This signal handler only remembers that the signal was emitted. The batch processing loop detects that the signal was emitted and stops once the whole batch is processed. """ # We need to use an array so that terminal_signal_received is not a # local variable assignment, but a lookup in the clojure's outer scope. termination_signal_received[0] = True with set_termination_request_handlers(termination_signal_handler): while not (is_shutdown_requested() or termination_signal_received[0]): # get up to commit_batch_size messages messages = consumer.consume(num_messages=commit_batch_size, timeout=max_fetch_time_seconds) for message in messages: message_error = message.error() if message_error is not None: logger.error( "Received message with error on %s, error:'%s'", consumer_type, message_error, ) raise ValueError("Bad message received from consumer", consumer_type, message_error) message = msgpack.unpackb(message.value(), use_list=False) body = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = message["project_id"] # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) continue cache_key = cache_key_from_project_id_and_event_id( project_id=project_id, event_id=event_id) cache_timeout = 3600 default_cache.set(cache_key, body, cache_timeout, raw=True) preprocess_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", 3600) if len(messages) > 0: # we have read some messages in the previous consume, commit the offset consumer.commit(asynchronous=False) logger.debug("Closing ingest-consumer %s...", consumer_type) consumer.close()