Example #1
0
 def insert_data_to_database(self, data):
     # we might be passed LazyData
     if isinstance(data, LazyData):
         data = dict(data.items())
     cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id'])
     default_cache.set(cache_key, data, timeout=3600)
     preprocess_event.delay(cache_key=cache_key, start_time=time())
Example #2
0
 def insert_data_to_database(self, data):
     # we might be passed LazyData
     if isinstance(data, LazyData):
         data = dict(data.items())
     cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id'])
     default_cache.set(cache_key, data, timeout=3600)
     preprocess_event.delay(cache_key=cache_key, start_time=time())
Example #3
0
    def process_message(self, message):
        message = msgpack.unpackb(message.value(), use_list=False)
        body = message["payload"]
        start_time = float(message["start_time"])
        event_id = message["event_id"]
        project_id = message["project_id"]

        # check that we haven't already processed this event (a previous instance of the forwarder
        # died before it could commit the event queue offset)
        deduplication_key = "ev:{}:{}".format(project_id, event_id)
        if cache.get(deduplication_key) is not None:
            logger.warning(
                "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.",
                event_id,
                project_id,
            )
            return  # message already processed do not reprocess

        cache_key = cache_key_from_project_id_and_event_id(project_id=project_id, event_id=event_id)
        cache_timeout = 3600
        default_cache.set(cache_key, body, cache_timeout, raw=True)

        # queue the event for processing
        preprocess_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id)

        # remember for an 1 hour that we saved this event (deduplication protection)
        cache.set(deduplication_key, "", 3600)
Example #4
0
def insert_data_to_database(data):
    preprocess_event.delay(data=data)
Example #5
0
 def insert_data_to_database(self, data):
     cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id'])
     default_cache.set(cache_key, data, timeout=3600)
     preprocess_event.delay(cache_key=cache_key, start_time=time())
Example #6
0
def insert_data_to_database(data):
    preprocess_event.delay(data=data)
Example #7
0
def insert_data_to_database(data):
    cache_key = "e:{0}".format(data["event_id"])
    cache.set(cache_key, data, timeout=3600)
    preprocess_event.delay(cache_key=cache_key)
Example #8
0
def insert_data_to_database(data):
    cache_key = 'e:{0}'.format(data['event_id'])
    cache.set(cache_key, data, timeout=3600)
    preprocess_event.delay(cache_key=cache_key)
Example #9
0
 def insert_data_to_database(self, data):
     cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id'])
     default_cache.set(cache_key, data, timeout=3600)
     preprocess_event.delay(cache_key=cache_key, start_time=time())
Example #10
0
def run_ingest_consumer(
    commit_batch_size,
    consumer_group,
    consumer_type,
    max_fetch_time_seconds,
    initial_offset_reset="latest",
    is_shutdown_requested=lambda: False,
):
    """
    Handles events coming via a kafka queue.

    The events should have already been processed (normalized... ) upstream (by Relay).

    :param commit_batch_size: the number of message the consumer will try to process/commit in one loop
    :param consumer_group: kafka consumer group name
    :param consumer_type: an enumeration defining the types of ingest messages see `ConsumerType`
    :param max_fetch_time_seconds: the maximum number of seconds a consume operation will be blocked waiting
        for the specified commit_batch_size number of messages to appear in the queue before it returns. At the
        end of the specified time the consume operation will return however many messages it has ( including
        an empty array if no new messages are available).
    :param initial_offset_reset: offset reset policy when there's no available offset for the consumer
    :param is_shutdown_requested: Callable[[],bool] predicate checked after each loop, if it returns
        True the forwarder stops (by default is lambda: False). In normal operation this should be left to default.
        For unit testing it offers a way to cleanly stop the forwarder after some particular condition is achieved.
    """

    logger.debug("Starting ingest-consumer...")
    consumer = _create_consumer(consumer_group, consumer_type,
                                initial_offset_reset)

    consumer.subscribe([ConsumerType.get_topic_name(consumer_type)])
    # setup a flag to mark termination signals received, see below why we use an array
    termination_signal_received = [False]

    def termination_signal_handler(_sig_id, _frame):
        """
        Function to use a hook for SIGINT and SIGTERM

        This signal handler only remembers that the signal was emitted.
        The batch processing loop detects that the signal was emitted
        and stops once the whole batch is processed.
        """
        # We need to use an array so that terminal_signal_received is not a
        # local variable assignment, but a lookup in the clojure's outer scope.
        termination_signal_received[0] = True

    with set_termination_request_handlers(termination_signal_handler):
        while not (is_shutdown_requested() or termination_signal_received[0]):
            # get up to commit_batch_size messages
            messages = consumer.consume(num_messages=commit_batch_size,
                                        timeout=max_fetch_time_seconds)

            for message in messages:
                message_error = message.error()
                if message_error is not None:
                    logger.error(
                        "Received message with error on %s, error:'%s'",
                        consumer_type,
                        message_error,
                    )
                    raise ValueError("Bad message received from consumer",
                                     consumer_type, message_error)

                message = msgpack.unpackb(message.value(), use_list=False)
                body = message["payload"]
                start_time = float(message["start_time"])
                event_id = message["event_id"]
                project_id = message["project_id"]

                # check that we haven't already processed this event (a previous instance of the forwarder
                # died before it could commit the event queue offset)
                deduplication_key = "ev:{}:{}".format(project_id, event_id)
                if cache.get(deduplication_key) is not None:
                    logger.warning(
                        "pre-process-forwarder detected a duplicated event"
                        " with id:%s for project:%s.",
                        event_id,
                        project_id,
                    )
                    continue

                cache_key = cache_key_from_project_id_and_event_id(
                    project_id=project_id, event_id=event_id)
                cache_timeout = 3600
                default_cache.set(cache_key, body, cache_timeout, raw=True)
                preprocess_event.delay(cache_key=cache_key,
                                       start_time=start_time,
                                       event_id=event_id)

                # remember for an 1 hour that we saved this event (deduplication protection)
                cache.set(deduplication_key, "", 3600)

            if len(messages) > 0:
                # we have read some messages in the previous consume, commit the offset
                consumer.commit(asynchronous=False)

    logger.debug("Closing ingest-consumer %s...", consumer_type)
    consumer.close()