コード例 #1
0
ファイル: queue_processors.py プロジェクト: zag/zulip
class QueueProcessingWorker(object):
    def __init__(self):
        self.q = SimpleQueueClient()

    def consume_wrapper(self, data):
        try:
            self.consume(data)
        except Exception:
            self._log_problem()
            if not os.path.exists(settings.QUEUE_ERROR_DIR):
                os.mkdir(settings.QUEUE_ERROR_DIR)
            fname = '%s.errors' % (self.queue_name, )
            fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
            line = '%s\t%s\n' % (time.asctime(), ujson.dumps(data))
            lock_fn = fn + '.lock'
            with lockfile(lock_fn):
                with open(fn, 'a') as f:
                    f.write(line)
        reset_queries()

    def _log_problem(self):
        logging.exception("Problem handling data on queue %s" %
                          (self.queue_name, ))

    def start(self):
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self):
        self.q.stop_consuming()
コード例 #2
0
ファイル: queue_processors.py プロジェクト: bgupta/zulip
class QueueProcessingWorker(object):
    def __init__(self):
        self.q = SimpleQueueClient()

    def consume_wrapper(self, data):
        try:
            self.consume(data)
        except Exception:
            self._log_problem()
            if not os.path.exists(settings.QUEUE_ERROR_DIR):
                os.mkdir(settings.QUEUE_ERROR_DIR)
            fname = '%s.errors' % (self.queue_name,)
            fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
            line = '%s\t%s\n' % (time.asctime(), ujson.dumps(data))
            lock_fn = fn + '.lock'
            with lockfile(lock_fn):
                with open(fn, 'a') as f:
                    f.write(line)
        reset_queries()

    def _log_problem(self):
        logging.exception("Problem handling data on queue %s" % (self.queue_name,))

    def start(self):
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self):
        self.q.stop_consuming()
コード例 #3
0
class QueueProcessingWorker(ABC):
    queue_name = None  # type: str

    def __init__(self) -> None:
        self.q = None  # type: SimpleQueueClient
        if self.queue_name is None:
            raise WorkerDeclarationException(
                "Queue worker declared without queue_name")

    @abstractmethod
    def consume(self, data: Dict[str, Any]) -> None:
        pass

    def do_consume(self, consume_func: Callable[[List[Dict[str, Any]]], None],
                   events: List[Dict[str, Any]]) -> None:
        try:
            consume_func(events)
        except Exception:
            self._handle_consume_exception(events)
        finally:
            flush_per_request_caches()
            reset_queries()

    def consume_wrapper(self, data: Dict[str, Any]) -> None:
        consume_func = lambda events: self.consume(events[0])
        self.do_consume(consume_func, [data])

    def _handle_consume_exception(self, events: List[Dict[str, Any]]) -> None:
        self._log_problem()
        if not os.path.exists(settings.QUEUE_ERROR_DIR):
            os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
        fname = '%s.errors' % (self.queue_name, )
        fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
        line = '%s\t%s\n' % (time.asctime(), ujson.dumps(events))
        lock_fn = fn + '.lock'
        with lockfile(lock_fn):
            with open(fn, 'ab') as f:
                f.write(line.encode('utf-8'))
        check_and_send_restart_signal()

    def _log_problem(self) -> None:
        logging.exception("Problem handling data on queue %s" %
                          (self.queue_name, ))

    def setup(self) -> None:
        self.q = SimpleQueueClient()

    def start(self) -> None:
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self) -> None:  # nocoverage
        self.q.stop_consuming()
コード例 #4
0
ファイル: queue_processors.py プロジェクト: vishnudut/zulip
class QueueProcessingWorker:
    queue_name = None  # type: str

    def __init__(self):
        # type: () -> None
        self.q = None  # type: SimpleQueueClient
        if self.queue_name is None:
            raise WorkerDeclarationException(
                "Queue worker declared without queue_name")

    def consume(self, data):
        # type: (Dict[str, Any]) -> None
        raise WorkerDeclarationException("No consumer defined!")

    def consume_wrapper(self, data):
        # type: (Dict[str, Any]) -> None
        try:
            self.consume(data)
        except Exception:
            self._log_problem()
            if not os.path.exists(settings.QUEUE_ERROR_DIR):
                os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
            fname = '%s.errors' % (self.queue_name, )
            fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
            line = '%s\t%s\n' % (time.asctime(), ujson.dumps(data))
            lock_fn = fn + '.lock'
            with lockfile(lock_fn):
                with open(fn, 'ab') as f:
                    f.write(line.encode('utf-8'))
            check_and_send_restart_signal()
        finally:
            reset_queries()

    def _log_problem(self):
        # type: () -> None
        logging.exception("Problem handling data on queue %s" %
                          (self.queue_name, ))

    def setup(self):
        # type: () -> None
        self.q = SimpleQueueClient()

    def start(self):
        # type: () -> None
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self):  # nocoverage
        # type: () -> None
        self.q.stop_consuming()
コード例 #5
0
ファイル: queue_processors.py プロジェクト: yhl-python/zulip
class QueueProcessingWorker(object):
    queue_name = None  # type: str

    def __init__(self):
        # type: () -> None
        self.q = None  # type: SimpleQueueClient
        if self.queue_name is None:
            raise WorkerDeclarationException("Queue worker declared without queue_name")

    def consume(self, data):
        # type: (Mapping[str, Any]) -> None
        raise WorkerDeclarationException("No consumer defined!")

    def consume_wrapper(self, data):
        # type: (Mapping[str, Any]) -> None
        try:
            self.consume(data)
        except Exception:
            self._log_problem()
            if not os.path.exists(settings.QUEUE_ERROR_DIR):
                os.mkdir(settings.QUEUE_ERROR_DIR)
            fname = '%s.errors' % (self.queue_name,)
            fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
            line = u'%s\t%s\n' % (time.asctime(), ujson.dumps(data))
            lock_fn = fn + '.lock'
            with lockfile(lock_fn):
                with open(fn, 'ab') as f:
                    f.write(line.encode('utf-8'))
            check_and_send_restart_signal()
        finally:
            reset_queries()

    def _log_problem(self):
        # type: () -> None
        logging.exception("Problem handling data on queue %s" % (self.queue_name,))

    def setup(self):
        # type: () -> None
        self.q = SimpleQueueClient()

    def start(self):
        # type: () -> None
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self):
        # type: () -> None
        self.q.stop_consuming()
コード例 #6
0
ファイル: queue_processors.py プロジェクト: SummerBulb/zulip
class QueueProcessingWorker(object):
    queue_name = None

    def __init__(self):
        self.q = SimpleQueueClient()
        if self.queue_name is None:
            raise WorkerDeclarationException("Queue worker declared without queue_name")

    def consume(self, data):
        raise WorkerDeclarationException("No consumer defined!")

    def consume_wrapper(self, data):
        try:
            self.consume(data)
        except Exception:
            self._log_problem()
            if not os.path.exists(settings.QUEUE_ERROR_DIR):
                os.mkdir(settings.QUEUE_ERROR_DIR)
            fname = "%s.errors" % (self.queue_name,)
            fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
            line = "%s\t%s\n" % (time.asctime(), ujson.dumps(data))
            lock_fn = fn + ".lock"
            with lockfile(lock_fn):
                with open(fn, "a") as f:
                    f.write(line.encode("utf-8"))
        reset_queries()

    def _log_problem(self):
        logging.exception("Problem handling data on queue %s" % (self.queue_name,))

    def start(self):
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self):
        self.q.stop_consuming()
コード例 #7
0
ファイル: queue_processors.py プロジェクト: pastewka/zulip
class QueueProcessingWorker(ABC):
    queue_name: str
    MAX_CONSUME_SECONDS: Optional[int] = 30
    ENABLE_TIMEOUTS = False
    CONSUME_ITERATIONS_BEFORE_UPDATE_STATS_NUM = 50
    MAX_SECONDS_BEFORE_UPDATE_STATS = 30

    def __init__(self) -> None:
        self.q: Optional[SimpleQueueClient] = None
        if not hasattr(self, "queue_name"):
            raise WorkerDeclarationException(
                "Queue worker declared without queue_name")

        self.initialize_statistics()

    def initialize_statistics(self) -> None:
        self.queue_last_emptied_timestamp = time.time()
        self.consumed_since_last_emptied = 0
        self.recent_consume_times: MutableSequence[Tuple[int, float]] = deque(
            maxlen=50)
        self.consume_iteration_counter = 0
        self.idle = True
        self.last_statistics_update_time = 0.0

        self.update_statistics(0)

    def update_statistics(self, remaining_local_queue_size: int) -> None:
        total_seconds = sum(seconds
                            for _, seconds in self.recent_consume_times)
        total_events = sum(events_number
                           for events_number, _ in self.recent_consume_times)
        if total_events == 0:
            recent_average_consume_time = None
        else:
            recent_average_consume_time = total_seconds / total_events
        stats_dict = dict(
            update_time=time.time(),
            recent_average_consume_time=recent_average_consume_time,
            current_queue_size=remaining_local_queue_size,
            queue_last_emptied_timestamp=self.queue_last_emptied_timestamp,
            consumed_since_last_emptied=self.consumed_since_last_emptied,
        )

        os.makedirs(settings.QUEUE_STATS_DIR, exist_ok=True)

        fname = f"{self.queue_name}.stats"
        fn = os.path.join(settings.QUEUE_STATS_DIR, fname)
        with lockfile(fn + ".lock"):
            tmp_fn = fn + ".tmp"
            with open(tmp_fn, "wb") as f:
                f.write(
                    orjson.dumps(stats_dict,
                                 option=orjson.OPT_APPEND_NEWLINE
                                 | orjson.OPT_INDENT_2))
            os.rename(tmp_fn, fn)
        self.last_statistics_update_time = time.time()

    def get_remaining_local_queue_size(self) -> int:
        if self.q is not None:
            return self.q.local_queue_size()
        else:
            # This is a special case that will happen if we're operating without
            # using RabbitMQ (e.g. in tests). In that case there's no queuing to speak of
            # and the only reasonable size to return is 0.
            return 0

    @abstractmethod
    def consume(self, data: Dict[str, Any]) -> None:
        pass

    def do_consume(self, consume_func: Callable[[List[Dict[str, Any]]], None],
                   events: List[Dict[str, Any]]) -> None:
        consume_time_seconds: Optional[float] = None
        with configure_scope() as scope:
            scope.clear_breadcrumbs()
            add_breadcrumb(
                type="debug",
                category="queue_processor",
                message=f"Consuming {self.queue_name}",
                data={
                    "events": events,
                    "local_queue_size": self.get_remaining_local_queue_size()
                },
            )
        try:
            if self.idle:
                # We're reactivating after having gone idle due to emptying the queue.
                # We should update the stats file to keep it fresh and to make it clear
                # that the queue started processing, in case the event we're about to process
                # makes us freeze.
                self.idle = False
                self.update_statistics(self.get_remaining_local_queue_size())

            time_start = time.time()
            if self.MAX_CONSUME_SECONDS and self.ENABLE_TIMEOUTS:
                try:
                    signal.signal(
                        signal.SIGALRM,
                        functools.partial(self.timer_expired,
                                          self.MAX_CONSUME_SECONDS, events),
                    )
                    try:
                        signal.alarm(self.MAX_CONSUME_SECONDS * len(events))
                        consume_func(events)
                    finally:
                        signal.alarm(0)
                finally:
                    signal.signal(signal.SIGALRM, signal.SIG_DFL)
            else:
                consume_func(events)
            consume_time_seconds = time.time() - time_start
            self.consumed_since_last_emptied += len(events)
        except Exception as e:
            self._handle_consume_exception(events, e)
        finally:
            flush_per_request_caches()
            reset_queries()

            if consume_time_seconds is not None:
                self.recent_consume_times.append(
                    (len(events), consume_time_seconds))

            remaining_local_queue_size = self.get_remaining_local_queue_size()
            if remaining_local_queue_size == 0:
                self.queue_last_emptied_timestamp = time.time()
                self.consumed_since_last_emptied = 0
                # We've cleared all the events from the queue, so we don't
                # need to worry about the small overhead of doing a disk write.
                # We take advantage of this to update the stats file to keep it fresh,
                # especially since the queue might go idle until new events come in.
                self.update_statistics(0)
                self.idle = True
                return

            self.consume_iteration_counter += 1
            if (self.consume_iteration_counter >=
                    self.CONSUME_ITERATIONS_BEFORE_UPDATE_STATS_NUM
                    or time.time() - self.last_statistics_update_time >=
                    self.MAX_SECONDS_BEFORE_UPDATE_STATS):
                self.consume_iteration_counter = 0
                self.update_statistics(remaining_local_queue_size)

    def consume_single_event(self, event: Dict[str, Any]) -> None:
        consume_func = lambda events: self.consume(events[0])
        self.do_consume(consume_func, [event])

    def timer_expired(self, limit: int, events: List[Dict[str, Any]],
                      signal: int, frame: FrameType) -> None:
        raise WorkerTimeoutException(self.queue_name, limit, len(events))

    def _handle_consume_exception(self, events: List[Dict[str, Any]],
                                  exception: Exception) -> None:
        if isinstance(exception, InterruptConsumeException):
            # The exception signals that no further error handling
            # is needed and the worker can proceed.
            return

        with configure_scope() as scope:
            scope.set_context(
                "events",
                {
                    "data": events,
                    "queue_name": self.queue_name,
                },
            )
            if isinstance(exception, WorkerTimeoutException):
                with sentry_sdk.push_scope() as scope:
                    scope.fingerprint = ["worker-timeout", self.queue_name]
                    logging.exception(exception, stack_info=True)
            else:
                logging.exception("Problem handling data on queue %s",
                                  self.queue_name,
                                  stack_info=True)
        if not os.path.exists(settings.QUEUE_ERROR_DIR):
            os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
        # Use 'mark_sanitized' to prevent Pysa from detecting this false positive
        # flow. 'queue_name' is always a constant string.
        fname = mark_sanitized(f"{self.queue_name}.errors")
        fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
        line = f"{time.asctime()}\t{orjson.dumps(events).decode()}\n"
        lock_fn = fn + ".lock"
        with lockfile(lock_fn):
            with open(fn, "a") as f:
                f.write(line)
        check_and_send_restart_signal()

    def setup(self) -> None:
        self.q = SimpleQueueClient()

    def start(self) -> None:
        assert self.q is not None
        self.initialize_statistics()
        self.q.start_json_consumer(
            self.queue_name,
            lambda events: self.consume_single_event(events[0]),
        )

    def stop(self) -> None:  # nocoverage
        assert self.q is not None
        self.q.stop_consuming()
コード例 #8
0
class QueueProcessingWorker(ABC):
    queue_name: str
    CONSUME_ITERATIONS_BEFORE_UPDATE_STATS_NUM = 50

    def __init__(self) -> None:
        self.q: Optional[SimpleQueueClient] = None
        if not hasattr(self, "queue_name"):
            raise WorkerDeclarationException("Queue worker declared without queue_name")

        self.initialize_statistics()

    def initialize_statistics(self) -> None:
        self.queue_last_emptied_timestamp = time.time()
        self.consumed_since_last_emptied = 0
        self.recent_consume_times: MutableSequence[Tuple[int, float]] = deque(maxlen=50)
        self.consume_interation_counter = 0

        self.update_statistics(0)

    def update_statistics(self, remaining_queue_size: int) -> None:
        total_seconds = sum([seconds for _, seconds in self.recent_consume_times])
        total_events = sum([events_number for events_number, _ in self.recent_consume_times])
        if total_events == 0:
            recent_average_consume_time = None
        else:
            recent_average_consume_time = total_seconds / total_events
        stats_dict = dict(
            update_time=time.time(),
            recent_average_consume_time=recent_average_consume_time,
            current_queue_size=remaining_queue_size,
            queue_last_emptied_timestamp=self.queue_last_emptied_timestamp,
            consumed_since_last_emptied=self.consumed_since_last_emptied,
        )

        os.makedirs(settings.QUEUE_STATS_DIR, exist_ok=True)

        fname = f'{self.queue_name}.stats'
        fn = os.path.join(settings.QUEUE_STATS_DIR, fname)
        with lockfile(fn + '.lock'):
            tmp_fn = fn + '.tmp'
            with open(tmp_fn, 'wb') as f:
                f.write(
                    orjson.dumps(stats_dict, option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2)
                )
            os.rename(tmp_fn, fn)

    @abstractmethod
    def consume(self, data: Dict[str, Any]) -> None:
        pass

    def do_consume(self, consume_func: Callable[[List[Dict[str, Any]]], None],
                   events: List[Dict[str, Any]]) -> None:
        consume_time_seconds: Optional[float] = None
        try:
            time_start = time.time()
            consume_func(events)
            consume_time_seconds = time.time() - time_start
            self.consumed_since_last_emptied += len(events)
        except Exception:
            self._handle_consume_exception(events)
        finally:
            flush_per_request_caches()
            reset_queries()

            if consume_time_seconds is not None:
                self.recent_consume_times.append((len(events), consume_time_seconds))

            if self.q is not None:
                remaining_queue_size = self.q.queue_size()
            else:
                remaining_queue_size = 0

            if remaining_queue_size == 0:
                self.queue_last_emptied_timestamp = time.time()
                self.consumed_since_last_emptied = 0

            self.consume_interation_counter += 1
            if self.consume_interation_counter >= self.CONSUME_ITERATIONS_BEFORE_UPDATE_STATS_NUM:

                self.consume_interation_counter = 0
                self.update_statistics(remaining_queue_size)

    def consume_wrapper(self, data: Dict[str, Any]) -> None:
        consume_func = lambda events: self.consume(events[0])
        self.do_consume(consume_func, [data])

    def _handle_consume_exception(self, events: List[Dict[str, Any]]) -> None:
        self._log_problem()
        if not os.path.exists(settings.QUEUE_ERROR_DIR):
            os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
        # Use 'mark_sanitized' to prevent Pysa from detecting this false positive
        # flow. 'queue_name' is always a constant string.
        fname = mark_sanitized(f'{self.queue_name}.errors')
        fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
        line = f'{time.asctime()}\t{orjson.dumps(events).decode()}\n'
        lock_fn = fn + '.lock'
        with lockfile(lock_fn):
            with open(fn, 'ab') as f:
                f.write(line.encode('utf-8'))
        check_and_send_restart_signal()

    def _log_problem(self) -> None:
        logging.exception("Problem handling data on queue %s", self.queue_name, stack_info=True)

    def setup(self) -> None:
        self.q = SimpleQueueClient()

    def start(self) -> None:
        assert self.q is not None
        self.initialize_statistics()
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self) -> None:  # nocoverage
        assert self.q is not None
        self.q.stop_consuming()
コード例 #9
0
class QueueProcessingWorker(ABC):
    queue_name: str = None
    CONSUME_ITERATIONS_BEFORE_UPDATE_STATS_NUM = 50

    def __init__(self) -> None:
        self.q: SimpleQueueClient = None
        if self.queue_name is None:
            raise WorkerDeclarationException(
                "Queue worker declared without queue_name")

        self.initialize_statistics()

    def initialize_statistics(self) -> None:
        self.queue_last_emptied_timestamp = time.time()
        self.consumed_since_last_emptied = 0
        self.recent_consume_times: MutableSequence[Tuple[int, float]] = deque(
            maxlen=50)
        self.consume_interation_counter = 0

        self.update_statistics(0)

    def update_statistics(self, remaining_queue_size: int) -> None:
        total_seconds = sum(
            [seconds for _, seconds in self.recent_consume_times])
        total_events = sum(
            [events_number for events_number, _ in self.recent_consume_times])
        if total_events == 0:
            recent_average_consume_time = None
        else:
            recent_average_consume_time = total_seconds / total_events
        stats_dict = dict(
            update_time=time.time(),
            recent_average_consume_time=recent_average_consume_time,
            current_queue_size=remaining_queue_size,
            queue_last_emptied_timestamp=self.queue_last_emptied_timestamp,
            consumed_since_last_emptied=self.consumed_since_last_emptied,
        )

        os.makedirs(settings.QUEUE_STATS_DIR, exist_ok=True)

        fname = '%s.stats' % (self.queue_name, )
        fn = os.path.join(settings.QUEUE_STATS_DIR, fname)
        with lockfile(fn + '.lock'):
            tmp_fn = fn + '.tmp'
            with open(tmp_fn, 'w') as f:
                serialized_dict = ujson.dumps(stats_dict, indent=2)
                serialized_dict += '\n'
                f.write(serialized_dict)
            os.rename(tmp_fn, fn)

    @abstractmethod
    def consume(self, data: Dict[str, Any]) -> None:
        pass

    def do_consume(self, consume_func: Callable[[List[Dict[str, Any]]], None],
                   events: List[Dict[str, Any]]) -> None:
        try:
            time_start = time.time()
            consume_func(events)
            consume_time_seconds: Optional[float] = time.time() - time_start
            self.consumed_since_last_emptied += len(events)
        except Exception:
            self._handle_consume_exception(events)
            consume_time_seconds = None
        finally:
            flush_per_request_caches()
            reset_queries()

            if consume_time_seconds is not None:
                self.recent_consume_times.append(
                    (len(events), consume_time_seconds))

            if self.q is not None:
                remaining_queue_size = self.q.queue_size()
            else:
                remaining_queue_size = 0

            if remaining_queue_size == 0:
                self.queue_last_emptied_timestamp = time.time()
                self.consumed_since_last_emptied = 0

            self.consume_interation_counter += 1
            if self.consume_interation_counter >= self.CONSUME_ITERATIONS_BEFORE_UPDATE_STATS_NUM:

                self.consume_interation_counter = 0
                self.update_statistics(remaining_queue_size)

    def consume_wrapper(self, data: Dict[str, Any]) -> None:
        consume_func = lambda events: self.consume(events[0])
        self.do_consume(consume_func, [data])

    def _handle_consume_exception(self, events: List[Dict[str, Any]]) -> None:
        self._log_problem()
        if not os.path.exists(settings.QUEUE_ERROR_DIR):
            os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
        fname = '%s.errors' % (self.queue_name, )
        fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
        line = '%s\t%s\n' % (time.asctime(), ujson.dumps(events))
        lock_fn = fn + '.lock'
        with lockfile(lock_fn):
            with open(fn, 'ab') as f:
                f.write(line.encode('utf-8'))
        check_and_send_restart_signal()

    def _log_problem(self) -> None:
        logging.exception("Problem handling data on queue %s" %
                          (self.queue_name, ))

    def setup(self) -> None:
        self.q = SimpleQueueClient()

    def start(self) -> None:
        self.initialize_statistics()
        self.q.register_json_consumer(self.queue_name, self.consume_wrapper)
        self.q.start_consuming()

    def stop(self) -> None:  # nocoverage
        self.q.stop_consuming()