Esempio n. 1
0
    def __init__(self, client, group, topic,
                 partitions=None,
                 auto_commit=True,
                 auto_commit_every_n=AUTO_COMMIT_MSG_COUNT,
                 auto_commit_every_t=AUTO_COMMIT_INTERVAL,
                 num_procs=1,
                 partitions_per_proc=0,
                 **simple_consumer_options):

        # Initiate the base consumer class
        super(MultiProcessConsumer, self).__init__(
            client, group, topic,
            partitions=partitions,
            auto_commit=auto_commit,
            auto_commit_every_n=auto_commit_every_n,
            auto_commit_every_t=auto_commit_every_t)

        # Variables for managing and controlling the data flow from
        # consumer child process to master
        manager = MPManager()
        self.queue = manager.Queue(1024)  # Child consumers dump messages into this
        self.events = Events(
            start = manager.Event(),        # Indicates the consumers to start fetch
            exit  = manager.Event(),        # Requests the consumers to shutdown
            pause = manager.Event())        # Requests the consumers to pause fetch
        self.size = manager.Value('i', 0)   # Indicator of number of messages to fetch

        # dict.keys() returns a view in py3 + it's not a thread-safe operation
        # http://blog.labix.org/2008/06/27/watch-out-for-listdictkeys-in-python-3
        # It's safer to copy dict as it only runs during the init.
        partitions = list(self.offsets.copy().keys())

        # By default, start one consumer process for all partitions
        # The logic below ensures that
        # * we do not cross the num_procs limit
        # * we have an even distribution of partitions among processes

        if partitions_per_proc:
            num_procs = len(partitions) / partitions_per_proc
            if num_procs * partitions_per_proc < len(partitions):
                num_procs += 1

        # The final set of chunks
        chunks = [partitions[proc::num_procs] for proc in range(num_procs)]

        self.procs = []
        for chunk in chunks:
            options = {'partitions': list(chunk)}
            if simple_consumer_options:
                simple_consumer_options.pop('partitions', None)
                options.update(simple_consumer_options)

            args = (client.copy(), self.group, self.topic, self.queue,
                    self.size, self.events)
            proc = Process(target=_mp_consume, args=args, kwargs=options)
            proc.daemon = True
            proc.start()
            self.procs.append(proc)
    def __init__(self,
                 use_confluent_kafka=False,
                 num_procs=1,
                 report_interval=5,
                 json_logging=False,
                 log_level='INFO',
                 verbose=False,
                 **consumer_options):

        # Variables for managing and controlling the data flow from
        # consumer child process to master
        manager = MPManager()
        self.queue = manager.Queue(
            1024)  # Child consumers dump messages into this
        self.events = Events(
            start=manager.Event(),  # Indicates the consumers to start fetch
            stop=manager.Event(
            ),  # Indicates to stop fetching and pushing data till start is set back
            exit=manager.Event()  # Requests the consumers to shutdown
        )
        self.report_interval = report_interval
        self.num_procs = num_procs
        self.consumer_options = consumer_options
        self.json_logging = json_logging
        self.verbose = verbose
        self.log_level = log_level

        self.procs = []
        for proc in range(self.num_procs):
            args = (self.queue, self.report_interval, self.json_logging,
                    self.log_level, self.verbose, self.events)
            proc = Process(target=_mp_consume_confluent_kafka if
                           use_confluent_kafka else _mp_consume_kafka_python,
                           args=args,
                           kwargs=consumer_options)
            proc.daemon = True
            proc.start()
            self.procs.append(proc)
Esempio n. 3
0
    def __init__(self, *args, **kwargs):
        """
        .. note::

           All args are optional unless otherwise noted.

        Args:
            name (str): unique name of this instance. By default a uuid will be
                 generated.
            queues (tuple): List of queue names to listen on.
            skip_signal (bool): Don't register the signal handlers. Useful for
                 testing.
        """
        super(JobManager, self).__init__(*args, **kwargs)

        setup_logger("eventmq")

        #: Define the name of this JobManager instance. Useful to know when
        #: referring to the logs.
        self.name = kwargs.pop('name', None) or generate_device_name()
        logger.info('EventMQ Version {}'.format(__version__))
        logger.info('Initializing JobManager {}...'.format(self.name))

        #: keep track of workers
        concurrent_jobs = kwargs.pop('concurrent_jobs', None)
        if concurrent_jobs is not None:
            conf.CONCURRENT_JOBS = concurrent_jobs

        #: List of queues that this job manager is listening on
        self.queues = kwargs.pop('queues', None)

        if not kwargs.pop('skip_signal', False):
            # handle any sighups by reloading config
            signal.signal(signal.SIGHUP, self.sighup_handler)
            signal.signal(signal.SIGTERM, self.sigterm_handler)
            signal.signal(signal.SIGINT, self.sigterm_handler)
            signal.signal(signal.SIGQUIT, self.sigterm_handler)
            signal.signal(signal.SIGUSR1, self.handle_pdb)

        #: JobManager starts out by INFORMing the router of it's existence,
        #: then telling the router that it is READY. The reply will be the unit
        #: of work.
        # Despite the name, jobs are received on this socket
        self.outgoing = Sender(name=self.name)

        self.poller = Poller()

        #: Stats and monitoring information

        #: Jobs in flight tracks all jobs currently executing.
        #: Key: msgid, Value: The message with all the details of the job
        self.jobs_in_flight = {}

        #: Running total number of REQUEST messages received on the broker
        self.total_requests = 0
        #: Running total number of READY messages sent to the broker
        self.total_ready_sent = 0
        #: Keep track of what pids are servicing our requests
        #: Key: pid, Value: # of jobs completed on the process with that pid
        self.pid_distribution = {}

        #: Setup worker queues
        self._mp_manager = MPManager()
        self.request_queue = self._mp_manager.Queue()
        self.finished_queue = self._mp_manager.Queue()
        self._setup()