예제 #1
0
def pusher(task_queue, event, broker=None):
    if not broker:
        broker = get_broker()
    name = current_process().name
    pid = current_process().pid
    logger.info('{} pushing tasks at {}'.format(name, pid))
    while 1:
        try:
            task_set = broker.dequeue()
        except BaseException as e:
            logger.error(e)
            # broker probably crashed. Let the sentinel handle it.
            sleep(10)
            break
        if task_set:
            for task in task_set:
                try:
                    task = signing.PickleSerializer.loads(task)
                except (TypeError, KeyError, signing.BadSerializer):
                    logger.error(e)
                    continue
                task_queue.put(task)
            logger.debug('queueing from {}'.format(broker.list_key))
        else:
            sleep(.1)
        if event.is_set():
            break
    logger.info('{} stopped pushing tasks'.format(name))
예제 #2
0
 def guard(self):
     logger.info('{} guarding cluster at {}'.format(current_process().name,
                                                    self.pid))
     self.start_event.set()
     Stat(self).save()
     logger.info('DTC-Cluster-{} running.'.format(self.parent_pid))
     #scheduler(broker=self.broker)
     counter = 0
     cycle = 0.5  # guard loop sleep in seconds
     # guard loop. Runs at least once
     while not self.stop_event.is_set() or not counter:
         # Check Workers
         for p in self.pool:
             # Are you alive?
             if not p.is_alive() or (self.timeout and p.timer.value == 0):
                 self.reincarnate(p)
                 continue
             # Decrement timer if work is beging done
             if self.timeout and p.timer.value > 0:
                 p.timer.value -= cycle
         # Check Monitor
         if not self.monitor.is_alive():
             self.reincarnate(self.monitor)
         # Check Pusher
         if not self.pusher.is_alive():
             self.reincarnate(self.pusher)
         # Call scheduler once aminute (or so)
         counter += cycle
         if counter == 30 and Conf.SCHEDULER:
             counter = 0
             #scheduler(broker=self.broker)
         # Save current status
         Stat(self).save()
         sleep(cycle)
     self.stop()
예제 #3
0
 def reincarnate(self, process):
     """
     :param process: the process to reincarnate
     :type process: Process or None
     """
     if process == self.monitor:
         self.monitor = self.spawn_monitor()
         logger.error('reincarnated monitor {} after sudden death'.format(
             process.name))
     elif process == self.pusher:
         self.pusher = self.spawn_pusher()
         logger.error('reincarnated pusher {} after sudden death'.format(
             process.name))
     else:
         self.pool.remove(process)
         self.spawn_worker()
         if self.timeout and int(process.timer.value) == 0:
             # only need to terminate on timeout, otherwise we risk destabilizing the queues
             process.terminate()
             logger.warning('reincarnated worker {} after timeout'.format(
                 process.name))
         elif int(process.timer.value) == -2:
             logger.info('recycled worker {}'.format(process.name))
         else:
             logger.warning(
                 'reincarnated worker {} after death with timer value {}'.
                 format(process.name, int(process.timer.value)))
     self.reincarnations += 1
예제 #4
0
 def stop(self):
     if not self.sentinel.is_alive():
         return False
     logger.info('DTC-Cluster-{} stopping.'.format(self.pid))
     self.stop_event.set()
     self.sentinel.join()
     logger.info('DTC-Cluster-{} has stopped.'.format(self.pid))
     self.start_event = None
     self.stop_event = None
     return True
예제 #5
0
 def start(self):
     # start Sentinel
     self.stop_event = Event()
     self.start_event = Event()
     self.sentinel = Process(target=Sentinel,
                             args=(self.stop_event, self.start_event,
                                   self.broker, self.timeout))
     self.sentinel.start()
     logger.info('DTC-Cluster-{} starting.'.format(self.pid))
     while not self.start_event.is_set():
         sleep(0.1)
     return self.pid
예제 #6
0
 def stop(self):
     Stat(self).save()
     name = current_process().name
     logger.info('{} stopping cluster processes'.format(name))
     # Stopping pusher
     self.event_out.set()
     # Wait for it to stop
     while self.pusher.is_alive():
         sleep(0.1)
         Stat(self).save()
     # Put poison pills in the queue
     for __ in range(len(self.pool)):
         self.task_queue.put('STOP')
     self.task_queue.close()
     # Wait for the task queue to empty
     self.task_queue.join_thread()
     # Wait for all the workers to exit
     while len(self.pool):
         for p in self.pool:
             if not p.is_alive():
                 self.pool.remove(p)
         sleep(0.1)
         Stat(self).save()
     # Finally stop the monitor
     self.result_queue.put('STOP')
     self.result_queue.close()
     # Wait for the result queue to empty
     self.result_queue.join_thread()
     logger.info('{} Waiting for the monitor.'.format(name))
     # Wait for everything to close or timeout
     count = 0
     if not self.timeout:
         self.timeout = 30
     while self.status() == Conf.STOPPING and count < self.timeout * 10:
         sleep(0.1)
         Stat(self).save()
         count += 1
     # Final status
     Stat(self).save()
예제 #7
0
def monitor(result_queue, broker=None):
    """
    Gets finished tasks from the result queue and saves them to DB or Cache
    :type result_queue: multiprocessing.Queue
    """
    if not broker:
        broker = get_broker()
    name = current_process().name
    pid = current_process().pid
    logger.info('{} monitoring at {}'.format(name, pid))
    for task in iter(result_queue.get, 'STOP'):
        ackid = task.get('id', None)
        if ackid:
            broker.acknowledge(ackid)
        # save the result
        save_cached(task, broker)
        # log the result
        if task['success']:
            logger.debug("Processed [{}]".format(task['name']))
        else:
            logger.error("Failed [{}] - {}".format(task['name'],
                                                   task['result']))
    logger.info("{} stopped monitoring results".format(name))
예제 #8
0
def set_cpu_affinity(n, process_ids, actual=not Conf.TESTING):
    """
    Sets the cpu affinity for the supplied processes.
    Requires the optional psutil module.
    :param int n: affinity
    :param list process_ids: a list of pids
    :param bool actual: Test workaround for Travis not supporting cpu affinity
    """
    # check if we have the psutil module
    if not psutil:
        logger.warning('Skipping cpu affnity bacause psutil was not found.')
        return
    # check if the platform supports cpu_affinity
    if actual and not hasattr(psutil.Process(process_ids[0]), 'cpu_affinity'):
        logger.warning(
            'Faking cpu affinity because it is not supported on this platform')
        actual = False
    # get the available processors
    cpu_list = list(range(psutil.cpu_count()))
    # affinities of 0 or gte cpu_count, equals to no affinity
    if not n or n >= len(cpu_list):
        return
    # spread the workers over the available processors.
    index = 0
    for pid in process_ids:
        affinity = []
        for k in range(n):
            if index == len(cpu_list):
                index = 0
            affinity.append(cpu_list[index])
            index += 1
        if psutil.pid_exists(pid):
            p = psutil.Process(pid)
            if actual:
                p.cpu_affinity(affinity)
            logger.info('{} will use cpu {}'.format(pid, affinity))
예제 #9
0
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT):
    """
    Takes a task from the task queue, tries to execute it and puts the result back in the result queue
    :type task_queue: multiprocessing.Queue
    :type result_queue: multiprocessing.Queue
    :type timer: multiprocessing.Value
    """
    name = current_process().name
    logger.info('{} ready for work at {}'.format(name, current_process().pid))
    pool = ThreadPool(Conf.THREADERS)
    task_count = 0

    # 任务回调
    def worker_callback(request, response):
        task = request.task
        result = (response, not request.exception)
        # Process result
        task['result'] = result[0]
        task['success'] = result[1]
        task['stopped'] = datetime.now()
        result_queue.put(task)
        timer.value = -1  # Idle
        # Recycle
        if task_count == Conf.RECYCLE:
            timer.value = -2  # Recycled
            task_queue.put('STOP')

    # 任务分发
    for task in iter(task_queue.get, 'STOP'):
        result = None
        timer.value = -1  #Idle
        task_count += 1
        # Get the function from the task
        logger.debug('{} processing [{}]'.format(name, task['name']))
        f = task['func']
        # if it's not an instance try to get it from the string
        if not callable(task['func']):
            try:
                module, func = f.rsplit('.', 1)
                m = importlib.import_module(module)
                f = getattr(m, func)
            except (ValueError, ImportError, AttributeError) as e:
                result = (e, False)
        # We're still going
        if not result:
            # execute the payload
            timer.value = task['kwargs'].pop('timeout', timeout or 0)  # Busy
            # res = f(*task['args'], **task['kwargs'])
            # result = (res, True)
            request = WorkRequest(f,
                                  args=task['args'],
                                  kwargs=task['kwargs'],
                                  callback=worker_callback)
            request.task = task
            pool.putRequest(request)
        else:
            # Process result
            task['result'] = result[0]
            task['success'] = result[1]
            task['stopped'] = datetime.now()
            result_queue.put(task)
            timer.value = -1  # Idle
    pool.stop()
    logger.info('{} stopped doing work'.format(name))