Esempio n. 1
0
    def __init__(self, home='.'):
        """Creates a new task processor operating out of the current working
        directory.  Uses processor.cfg as the config file.

        home -- The directory to run the processor out of.
        """
        self._home = os.path.abspath(home)

        # Once we have home set up, do everything in a try and log the error if
        # we get one
        try:
            self.config = Config(self.getPath("processor.cfg"))['processor']
            allowed = [ 'taskDatabase', 'pythonPath'
                    , 'threaded', 'rconsole' 
            ]
            for k in self.config.keys():
                if k not in allowed:
                    raise ValueError(
                            "Processor parameter '{0}' unrecognized".format(k))

            connection = Connection(self.config['taskDatabase'])
            connection._ensureIndexes()
            self.taskConnection = connection
            
            # Add other path elements before trying imports
            for i,path in enumerate(self.config.get('pythonPath', [])):
                sys.path.insert(
                    i
                    , os.path.abspath(os.path.join(home, path))
                )
            
            self._tasksAvailable = self._getTasksAvailable(self._home)
            self._monitors = {}

            self._cleanupThread = None
            self._cleanupThread_doc = """Thread that cleans up the _LOG_DIR"""

            self._startTaskQueue = Queue()
            self._stopOnNoTasks = False

            self._useRConsole = self.config.get('rconsole', False)
        except:
            self.error('During init')
            raise
Esempio n. 2
0
    def __init__(self, processor):
        multiprocessing.Process.__init__(self)

        self._processor = processor
        self._connection = Connection(processor.taskConnection)
        self._processorHome = self._processor._home
        self._taskClasses = self._processor._tasksAvailable
        self._processorPid = os.getpid()

        self._queue = multiprocessing.Queue()
        self._running = []
        self._running_doc = """List of running task threads"""
        self._isAccepting = multiprocessing.Value('b', True)
        self._runningCount = multiprocessing.Value('i', 0)
        self._startTime = time.time()
        # No need to check kill right away, after all, we would have just 
        # accepted something
        self._lastKillCheck = self._startTime
Esempio n. 3
0
class _ProcessorSlave(multiprocessing.Process):

    MAX_TIME = 3600
    MAX_TIME_doc = """Max time, in seconds, to accept tasks."""

    def __init__(self, processor):
        multiprocessing.Process.__init__(self)

        self._processor = processor
        self._connection = Connection(processor.taskConnection)
        self._processorHome = self._processor._home
        self._taskClasses = self._processor._tasksAvailable
        self._processorPid = os.getpid()

        self._queue = multiprocessing.Queue()
        self._running = []
        self._running_doc = """List of running task threads"""
        self._isAccepting = multiprocessing.Value('b', True)
        self._runningCount = multiprocessing.Value('i', 0)
        self._startTime = time.time()
        # No need to check kill right away, after all, we would have just 
        # accepted something
        self._lastKillCheck = self._startTime


    def execute(self, taskData):
        """Queue the task to be ran; this method is ONLY called by the 
        Processor, not the ProcessorSlave.

        It's a little sloppy to increment _runningCount here, but the worst
        case scenario is that we'll overwrite immediately after they update
        their running count (in which case we'll overestimate) or immediately
        before, in which case they'll pick up the queued item from Queue.qsize()
        anyway.
        """
        self._queue.put(taskData)
        self._runningCount.value += 1


    def getTaskCount(self):
        """Returns the number of running tasks from either the Processor or
        the _ProcessorSlave."""
        return self._runningCount.value


    def isAccepting(self):
        """Called by Processor to see if we're still accepting"""
        return self._isAccepting.value


    def run(self):
        # If we can, replace lgTaskProcessor with lgTaskSlave in our title
        try:
            import setproctitle
            title = setproctitle.getproctitle()
            if 'lgTaskProcessor' in title:
                title = title.replace('lgTaskProcessor', 'lgTaskSlave')
            else:
                title += ' --slave'
            setproctitle.setproctitle(title)
        except ImportError:
            pass
        # We're in our own process now, so disconnect the processor's 
        # pymongo connection to make sure we don't hold those sockets open
        self._processor.taskConnection.close()

        # Also, ensure that the global talk variables weren't copied over.
        # This only affects testing situations - that is, the normal processor
        # process won't use talk.
        import lgTask.talk
        lgTask.talk.talkConnection.resetFork()
        
        canQsize = True
        try:
            self._queue.qsize()
        except NotImplementedError:
            # Oh Mac OS X, how silly you are sometimes
            canQsize = False

        self._fixSigTerm()

        # rconsole?
        if self._processor._useRConsole:
            import lgTask.lib.rfooUtil as rfooUtil
            rfooUtil.spawnServer()

        # Any tasks that we start only really need a teeny bit of stack
        thread.stack_size(1024 * 1024)
        try:
            while True:
                try:
                    # See if we should be marked as accepting new tasks from
                    # the Processor
                    if self._isAccepting.value:
                        self._checkAccepting()

                    # Check tasks are running
                    self._checkRunning()

                    # Get new task
                    taskData = self._queue.get(
                        timeout = self._processor.KILL_INTERVAL
                    )
                    taskThread = InterruptableThread(
                        target = self._runTaskThreadMain
                        , args = (taskData,)
                    )
                    # Remember the ID so that we can check for "kill" states
                    taskThread.taskId = taskData['_id']
                    taskThread.start()
                    self._running.append(taskThread)
                    
                    # Update running count
                    newCount = len(self._running)
                    if canQsize:
                        newCount += self._queue.qsize()
                    self._runningCount.value = newCount

                except Empty:
                    pass
                except Exception:
                    self._processor.log("Slave error {0}: {1}".format(
                        self.pid, traceback.format_exc()
                    ))

                # After each iteration, see if we're alive
                if not self._shouldContinue():
                    break
        except:
            self._processor.log("Slave error {0}: {1}".format(
                self.pid, traceback.format_exc()
            ))
        finally:
            pass


    def start(self):
        """We override multiprocessing.Process.start() so that the Processor
        can gracefully exit without waiting for its child process to exit.
        The default python multiprocessing behavior is to wait until all
        child processes have exited before exiting the main process; we don't
        want this.
        """
        result = multiprocessing.Process.start(self)
        multiprocessing.current_process()._children.remove(self)
        return result


    def _checkAccepting(self):
        """Stop accepting new tasks to execute if:

        1. We've run for too long
        2. Our parent process is no longer our processor (this means that
            the processor has executed, so we won't get more)
        """
        if (
                time.time() - self._startTime >= self.MAX_TIME
                or os.getppid() != self._processorPid
            ):
            self._isAccepting.value = False


    def _checkRunning(self):
        """Check on running threads"""
        now = time.time()
        if self._lastKillCheck + self._processor.KILL_INTERVAL <= now:
            self._lastKillCheck = now
            allIds = [ t.taskId for t in self._running ]
            killIds = self._connection.getTasksToKill(allIds)
            for t in self._running:
                if t.taskId in killIds:
                    t.raiseException(KillTaskError)
                    # And prevent us from trying to kill again for a slightly
                    # longer interval
                    self._lastKillCheck = now + self._processor.KILL_TOLERANCE

        for i in reversed(range(len(self._running))):
            t = self._running[i]
            if not t.is_alive():
                self._running.pop(i)


    def _fixSigTerm(self):
        """Register our SIGTERM handler - that is, convert a sigterm on 
        ourselves into a KillTaskError on all of our tasks, and stop accepting
        once we get a sigterm.
        """
        def handleSigTerm(signum, frame):
            for t in self._running:
                t.raiseException(KillTaskError)
            self._isAccepting.value = False
        signal.signal(signal.SIGTERM, handleSigTerm)


    def _runTaskThreadMain(self, taskData):
        """Ran as the main method of a spawned thread; responsible for
        running the task passed.
        """
        taskCls = self._taskClasses[taskData['taskClass']]
        try:
            _runTask(
                taskCls
                , taskData
                , self._connection
                , self._processorHome
                , isThread = True
            )
        except Exception:
            self._processor.error('In or after _runTask')


    def _shouldContinue(self):
        """A slave should stop running if it is not currently running any
        tasks and it is no longer accepting new tasks.
        """
        if len(self._running) == 0 and not self._isAccepting.value:
            return False
        return True