Esempio n. 1
0
    def __init__(self, home='.'):
        """Creates a new task processor operating out of the current working
        directory.  Uses processor.cfg as the config file.

        home -- The directory to run the processor out of.
        """
        self._home = os.path.abspath(home)

        # Once we have home set up, do everything in a try and log the error if
        # we get one
        try:
            self.config = Config(self.getPath("processor.cfg"))['processor']
            allowed = [ 'taskDatabase', 'pythonPath'
                    , 'threaded', 'rconsole' 
            ]
            for k in self.config.keys():
                if k not in allowed:
                    raise ValueError(
                            "Processor parameter '{0}' unrecognized".format(k))

            connection = Connection(self.config['taskDatabase'])
            connection._ensureIndexes()
            self.taskConnection = connection
            
            # Add other path elements before trying imports
            for i,path in enumerate(self.config.get('pythonPath', [])):
                sys.path.insert(
                    i
                    , os.path.abspath(os.path.join(home, path))
                )
            
            self._tasksAvailable = self._getTasksAvailable(self._home)
            self._monitors = {}

            self._cleanupThread = None
            self._cleanupThread_doc = """Thread that cleans up the _LOG_DIR"""

            self._startTaskQueue = Queue()
            self._stopOnNoTasks = False

            self._useRConsole = self.config.get('rconsole', False)
        except:
            self.error('During init')
            raise
Esempio n. 2
0
class Processor(object):
    """Processes tasks for the given db.  Also performs certain administrative
    actions (or ensures that they are performed) on the tasks database.
    """

    LOGFILE = 'processor.log'

    LOAD_SLEEP_SCALE = 0.1
    LOAD_SLEEP_SCALE_doc = """Seconds to wait for a running task to finish
            before starting a parallel task.  Scaled by system load divided
            by number of cores.  Set to 0 for tests, which will disable both
            this and LOAD_SLEEP_SCALE_NO_TASK."""

    LOAD_SLEEP_SCALE_NO_TASK = 4.0
    LOAD_SLEEP_SCALE_NO_TASK_doc = """LOAD_SLEEP_SCALE for when we tried to
            get a task but none were waiting"""

    CLEANUP_INTERVAL = 3600
    CLEANUP_INTERVAL_doc = """Seconds between cleaning up logs"""

    KEEP_LOGS_FOR = 30*24*60*60
    KEEP_LOGS_FOR_doc = """Seconds to keep logs around for"""

    _LOG_DIR = "logs/"
    _LOG_DIR_doc = """Directory used for logs; only for testing.  Must end
            in slash."""

    KILL_INTERVAL = 5
    KILL_INTERVAL_doc = """Seconds between checking if we should kill a task"""

    KILL_TOLERANCE = 10
    KILL_TOLERANCE_doc = """Min seconds between sending a kill message to a 
            task.  Used to prevent killing cleanup code."""

    MONITOR_CHECK_INTERVAL = 0.01
    MONITOR_CHECK_INTERVAL_doc = """Minimum seconds between checking if tasks
            have died."""
    
    def __init__(self, home='.'):
        """Creates a new task processor operating out of the current working
        directory.  Uses processor.cfg as the config file.

        home -- The directory to run the processor out of.
        """
        self._home = os.path.abspath(home)

        # Once we have home set up, do everything in a try and log the error if
        # we get one
        try:
            self.config = Config(self.getPath("processor.cfg"))['processor']
            allowed = [ 'taskDatabase', 'pythonPath'
                    , 'threaded', 'rconsole' 
            ]
            for k in self.config.keys():
                if k not in allowed:
                    raise ValueError(
                            "Processor parameter '{0}' unrecognized".format(k))

            connection = Connection(self.config['taskDatabase'])
            connection._ensureIndexes()
            self.taskConnection = connection
            
            # Add other path elements before trying imports
            for i,path in enumerate(self.config.get('pythonPath', [])):
                sys.path.insert(
                    i
                    , os.path.abspath(os.path.join(home, path))
                )
            
            self._tasksAvailable = self._getTasksAvailable(self._home)
            self._monitors = {}

            self._cleanupThread = None
            self._cleanupThread_doc = """Thread that cleans up the _LOG_DIR"""

            self._startTaskQueue = Queue()
            self._stopOnNoTasks = False

            self._useRConsole = self.config.get('rconsole', False)
        except:
            self.error('During init')
            raise

    def error(self, message):
        error = traceback.format_exc()
        self.log("{0} - {1}".format(message, error))

    @classmethod
    def fork(cls, home='.', killExisting=True):
        """Forks a new subprocess to run a Processor instance out of the
        given home directory.  Useful for e.g. debug environments, where the
        main script should also spawn a processor but perhaps does something
        else, like serving webpages.

        The fork is automatically registered with an atexit to terminate the
        forked Processor.  Look at lamegame_tasking/bin/lgTaskProcessor for
        a standalone script.

        Automatically forwards site.ENABLE_USER_SITE to forked interpreter.

        Returns the function that is already registered with atexit, but may
        be called manually if you need to kill the fork.

        killExisting, if True, will kill any processor holding
          the lock that this processor will need (thus freeing the lock).  Since
          fork() is primarily meant for debugging code that expects the fork
          to always be running with the latest version, this defaults to True.
        """
        hasS = ('-s' in sys.argv)
        runProcess = os.path.abspath(os.path.join(
            __file__
            , '../../bin/lgTaskProcessor'
        ))
        args = [ sys.executable ]
        # site.ENABLE_USER_SITE tells us if, for instance, -s was passed
        if not site.ENABLE_USER_SITE:
            args.append('-s')
        args.extend([ runProcess, home ])
        if killExisting:
            args.append('-killExisting')

        args = tuple(args)
        proc = subprocess.Popen(args, close_fds = True)
        def terminateProc():
            # We have to both terminate AND wait, or we'll get defunct
            # processes lying around
            if proc.poll() is None:
                # Process is still running
                proc.terminate()
            proc.wait()
        atexit.register(terminateProc)
        return terminateProc
        
    def getPath(self, path):
        """Returns the absolute path for path, taking into account our
        home directory.
        """
        return self._home + '/' + path

    def log(self, message):
        now = datetime.datetime.utcnow().isoformat()
        print(message)
        open(self.getPath(self._LOG_DIR + self.LOGFILE), 'a').write(
            "[{0}] {1}\n".format(now, message)
        )
        
    def run(self, killExisting=False):
        """Run indefinitely or (for debugging) until no tasks are available.

        If killExisting is specified, then forcibly break the lock by killing
        the process that currently has the lock.
        """

        # .lock is automatically appended to FileLock (processor.lock)
        self._lock = ProcessorLock(self._home + '/.processor.lock')
        # raises ProcessorAlreadyRunningError on fail
        self._lock.acquire(killExisting=killExisting)
        try:
            try:
                os.makedirs(self._home + '/logs')
            except OSError:
                pass
            try:
                os.makedirs(self._home + '/pids')
            except OSError:
                pass

            self.log("Tasks loaded: {0}".format(self._tasksAvailable.keys()))

            # Can we use psutil?
            try:
                import psutil
                self._psutil = psutil
            except ImportError:
                self.log("No stats, install psutil for stats")
                self._psutil = None

            # The advantage to multiprocessing is that since we are forking
            # the process, our libraries don't need to load again.  This
            # means that the startup time for new tasks is substantially
            # (~ 0.5 sec in my tests) faster.
            # The disadvantage is that there's a bug in python 2.6 that
            # prohibits it from working from non-main threads.
            self._useMultiprocessing = (
                sys.version_info[0] >= 3
                or sys.version_info[1] >= 7
                or threading.current_thread().name == 'MainThread'
            )
            # Threaded tasks execute in a slave process to the processor itself.
            # There are a few reasons for this:
            # 1. If code changes, and we need to reboot the processor, the
            #    slave process can finish running its tasks before exiting
            #    but the processor can restart and keep going with new tasks.
            # 2. If a task misbehaves and wreaks havoc, the processor itself
            #    will not be affected.
            self._useThreading = (
                self._useMultiprocessing 
                and self.config.get('threaded', False)
            )
            if self._useThreading:
                self._useMultiprocessing = False
                self.log("Using threading")
                self._slaves = [ None ] * multiprocessing.cpu_count()
            elif self._useMultiprocessing:
                self.log("Using multiprocessing")
            else:
                self.log("Not using multiprocessing - detected non-main thread")

            self.log("Processor started - pid " + str(os.getpid()))

            # Start monitoring our starting pids
            self._monitorCurrentPids()

            # Run the scheduler loop; start with 1 task running at a time
            self._startTaskQueue.put('any')
            lastScheduler = time.time()
            lastMonitor = 0.0
            lastMonitorCheck = 0.0
            self._lastMonitorCheckKill = 0.0
            lastStats = 0.0
            loadMult = self.LOAD_SLEEP_SCALE
            lastCleanup = 0.0
            while True:
                lastScheduler = self._schedulerAudit(lastScheduler)
                lastMonitor = self._monitorAudit(lastMonitor)
                lastMonitorCheck = self._monitorCheckAudit(lastMonitorCheck)
                lastStats = self._statsAudit(lastStats)
                lastCleanup = self._cleanupAudit(lastCleanup)
                try:
                    load = os.getloadavg()[0] / multiprocessing.cpu_count()
                    loadSleep = min(10.0, loadMult * load)
                    if loadSleep > 0:
                        # We need to wait for either a running task to stop
                        # or our timeout to be met
                        self._startTaskQueue.get(timeout=loadSleep)
                except Empty:
                    # Timeout, no task start tokens are available.
                    # Time to run scheduler again.
                    # For now, we can safely assume that this always means that
                    # any tasks running are long-running, and can grab a new 
                    # token.
                    pass

                # We got a token or timed out, OK to start a new task
                loadMult = self.LOAD_SLEEP_SCALE
                try:
                    result = self._consume()
                except _ProcessorStop:
                    raise
                except (Exception, OSError):
                    # _consume has its own error logging; just remove our 
                    # extra sleep bonus and try again
                    continue

                if not result:
                    # Failed to get a new task, avoid using cpu
                    if self.LOAD_SLEEP_SCALE > 0:
                        # Otherwise it's a test, so don't sleep
                        loadMult = self.LOAD_SLEEP_SCALE_NO_TASK
                    if len(self._monitors) == 0:
                        if self._stopOnNoTasks:
                            # This is a test, we're not running anything,
                            # so done
                            break
        except _ProcessorStop:
            self.error("Received _ProcessorStop")
        except Exception:
            self.error("Unhandled error, exiting")
        finally:
            self._lock.release()

    def start(self):
        """Run the Processor asynchronously for test cases.
        """
        self.LOAD_SLEEP_SCALE = 0.0
        self._thread = InterruptableThread(target=self.run)
        self._thread.start()
        '''
        def withProfile(self):
            import cProfile
            p = cProfile.Profile(builtins = False, subcalls = False)
            p.runctx('self.run()', globals(), locals())
            from io import BytesIO
            buffer = BytesIO()
            import pstats
            pr = pstats.Stats(p, stream = buffer)
            pr.sort_stats("cumulative")
            pr.print_stats()
            open('processor.profile', 'w').write(buffer.getvalue())
        self._thread = InterruptableThread(target=withProfile, args=(self,))
        self._thread.start()
        '''

    def stop(self, timeout = 5.0):
        """Halt an asynchronously started processor (only use this for tests!).

        Also kills all tasks.

        timeout -- Max # of seconds to run
        """
        self._stopOnNoTasks = True
        self._thread.join(timeout)
        if self._thread.is_alive():
            self._thread.raiseException(_ProcessorStop)
        while self._thread.is_alive():
            time.sleep(0.1)
        # Now kill our tasks
        pids = [ d[1] for d in self._monitors.itervalues() ]
        for p in pids:
            try:
                os.kill(p, signal.SIGKILL)
            except OSError, e:
                if e.errno == 3:
                    # No such process
                    continue
                raise