def __init__(self, home='.'): """Creates a new task processor operating out of the current working directory. Uses processor.cfg as the config file. home -- The directory to run the processor out of. """ self._home = os.path.abspath(home) # Once we have home set up, do everything in a try and log the error if # we get one try: self.config = Config(self.getPath("processor.cfg"))['processor'] allowed = [ 'taskDatabase', 'pythonPath' , 'threaded', 'rconsole' ] for k in self.config.keys(): if k not in allowed: raise ValueError( "Processor parameter '{0}' unrecognized".format(k)) connection = Connection(self.config['taskDatabase']) connection._ensureIndexes() self.taskConnection = connection # Add other path elements before trying imports for i,path in enumerate(self.config.get('pythonPath', [])): sys.path.insert( i , os.path.abspath(os.path.join(home, path)) ) self._tasksAvailable = self._getTasksAvailable(self._home) self._monitors = {} self._cleanupThread = None self._cleanupThread_doc = """Thread that cleans up the _LOG_DIR""" self._startTaskQueue = Queue() self._stopOnNoTasks = False self._useRConsole = self.config.get('rconsole', False) except: self.error('During init') raise
class Processor(object): """Processes tasks for the given db. Also performs certain administrative actions (or ensures that they are performed) on the tasks database. """ LOGFILE = 'processor.log' LOAD_SLEEP_SCALE = 0.1 LOAD_SLEEP_SCALE_doc = """Seconds to wait for a running task to finish before starting a parallel task. Scaled by system load divided by number of cores. Set to 0 for tests, which will disable both this and LOAD_SLEEP_SCALE_NO_TASK.""" LOAD_SLEEP_SCALE_NO_TASK = 4.0 LOAD_SLEEP_SCALE_NO_TASK_doc = """LOAD_SLEEP_SCALE for when we tried to get a task but none were waiting""" CLEANUP_INTERVAL = 3600 CLEANUP_INTERVAL_doc = """Seconds between cleaning up logs""" KEEP_LOGS_FOR = 30*24*60*60 KEEP_LOGS_FOR_doc = """Seconds to keep logs around for""" _LOG_DIR = "logs/" _LOG_DIR_doc = """Directory used for logs; only for testing. Must end in slash.""" KILL_INTERVAL = 5 KILL_INTERVAL_doc = """Seconds between checking if we should kill a task""" KILL_TOLERANCE = 10 KILL_TOLERANCE_doc = """Min seconds between sending a kill message to a task. Used to prevent killing cleanup code.""" MONITOR_CHECK_INTERVAL = 0.01 MONITOR_CHECK_INTERVAL_doc = """Minimum seconds between checking if tasks have died.""" def __init__(self, home='.'): """Creates a new task processor operating out of the current working directory. Uses processor.cfg as the config file. home -- The directory to run the processor out of. """ self._home = os.path.abspath(home) # Once we have home set up, do everything in a try and log the error if # we get one try: self.config = Config(self.getPath("processor.cfg"))['processor'] allowed = [ 'taskDatabase', 'pythonPath' , 'threaded', 'rconsole' ] for k in self.config.keys(): if k not in allowed: raise ValueError( "Processor parameter '{0}' unrecognized".format(k)) connection = Connection(self.config['taskDatabase']) connection._ensureIndexes() self.taskConnection = connection # Add other path elements before trying imports for i,path in enumerate(self.config.get('pythonPath', [])): sys.path.insert( i , os.path.abspath(os.path.join(home, path)) ) self._tasksAvailable = self._getTasksAvailable(self._home) self._monitors = {} self._cleanupThread = None self._cleanupThread_doc = """Thread that cleans up the _LOG_DIR""" self._startTaskQueue = Queue() self._stopOnNoTasks = False self._useRConsole = self.config.get('rconsole', False) except: self.error('During init') raise def error(self, message): error = traceback.format_exc() self.log("{0} - {1}".format(message, error)) @classmethod def fork(cls, home='.', killExisting=True): """Forks a new subprocess to run a Processor instance out of the given home directory. Useful for e.g. debug environments, where the main script should also spawn a processor but perhaps does something else, like serving webpages. The fork is automatically registered with an atexit to terminate the forked Processor. Look at lamegame_tasking/bin/lgTaskProcessor for a standalone script. Automatically forwards site.ENABLE_USER_SITE to forked interpreter. Returns the function that is already registered with atexit, but may be called manually if you need to kill the fork. killExisting, if True, will kill any processor holding the lock that this processor will need (thus freeing the lock). Since fork() is primarily meant for debugging code that expects the fork to always be running with the latest version, this defaults to True. """ hasS = ('-s' in sys.argv) runProcess = os.path.abspath(os.path.join( __file__ , '../../bin/lgTaskProcessor' )) args = [ sys.executable ] # site.ENABLE_USER_SITE tells us if, for instance, -s was passed if not site.ENABLE_USER_SITE: args.append('-s') args.extend([ runProcess, home ]) if killExisting: args.append('-killExisting') args = tuple(args) proc = subprocess.Popen(args, close_fds = True) def terminateProc(): # We have to both terminate AND wait, or we'll get defunct # processes lying around if proc.poll() is None: # Process is still running proc.terminate() proc.wait() atexit.register(terminateProc) return terminateProc def getPath(self, path): """Returns the absolute path for path, taking into account our home directory. """ return self._home + '/' + path def log(self, message): now = datetime.datetime.utcnow().isoformat() print(message) open(self.getPath(self._LOG_DIR + self.LOGFILE), 'a').write( "[{0}] {1}\n".format(now, message) ) def run(self, killExisting=False): """Run indefinitely or (for debugging) until no tasks are available. If killExisting is specified, then forcibly break the lock by killing the process that currently has the lock. """ # .lock is automatically appended to FileLock (processor.lock) self._lock = ProcessorLock(self._home + '/.processor.lock') # raises ProcessorAlreadyRunningError on fail self._lock.acquire(killExisting=killExisting) try: try: os.makedirs(self._home + '/logs') except OSError: pass try: os.makedirs(self._home + '/pids') except OSError: pass self.log("Tasks loaded: {0}".format(self._tasksAvailable.keys())) # Can we use psutil? try: import psutil self._psutil = psutil except ImportError: self.log("No stats, install psutil for stats") self._psutil = None # The advantage to multiprocessing is that since we are forking # the process, our libraries don't need to load again. This # means that the startup time for new tasks is substantially # (~ 0.5 sec in my tests) faster. # The disadvantage is that there's a bug in python 2.6 that # prohibits it from working from non-main threads. self._useMultiprocessing = ( sys.version_info[0] >= 3 or sys.version_info[1] >= 7 or threading.current_thread().name == 'MainThread' ) # Threaded tasks execute in a slave process to the processor itself. # There are a few reasons for this: # 1. If code changes, and we need to reboot the processor, the # slave process can finish running its tasks before exiting # but the processor can restart and keep going with new tasks. # 2. If a task misbehaves and wreaks havoc, the processor itself # will not be affected. self._useThreading = ( self._useMultiprocessing and self.config.get('threaded', False) ) if self._useThreading: self._useMultiprocessing = False self.log("Using threading") self._slaves = [ None ] * multiprocessing.cpu_count() elif self._useMultiprocessing: self.log("Using multiprocessing") else: self.log("Not using multiprocessing - detected non-main thread") self.log("Processor started - pid " + str(os.getpid())) # Start monitoring our starting pids self._monitorCurrentPids() # Run the scheduler loop; start with 1 task running at a time self._startTaskQueue.put('any') lastScheduler = time.time() lastMonitor = 0.0 lastMonitorCheck = 0.0 self._lastMonitorCheckKill = 0.0 lastStats = 0.0 loadMult = self.LOAD_SLEEP_SCALE lastCleanup = 0.0 while True: lastScheduler = self._schedulerAudit(lastScheduler) lastMonitor = self._monitorAudit(lastMonitor) lastMonitorCheck = self._monitorCheckAudit(lastMonitorCheck) lastStats = self._statsAudit(lastStats) lastCleanup = self._cleanupAudit(lastCleanup) try: load = os.getloadavg()[0] / multiprocessing.cpu_count() loadSleep = min(10.0, loadMult * load) if loadSleep > 0: # We need to wait for either a running task to stop # or our timeout to be met self._startTaskQueue.get(timeout=loadSleep) except Empty: # Timeout, no task start tokens are available. # Time to run scheduler again. # For now, we can safely assume that this always means that # any tasks running are long-running, and can grab a new # token. pass # We got a token or timed out, OK to start a new task loadMult = self.LOAD_SLEEP_SCALE try: result = self._consume() except _ProcessorStop: raise except (Exception, OSError): # _consume has its own error logging; just remove our # extra sleep bonus and try again continue if not result: # Failed to get a new task, avoid using cpu if self.LOAD_SLEEP_SCALE > 0: # Otherwise it's a test, so don't sleep loadMult = self.LOAD_SLEEP_SCALE_NO_TASK if len(self._monitors) == 0: if self._stopOnNoTasks: # This is a test, we're not running anything, # so done break except _ProcessorStop: self.error("Received _ProcessorStop") except Exception: self.error("Unhandled error, exiting") finally: self._lock.release() def start(self): """Run the Processor asynchronously for test cases. """ self.LOAD_SLEEP_SCALE = 0.0 self._thread = InterruptableThread(target=self.run) self._thread.start() ''' def withProfile(self): import cProfile p = cProfile.Profile(builtins = False, subcalls = False) p.runctx('self.run()', globals(), locals()) from io import BytesIO buffer = BytesIO() import pstats pr = pstats.Stats(p, stream = buffer) pr.sort_stats("cumulative") pr.print_stats() open('processor.profile', 'w').write(buffer.getvalue()) self._thread = InterruptableThread(target=withProfile, args=(self,)) self._thread.start() ''' def stop(self, timeout = 5.0): """Halt an asynchronously started processor (only use this for tests!). Also kills all tasks. timeout -- Max # of seconds to run """ self._stopOnNoTasks = True self._thread.join(timeout) if self._thread.is_alive(): self._thread.raiseException(_ProcessorStop) while self._thread.is_alive(): time.sleep(0.1) # Now kill our tasks pids = [ d[1] for d in self._monitors.itervalues() ] for p in pids: try: os.kill(p, signal.SIGKILL) except OSError, e: if e.errno == 3: # No such process continue raise