def add_step(self, spec, user): """ Add a step to the running list """ try: output_dir = spec["output_dir"] self.count += 1 run_id = spec.get('run_id', self.count) pretty_print( "Queuing step: id=%d, name=%s, user=%s, dir=%s" %(run_id, spec['name'], user, output_dir) ) p_dict = { 'id' : run_id, 'label' : spec['name'], 'output_dir' : output_dir, 'user' : user, 'spec' : spec, 'result' : self.pool['steps'].apply_async(submit, (spec, user, run_id, self.pids)) } self.submitted.append(p_dict) return "Pipeline %s (ID %d) has been queued" % (p_dict['label'], run_id) except Exception, e: return "Exception caught creating the pipeline: %s" % e
def apply_custom(config, custom): """ Replace/add custom values to that in config. Config is a dictionary and is expected to have a 'config' section. Custom is a list of custom parameters of the form 'a.b.c=value' """ ut.pretty_print("Setting custom params: %s" % custom) for c in custom: path, v = c.split('=') keys = path.split('.') #print " %s : %s" % (keys, v) if 'config' not in config: config['config'] = {} param = config['config'] for key in keys[:-1]: if key not in param: ut.pretty_print( '*** WARNING: creating new parameter %s (a typo?)' % key) param[key] = {} param = param[key] name = keys[-1] if name in param: # if already set, preserve type ptype = type(param[name]) else: ptype = type(v) param.update({name: ptype(v)})
def apply_custom(config,custom): """ Replace/add custom values to that in config. Config is a dictionary and is expected to have a 'config' section. Custom is a list of custom parameters of the form 'a.b.c=value' """ ut.pretty_print("Setting custom params: %s" % custom) for c in custom: path,v = c.split('=') keys = path.split('.') #print " %s : %s" % (keys, v) if 'config' not in config: config['config'] = {} param = config['config'] for key in keys[:-1]: if key not in param: ut.pretty_print('*** WARNING: creating new parameter %s (a typo?)' % key) param[key]={} param=param[key] name = keys[-1] if name in param: # if already set, preserve type ptype = type(param[name]) else: ptype = type(v) param.update({name:ptype(v)})
def stop(self): """ Stop the processing of the pipelines """ pretty_print('Stopping pool') self.lock.acquire() self.pool['pipelines'].terminate() self.pool['pipelines'].join() self.running = False self.lock.release()
def get_db(): """ Connect to the db if it is not already connected """ global db if not db: client = MongoClient(cfg.MONGODB_HOST,cfg.MONGODB_PORT) db = client[cfg.MONGODB_NAME] ut.pretty_print("Connected to MONGO_HOST %s on PORT %s" % (cfg.MONGODB_HOST, cfg.MONGODB_PORT)) return db
def delete_pipeline(self, run_id, user, output_dir, work_dir): """ Wipe out a pipeline directory """ try: pretty_print("Deleting pipeline: id=%d, user=%s, output_dir=%s, work_dir=%s" % (run_id, user, output_dir, work_dir)) p_dict = { 'id' : run_id, 'user' : user, 'label' : 'deletion', 'result' : self.pool['pipelines'].apply_async(delete, [user, output_dir, work_dir]) } self.actions.append(p_dict) except Exception, e: return "Exception caught queuing pipeline for deletion: %s" % e
def resume_pipeline(self, run_id, user, work_dir): """ Resume a pipeline residing in given directory """ try: config = os.path.join(work_dir,'pipeline.cfg') pretty_print("Resuming pipeline: id=%d, user=%s, work_dir=%s" % (run_id, user, work_dir)) p_dict = { 'id' : run_id, 'user' : user, 'label' : 'resuming', 'result' : self.pool['pipelines'].apply_async(resume, (user, config, run_id, self.pids)) } self.actions.append(p_dict) except Exception, e: return "Exception caught queuing pipeline for resuming: %s" % e
def accumulate(self): """ Accumulate process resources information """ global scheduler self.nsamples += 1 try: parent = psutil.Process(self.pid) procs = [parent] procs.extend(parent.children(recursive=True)) self.stats['cpuusr'] = 0 self.stats['iowrite'] = 0 self.stats['ioread'] = 0 self.stats['names'] = [] self.stats['pids'] = [] memtot = 0 cpuperc = 0 for proc in procs: self.stats['names'].append(proc.name()) self.stats['pids'].append(proc.pid) self.stats['cpuusr'] += proc.cpu_times().user cpuperc += proc.cpu_percent() memtot += proc.memory_info().rss io_count = self.get_iocounts(proc.pid) self.stats['ioread'] += io_count[0] self.stats['iowrite'] += io_count[1] if memtot>self.stats['memmax']: self.stats['memmax'] = memtot if cpuperc>self.stats['cpupmax']: self.stats['cpupmax'] = cpuperc self.average('memavg', memtot) self.average('cpupavg', cpuperc) self.stats['time'] = time.time()-self.sttime # Print stats and schedule next accumulation if self.verbose: pretty_print(format_dict(self.stats)) scheduler.enter(self.interval, 1, self.accumulate, []) except psutil.NoSuchProcess as err: pretty_print('Process exited: dumping statistics') print json.dumps(pi.stats, sort_keys=True)
def put(self): """ Queue the specific pipeline """ data = request.get_json(force=True) config = data.get('config') user = data.get('user') errors = {} step = Step.load_step(config) errors = step.validate_config(config) if not step.output_dir: errors['output_dir'] = 'missing value' if not errors: # Get id from DB db_info = dbmodel.PipelineDb(config['name'], config, [step.name], user, output_dir=step.output_dir) config['run_id'] = db_info.run_id ut.pretty_print("Submitting step %s (ID %d) for user %s" % (config['name'], config['run_id'], user)) return pm.add_step(config, user) else: return errors, 400
def process(self): """ Process all the pipelines in the list """ while self.running: ############################################### # Pipeline handling ############################################### for p_dict in copy.copy(self.submitted): if p_dict['result'].ready(): try: self.check_result(p_dict) except Exception as e: pretty_print('*** Pipeline %s with id %d terminated with EXCEPTION:' % (p_dict['label'], p_dict['id'])) print('-----\n%s\n-----' % e) finally: self.submitted.remove(p_dict) if p_dict['id'] in self.pids: self.pids.pop(p_dict['id']) for p_dict in copy.copy(self.actions): if p_dict['result'].ready(): try: self.check_result(p_dict) except Exception as e: pretty_print('*** Pipeline %s %s terminated with EXCEPTION:' % (p_dict['id'], p_dict['label'])) print('-----\n%s\n-----' % e) finally: self.actions.remove(p_dict) # Log every minute if (int(time.time()/10))%6==0: npipes = len(self.submitted) if npipes: pretty_print('INFO: %d pipeline%s currently running:' % (npipes, ['','s'][npipes>1])) self.print_pipelines() else: pretty_print('INFO: no pipeline currently running') ############################################### # Step handling ############################################### time.sleep(10)
def pause_pipeline(self, run_id, user): """ Interrupt pipeline by sending signal to corresponding worker's children """ pid = self.pids.get(run_id) if pid: pretty_print("Pausing pipeline: id=%d, user=%s" % (run_id, user)) try: parent = psutil.Process(pid) children = parent.children(recursive=True) for child in children: run_as(cmd=['kill', child.pid], user=user) except psutil.NoSuchProcess: pretty_print("Error pausing pipeline: no process with ID %d" % int(pid)) else: pretty_print("Error pausing pipeline: ID %d not found" % run_id)
def check_result(self, p_dict): """ Retrieve result of pipeline and dump some information """ (err, out) = p_dict['result'].get() # Re-raises exception if submit failed if len(err): pretty_print('*** Pipeline [%03d] %s terminated with ERROR:' % (p_dict['id'], p_dict['label'])) print('-----\n%s-----' % err) if len(out): pretty_print('Pipeline OUTPUT:') print('-----\n%s-----' % out) elif len(out): pretty_print('Pipeline [%03d] %s terminated SUCCESSFULLY:' % (p_dict['id'], p_dict['label'])) print('-----\n%s-----' % out)
You should have received a copy of the GNU General Public License along with Pypers. If not, see <http://www.gnu.org/licenses/>. """ import os import argparse from datetime import datetime from pypers.utils.utils import pretty_print parser = argparse.ArgumentParser(description='Start API server.') parser.add_argument('-r', '--enable-reload', dest='reloader', action='store_true', help='enable hot reload of server.py on any source code change') if __name__ == '__main__': pretty_print('===========================') pretty_print('STARTING SERVER') from pypers.api import app from pypers.config import ACME_PROD, SERVICE_PORT, HOST_NAME now = datetime.utcnow() if ACME_PROD: pretty_print('Flask API server start at port %s in PRODUCTION mode' % (SERVICE_PORT)) app.run(host=HOST_NAME, port=SERVICE_PORT) else: args = parser.parse_args() if args.reloader: use_reloader=True else: use_reloader=False pretty_print('Flask API server start at port %s in DEVELOPMENT mode (with%s reloader)' % (SERVICE_PORT, 'out' if not use_reloader else '')) # app.run(host='0.0.0.0', port=SERVICE_PORT, debug=True, use_reloader=use_reloader) app.run(host=HOST_NAME, port=SERVICE_PORT, debug=True, use_reloader=use_reloader)
os.path.realpath(args.config_file)) except Exception as e1: raise e1 config = Step.load_cfg(args.config_file) if 'sys_path' not in config: config['sys_path'] = os.path.dirname( os.path.realpath(args.config_file)) step = Step.load_step(config) is_step = True if args.custom: apply_custom(config, args.custom) if args.local: ut.pretty_print("Instantiating the Pipeline...") p = Pipeline(config, user=args.user, db=args.db, schedname='SCHED_LOCAL') ut.pretty_print("Running the pipeline...") p.run(local=True, verbose=args.verbose) elif args.interactive: global pi ut.pretty_print("Instantiating the Pipeline...") pi = Pipeline(config, user=args.user) ut.pretty_print("Running pipeline %s" % (pi.db.run_id)) tb = None try: pi.run(verbose=args.verbose) except Exception, e:
def stop_pipeline(signum, frame): global pi ut.pretty_print("Signal received: terminating pipeline") pi.stop() sys.exit()
config['sys_path'] = os.path.dirname(os.path.realpath(args.config_file)) except Exception as e1: raise e1 config = Step.load_cfg(args.config_file) if 'sys_path' not in config: config['sys_path'] = os.path.dirname(os.path.realpath(args.config_file)) step = Step.load_step(config) is_step = True if args.custom: apply_custom(config,args.custom) if args.local: ut.pretty_print("Instantiating the Pipeline...") p = Pipeline(config, user=args.user, db=args.db, schedname='SCHED_LOCAL') ut.pretty_print("Running the pipeline...") p.run(local=True, verbose=args.verbose) elif args.interactive: global pi ut.pretty_print("Instantiating the Pipeline...") pi = Pipeline(config, user=args.user) ut.pretty_print("Running pipeline %s" % (pi.db.run_id)) tb = None try: pi.run(verbose=args.verbose) except Exception, e: ex_type, ex, tb = sys.exc_info() ut.pretty_print("FAILED: %s" % e) traceback.print_tb(tb)
def handler(signum,frame): global pm pretty_print('Received signal %d' % signum) pm.stop() pretty_print('Collecting garbage and exiting') sys.exit()
import os import argparse from datetime import datetime from pypers.utils.utils import pretty_print parser = argparse.ArgumentParser(description='Start API server.') parser.add_argument( '-r', '--enable-reload', dest='reloader', action='store_true', help='enable hot reload of server.py on any source code change') if __name__ == '__main__': pretty_print('===========================') pretty_print('STARTING SERVER') from pypers.api import app from pypers.config import ACME_PROD, SERVICE_PORT, HOST_NAME now = datetime.utcnow() if ACME_PROD: pretty_print('Flask API server start at port %s in PRODUCTION mode' % (SERVICE_PORT)) app.run(host=HOST_NAME, port=SERVICE_PORT) else: args = parser.parse_args() if args.reloader: use_reloader = True else: use_reloader = False pretty_print( 'Flask API server start at port %s in DEVELOPMENT mode (with%s reloader)'