def start(): #TODO(robnagler) commands need their own init hook like the server has job.init() global cfg cfg = pkconfig.init( agent_id=pkconfig.Required(str, 'id of this agent'), fastcgi_sock_dir=( pkio.py_path('/tmp'), pkio.py_path, 'directory of fastcfgi socket, must be less than 50 chars'), start_delay=(0, pkconfig.parse_seconds, 'delay startup in internal_test mode'), supervisor_uri=pkconfig.Required( str, 'how to connect to the supervisor', ), ) pkdlog('{}', cfg) if pkconfig.channel_in_internal_test() and cfg.start_delay: pkdlog('start_delay={}', cfg.start_delay) time.sleep(cfg.start_delay) i = tornado.ioloop.IOLoop.current() d = _Dispatcher() def s(*args): return i.add_callback_from_signal(_terminate, d) signal.signal(signal.SIGTERM, s) signal.signal(signal.SIGINT, s) i.spawn_callback(d.loop) i.start()
def default_command(in_file): """Reads `in_file` passes to `msg.jobCmd` Must be called in run_dir Writes its output on stdout. Args: in_file (str): json parsed to msg Returns: str: json output of command, e.g. status msg """ try: job.init() f = pkio.py_path(in_file) msg = pkjson.load_any(f) #TODO(e-carlin): find common place to serialize/deserialize paths msg.runDir = pkio.py_path(msg.runDir) f.remove() res = globals()['_do_' + msg.jobCmd](msg, sirepo.template.import_module( msg.simulationType)) if res is None: return r = PKDict(res).pksetdefault(state=job.COMPLETED) except Exception as e: r = PKDict( state=job.ERROR, error=e.sr_args.error if isinstance(e, sirepo.util.UserAlert) else str(e), stack=pkdexc(), ) return pkjson.dump_pretty(r, pretty=False)
def init(): global _DB_DIR, cfg, _NEXT_REQUEST_SECONDS, job_driver if _DB_DIR: return job.init() from sirepo import job_driver job_driver.init(pkinspect.this_module()) _DB_DIR = sirepo.srdb.root().join(_DB_SUBDIR) cfg = pkconfig.init( job_cache_secs=(300, int, 'when to re-read job state from disk'), max_hours=dict( analysis=(.04, float, 'maximum run-time for analysis job',), parallel=(1, float, 'maximum run-time for parallel job (except sbatch)'), parallel_premium=(2, float, 'maximum run-time for parallel job for premium user (except sbatch)'), sequential=(.1, float, 'maximum run-time for sequential job'), ), purge_free_after_days=(1000, int, 'how many days to wait before purging a free users simulation'), purge_free_start=('02:00:00', str, 'time to first start purging free users simulations (%H:%M:%S)'), purge_free_period=('01:00:00', str, 'how often to purge free users simulations after start time (%H:%M:%S)'), sbatch_poll_secs=(15, int, 'how often to poll squeue and parallel status'), ) _NEXT_REQUEST_SECONDS = PKDict({ job.PARALLEL: 2, job.SBATCH: cfg.sbatch_poll_secs, job.SEQUENTIAL: 1, }) sirepo.auth_db.init(sirepo.srdb.root(), migrate_db_file=False) if sirepo.simulation_db.user_dir_name().exists(): if not _DB_DIR.exists(): pkdlog('calling upgrade_runner_to_job_db path={}', _DB_DIR) import subprocess subprocess.check_call( ( 'pyenv', 'exec', 'sirepo', 'db', 'upgrade_runner_to_job_db', _DB_DIR, ), env=PKDict(os.environ).pkupdate( PYENV_VERSION='py2', SIREPO_AUTH_LOGGED_IN_USER='******', ), ) else: pykern.pkio.mkdir_parent(_DB_DIR) tornado.ioloop.IOLoop.current().add_callback( _ComputeJob.purge_free_simulations, init=True, )
def init(): global _DB_DIR, cfg, _NEXT_REQUEST_SECONDS, job_driver if _DB_DIR: return job.init() from sirepo import job_driver job_driver.init(pkinspect.this_module()) _DB_DIR = sirepo.srdb.root().join(_DB_SUBDIR) cfg = pkconfig.init( job_cache_secs=(300, int, 'when to re-read job state from disk'), max_secs=dict( analysis=(144, pkconfig.parse_seconds, 'maximum run-time for analysis job',), parallel=(3600, pkconfig.parse_seconds, 'maximum run-time for parallel job (except sbatch)'), parallel_premium=(3600*2, pkconfig.parse_seconds, 'maximum run-time for parallel job for premium user (except sbatch)'), sequential=(360, pkconfig.parse_seconds, 'maximum run-time for sequential job'), ), purge_non_premium_after_secs=(0, pkconfig.parse_seconds, 'how long to wait before purging non-premium users simulations'), purge_non_premium_task_secs=(None, pkconfig.parse_seconds, 'when to clean up simulation runs of non-premium users (%H:%M:%S)'), sbatch_poll_secs=(15, int, 'how often to poll squeue and parallel status'), ) _NEXT_REQUEST_SECONDS = PKDict({ job.PARALLEL: 2, job.SBATCH: cfg.sbatch_poll_secs, job.SEQUENTIAL: 1, }) sirepo.auth_db.init() if sirepo.simulation_db.user_path().exists(): if not _DB_DIR.exists(): pkdlog('calling upgrade_runner_to_job_db path={}', _DB_DIR) import subprocess subprocess.check_call( ( 'pyenv', 'exec', 'sirepo', 'db', 'upgrade_runner_to_job_db', _DB_DIR, ), env=PKDict(os.environ).pkupdate( SIREPO_AUTH_LOGGED_IN_USER='******', ), ) else: pykern.pkio.mkdir_parent(_DB_DIR) tornado.ioloop.IOLoop.current().add_callback( _ComputeJob.purge_free_simulations, )
def init(): global cfg, _DB_DIR, _NEXT_REQUEST_SECONDS, job_driver if cfg: return job.init() from sirepo import job_driver job_driver.init(pkinspect.this_module()) cfg = pkconfig.init( job_cache_secs=(300, int, 'when to re-read job state from disk'), max_secs=dict( analysis=( 144, pkconfig.parse_seconds, 'maximum run-time for analysis job', ), parallel=(3600, pkconfig.parse_seconds, 'maximum run-time for parallel job (except sbatch)'), parallel_premium= (3600 * 2, pkconfig.parse_seconds, 'maximum run-time for parallel job for premium user (except sbatch)' ), sequential=(360, pkconfig.parse_seconds, 'maximum run-time for sequential job'), ), purge_non_premium_after_secs=( 0, pkconfig.parse_seconds, 'how long to wait before purging non-premium users simulations'), purge_non_premium_task_secs=( None, pkconfig.parse_seconds, 'when to clean up simulation runs of non-premium users (%H:%M:%S)' ), sbatch_poll_secs=(15, int, 'how often to poll squeue and parallel status'), ) _DB_DIR = sirepo.srdb.supervisor_dir() _NEXT_REQUEST_SECONDS = PKDict({ job.PARALLEL: 2, job.SBATCH: cfg.sbatch_poll_secs, job.SEQUENTIAL: 1, }) sirepo.auth_db.init() tornado.ioloop.IOLoop.current().add_callback( _ComputeJob.purge_free_simulations, )
def init(): global _DB_DIR, cfg, _NEXT_REQUEST_SECONDS if _DB_DIR: return job.init() job_driver.init() _DB_DIR = sirepo.srdb.root().join(_DB_SUBDIR) cfg = pkconfig.init( parallel=dict( max_hours=(1, float, 'maximum run-time for parallel job (except sbatch)'), ), sbatch_poll_secs=(60, int, 'how often to poll squeue and parallel status'), sequential=dict(max_hours=(.1, float, 'maximum run-time for sequential job'), ), ) for k in job.KINDS: _MAX_RUN_SECS[k] = int(cfg[k].max_hours * 3600) _NEXT_REQUEST_SECONDS = PKDict({ job.PARALLEL: 2, job.SBATCH: cfg.sbatch_poll_secs, job.SEQUENTIAL: 1, }) if sirepo.simulation_db.user_dir_name().exists(): if not _DB_DIR.exists(): pkdlog('calling upgrade_runner_to_job_db path={}', _DB_DIR) import subprocess subprocess.check_call( ( 'pyenv', 'exec', 'sirepo', 'db', 'upgrade_runner_to_job_db', _DB_DIR, ), env=PKDict(os.environ).pkupdate( PYENV_VERSION='py2', SIREPO_AUTH_LOGGED_IN_USER='******', ), ) else: pykern.pkio.mkdir_parent(_DB_DIR)
def start(): #TODO(robnagler) commands need their own init hook like the server has job.init() global cfg cfg = pkconfig.init( agent_id=pkconfig.Required(str, 'id of this agent'), supervisor_uri=pkconfig.Required( str, 'how to connect to the supervisor', ), ) pkdlog('{}', cfg) i = tornado.ioloop.IOLoop.current() d = _Dispatcher() def s(*args): return i.add_callback_from_signal(_terminate, d) signal.signal(signal.SIGTERM, s) signal.signal(signal.SIGINT, s) i.spawn_callback(d.loop) i.start()