def process_queues(opts, parts, whoami, sys_type): """ This function will get the new list of queues that are associated wth given partition """ if opts.rmq != None or opts.appq != None: part_queues = partition_queues(opts, parts, sys_type) _parts = [] for p in part_queues: args = ([{ 'tag': 'partition', 'name': p }], { 'queue': part_queues[p] }, whoami) _parts.append( client_utils.component_call(SYSMGR, False, 'set_partitions', args)) else: _parts = None args = ([{ 'tag': 'partition', 'name': partname } for partname in parts], { 'queue': opts.queue }, whoami) _parts = client_utils.component_call(SYSMGR, False, 'set_partitions', args) return _parts
def main(): """ setres main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [cb_debug, ()] ] # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) whoami = client_utils.getuid() parser.parse_it() # parse the command line validate_args(parser) opt = parser.options args = parser.args if opt.down: delta = client_utils.component_call(SYSMGR, False, 'nodes_down', (args, whoami)) client_utils.logger.info("nodes marked down:") for d in delta: client_utils.logger.info(" %s" % d) client_utils.logger.info("") client_utils.logger.info("unknown nodes:") for a in args: if a not in delta: client_utils.logger.info(" %s" % a) elif opt.up: delta = client_utils.component_call(SYSMGR, False, 'nodes_up', (args, whoami)) client_utils.logger.info("nodes marked up:") for d in delta: client_utils.logger.info(" %s" % d) client_utils.logger.info('') client_utils.logger.info("nodes that weren't in the down list:") for a in args: if a not in delta: client_utils.logger.info(" %s" % a) elif opt.list_nstates: header, output = client_utils.cluster_display_node_info() client_utils.printTabular(header + output) elif opt.queue: data = client_utils.component_call(SYSMGR, False, 'set_queue_assignments', (opt.queue, args, whoami)) client_utils.logger.info(data)
def run_interactive_job(jobid, user, disable_preboot, nodes, procs): """ This will create the shell or ssh session for user """ not_exit_on_interrupt() # save whether we are running on a cluster system impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) exit_on_interrupt() deljob = True if impl == "cluster_system" else False def start_session(loc, resid, nodes, procs): """ start ssh or shell session """ # Create necesary env vars os.putenv("COBALT_NODEFILE", "/var/tmp/cobalt.%s" % (jobid)) os.putenv("COBALT_JOBID", "%s" % (jobid)) if resid: os.putenv("COBALT_RESID", "%s" % (resid)) os.putenv("COBALT_PARTNAME", loc) os.putenv("COBALT_BLOCKNAME", loc) os.putenv("COBALT_JOBSIZE", str(procs)) os.putenv("COBALT_BLOCKSIZE",str(nodes)) os.putenv("COBALT_PARTSIZE", str(nodes)) client_utils.logger.info("Opening interactive session to %s", loc) if deljob: os.system("/usr/bin/ssh -o \"SendEnv COBALT_NODEFILE COBALT_JOBID\" %s" % (loc)) else: os.system(os.environ['SHELL']) # Wait for job to start query = [{'tag':'job', 'jobid':jobid, 'location':'*', 'state':"*", 'resid':"*"}] client_utils.logger.info("Wait for job %s to start...", str(jobid)) while True: # If we get a ssl timeout error or component lookup error try again try: not_exit_on_interrupt() response = client_utils.component_call(QUEMGR, False, 'get_jobs', (query, ), False) exit_on_interrupt() # if jobid not found flag an error and exit if not response: client_utils.logger.error("Jobid %s not found after submission", str(jobid)) sys.exit() except (xmlrpclib.Fault, ComponentProxy) as fault: # This can happen if the component is down so try again client_utils.logger.error('Error getting job info: %s. Try again', fault) sleep(2) state = response[0]['state'] location = response[0]['location'] resid = response[0]['resid'] if state == 'running' and location: start_session(location[0], resid, nodes, procs) break client_utils.logger.debug('Current State "%s" for job %s', str(state), str(jobid)) sleep(2) return deljob
def run_interactive_job(jobid, user, disable_preboot): """ This will create the shell or ssh session for user """ not_exit_on_interrupt() # save whether we are running on a cluster system impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) exit_on_interrupt() deljob = True if impl == "cluster_system" else False def start_session(loc): """ start ssh or shell session """ # Create necesary env vars os.putenv("COBALT_NODEFILE", "/var/tmp/cobalt.%s" % (jobid)) os.putenv("COBALT_JOBID", "%s" % (jobid)) os.putenv("COBALT_PARTNAME", loc) os.putenv("COBALT_BLOCKNAME", loc) client_utils.logger.info("Opening interactive session to %s", loc) if deljob: os.system( "/usr/bin/ssh -o \"SendEnv COBALT_NODEFILE COBALT_JOBID\" %s" % (loc)) else: os.system(os.environ['SHELL']) # Wait for job to start query = [{'tag': 'job', 'jobid': jobid, 'location': '*', 'state': "*"}] client_utils.logger.info("Wait for job %s to start...", str(jobid)) while True: # If we get a ssl timeout error or component lookup error try again try: not_exit_on_interrupt() response = client_utils.component_call(QUEMGR, False, 'get_jobs', (query, ), False) exit_on_interrupt() # if jobid not found flag an error and exit if not response: client_utils.logger.error( "Jobid %s not found after submission", str(jobid)) sys.exit() except (xmlrpclib.Fault, ComponentProxy) as fault: # This can happen if the component is down so try again client_utils.logger.error('Error getting job info: %s. Try again', fault) sleep(2) state = response[0]['state'] location = response[0]['location'] if state == 'running' and location: start_session(location[0]) break client_utils.logger.debug('Current State "%s" for job %s', str(state), str(jobid)) sleep(2) return deljob
def fetch_pgid(user, jobid, loc, pgid=None): '''fetch and set pgid for user shell. Needed for cray systems''' if client_utils.component_call(SYSMGR, False, 'get_implementation', ()) == 'alps_system': #Cray is apparently using the session id for interactive jobs. spec = [{'user': user, 'jobid': jobid, 'pgid': pgid, 'location':loc}] if not client_utils.component_call(SYSMGR, False, 'confirm_alps_reservation', (spec)): client_utils.logger.error('Unable to confirm ALPS reservation. Exiting.') sys.exit(1) return
def set_res_id(parser): """ set res id """ if parser.options.force_id: client_utils.component_call(SCHMGR, False, 'force_res_id', (parser.options.res_id,)) client_utils.logger.info("WARNING: Forcing res id to %s" % parser.options.res_id) else: client_utils.component_call(SCHMGR, False, 'set_res_id', (parser.options.res_id,)) client_utils.logger.info("Setting res id to %s" % parser.options.res_id)
def main(): """ setres main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [ cb_debug , () ] ] # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) whoami = client_utils.getuid() parser.parse_it() # parse the command line validate_args(parser) opt = parser.options args = parser.args if opt.down: delta = client_utils.component_call(SYSMGR, False, 'nodes_down', (args, whoami)) client_utils.logger.info("nodes marked down:") for d in delta: client_utils.logger.info(" %s" % d) client_utils.logger.info("") client_utils.logger.info("unknown nodes:") for a in args: if a not in delta: client_utils.logger.info(" %s" % a) elif opt.up: delta = client_utils.component_call(SYSMGR, False, 'nodes_up', (args, whoami)) client_utils.logger.info("nodes marked up:") for d in delta: client_utils.logger.info(" %s" % d) client_utils.logger.info('') client_utils.logger.info("nodes that weren't in the down list:") for a in args: if a not in delta: client_utils.logger.info(" %s" %a) elif opt.list_nstates: header, output = client_utils.cluster_display_node_info() client_utils.printTabular(header + output) elif opt.queue: data = client_utils.component_call(SYSMGR, False, 'set_queue_assignments', (opt.queue, args, whoami)) client_utils.logger.info(data)
def set_cycle_id(parser): """ set res id """ cycle_id = parser.options.cycle_id if parser.options.force_id: client_utils.component_call(SCHMGR, False, 'force_cycle_id', (cycle_id,)) client_utils.logger.info("WARNING: Forcing cycle id to %s" % str(cycle_id)) else: client_utils.component_call(SCHMGR, False, 'set_cycle_id', (cycle_id,)) client_utils.logger.info("Setting cycle_id to %s" % str(cycle_id))
def set_res_id(parser): """ set res id """ if parser.options.force_id: client_utils.component_call(SCHMGR, False, 'force_res_id', (parser.options.res_id, )) client_utils.logger.info("WARNING: Forcing res id to %s" % parser.options.res_id) else: client_utils.component_call(SCHMGR, False, 'set_res_id', (parser.options.res_id, )) client_utils.logger.info("Setting res id to %s" % parser.options.res_id)
def add_reservation(parser,spec,user): """ add reservation """ validate_starttime(parser) spec['users'] = None if parser.options.users == '*' else parser.options.users spec['cycle'] = parser.options.cycle spec['project'] = parser.options.project if parser.options.block_passthrough == None: spec['block_passthrough'] = False client_utils.logger.info(client_utils.component_call(SCHMGR, False, 'add_reservations', ([spec], user))) client_utils.logger.info(client_utils.component_call(SCHMGR, False, 'check_reservations', ()))
def set_cycle_id(parser): """ set res id """ cycle_id = parser.options.cycle_id if parser.options.force_id: client_utils.component_call(SCHMGR, False, 'force_cycle_id', (cycle_id, )) client_utils.logger.info("WARNING: Forcing cycle id to %s" % str(cycle_id)) else: client_utils.component_call(SCHMGR, False, 'set_cycle_id', (cycle_id, )) client_utils.logger.info("Setting cycle_id to %s" % str(cycle_id))
def exit_interactive_job(deljob, jobid, user): """ Exit job normally or delete job """ not_exit_on_interrupt() # If no jobid assigned yet return if not jobid: return if deljob: job_to_del = [{'tag':'job', 'jobid':jobid, 'user':user}] client_utils.logger.info("Deleting interactive job %s", str(jobid)) client_utils.component_call(QUEMGR, False, 'del_jobs', (job_to_del, False, user)) else: client_utils.logger.info("Exiting interactive job %d", int(jobid)) client_utils.component_call(SYSMGR, False, 'interactive_job_complete', (jobid,))
def main(): """ qmove main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [ cb_debug , () ] ] # Get the version information opt_def = __doc__.replace('__revision__',__revision__) opt_def = opt_def.replace('__version__',__version__) parser = ArgParse(opt_def,callbacks) # Set required default values: None parser.parse_it() # parse the command line if not parser.no_args(): client_utils.logger.error("No arguments needed") impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) # make sure we're on a cluster-system or orcm-system if ("cluster_system" != impl) and ("orcm_system" != impl): client_utils.logger.error("nodelist is only supported on cluster and orcm systems. Try partlist instead.") sys.exit(0) status = client_utils.component_call(SYSMGR, False, 'get_node_status', ()) queue_data = client_utils.component_call(SYSMGR, False, 'get_queue_assignments', ()) header = [['Host', 'Queue', 'State']] #build output list output = [] for t in status: host_name = t[0] status = t[1] queues = [] for q in queue_data: if host_name in queue_data[q]: queues.append(q) output.append([host_name, ":".join(queues), status]) client_utils.printTabular(header + output)
def handle_list_io_option(sys_type): """ handles list io option """ if sys_type != 'bgq': client_utils.logger.error( "WARNING: IO Block information only exists on BG/Q-type systems.") #fetch and print bulk IO Block data if sys_type == 'bgq': args = ([{ 'name': '*', 'size': '*', 'status': '*', 'state': '*', 'block_computes_for_reboot': '*', 'autoreboot': '*', 'current_kernel': '*', 'current_kernel_options': '*' }], ) io_block_info = client_utils.component_call(SYSMGR, False, 'get_io_blocks', args) data = [[ 'Name', 'Size', 'State', 'CS Status', 'BlockComputes', 'Autoreboot' ]] for io_block in io_block_info: data.append([ io_block['name'], io_block['size'], io_block['state'], io_block['status'], 'x' if io_block['block_computes_for_reboot'] else '-', 'x' if io_block['autoreboot'] else '-' ]) client_utils.printTabular(data, centered=[4])
def process_queues(opts, parts, whoami, sys_type): """ This function will get the new list of queues that are associated wth given partition """ if opts.rmq != None or opts.appq != None: part_queues = partition_queues(opts, parts, sys_type) _parts = [] for p in part_queues: args = ([{'tag':'partition', 'name':p}], {'queue':part_queues[p]}, whoami) _parts.append(client_utils.component_call(SYSMGR, False, 'set_partitions', args)) else: _parts = None args = ([{'tag':'partition', 'name':partname} for partname in parts], {'queue':opts.queue}, whoami) _parts = client_utils.component_call(SYSMGR, False, 'set_partitions', args) return _parts
def partition_queues(opts, parts, sys_type): """ This will get the list for each partition """ if sys_type == 'bgq': args = ([{'name':partname, 'size':'*', 'state':'*', 'scheduled':'*', 'functional':'*', 'queue':'*', 'relatives':'*', 'passthrough_blocks':'*', 'node_geometry':'*'} for partname in parts], ) if sys_type == 'bgp': args = ([{'name':partname, 'size':'*', 'state':'*', 'scheduled':'*', 'functional':'*', 'queue':'*', 'parents':'*', 'children':'*'} for partname in parts], ) _parts = client_utils.component_call(SYSMGR, False, 'get_partitions', args) queues_dict = {} for p in _parts: qs_1 = p['queue'].split(':') qs_2 = opts.queue.split(':') new_queues = [q for q in qs_1[:] if q is not ''] for q in qs_2: if q in new_queues and opts.rmq != None: new_queues.remove(q) if q not in new_queues and opts.appq != None: new_queues.append(q) if len(new_queues) == 1: queues_dict[p['name']] = new_queues[0] else: queues_dict[p['name']] = ':'.join(new_queues) return queues_dict
def validate_args(parser): """ Validate nodeadm arguments. """ spec = {} # map of destination option strings and parsed values opts = {} # old map opt2spec = {} opt_count = client_utils.get_options(spec, opts, opt2spec, parser) if (parser.no_args() and not parser.options.list_nstates) or opt_count == 0: client_utils.print_usage(parser) sys.exit(1) impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) # make sure we're on a cluster-system if "cluster_system" != impl: client_utils.logger.error( "nodeadm is only supported on cluster systems. Try partlist instead." ) sys.exit(0) # Check mutually exclusive options mutually_exclusive_option_lists = [['down', 'up', 'list_nstates', 'queue']] if opt_count > 1: client_utils.validate_conflicting_options( parser, mutually_exclusive_option_lists)
def get_output_for_queues(parser, hinfo): """ get the queues info for the specified queues """ names = parser.args if not parser.no_args() else ['*'] query = [{ 'name': qname, 'users': '*', 'mintime': '*', 'maxtime': '*', 'maxrunning': '*', 'maxqueued': '*', 'maxusernodes': '*', 'maxnodehours': '*', 'totalnodes': '*', 'state': '*' } for qname in names] response = client_utils.component_call(QUEMGR, True, 'get_queues', (query, )) if not parser.no_args() and not response: sys.exit(1) for q in response: if q['maxtime'] is not None: q['maxtime'] = "%02d:%02d:00" % (divmod(int(q['maxtime']), 60)) if q['mintime'] is not None: q['mintime'] = "%02d:%02d:00" % (divmod(int(q['mintime']), 60)) output = [[q[x] for x in [y.lower() for y in hinfo.header]] for q in response] return output
def getq(info): """ get queue """ response = client_utils.component_call(QUEMGR, True, 'get_queues', (info, )) for que in response: if que['maxtime'] is not None: que['maxtime'] = "%02d:%02d:00" % (divmod(int(que.get('maxtime')), 60)) if que['mintime'] is not None: que['mintime'] = "%02d:%02d:00" % (divmod(int(que.get('mintime')), 60)) header = [('Queue', 'Users', 'Groups', 'MinTime', 'MaxTime', 'MaxRunning', 'MaxQueued', 'MaxUserNodes', 'MaxNodeHours', 'TotalNodes', 'AdminEmail', 'State', 'Cron', 'Policy', 'Priority')] datatoprint = [(que['name'], que['users'], que['groups'], que['mintime'], que['maxtime'], que['maxrunning'], que['maxqueued'], que['maxusernodes'], que['maxnodehours'], que['totalnodes'], que['adminemail'], que['state'], que['cron'], que['policy'], que['priority']) for que in response] datatoprint.sort() client_utils.print_tabular(header + datatoprint) return response
def run_job(parser, user, spec, opts): """ run the job """ jobid = None deljob = True try: not_exit_on_interrupt() jobs = client_utils.component_call(QUEMGR, False, 'add_jobs', ([spec], ), False) jobid = jobs[0]['jobid'] exit_on_interrupt() if parser.options.envs: client_utils.logger.debug("Environment Vars: %s", parser.options.envs) # If this is an interactive job, wait for it to start, then start user shell if parser.options.mode == 'interactive': logjob(jobid, spec, False) deljob = run_interactive_job(jobid, user, opts['disable_preboot']) else: logjob(jobid, spec, True) finally: if parser.options.mode == 'interactive': exit_interactive_job(deljob, jobid, user)
def run_job(parser, user, spec, opts): """ run the job """ jobid = None deljob = True exc_occurred = False try: not_exit_on_interrupt() jobs = client_utils.component_call(QUEMGR, False, 'add_jobs', ([spec], ), False) jobid = jobs[0]['jobid'] exit_on_interrupt() if parser.options.envs: client_utils.logger.debug("Environment Vars: %s", parser.options.envs) # If this is an interactive job, wait for it to start, then start user shell if parser.options.mode == 'interactive': logjob(jobid, spec, False, spec['ttysession']) deljob = run_interactive_job(jobid, user, opts['disable_preboot'], opts['nodecount'], opts['proccount']) else: logjob(jobid, spec, True, spec['ttysession']) except Exception, e: client_utils.logger.error(e) exc_occurred = True
def get_interactive_command(parser, spec, opts, opt2spec, def_spec): '''Interactive job checks and command update. Set the sleeper job up to be submitted for the walltime. ''' #update the auxillary environment variables here so that they are properly set for the job? # Checks for interactive jobs: user must not specify a command, # and we must be running on a cluster if parser.options.interactive: try: impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) except xmlrpclib.Fault: client_utils.logger.error( "Error: unable to connect to the system component") sys.exit(1) if ("cluster_system" != impl) and ("orcm_system" != impl): client_utils.logger.error( "Interactive jobs are only supported on cluster or orcm systems" ) sys.exit(1) else: spec['command'] = "/bin/sleep" spec['args'] = [ str(int(parser.options.walltime) * 60), ]
def main(): """ get-bootable-blocks main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [ cb_debug , () ], [ cb_gtzero , (True,) ], # return int [ cb_bgq_geo , () ] ] # Get the version information opt_def = __doc__.replace('__revision__',__revision__) opt_def = opt_def.replace('__version__',__version__) parser = ArgParse(opt_def,callbacks) parser.parse_it() # parse the command line opts = parser.options args = parser.args if parser.no_args(): client_utils.print_usage(parser) sys.exit(1) block_loc = args[0] idle_blocks = client_utils.component_call(SYSMGR, False, 'get_idle_blocks', (block_loc, opts.query_size, opts.geo_list)) client_utils.logger.info("\n".join(idle_blocks))
def run_job(parser, user, spec, opts): """ run the job """ jobid = None deljob = True exc_occurred = False interactive_remote_host = opts.get('ssh_host', None) try: not_exit_on_interrupt() jobs = client_utils.component_call(QUEMGR, False, 'add_jobs',([spec],), False) jobid = jobs[0]['jobid'] exit_on_interrupt() if parser.options.envs: client_utils.logger.debug("Environment Vars: %s", parser.options.envs) # If this is an interactive job, wait for it to start, then start user shell if parser.options.mode == 'interactive': logjob(jobid, spec, False, spec['ttysession']) deljob = run_interactive_job(jobid, user, opts['disable_preboot'], opts['nodecount'], opts['proccount']) else: logjob(jobid, spec, True, spec['ttysession']) except Exception, e: client_utils.logger.error(e) exc_occurred = True
def validate_args(parser): """ Validate nodeadm arguments. """ spec = {} # map of destination option strings and parsed values opts = {} # old map opt2spec = {} opt_count = client_utils.get_options(spec,opts,opt2spec,parser) if (parser.no_args() and not parser.options.list_nstates) or opt_count == 0: client_utils.print_usage(parser) sys.exit(1) impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) # make sure we're on a cluster-system if impl not in ['cluster_system', 'alps_system']: client_utils.logger.error("nodeadm is only supported on cluster systems. Try partlist instead.") sys.exit(0) # Check mutually exclusive options mutually_exclusive_option_lists = [['down', 'up', 'list_nstates', 'list_details', 'queue']] if opt_count > 1: client_utils.validate_conflicting_options(parser, mutually_exclusive_option_lists)
def main(): """ qdel main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [ cb_debug , () ] ] # Get the version information opt_def = __doc__.replace('__revision__',__revision__) opt_def = opt_def.replace('__version__',__version__) parser = ArgParse(opt_def,callbacks) user = client_utils.getuid() # Set required default values: None parser.parse_it() # parse the command line jobids = client_utils.validate_jobid_args(parser) jobs = [{'tag':'job', 'user':user, 'jobid':jobid} for jobid in jobids] deleted_jobs = client_utils.component_call(QUEMGR, True, 'del_jobs', (jobs, False, user)) time.sleep(1) if deleted_jobs: data = [('JobID','User')] + [(job.get('jobid'), job.get('user')) for job in deleted_jobs] client_utils.logger.info(" Deleted Jobs") client_utils.print_tabular(data)
def process_cqadm_options(jobs, parser, spec, user): """ This function will process any command argument and options passed to cqadm """ force = parser.options.force # force flag. info = [{'tag':'queue', 'name':'*', 'state':'*', 'users':'*', 'groups':'*', 'maxtime':'*', 'mintime':'*', 'maxuserjobs':'*', 'maxusernodes':'*', 'maxqueued':'*', 'maxrunning':'*','maxtotaljobs':'*', 'maxnodehours':'*', 'adminemail':'*', 'totalnodes':'*', 'cron':'*', 'policy':'*', 'priority':'*'}] response = [] if parser.options.setjobid != None: response = client_utils.component_call(QUEMGR, True, 'set_jobid', (parser.options.setjobid, user)) elif parser.options.savestate != None: response = client_utils.component_call(QUEMGR, True, 'save', (parser.options.savestate,)) elif parser.options.kill != None: response = client_utils.component_call(QUEMGR, False, 'del_jobs', (jobs, force, user)) elif parser.options.run != None: response = client_utils.run_jobs(jobs, parser.options.run, user) elif parser.options.addq != None: response = client_utils.add_queues(jobs, parser, user, info) elif parser.options.getq != None: response = getq(info) elif parser.options.delq != None: response = client_utils.del_queues(jobs, force, user) elif parser.options.qdata != None: response = client_utils.component_call(QUEMGR, True, 'set_queues', (jobs, parser.options.qdata, user)) elif parser.options.preempt != None: response = client_utils.component_call(QUEMGR, True, 'preempt_jobs', (jobs, user, force)) else: response = setjobs(jobs, parser, spec, user) if not response: client_utils.logger.error("Failed to match any jobs or queues") else: client_utils.logger.debug(response)
def exit_interactive_job(deljob, jobid, user): """ Exit job normally or delete job """ not_exit_on_interrupt() # If no jobid assigned yet return if not jobid: return if deljob: job_to_del = [{'tag': 'job', 'jobid': jobid, 'user': user}] client_utils.logger.info("Deleting interactive job %s", str(jobid)) client_utils.component_call(QUEMGR, False, 'del_jobs', (job_to_del, False, user)) else: client_utils.logger.info("Exiting interactive job %d", int(jobid)) client_utils.component_call(SYSMGR, False, 'interactive_job_complete', (jobid, ))
def main(): """ qmove main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [cb_debug, ()] ] # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) user = client_utils.getuid() # Set required default values: None parser.parse_it() # parse the command line queue, jobs = validate_args(parser, user) filters = client_utils.get_filters() jobdata = client_utils.component_call(QUEMGR, False, 'get_jobs', (jobs, )) response = [] # move jobs to queue for job in jobdata: orig_job = job.copy() job.update({'queue': queue}) client_utils.process_filters(filters, job) [j] = client_utils.component_call(QUEMGR, False, 'set_jobs', ([orig_job], job, user)) response.append("moved job %d to queue '%s'" % (j.get('jobid'), j.get('queue'))) if not response: client_utils.logger.error("Failed to match any jobs or queues") else: for line in response: client_utils.logger.info(line)
def main(): """ userres main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [( cb_debug, ())] # Get the version information opt_def = __doc__.replace('__revision__',__revision__) opt_def = opt_def.replace('__version__',__version__) parser = ArgParse(opt_def,callbacks) parser.parse_it() # parse the command line args = parser.args if parser.no_args(): client_utils.print_usage(parser) sys.exit(1) # Check if reservation exists spec = [{'name': rname, 'users':"*", 'start':'*', 'cycle':'*', 'duration':'*'} for rname in args] result = client_utils.component_call(SCHMGR, False, 'get_reservations', (spec,)) if len(result) and len(result) != len(args): client_utils.logger.error("Reservation subset matched") elif not result: client_utils.logger.error("No Reservations matched") sys.exit(1) user_name = client_utils.getuid() for spec in result: if not spec['users'] or user_name not in spec['users'].split(":"): client_utils.logger.error("You are not a user of reservation '%s' and so cannot alter it.", spec['name']) continue if spec['cycle']: updates = update_start_time(spec, user_name) else: client_utils.component_call(SCHMGR, False, 'del_reservations', ([{'name':spec['name']}], user_name)) client_utils.logger.info("Releasing reservation '%s'", spec['name'])
def verify_locations(partitions): """ verify that partitions are valid """ for p in partitions: test_parts = client_utils.component_call(SYSMGR, False, 'verify_locations', (partitions,)) if len(test_parts) != len(partitions): missing = [p for p in partitions if p not in test_parts] client_utils.logger.error("Missing partitions: %s" % (" ".join(missing))) sys.exit(1)
def add_reservation(parser, spec, user): """ add reservation """ validate_starttime(parser) spec[ 'users'] = None if parser.options.users == '*' else parser.options.users spec['cycle'] = parser.options.cycle spec['project'] = parser.options.project if parser.options.block_passthrough == None: spec['block_passthrough'] = False client_utils.logger.info( client_utils.component_call(SCHMGR, False, 'add_reservations', ([spec], user))) client_utils.logger.info( client_utils.component_call(SCHMGR, False, 'check_reservations', ()))
def main(): """ qmove main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [ cb_debug , () ] ] # Get the version information opt_def = __doc__.replace('__revision__',__revision__) opt_def = opt_def.replace('__version__',__version__) parser = ArgParse(opt_def,callbacks) user = client_utils.getuid() # Set required default values: None parser.parse_it() # parse the command line queue,jobs = validate_args(parser,user) filters = client_utils.get_filters() jobdata = client_utils.component_call(QUEMGR, False, 'get_jobs', (jobs,)) response = [] # move jobs to queue for job in jobdata: orig_job = job.copy() job.update({'queue':queue}) client_utils.process_filters(filters,job) [j] = client_utils.component_call(QUEMGR, False, 'set_jobs', ([orig_job],job,user)) response.append("moved job %d to queue '%s'" % (j.get('jobid'), j.get('queue'))) if not response: client_utils.logger.error("Failed to match any jobs or queues") else: for line in response: client_utils.logger.info(line)
def main(): """ releaseres main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [(cb_debug, ())] # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) parser.parse_it() # parse the command line args = parser.args if parser.no_args(): client_utils.print_usage(parser) sys.exit(1) # Check if reservation exists spec = [{'name': arg, 'partitions': '*'} for arg in args] result = client_utils.component_call(SCHMGR, False, 'get_reservations', (spec, )) if len(result) and len(result) != len(args): client_utils.logger.error("Reservation subset matched") elif not result: client_utils.logger.error("No Reservations matched") sys.exit(1) result = client_utils.component_call(SCHMGR, False, 'release_reservations', (spec, client_utils.getuid())) for resinfo in result: partitions = resinfo['partitions'].split(':') client_utils.logger.info( "Released reservation '%s' for partitions: %s", resinfo['name'], str(partitions))
def getq(info): """ get queue """ response = client_utils.component_call(QUEMGR, True, "get_queues", (info,)) for que in response: if que["maxtime"] is not None: que["maxtime"] = "%02d:%02d:00" % (divmod(int(que.get("maxtime")), 60)) if que["mintime"] is not None: que["mintime"] = "%02d:%02d:00" % (divmod(int(que.get("mintime")), 60)) header = [ ( "Queue", "Users", "Groups", "MinTime", "MaxTime", "MaxRunning", "MaxQueued", "MaxUserNodes", "MaxNodeHours", "TotalNodes", "AdminEmail", "State", "Cron", "Policy", "Priority", ) ] datatoprint = [ ( que["name"], que["users"], que["groups"], que["mintime"], que["maxtime"], que["maxrunning"], que["maxqueued"], que["maxusernodes"], que["maxnodehours"], que["totalnodes"], que["adminemail"], que["state"], que["cron"], que["policy"], que["priority"], ) for que in response ] datatoprint.sort() client_utils.print_tabular(header + datatoprint) return response
def main(): """ qselect main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) opts = {} # old map opt2spec = {} # list of callback with its arguments callbacks = [ # <cb function> <cb args> [cb_debug, ()], [cb_nodes, (False, )], # return string [cb_time, (False, False, False)] ] # no delta time, return minutes, return string # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) # Set required default for the query: query = { 'tag': 'job', 'jobid': '*', 'nodes': '*', 'walltime': '*', 'mode': '*', 'project': '*', 'state': '*', 'queue': '*' } parser.parse_it() # parse the command line if not parser.no_args(): client_utils.logger.error("qselect takes no arguments") sys.exit(1) client_utils.get_options(query, opts, opt2spec, parser) response = client_utils.component_call(QUEMGR, False, 'get_jobs', ([query], )) if not response: client_utils.logger.error("Failed to match any jobs") else: client_utils.logger.debug(response) client_utils.logger.info(" The following jobs matched your query:") for job in response: client_utils.logger.info(" %d" % job.get('jobid'))
def setjobs(jobs, parser, spec, user): """ set jobs """ if hasattr(parser.options, "admin_hold"): for job in jobs: job.update({"admin_hold": not parser.options.admin_hold}) if hasattr(parser.options, "user_hold"): for job in jobs: job.update({"user_hold": not parser.options.user_hold}) return client_utils.component_call(QUEMGR, False, "set_jobs", (jobs, spec, user))
def update_start_time(spec, user_name): """ will update the start time for the clyclic reservation """ start = spec['start'] duration = spec['duration'] cycle = float(spec['cycle']) now = time.time() periods = math.floor((now - start)/cycle) if(periods < 0): start += cycle elif(now - start) % cycle < duration: start += (periods + 1) * cycle else: start += (periods + 2) * cycle updates = {'start':start} client_utils.component_call(SCHMGR, False, 'set_reservations', ([{'name':spec['name']}], updates, user_name)) newstart = time.strftime("%c", time.localtime(start)) client_utils.logger.info("Setting new start time for for reservation '%s': %s", spec['name'], newstart)
def verify_locations(partitions): """ verify that partitions are valid """ for p in partitions: test_parts = client_utils.component_call(SYSMGR, False, 'verify_locations', (partitions, )) if len(test_parts) != len(partitions): missing = [p for p in partitions if p not in test_parts] client_utils.logger.error("Missing partitions: %s" % (" ".join(missing))) sys.exit(1)
def handle_blockinfo_option(parts, sys_type, print_block): """ function to handle the block info option """ if sys_type == 'bgq': args = ([{'name':part,'node_card_list':'*','subblock_parent':'*', 'midplane_list':'*','node_list':'*', 'scheduled':'*', 'funcitonal':'*','queue':'*','parents':'*','children':'*', 'reserved_until':'*','reserved_by':'*','used_by':'*','freeing':'*', 'block_type':'*','corner_node':'*', 'extents':'*', 'cleanup_pending':'*', 'state':'*','size':'*','draining':'*','backfill_time':'*','wire_list':'*', 'wiring_conflict_list':'*', 'midplane_geometry':'*', 'node_geometry':'*', 'passthrough_blocks':'*', 'passthrough_midplane_list':'*', 'io_node_list':'*', 'current_kernel':'*', 'current_kernel_options':'*'} for part in parts], ) info = client_utils.component_call(SYSMGR, False, 'get_blocks', args) print_block(info) args = ([{'name':part, 'status':'*', 'state':'*', 'size':'*', 'io_drawer_list':'*', 'io_node_list':'*', 'block_computes_for_reboot':'*', 'autoreboot':'*', 'current_kernel':'*', 'current_kernel_options':'*'} for part in parts], ) info = client_utils.component_call(SYSMGR, False, 'get_io_blocks', args) print_block(info) sys.exit(0) elif sys_type == 'bgp': args = ([{'name':part,'node_card_list':'*','wire_list':'*','switch_list':'*', 'scheduled':'*', 'funcitonal':'*','queue':'*','parents':'*', 'children':'*','reserved_until':'*','reserved_by':'*','used_by':'*', 'freeing':'*','block_type':'*','cleanup_pending':'*', 'state':'*', 'wiring_conflicts':'*','size':'*','draining':'*','backfill_time':'*'} for part in parts], ) info = client_utils.component_call(SYSMGR, False, 'get_partitions', args) print_block(info) sys.exit(0)
def setjobs(jobs, parser, spec, user): """ set jobs """ if hasattr(parser.options, 'admin_hold'): for job in jobs: job.update({'admin_hold': not parser.options.admin_hold}) if hasattr(parser.options, 'user_hold'): for job in jobs: job.update({'user_hold': not parser.options.user_hold}) return client_utils.component_call(QUEMGR, False, 'set_jobs', (jobs, spec, user))
def verify_locations(partitions): """ verify that partitions are valid """ check_partitions = partitions system_type = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) # if we have args then verify the args (partitions) if system_type in ['alps_system']: # nodes come in as a compact list. expand this. check_partitions = [] # if we're not a compact list, convert to a compact list. Get this, # ideally, in one call for num_list in partitions: check_partitions.extend(expand_num_list(num_list)) test_parts = client_utils.component_call(SYSMGR, False, 'verify_locations', (check_partitions,)) # On Cray we will be a little looser to make setting reservations # easier. client_utils.logger.info('Found Nodes: %s', compact_num_list(test_parts)) missing_nodes = set(check_partitions) - set(test_parts) if len(missing_nodes) != 0: # TODO: relax this, we should allow for this to occur, but # reservation-queue data amalgamation will need a fix to get # this to work. --PMR client_utils.logger.error("Missing partitions: %s" % (",".join([str(nid) for nid in missing_nodes]))) client_utils.logger.error("Aborting reservation setup.") sys.exit(1) #sys.exit(1) else: for p in check_partitions: test_parts = client_utils.component_call(SYSMGR, False, 'verify_locations', (check_partitions,)) if len(test_parts) != len(check_partitions): missing = [p for p in check_partitions if p not in test_parts] client_utils.logger.error("Missing partitions: %s" % (" ".join(missing))) sys.exit(1)
def main(): """ releaseres main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [(cb_debug, ())] # Get the version information opt_def = __doc__.replace("__revision__", __revision__) opt_def = opt_def.replace("__version__", __version__) parser = ArgParse(opt_def, callbacks) parser.parse_it() # parse the command line args = parser.args if parser.no_args(): client_utils.print_usage(parser) sys.exit(1) # Check if reservation exists spec = [{"name": arg, "partitions": "*"} for arg in args] result = client_utils.component_call(SCHMGR, False, "get_reservations", (spec,)) if len(result) and len(result) != len(args): client_utils.logger.error("Reservation subset matched") elif not result: client_utils.logger.error("No Reservations matched") sys.exit(1) result = client_utils.component_call(SCHMGR, False, "release_reservations", (spec, client_utils.getuid())) for resinfo in result: partitions = resinfo["partitions"].split(":") client_utils.logger.info("Released reservation '%s' for partitions: %s", resinfo["name"], str(partitions))
def on_interrupt(sig, func=None): '''Handler to cleanup the queued 'dummy' job if the user interrupts qsub -I forcibly ''' try: spec = [{'tag':'job', 'jobid':jobs[0]['jobid'], 'user':user}] except NameError: sys.exit(1) except: raise client_utils.logger.info("Deleting job %d", (jobs[0]['jobid'])) del_jobs = client_utils.component_call(QUEMGR, False, 'del_jobs', (spec, False, user)) client_utils.logger.info("%s", del_jobs) sys.exit(1)
def validate_queues(opts): """ This function will valdate to see if the given queues exists """ args = ([{'tag':'queue', 'name':'*'}], ) queues = client_utils.component_call(QUEMGR, True, 'get_queues', args) existing_queues = [q.get('name') for q in queues] error_messages = [] for queue in opts.queue.split(':'): if not queue in existing_queues: error_messages.append('\'' + queue + '\' is not an existing queue') if error_messages: for err in error_messages: client_utils.logger.error(err) sys.exit(1)
def validate_queues(opts): """ This function will valdate to see if the given queues exists """ args = ([{'tag': 'queue', 'name': '*'}], ) queues = client_utils.component_call(QUEMGR, True, 'get_queues', args) existing_queues = [q.get('name') for q in queues] error_messages = [] for queue in opts.queue.split(':'): if not queue in existing_queues: error_messages.append('\'' + queue + '\' is not an existing queue') if error_messages: for err in error_messages: client_utils.logger.error(err) sys.exit(1)
def recursive(args): """ Handle Recursive option """ parts = [] comp_args = ([{'tag':'partition', 'name':name, 'children':'*'} for name in args], ) partdata = client_utils.component_call('system', False, 'get_partitions', comp_args) parts = args for part in partdata: if 'children' not in part: continue for child in part['children']: if child not in parts: parts.append(child) return parts
def on_interrupt(sig, func=None): '''Handler to cleanup the queued 'dummy' job if the user interrupts qsub -I forcibly ''' try: spec = [{'tag': 'job', 'jobid': jobs[0]['jobid'], 'user': user}] except NameError: sys.exit(1) except: raise client_utils.logger.info("Deleting job %d", (jobs[0]['jobid'])) del_jobs = client_utils.component_call(QUEMGR, False, 'del_jobs', (spec, False, user)) client_utils.logger.info("%s", del_jobs) sys.exit(1)
def main(): # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [ cb_debug , () ] ] # Get the version information opt_def = __doc__.replace('__revision__',__revision__) opt_def = opt_def.replace('__version__',__version__) parser = ArgParse(opt_def,callbacks) # Set required default values: None parser.parse_it() # parse the command line opt = parser.options args = parser.args #if not parser.no_args(): # client_utils.logger.error("No arguments needed") impl = client_utils.component_call(SYSMGR, False, 'get_implementation', ()) # make sure we're on a cluster-system if impl not in ['cluster_system', 'alps_system']: client_utils.logger.error("nodelist is only supported on cluster systems. Try partlist instead.") sys.exit(0) if impl == 'alps_system': if opt.list_details: # get list from arguments. Currently assuing a comma separated, # hyphen-condensed nodelist client_utils.print_node_details(args) else: client_utils.print_node_list() return header, output = client_utils.cluster_display_node_info() if parser.options.noheader is not None: client_utils.printTabular(header + output, with_header_info=False) else: client_utils.printTabular(header + output)
def partition_queues(opts, parts, sys_type): """ This will get the list for each partition """ if sys_type == 'bgq': args = ([{ 'name': partname, 'size': '*', 'state': '*', 'scheduled': '*', 'functional': '*', 'queue': '*', 'relatives': '*', 'passthrough_blocks': '*', 'node_geometry': '*' } for partname in parts], ) if sys_type == 'bgp': args = ([{ 'name': partname, 'size': '*', 'state': '*', 'scheduled': '*', 'functional': '*', 'queue': '*', 'parents': '*', 'children': '*' } for partname in parts], ) _parts = client_utils.component_call(SYSMGR, False, 'get_partitions', args) queues_dict = {} for p in _parts: qs_1 = p['queue'].split(':') qs_2 = opts.queue.split(':') new_queues = [q for q in qs_1[:] if q is not ''] for q in qs_2: if q in new_queues and opts.rmq != None: new_queues.remove(q) if q not in new_queues and opts.appq != None: new_queues.append(q) if len(new_queues) == 1: queues_dict[p['name']] = new_queues[0] else: queues_dict[p['name']] = ':'.join(new_queues) return queues_dict
def handle_list_io_option(sys_type): """ handles list io option """ if sys_type != 'bgq': client_utils.logger.error("WARNING: IO Block information only exists on BG/Q-type systems.") #fetch and print bulk IO Block data if sys_type == 'bgq': args = ([{'name':'*', 'size':'*', 'status':'*', 'state':'*', 'block_computes_for_reboot':'*', 'autoreboot':'*', 'current_kernel':'*', 'current_kernel_options':'*'}],) io_block_info = client_utils.component_call(SYSMGR, False, 'get_io_blocks', args) data = [['Name', 'Size', 'State', 'CS Status', 'BlockComputes', 'Autoreboot']] for io_block in io_block_info: data.append([io_block['name'], io_block['size'], io_block['state'], io_block['status'], 'x' if io_block['block_computes_for_reboot'] else '-', 'x' if io_block['autoreboot'] else '-']) client_utils.printTabular(data, centered=[4])
def main(): """ qstat main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) delim = ':' # list of callback with its arguments callbacks = [ # <cb function> <cb args (tuple) > (cb_debug, ()), (cb_split, (delim, )) ] # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) parser.parse_it() # parse the command line # Get the header instance hinfo = client_utils.header_info(parser) # Get the queues for job ids queues = client_utils.component_call(QUEMGR, True, 'get_queues', ([{ 'name': '*', 'state': '*' }], )) # if Q option specified then get the info for the specified queues if parser.options.Q != None: output = get_output_for_queues(parser, hinfo) else: # build query from long_header (all fields) and fetch response output = get_output_for_jobs(parser, hinfo, queues) process_the_output(output, parser, hinfo)
def handle_clean_block_option(parts, whoami, sys_type): """ function to process the clean block option """ if sys_type == 'bgp': client_utils.logger.info("Force clenaing not available for BG/P systems") sys.exit(0) client_utils.component_call(SCHMGR, False, 'sched_status', ()) client_utils.component_call(SYSMGR, False, 'booting_status', ()) for part in parts: client_utils.component_call(SYSMGR, False, 'set_cleaning', (part, None, whoami)) client_utils.logger.info("Initiating cleanup on block %s" % part) sys.exit(0)
def main(): """ slpstat main """ # setup logging for client. The clients should call this before doing anything else. client_utils.setup_logging(logging.INFO) # list of callback with its arguments callbacks = [ # <cb function> <cb args> [cb_debug, ()] ] # Get the version information opt_def = __doc__.replace('__revision__', __revision__) opt_def = opt_def.replace('__version__', __version__) parser = ArgParse(opt_def, callbacks) # Set required default values: None parser.parse_it() # parse the command line if not parser.no_args(): client_utils.logger.error('No arguments needed') services = client_utils.component_call(SLPMGR, False, 'get_services', ([{ 'tag': 'service', 'name': '*', 'stamp': '*', 'location': '*' }], )) if services: header = [('Name', 'Location', 'Update Time')] output = [(service['name'], service['location'], time.strftime("%c", time.localtime(service['stamp']))) for service in services] client_utils.print_tabular(header + [tuple(item) for item in output]) else: client_utils.logger.info("no services registered")
def recursive(args): """ Handle Recursive option """ parts = [] comp_args = ([{ 'tag': 'partition', 'name': name, 'children': '*' } for name in args], ) partdata = client_utils.component_call('system', False, 'get_partitions', comp_args) parts = args for part in partdata: if 'children' not in part: continue for child in part['children']: if child not in parts: parts.append(child) return parts
def get_jobdata(jobids, parser, user): """ Will get the jobdata for the specified jobs """ jobs = [{'tag':'job', 'user':user, 'jobid':jobid, 'project':'*', 'notify':'*', 'walltime':'*', 'mode':'*', 'procs':'*', 'nodes':'*', 'is_active':"*", 'queue':'*'} for jobid in jobids] jobdata = client_utils.component_call(QUEMGR, False, 'get_jobs', (jobs,)) job_running = False # verify no job is running for job in jobdata: if job['is_active']: job_running = True if job_running: if options_disallowed(parser): sys.exit(1) return jobdata