def cleanup(llqid, options, logger, tempdir): if options.debug: logger.debug("not deleting temporary working directory: " + tempdir) else: logger.info("deleting temporary files") shutil.rmtree(tempdir) if options.debug or options.generate_only: logger.debug("not deleting %s_submit.ll" % options.job_name) else: try: os.remove('%s_submit.ll' % options.job_name) except: pass if llqid is not None: if options.debug: logger.debug("not cancelling job: " + llqid) else: print("***\nattempting to cancel job: " + llqid + "\n***") (llout, llerr) = util.call_command("llcancel " + llqid) logger.info(llout)
def main(): # mode depends on if we are running on neser or shaheen host_arch = os.uname()[4] parser = build_parser(host_arch, 'kslrun') config_tuple = util.get_file_config(host_arch, 'kslrun') config_strings = ['--'+arg+'='+value for arg,value in config_tuple] file_options = parser.parse_args(config_strings) options = parser.parse_args(namespace=file_options) util.setup_logging(options) if options.version: print("kslrun: "+ksl.process.__version__) return if options.configure: configure(host_arch, options) return logger = logging.getLogger('kslrun') logger.debug("options") logger.debug(options) tempdir = tempfile.mkdtemp() if options.interactive or options.generate_only: options.no_std_redirect = True elif options.command is '': print("Missing command argument to mpirun, e.g. kslrun ./a.out") parser.print_usage() return try: llqid = None if not options.no_std_redirect: job_out_name = os.path.join(tempdir, "job_out") job_err_name = os.path.join(tempdir, "job_err") time.sleep(1) job_done_name = os.path.join(tempdir, "job_done") done_command = 'touch ' + job_done_name else: job_out_name = options.prefix+'.out' job_err_name = options.prefix+'.err' done_command = '' logger.info("setting up LoadLeveler submission script") ll_dict = dict(inspect.getmembers(options)) ll_dict['command'] = ''.join(options.command) ll_dict['job_out_name'] = job_out_name ll_dict['job_err_name'] = job_err_name ll_dict['done_command'] = done_command if options.no_notify: ll_dict['notification'] = 'never' else: ll_dict['notification'] = 'always' llfilename = setup_ll_file(options, host_arch, ll_dict, tempdir) if options.generate_only: logger.info("Generate only -- returning") return logger.info("submitting to LoadLeveler") if options.dry_run: util.call_command("llsubmit " + llfilename, options) logger.info("Dry run -- returning") return (llout, llerr) = util.call_command("llsubmit " + llfilename, options) logger.info('llout: ' + str(llout)) logger.info('llerr: ' + str(llerr)) if len(llout) == 0: llerr = llerr.decode('utf-8') logger.info("error from LoadLeveler") sys.stderr.write(llerr) if llerr.startswith('invalid account'): sys.stderr.write("********************************************************************************\n") sys.stderr.write("try configuring (kslrun -c) or setting a valid account with kslrun -a account_no\n") sys.stderr.write("********************************************************************************\n") return # silent ignore currently # logger.error(llerr) longllqid = str(llout).split(' ')[1] assert ('shaheen.kaust.edu.sa') in longllqid, "Unable to determine llq job id from output, %s" % llout llqid = longllqid.replace("shaheen.kaust.edu.sa.","") + ".0" logger.info('identified job as ' + llqid) if not options.no_std_redirect: logger.info("watching for job_done file") watch_hangup(job_done_name, llqid) handle_output(job_out_name, job_err_name) else: logger.info("job submitted") print(llqid + " submitted to LoadLeveler") except: cleanup(llqid, options, logger, tempdir) raise cleanup(None, options, logger, tempdir)