Beispiel #1
0
def cleanup(llqid, options, logger, tempdir):
    if options.debug:
        logger.debug("not deleting temporary working directory: " + tempdir)
    else:
        logger.info("deleting temporary files")
        shutil.rmtree(tempdir)
    if options.debug or options.generate_only:
        logger.debug("not deleting %s_submit.ll" % options.job_name)
    else:
        try:
            os.remove('%s_submit.ll' % options.job_name)
        except:
            pass
        
    if llqid is not None:
        if options.debug:
            logger.debug("not cancelling job: " + llqid)
        else:
            print("***\nattempting to cancel job: " + llqid + "\n***")
            (llout, llerr) = util.call_command("llcancel " + llqid)
            logger.info(llout)
Beispiel #2
0
def main():
    # mode depends on if we are running on neser or shaheen
    host_arch = os.uname()[4]

    parser = build_parser(host_arch, 'kslrun')
    config_tuple = util.get_file_config(host_arch, 'kslrun')
    config_strings = ['--'+arg+'='+value for arg,value in config_tuple]
    file_options = parser.parse_args(config_strings)
    options = parser.parse_args(namespace=file_options)

    util.setup_logging(options)

    if options.version:
        print("kslrun: "+ksl.process.__version__)
        return

    if options.configure:
        configure(host_arch, options)
        return

    logger = logging.getLogger('kslrun')
    logger.debug("options")
    logger.debug(options)

    tempdir = tempfile.mkdtemp()

    if options.interactive or options.generate_only:
        options.no_std_redirect = True
    elif options.command is '':
        print("Missing command argument to mpirun, e.g. kslrun ./a.out")
        parser.print_usage()
        return
    try:
        llqid = None
        if not options.no_std_redirect:
            job_out_name = os.path.join(tempdir, "job_out")
            job_err_name = os.path.join(tempdir, "job_err")

            time.sleep(1) 
            job_done_name = os.path.join(tempdir, "job_done")
            done_command = 'touch ' + job_done_name
        else:
            job_out_name = options.prefix+'.out'
            job_err_name = options.prefix+'.err'
            done_command = ''
            
        logger.info("setting up LoadLeveler submission script")    

        ll_dict = dict(inspect.getmembers(options))
        ll_dict['command'] = ''.join(options.command)
        ll_dict['job_out_name'] = job_out_name
        ll_dict['job_err_name'] = job_err_name
        ll_dict['done_command'] = done_command

        if options.no_notify:
            ll_dict['notification'] = 'never'
        else:
            ll_dict['notification'] = 'always'

        llfilename = setup_ll_file(options, host_arch, ll_dict, tempdir)

        if options.generate_only:
            logger.info("Generate only -- returning")
            return
        
        logger.info("submitting to LoadLeveler")

        if options.dry_run:
            util.call_command("llsubmit " + llfilename, options)
            logger.info("Dry run -- returning")
            return

        (llout, llerr) = util.call_command("llsubmit " + llfilename, options)
        logger.info('llout: ' + str(llout))
        logger.info('llerr: ' + str(llerr))


        if len(llout) == 0:
            llerr = llerr.decode('utf-8')
            logger.info("error from LoadLeveler")
            sys.stderr.write(llerr)
            if llerr.startswith('invalid account'):
                sys.stderr.write("********************************************************************************\n")
                sys.stderr.write("try configuring (kslrun -c) or setting a valid account with kslrun -a account_no\n")
                sys.stderr.write("********************************************************************************\n")
            return
        # silent ignore currently
        #        logger.error(llerr)
        
        longllqid = str(llout).split(' ')[1]

        assert ('shaheen.kaust.edu.sa') in longllqid, "Unable to determine llq job id from output, %s" % llout

        llqid = longllqid.replace("shaheen.kaust.edu.sa.","") + ".0"
        logger.info('identified job as ' + llqid)

        if not options.no_std_redirect:
            logger.info("watching for job_done file")
            watch_hangup(job_done_name, llqid)
            handle_output(job_out_name, job_err_name)
        else:
            logger.info("job submitted")
            print(llqid + " submitted to LoadLeveler")
    except:
        cleanup(llqid, options, logger, tempdir)
        raise
    cleanup(None, options, logger, tempdir)