def explore(self, cmdl): """ Starts exploration of a pickled job folder. Usage: The most standard form is to simply load a job folder. All other job-dictionary magic functions will then use it. >>> explore path/to/job_folder_pickle If you have created a job-folder directly (rather than save it to disk), you can also load it as >>> explore jobfolder_variable In case of conflict between a pathname and a variable name, you can use the more explicit version. >>> explore --file jobfolder >>> explore --expression jobfolder You can load a dictionary and filter out successfull or unsuccessfull runs. To explore errors only, use: >>> explore errors path/to/job_pickle To explore only successful results, use: >>> explore results path/to/job_pickle """ import argparse from os.path import join, dirname from pylada import interactive from pylada.misc import bugLev # options supported by all. parser = argparse.ArgumentParser(prog='%explore', description='Opens a job-folder from file on disk.') group = parser.add_mutually_exclusive_group() group.add_argument( '--file', action="store_true", dest="is_file", help='JOBFOLDER is a path to a job-dictionary stored on disk.' ) group.add_argument( '--expression', action="store_true", dest="is_expression", help='JOBFOLDER is a python expression.' ) parser.add_argument( 'type', metavar='TYPE', type=str, default="", nargs='?', help="Optional. Specifies what kind of job folders will be explored. "\ "Can be one of results, errors, all, running. " \ "\"results\" are those job folders which have completed. " \ "\"errors\" are those job folders which are not \"running\" " \ "at the time of invokation and failed somehow. \"all\" means " \ "all job folders. By default, the dictionary is read as it was " \ "saved. The modified job-folder is not saved to disk." ) parser.add_argument( 'jobfolder', metavar='JOBFOLDER', type=str, default="", nargs='?', help='Job-dictionary variable or path to job folder saved to disk.' ) # parse arguments try: args = parser.parse_args(cmdl.split()) except SystemExit: return None else: if len(args.jobfolder) == 0 \ and (args.type not in ["results", "errors", "all", "running"]): args.jobfolder = args.type args.type = "" if len(args.jobfolder) == 0 \ and (not args.is_file) \ and (not args.is_expression) \ and len(args.type) == 0 \ and len(args.jobfolder) == 0: if interactive.jobfolder is None: print "No current job folders." elif interactive.jobfolder_path is None: print "Current position in job folder:", interactive.jobfolder.name else: print "Current position in job folder:", interactive.jobfolder.name print "Path to job folder: ", interactive.jobfolder_path return options = ['', "errors", "results", "all", 'running'] if hasattr(self, "magic_qstat"): options.append("running") if args.type not in options: print "Unknown TYPE argument {0}.\nTYPE can be one of {1}." \ .format(args.type, options) return # tries to open dictionary try: _explore_impl(self, args) except: return # now does special stuff if requested. # First checks for errors. Errors are jobs which cannot be determined as # running and have failed. if args.type == "errors": if interactive.jobfolder_path is None: print "No known path/file for current job-folder.\n"\ "Please save to file first." return for name, job in interactive.jobfolder.iteritems(): if job.is_tagged: continue directory = join(dirname(interactive.jobfolder_path), name) extract = job.functional.Extract(directory) # successful jobs are not errors. if extract.success: job.tag() # running jobs are not errors either. else: is_run = getattr(extract, 'is_running', False) if is_run: job.tag() # Vladan changed Jun 23,2014: jobs which are in the queue are not errors either. from pylada.ipython import qstat qstuff=qstat(self,name) is_inqueue = len(qstuff)>0 if is_inqueue: job.tag() # what's left is an error. else: job.untag() if bugLev >= 5: print 'ipython/explore errors: dir: %s is_run: %s' \ % (directory, is_run,) # Look only for jobs which are successfull. if args.type == "results": if interactive.jobfolder_path is None: print "No known path/file for current job-folder.\n"\ "Please save to file first." return directory = dirname(interactive.jobfolder_path) for name, job in interactive.jobfolder.iteritems(): if not job.functional.Extract(join(directory,name)).success: job.tag() else: job.untag() # Look only for jobs which are running (and can be determined as such). elif args.type == "running": if interactive.jobfolder_path is None: print "No known path/file for current job-folder.\n"\ "Please save to file first." return for name, job in interactive.jobfolder.iteritems(): directory = join(dirname(interactive.jobfolder_path), name) extract = job.functional.Extract(directory) is_run = getattr(extract, 'is_running', False) if is_run: # exploremod: # import subprocess # print job.jobNumber, job.jobId # proc = subprocess.Popen( # ['checkjob', str(job.jobNumber)], # shell=False, # cwd=wkDir, # stdin=subprocess.PIPE, # stdout=subprocess.PIPE, # stderr=subprocess.PIPE, # bufsize=10*1000*1000) # (stdout, stderr) = proc.communicate() # parse stdout to get status. May be 'not found'. # if idle or active: job.untag() # else: job.tag() job.untag() else: job.tag() if bugLev >= 5: print 'ipython/explore running: dir: %s is_run: %s' \ % (directory, is_run,) # All jobs without restriction. elif args.type == "all": if interactive.jobfolder_path is None: return for job in interactive.jobfolder.itervalues(): job.untag()
def launch(self, event, jobfolders): """ Launch scattered jobs: one job = one pbs script. """ from copy import deepcopy import os, re import subprocess from os.path import join, dirname, exists, basename from os import remove from .. import get_shell from ...misc import Changedir from ... import pbs_string, default_pbs, qsub_exe, default_comm from . import get_walltime, get_mppalloc, get_queues, scattered_script from pylada.misc import bugLev from pylada.misc import testValidProgram if bugLev >= 1: print "launch/scattered: event: %s" % (event,) shell = get_shell(self) pbsargs = deepcopy(dict(default_comm)) pbsargs.update(default_pbs) pbsargs['ppn'] = event.ppn mppalloc = get_mppalloc(shell, event) if mppalloc is None: return # Set pbsargs['walltime'] to a string like '03:59:59' if not get_walltime(shell, event, pbsargs): return # Set pbsargs['queue'], pbsargs['account'] if not get_queues(shell, event, pbsargs): return if bugLev >= 1: print "launch/scattered: pbsargs: %s" % (pbsargs,) # gets python script to launch in pbs. pyscript = scattered_script.__file__ if bugLev >= 1: print "launch/scattered: pyscript: %s" % (pyscript,) if pyscript[-1] == 'c': pyscript = pyscript[:-1] # change .pyc to .py # creates file names. hasprefix = getattr(event, "prefix", None) def pbspaths(directory, jobname, suffix): """ creates filename paths. """ return join( join(directory,jobname), '{0}-pbs{1}'.format(event.prefix, suffix) if hasprefix \ else 'pbs{0}'.format(suffix) ) # now loop over jobfolders pbsscripts = [] for current, path in jobfolders: if bugLev >= 1: print "launch/scattered: current: %s path: %s" \ % (current, path,) # creates directory. directory = dirname(path) with Changedir(directory) as pwd: pass # loop over executable folders in current jobfolder for name, job in current.root.iteritems(): if bugLev >= 1: # if True: print 'launch/scattered: current: %s' % (current,) print 'launch/scattered: current.root: %s' % (current.root,) print 'launch/scattered: name: %s' % (name,) print 'launch/scattered: job: %s' % (job,) print 'launch/scattered: job.is_tagged: %s' % (job.is_tagged,) # avoid jobfolder which are off if job.is_tagged: continue ###### added by Peter Graf # avoid jobfolder which is already in the queue: from pylada.ipython import qstat qstuff = qstat(self, name) if (len(qstuff) > 0 and not event.force): status = [x.split()[2] for x in qstuff] # status is a list like ['Q'], ['R'], ['H'], ['C'], ['R', 'C'], etc # 'RHQ' is the status that the job is indeed in the queue, 'C' job completed and being removed from the queue # if needed, a prefix can be used to distinguish two jobs with the same name if len(set(status)&set('RHQ')) > 0: print "Job %s is in the queue, will not be re-queued" % name continue ####### # avoid successful jobs.unless specifically requested if hasattr(job.functional, 'Extract') and not event.force: p = join(directory, name) extract = job.functional.Extract(p) if extract.success: print "Job {0} completed successfully. " \ "It will not be relaunched.".format(name) continue # setup parameters for launching/running jobs pbsargs['n'] = mppalloc(job) if hasattr(mppalloc, "__call__") \ else mppalloc pbsargs['nnodes'] = (pbsargs['n'] + pbsargs['ppn'] - 1) \ // pbsargs['ppn'] pbsargs['err'] = pbspaths(directory, name, 'err') pbsargs['out'] = pbspaths(directory, name, 'out') pbsargs['name'] = name if len(name) \ else "{0}-root".format(basename(path)) pbsargs['directory'] = directory pbsargs['bugLev'] = bugLev pbsargs['testValidProgram'] = testValidProgram pbsargs['scriptcommand'] \ = "{0} --bugLev {bugLev} --testValidProgram {testValidProgram} --nbprocs {n} --ppn {ppn} --jobid={1} {2}" \ .format(pyscript, name, path, **pbsargs) ppath = pbspaths(directory, name, 'script') if bugLev >=1: print "launch/scattered: ppath: \"%s\"" % (ppath,) print "launch/scattered: pbsargs: \"%s\"" % (pbsargs,) pbsscripts.append( ppath) # write pbs scripts with Changedir(join(directory, name)) as pwd: pass if exists(pbsscripts[-1]): remove(pbsscripts[-1]) with open(pbsscripts[-1], "w") as file: string = pbs_string(**pbsargs) if hasattr(pbs_string, '__call__') \ else pbs_string.format(**pbsargs) # peregrine takes back the option of "anynode" string = string.replace("#PBS -l feature=anynode", "##PBS -l feature=anynode") if bugLev >= 1: print "launch/scattered: ===== start pbsscripts[-1]: %s =====" \ % (pbsscripts[-1],) print '%s' % (string,) print "launch/scattered: ===== end pbsscripts[-1]: %s =====" \ % (pbsscripts[-1],) lines = string.split('\n') omitTag = '# omitted for testValidProgram: ' for line in lines: if testValidProgram != None \ and (re.match('^ *module ', line) \ or re.match('^\. .*/bin/activate$', line)): line = omitTag + line file.write( line + '\n') assert exists(pbsscripts[-1]) # exploremod # import subprocess # if not event.nolaunch: # move launch here: # # if bugLev >= 1: # print ... # # proc = subprocess.Popen( # [qsub_exe, pbsscripts[-1]], # shell=False, # cwd=wkDir, # stdin=subprocess.PIPE, # stdout=subprocess.PIPE, # stderr=subprocess.PIPE, # bufsize=10*1000*1000) # (stdout, stderr) = proc.communicate() # parse stdout to get jobNumber # job.jobNumber = jobNumber # # if bugLev >= 1: # print ... print "Created {0} scattered jobs from {1}.".format(len(pbsscripts), path) if event.nolaunch: return # otherwise, launch. for script in pbsscripts: if bugLev >= 1: print "launch/scattered: launch: shell: %s" % (shell,) print "launch/scattered: launch: qsub_exe: %s" % (qsub_exe,) print "launch/scattered: launch: script: \"%s\"" % (script,) if testValidProgram != None: cmdLine = '/bin/bash ' + script else: # qsub pbsscript (template is in config/mpi.py: pbs_string), # which sets up modules and invokes: python {scriptcommand} cmdLine = "{0} {1}".format(qsub_exe, script) nmerr = script + '.stderr' nmout = script + '.stdout' with open( nmerr, 'w') as ferr: with open( nmout, 'w') as fout: subprocess.call( cmdLine, shell=True, stderr=ferr, stdout=fout) # xxx: all subprocess: set stderr, stdout if os.path.getsize( nmerr) != 0: with open( nmerr) as fin: print 'launch/scattered: stderr: %s' % (fin.read(),) with open( nmout) as fin: print 'launch/scattered: stdout: %s' % (fin.read(),)