def main(): Script.registerSwitch("f:", "File=", "Get status for jobs with IDs from the file") Script.registerSwitch("g:", "JobGroup=", "Get status for jobs in the given group") Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC import exit as DIRACExit from DIRAC.Core.Utilities.Time import toString, date, day from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments dirac = Dirac() exitCode = 0 jobs = [] for key, value in Script.getUnprocessedSwitches(): if key.lower() in ('f', 'file'): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif key.lower() in ('g', 'jobgroup'): jobDate = toString(date() - 30 * day) # Choose jobs no more than 30 days old result = dirac.selectJobs(jobGroup=value, date=jobDate) if not result['OK']: print("Error:", result['Message']) DIRACExit(-1) jobs += result['Value'] if len(args) < 1 and not jobs: Script.showHelp(exitCode=1) if len(args) > 0: jobs += parseArguments(args) result = dirac.getJobStatus(jobs) if result['OK']: for job in result['Value']: print('JobID=' + str(job), end=' ') for status in result['Value'][job].items(): print('%s=%s;' % status, end=' ') print() else: exitCode = 2 print("ERROR: %s" % result['Message']) DIRACExit(exitCode)
def main(): Script.registerSwitch("f:", "File=", "Get status for jobs with IDs from the file") Script.registerSwitch("g:", "JobGroup=", "Get status for jobs in the given group") # Registering arguments will automatically add their description to the help menu Script.registerArgument(["JobID: DIRAC Job ID"], mandatory=False) sws, args = Script.parseCommandLine(ignoreErrors=True) from DIRAC import exit as DIRACExit from DIRAC.Core.Utilities.Time import toString, date, day from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments dirac = Dirac() exitCode = 0 jobs = [] for key, value in sws: if key.lower() in ("f", "file"): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif key.lower() in ("g", "jobgroup"): jobDate = toString(date() - 30 * day) # Choose jobs no more than 30 days old result = dirac.selectJobs(jobGroup=value, date=jobDate) if not result["OK"]: print("Error:", result["Message"]) DIRACExit(-1) jobs += result["Value"] if len(args) < 1 and not jobs: Script.showHelp(exitCode=1) if len(args) > 0: jobs += parseArguments(args) result = dirac.getJobStatus(jobs) if result["OK"]: for job in result["Value"]: print("JobID=" + str(job), end=" ") for status in result["Value"][job].items(): print("%s=%s;" % status, end=" ") print() else: exitCode = 2 print("ERROR: %s" % result["Message"]) DIRACExit(exitCode)
def main(): Script.registerSwitch("f:", "File=", "Get output for jobs with IDs from the file") Script.registerSwitch("g:", "JobGroup=", "Get output for jobs in the given group") # Registering arguments will automatically add their description to the help menu Script.registerArgument(["JobID: DIRAC Job ID"], mandatory=False) sws, args = Script.parseCommandLine(ignoreErrors=True) import DIRAC from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments from DIRAC.Core.Utilities.TimeUtilities import toString, day dirac = Dirac() jobs = [] for sw, value in sws: if sw.lower() in ("f", "file"): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ("g", "jobgroup"): group = value jobDate = toString(datetime.datetime.utcnow().date() - 30 * day) result = dirac.selectJobs(jobGroup=value, date=jobDate) if not result["OK"]: if "No jobs selected" not in result["Message"]: print("Error:", result["Message"]) DIRAC.exit(-1) else: jobs += result["Value"] for arg in parseArguments(args): jobs.append(arg) if not jobs: print("Warning: no jobs selected") Script.showHelp() DIRAC.exit(0) result = dirac.deleteJob(jobs) if result["OK"]: print("Deleted jobs %s" % ",".join([str(j) for j in result["Value"]])) exitCode = 0 else: print(result["Message"]) exitCode = 2 DIRAC.exit(exitCode)
def main(): Script.registerSwitch("f:", "File=", "Get output for jobs with IDs from the file") Script.registerSwitch("g:", "JobGroup=", "Get output for jobs in the given group") Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() import DIRAC from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments from DIRAC.Core.Utilities.Time import toString, date, day dirac = Dirac() jobs = [] for sw, value in Script.getUnprocessedSwitches(): if sw.lower() in ('f', 'file'): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ('g', 'jobgroup'): group = value jobDate = toString(date() - 30 * day) result = dirac.selectJobs(jobGroup=value, date=jobDate) if not result['OK']: if "No jobs selected" not in result['Message']: print("Error:", result['Message']) DIRAC.exit(-1) else: jobs += result['Value'] for arg in parseArguments(args): jobs.append(arg) if not jobs: print("Warning: no jobs selected") Script.showHelp() DIRAC.exit(0) result = dirac.deleteJob(jobs) if result['OK']: print('Deleted jobs %s' % ','.join([str(j) for j in result['Value']])) exitCode = 0 else: print(result['Message']) exitCode = 2 DIRAC.exit(exitCode)
def get_job_list(owner, job_group, n_hours): ''' get a list of jobs for a selection ''' from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() now = datetime.datetime.now() onehour = datetime.timedelta(hours=1) results = dirac.selectJobs(jobGroup=job_group, owner=owner, date=now - n_hours * onehour) if 'Value' not in results: DIRAC.gLogger.error( "No job found for group \"%s\" and owner \"%s\" in the past %s hours" % (job_group, owner, n_hours)) DIRAC.exit(-1) # Found some jobs, print information) jobs_list = results['Value'] return jobs_list
def main(): Script.registerSwitch("D:", "Dir=", "Store the output in this directory") Script.registerSwitch("f:", "File=", "Get output for jobs with IDs from the file") Script.registerSwitch("g:", "JobGroup=", "Get output for jobs in the given group") Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments from DIRAC.Core.Utilities.Time import toString, date, day from DIRAC.Core.Utilities.File import mkDir dirac = Dirac() exitCode = 0 errorList = [] outputDir = None group = None jobs = [] for sw, value in Script.getUnprocessedSwitches(): if sw in ('D', 'Dir'): outputDir = value elif sw.lower() in ('f', 'file'): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ('g', 'jobgroup'): group = value jobDate = toString(date() - 30 * day) # Choose jobs in final state, no more than 30 days old result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Done') if not result['OK']: if "No jobs selected" not in result['Message']: print("Error:", result['Message']) DIRAC.exit(-1) else: jobs += result['Value'] result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Failed') if not result['OK']: if "No jobs selected" not in result['Message']: print("Error:", result['Message']) DIRAC.exit(-1) else: jobs += result['Value'] for arg in parseArguments(args): if os.path.isdir(arg): print("Output for job %s already retrieved, remove the output directory to redownload" % arg) else: jobs.append(arg) if not jobs: print("No jobs selected") DIRAC.exit(0) if group: if outputDir: outputDir = os.path.join(outputDir, group) else: outputDir = group if outputDir: mkDir(outputDir) else: outputDir = os.getcwd() jobs = [str(job) for job in jobs] doneJobs = os.listdir(outputDir) todoJobs = [job for job in jobs if job not in doneJobs] for job in todoJobs: result = dirac.getOutputSandbox(job, outputDir=outputDir) jobDir = str(job) if outputDir: jobDir = os.path.join(outputDir, job) if result['OK']: if os.path.exists(jobDir): print('Job output sandbox retrieved in %s/' % (jobDir)) else: if os.path.exists('%s' % jobDir): shutil.rmtree(jobDir) errorList.append((job, result['Message'])) exitCode = 2 for error in errorList: print("ERROR %s: %s" % error) DIRAC.exit(exitCode)
def main(): site = 'BOINC.World.org' status = ["Running"] minorStatus = None workerNodes = None since = None date = 'today' full = False until = None batchIDs = None Script.registerSwitch('', 'Site=', ' Select site (default: %s)' % site) Script.registerSwitch('', 'Status=', ' Select status (default: %s)' % status) Script.registerSwitch('', 'MinorStatus=', ' Select minor status') Script.registerSwitch('', 'WorkerNode=', ' Select WN') Script.registerSwitch('', 'BatchID=', ' Select batch jobID') Script.registerSwitch( '', 'Since=', ' Date since when to select jobs, or number of days (default: today)' ) Script.registerSwitch('', 'Date=', ' Specify the date (check for a full day)') Script.registerSwitch( '', 'Full', ' Printout full list of job (default: False except if --WorkerNode)') Script.parseCommandLine() from DIRAC import gLogger from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient switches = Script.getUnprocessedSwitches() for switch in switches: if switch[0] == 'Site': site = switch[1] elif switch[0] == 'MinorStatus': minorStatus = switch[1] elif switch[0] == 'Status': if switch[1].lower() == 'all': status = [None] else: status = switch[1].split(',') elif switch[0] == 'WorkerNode': workerNodes = switch[1].split(',') elif switch[0] == 'BatchID': try: batchIDs = [int(id) for id in switch[1].split(',')] except BaseException: gLogger.error('Invalid jobID', switch[1]) DIRAC.exit(1) elif switch[0] == 'Full': full = True elif switch[0] == 'Date': since = switch[1].split()[0] until = str( datetime.datetime.strptime(since, '%Y-%m-%d') + datetime.timedelta(days=1)).split()[0] elif switch[0] == 'Since': date = switch[1].lower() if date == 'today': since = None elif date == 'yesterday': since = 1 elif date == 'ever': since = 2 * 365 elif date.isdigit(): since = int(date) date += ' days' else: since = date if isinstance(since, int): since = str(datetime.datetime.now() - datetime.timedelta(days=since)).split()[0] if workerNodes or batchIDs: # status = [None] full = True monitoring = JobMonitoringClient() dirac = Dirac() # Get jobs according to selection jobs = set() for stat in status: res = dirac.selectJobs(site=site, date=since, status=stat, minorStatus=minorStatus) if not res['OK']: gLogger.error('Error selecting jobs', res['Message']) DIRAC.exit(1) allJobs = set(int(job) for job in res['Value']) if until: res = dirac.selectJobs(site=site, date=until, status=stat) if not res['OK']: gLogger.error('Error selecting jobs', res['Message']) DIRAC.exit(1) allJobs -= set(int(job) for job in res['Value']) jobs.update(allJobs) if not jobs: gLogger.always('No jobs found...') DIRAC.exit(0) # res = monitoring.getJobsSummary( jobs ) # print eval( res['Value'] )[jobs[0]] allJobs = set() result = {} wnJobs = {} gLogger.always('%d jobs found' % len(jobs)) # Get host name for job in jobs: res = monitoring.getJobParameter(job, 'HostName') node = res.get('Value', {}).get('HostName', 'Unknown') res = monitoring.getJobParameter(job, 'LocalJobID') batchID = res.get('Value', {}).get('LocalJobID', 'Unknown') if workerNodes: if not [wn for wn in workerNodes if node.startswith(wn)]: continue allJobs.add(job) if batchIDs: if batchID not in batchIDs: continue allJobs.add(job) if full or status == [None]: allJobs.add(job) result.setdefault(job, {})['Status'] = status result[job]['Node'] = node result[job]['LocalJobID'] = batchID wnJobs[node] = wnJobs.setdefault(node, 0) + 1 # If necessary get jobs' status statusCounters = {} if allJobs: allJobs = sorted(allJobs, reverse=True) res = monitoring.getJobsStatus(allJobs) if res['OK']: jobStatus = res['Value'] res = monitoring.getJobsMinorStatus(allJobs) if res['OK']: jobMinorStatus = res['Value'] res = monitoring.getJobsApplicationStatus(allJobs) if res['OK']: jobApplicationStatus = res['Value'] if not res['OK']: gLogger.error('Error getting job parameter', res['Message']) else: for job in allJobs: stat = jobStatus.get(job, {}).get('Status', 'Unknown') + '; ' + \ jobMinorStatus.get(job, {}).get('MinorStatus', 'Unknown') + '; ' + \ jobApplicationStatus.get(job, {}).get('ApplicationStatus', 'Unknown') result[job]['Status'] = stat statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1 elif not workerNodes and not batchIDs: allJobs = sorted(jobs, reverse=True) # Print out result if workerNodes or batchIDs: gLogger.always('Found %d jobs at %s, WN %s (since %s):' % (len(allJobs), site, workerNodes, date)) if allJobs: gLogger.always('List of jobs:', ','.join([str(job) for job in allJobs])) else: if status == [None]: gLogger.always('Found %d jobs at %s (since %s):' % (len(allJobs), site, date)) for stat in sorted(statusCounters): gLogger.always('%d jobs %s' % (statusCounters[stat], stat)) else: gLogger.always('Found %d jobs %s at %s (since %s):' % (len(allJobs), status, site, date)) gLogger.always( 'List of WNs:', ','.join([ '%s (%d)' % (node, wnJobs[node]) for node in sorted( wnJobs, cmp=(lambda n1, n2: (wnJobs[n2] - wnJobs[n1]))) ])) if full: if workerNodes or batchIDs: nodeJobs = {} for job in allJobs: status = result[job]['Status'] node = result[job]['Node'].split('.')[0] jobID = result[job].get('LocalJobID') nodeJobs.setdefault(node, []).append((jobID, job, status)) if not workerNodes: workerNodes = sorted(nodeJobs) for node in workerNodes: for job in nodeJobs.get(node.split('.')[0], []): gLogger.always('%s ' % node + '(%s): %s - %s' % job) else: for job in allJobs: status = result[job]['Status'] node = result[job]['Node'] jobID = result[job].get('LocalJobID') gLogger.always('%s (%s): %s - %s' % (node, jobID, job, status))
group = None jobs = [] for sw, value in Script.getUnprocessedSwitches(): if sw in ( 'D', 'Dir' ): outputDir = value elif sw.lower() in ( 'f', 'file' ): if os.path.exists( value ): jFile = open( value ) jobs += jFile.read().split() jFile.close() elif sw.lower() in ( 'g', 'jobgroup' ): group = value jobDate = toString( date() - 30 * day ) # Choose jobs in final state, no more than 30 days old result = dirac.selectJobs( jobGroup = value, date = jobDate, status = 'Done' ) if not result['OK']: if not "No jobs selected" in result['Message']: print "Error:", result['Message'] DIRAC.exit( -1 ) else: jobs += result['Value'] result = dirac.selectJobs( jobGroup = value, date = jobDate, status = 'Failed' ) if not result['OK']: if not "No jobs selected" in result['Message']: print "Error:", result['Message'] DIRAC.exit( -1 ) else: jobs += result['Value'] for arg in parseArguments( args ):
from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments dirac = Dirac() exitCode = 0 jobs = [] for key, value in Script.getUnprocessedSwitches(): if key.lower() in ( 'f', 'file' ): if os.path.exists( value ): jFile = open( value ) jobs += jFile.read().split() jFile.close() elif key.lower() in ( 'g', 'jobgroup' ): jobDate = toString( date() - 30 * day ) # Choose jobs no more than 30 days old result = dirac.selectJobs( jobGroup = value, date = jobDate ) if not result['OK']: print "Error:", result['Message'] DIRACExit( -1 ) jobs += result['Value'] if len( args ) < 1 and not jobs: Script.showHelp() if len( args ) > 0: jobs += parseArguments( args ) result = dirac.getJobStatus( jobs ) if result['OK']: for job in result['Value']: print 'JobID=' + str( job ),
def main(): Script.registerSwitch("D:", "Dir=", "Store the output in this directory") Script.registerSwitch("f:", "File=", "Get output for jobs with IDs from the file") Script.registerSwitch("g:", "JobGroup=", "Get output for jobs in the given group") # Registering arguments will automatically add their description to the help menu Script.registerArgument(["JobID: DIRAC Job ID or a name of the file with JobID per line"], mandatory=False) sws, args = Script.parseCommandLine(ignoreErrors=True) from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments from DIRAC.Core.Utilities.Time import toString, date, day from DIRAC.Core.Utilities.File import mkDir dirac = Dirac() exitCode = 0 errorList = [] outputDir = None group = None jobs = [] for sw, value in sws: if sw in ("D", "Dir"): outputDir = value elif sw.lower() in ("f", "file"): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ("g", "jobgroup"): group = value jobDate = toString(date() - 30 * day) # Choose jobs in final state, no more than 30 days old result = dirac.selectJobs(jobGroup=value, date=jobDate, status="Done") if not result["OK"]: if "No jobs selected" not in result["Message"]: print("Error:", result["Message"]) DIRAC.exit(-1) else: jobs += result["Value"] result = dirac.selectJobs(jobGroup=value, date=jobDate, status="Failed") if not result["OK"]: if "No jobs selected" not in result["Message"]: print("Error:", result["Message"]) DIRAC.exit(-1) else: jobs += result["Value"] for arg in parseArguments(args): if os.path.isdir(arg): print("Output for job %s already retrieved, remove the output directory to redownload" % arg) else: jobs.append(arg) if not jobs: print("No jobs selected") DIRAC.exit(0) if group: if outputDir: outputDir = os.path.join(outputDir, group) else: outputDir = group if outputDir: mkDir(outputDir) else: outputDir = os.getcwd() jobs = [str(job) for job in jobs] doneJobs = os.listdir(outputDir) todoJobs = [job for job in jobs if job not in doneJobs] for job in todoJobs: result = dirac.getOutputSandbox(job, outputDir=outputDir) jobDir = str(job) if outputDir: jobDir = os.path.join(outputDir, job) if result["OK"]: if os.path.exists(jobDir): print("Job output sandbox retrieved in %s/" % (jobDir)) else: if os.path.exists("%s" % jobDir): shutil.rmtree(jobDir) errorList.append((job, result["Message"])) exitCode = 2 for error in errorList: print("ERROR %s: %s" % error) DIRAC.exit(exitCode)
'Owner': owner, 'JobGroup': ','.join(str(jg) for jg in jobGroups), 'Date': selDate } from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() jobs = [] if jobGroups: for jobGroup in jobGroups: res = dirac.selectJobs(status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, jobGroup=jobGroup, date=date, printErrors=False) if res['OK']: jobs.extend(res['Value']) else: gLogger.error("Can't select jobs: ", res['Message']) else: res = dirac.selectJobs(status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, date=date, printErrors=False)
# ## ## ## ## ## ### ## ### ## ## ### ## ## # ## ## ####### ## ## ## ## #### ## ## ###### # get jobs from today and yesterday... days = [] for i in range(2): # how many days do you want to look back? days.append((datetime.date.today() - datetime.timedelta(days=i)).isoformat()) # get list of run_tokens that are currently running / waiting running_ids = set() running_names = [] for status in ["Waiting", "Running", "Checking"]: for day in days: try: [running_ids.add(id) for id in dirac.selectJobs( status=status, date=day, owner=username)['Value']] except KeyError: pass n_jobs = len(running_ids) if n_jobs > 0: print("getting names from {} running/waiting jobs... please wait..." .format(n_jobs)) for i, id in enumerate(running_ids): if ((100 * i) / n_jobs) % 5 == 0: print("\r{} %".format(((20 * i) / n_jobs) * 5)), jobname = dirac.attributes(id)["Value"]["JobName"] running_names.append(jobname) else: print("\n... done")
# ## ## ## ## ## ### ## ### ## ## ### ## ## # ## ## ####### ## ## ## ## #### ## ## ###### # get jobs from today and yesterday... days = [] for i in range(2): # how many days do you want to look back? days.append((datetime.date.today() - datetime.timedelta(days=i)).isoformat()) # get list of run_tokens that are currently running / waiting running_ids = set() running_names = [] for status in ["Waiting", "Running", "Checking"]: for day in days: try: [running_ids.add(id) for id in dirac.selectJobs( status=status, date=day, owner="tmichael")['Value']] except KeyError: pass n_jobs = len(running_ids) if n_jobs > 0: print("getting names from {} running/waiting jobs... please wait..." .format(n_jobs)) for i, id in enumerate(running_ids): if ((100 * i) / n_jobs) % 5 == 0: print("\r{} %".format(((20 * i) / n_jobs) * 5)), jobname = dirac.attributes(id)["Value"]["JobName"] running_names.append(jobname) else: print("\n... done")
def main(): Script.registerSwitch("", "Status=", "Primary status") Script.registerSwitch("", "MinorStatus=", "Secondary status") Script.registerSwitch("", "ApplicationStatus=", "Application status") Script.registerSwitch("", "Site=", "Execution site") Script.registerSwitch("", "Owner=", "Owner (DIRAC nickname)") Script.registerSwitch("", "JobGroup=", "Select jobs for specified job group") Script.registerSwitch( "", "Date=", "Date in YYYY-MM-DD format, if not specified default is today") Script.registerSwitch("", "File=", "File name,if not specified default is std.out ") Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() # Default values status = None minorStatus = None appStatus = None site = None owner = None jobGroup = None date = None filename = 'std.out' if len(args) != 1: Script.showHelp() searchstring = str(args[0]) for switch in Script.getUnprocessedSwitches(): if switch[0].lower() == "status": status = switch[1] elif switch[0].lower() == "minorstatus": minorStatus = switch[1] elif switch[0].lower() == "applicationstatus": appStatus = switch[1] elif switch[0].lower() == "site": site = switch[1] elif switch[0].lower() == "owner": owner = switch[1] elif switch[0].lower() == "jobgroup": jobGroup = switch[1] elif switch[0].lower() == "date": date = switch[1] elif switch[0].lower() == "file": filename = switch[1] selDate = date if not date: selDate = 'Today' from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() exitCode = 0 errorList = [] resultDict = {} result = dirac.selectJobs(status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, jobGroup=jobGroup, date=date) if result['OK']: jobs = result['Value'] else: print("Error in selectJob", result['Message']) DIRAC.exit(2) for job in jobs: result = dirac.getOutputSandbox(job) if result['OK']: if os.path.exists('%s' % job): lines = [] try: lines = open(os.path.join(job, filename)).readlines() except Exception as x: errorList.append((job, x)) for line in lines: if line.count(searchstring): resultDict[job] = line rmtree("%s" % (job)) else: errorList.append((job, result['Message'])) exitCode = 2 for result in resultDict.items(): print(result) DIRAC.exit(exitCode)
def main(): site = "BOINC.World.org" status = ["Running"] minorStatus = None workerNodes = None since = None date = "today" full = False until = None batchIDs = None Script.registerSwitch("", "Site=", " Select site (default: %s)" % site) Script.registerSwitch("", "Status=", " Select status (default: %s)" % status) Script.registerSwitch("", "MinorStatus=", " Select minor status") Script.registerSwitch("", "WorkerNode=", " Select WN") Script.registerSwitch("", "BatchID=", " Select batch jobID") Script.registerSwitch( "", "Since=", " Date since when to select jobs, or number of days (default: today)" ) Script.registerSwitch("", "Date=", " Specify the date (check for a full day)") Script.registerSwitch( "", "Full", " Printout full list of job (default: False except if --WorkerNode)") Script.parseCommandLine() from DIRAC import gLogger from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient switches = Script.getUnprocessedSwitches() for switch in switches: if switch[0] == "Site": site = switch[1] elif switch[0] == "MinorStatus": minorStatus = switch[1] elif switch[0] == "Status": if switch[1].lower() == "all": status = [None] else: status = switch[1].split(",") elif switch[0] == "WorkerNode": workerNodes = switch[1].split(",") elif switch[0] == "BatchID": try: batchIDs = [int(id) for id in switch[1].split(",")] except Exception: gLogger.error("Invalid jobID", switch[1]) DIRAC.exit(1) elif switch[0] == "Full": full = True elif switch[0] == "Date": since = switch[1].split()[0] until = str( datetime.datetime.strptime(since, "%Y-%m-%d") + datetime.timedelta(days=1)).split()[0] elif switch[0] == "Since": date = switch[1].lower() if date == "today": since = None elif date == "yesterday": since = 1 elif date == "ever": since = 2 * 365 elif date.isdigit(): since = int(date) date += " days" else: since = date if isinstance(since, int): since = str(datetime.datetime.now() - datetime.timedelta(days=since)).split()[0] if workerNodes or batchIDs: # status = [None] full = True monitoring = JobMonitoringClient() dirac = Dirac() # Get jobs according to selection jobs = set() for stat in status: res = dirac.selectJobs(site=site, date=since, status=stat, minorStatus=minorStatus) if not res["OK"]: gLogger.error("Error selecting jobs", res["Message"]) DIRAC.exit(1) allJobs = set(int(job) for job in res["Value"]) if until: res = dirac.selectJobs(site=site, date=until, status=stat) if not res["OK"]: gLogger.error("Error selecting jobs", res["Message"]) DIRAC.exit(1) allJobs -= set(int(job) for job in res["Value"]) jobs.update(allJobs) if not jobs: gLogger.always("No jobs found...") DIRAC.exit(0) # res = monitoring.getJobsSummary( jobs ) # print eval( res['Value'] )[jobs[0]] allJobs = set() result = {} wnJobs = {} gLogger.always("%d jobs found" % len(jobs)) # Get host name for job in jobs: res = monitoring.getJobParameter(job, "HostName") node = res.get("Value", {}).get("HostName", "Unknown") res = monitoring.getJobParameter(job, "LocalJobID") batchID = res.get("Value", {}).get("LocalJobID", "Unknown") if workerNodes: if not [wn for wn in workerNodes if node.startswith(wn)]: continue allJobs.add(job) if batchIDs: if batchID not in batchIDs: continue allJobs.add(job) if full or status == [None]: allJobs.add(job) result.setdefault(job, {})["Status"] = status result[job]["Node"] = node result[job]["LocalJobID"] = batchID wnJobs[node] = wnJobs.setdefault(node, 0) + 1 # If necessary get jobs' status statusCounters = {} if allJobs: allJobs = sorted(allJobs, reverse=True) res = monitoring.getJobsStates(allJobs) if not res["OK"]: gLogger.error("Error getting job parameter", res["Message"]) else: jobStates = res["Value"] for job in allJobs: stat = ( jobStates.get(job, {}).get("Status", "Unknown") + "; " + jobStates.get(job, {}).get("MinorStatus", "Unknown") + "; " + jobStates.get(job, {}).get("ApplicationStatus", "Unknown")) result[job]["Status"] = stat statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1 elif not workerNodes and not batchIDs: allJobs = sorted(jobs, reverse=True) # Print out result if workerNodes or batchIDs: gLogger.always("Found %d jobs at %s, WN %s (since %s):" % (len(allJobs), site, workerNodes, date)) if allJobs: gLogger.always("List of jobs:", ",".join([str(job) for job in allJobs])) else: if status == [None]: gLogger.always("Found %d jobs at %s (since %s):" % (len(allJobs), site, date)) for stat in sorted(statusCounters): gLogger.always("%d jobs %s" % (statusCounters[stat], stat)) else: gLogger.always("Found %d jobs %s at %s (since %s):" % (len(allJobs), status, site, date)) gLogger.always( "List of WNs:", ",".join([ "%s (%d)" % (node, wnJobs[node]) for node in sorted( wnJobs, key=cmp_to_key(lambda n1, n2: (wnJobs[n2] - wnJobs[n1]))) ]), ) if full: if workerNodes or batchIDs: nodeJobs = {} for job in allJobs: status = result[job]["Status"] node = result[job]["Node"].split(".")[0] jobID = result[job].get("LocalJobID") nodeJobs.setdefault(node, []).append((jobID, job, status)) if not workerNodes: workerNodes = sorted(nodeJobs) for node in workerNodes: for job in nodeJobs.get(node.split(".")[0], []): gLogger.always("%s " % node + "(%s): %s - %s" % job) else: for job in allJobs: status = result[job]["Status"] node = result[job]["Node"] jobID = result[job].get("LocalJobID") gLogger.always("%s (%s): %s - %s" % (node, jobID, job, status))
def main(): """ Launch job on the GRID """ # this thing pilots everything related to the GRID dirac = Dirac() if switches["output_type"] in "TRAINING": print("Preparing submission for TRAINING data") elif switches["output_type"] in "DL2": print("Preparing submission for DL2 data") else: print("You have to choose either TRAINING or DL2 as output type!") sys.exit() # Read configuration file cfg = load_config(switches["config_file"]) # Analysis config_path = cfg["General"]["config_path"] config_file = cfg["General"]["config_file"] mode = cfg["General"]["mode"] # One mode naw particle = cfg["General"]["particle"] estimate_energy = cfg["General"]["estimate_energy"] force_tailcut_for_extended_cleaning = cfg["General"][ "force_tailcut_for_extended_cleaning"] # Take parameters from the analysis configuration file ana_cfg = load_config(os.path.join(config_path, config_file)) config_name = ana_cfg["General"]["config_name"] cam_id_list = ana_cfg["General"]["cam_id_list"] # Regressor and classifier methods regressor_method = ana_cfg["EnergyRegressor"]["method_name"] classifier_method = ana_cfg["GammaHadronClassifier"]["method_name"] # Someone might want to create DL2 without score or energy estimation if regressor_method in ["None", "none", None]: use_regressor = False else: use_regressor = True if classifier_method in ["None", "none", None]: use_classifier = False else: use_classifier = True # GRID outdir = os.path.join(cfg["GRID"]["outdir"], config_name) n_file_per_job = cfg["GRID"]["n_file_per_job"] n_jobs_max = cfg["GRID"]["n_jobs_max"] model_dir = cfg["GRID"]["model_dir"] training_dir_energy = cfg["GRID"]["training_dir_energy"] training_dir_classification = cfg["GRID"]["training_dir_classification"] dl2_dir = cfg["GRID"]["dl2_dir"] home_grid = cfg["GRID"]["home_grid"] user_name = cfg["GRID"]["user_name"] banned_sites = cfg["GRID"]["banned_sites"] # HACK if force_tailcut_for_extended_cleaning is True: print("Force tail cuts for extended cleaning!!!") # Prepare command to launch script source_ctapipe = "source /cvmfs/cta.in2p3.fr/software/conda/dev/setupConda.sh" source_ctapipe += " && conda activate ctapipe_v0.11.0" if switches["output_type"] in "TRAINING": execute = "data_training.py" script_args = [ "--config_file={}".format(config_file), "--estimate_energy={}".format(str(estimate_energy)), "--regressor_config={}.yaml".format(regressor_method), "--regressor_dir=./", "--outfile {outfile}", "--indir ./ --infile_list={infile_name}", "--max_events={}".format(switches["max_events"]), "--{mode}", "--cam_ids {}".format(cam_id_list), ] output_filename_template = "TRAINING" elif switches["output_type"] in "DL2": execute = "write_dl2.py" script_args = [ "--config_file={}".format(config_file), "--regressor_config={}.yaml".format(regressor_method), "--regressor_dir=./", "--classifier_config={}.yaml".format(classifier_method), "--classifier_dir=./", "--outfile {outfile}", "--indir ./ --infile_list={infile_name}", "--max_events={}".format(switches["max_events"]), "--{mode}", "--force_tailcut_for_extended_cleaning={}".format( force_tailcut_for_extended_cleaning), "--cam_ids {}".format(cam_id_list), ] output_filename_template = "DL2" # Make the script save also the full calibrated images if required if switches["save_images"] is True: script_args.append("--save_images") # Make the script print debug information if required if switches["debug_script"] is True: script_args.append("--debug") cmd = [source_ctapipe, "&&", "./" + execute] cmd += script_args pilot_args_write = " ".join(cmd) # For table merging if multiple runs pilot_args_merge = " ".join([ source_ctapipe, "&&", "./merge_tables.py", "--template_file_name", "{in_name}", "--outfile", "{out_name}", ]) prod3b_filelist = dict() if estimate_energy is False and switches["output_type"] in "TRAINING": prod3b_filelist["gamma"] = cfg["EnergyRegressor"]["gamma_list"] elif estimate_energy is True and switches["output_type"] in "TRAINING": prod3b_filelist["gamma"] = cfg["GammaHadronClassifier"]["gamma_list"] prod3b_filelist["proton"] = cfg["GammaHadronClassifier"]["proton_list"] elif switches["output_type"] in "DL2": prod3b_filelist["gamma"] = cfg["Performance"]["gamma_list"] prod3b_filelist["proton"] = cfg["Performance"]["proton_list"] prod3b_filelist["electron"] = cfg["Performance"]["electron_list"] # from IPython import embed # embed() # Split list of files according to stoprage elements with open(prod3b_filelist[particle]) as f: filelist = f.readlines() filelist = ["{}".format(_.replace("\n", "")) for _ in filelist] res = dirac.splitInputData(filelist, n_file_per_job) list_run_to_loop_on = res["Value"] # define a template name for the file that's going to be written out. # the placeholder braces are going to get set during the file-loop output_filename = output_filename_template output_path = outdir if estimate_energy is False and switches["output_type"] in "TRAINING": output_path += "/{}/".format(training_dir_energy) step = "energy" if estimate_energy is True and switches["output_type"] in "TRAINING": output_path += "/{}/".format(training_dir_classification) step = "classification" if switches["output_type"] in "DL2": if force_tailcut_for_extended_cleaning is False: output_path += "/{}/".format(dl2_dir) else: output_path += "/{}_force_tc_extended_cleaning/".format(dl2_dir) step = "" output_filename += "_{}.h5" # sets all the local files that are going to be uploaded with the job # plus the pickled classifier # if file name starts with `LFN:`, it will be copied from the GRID input_sandbox = [ # Utility to assign one job to one command... os.path.expandvars("$GRID/pilot.sh"), os.path.expandvars("$PROTOPIPE/protopipe/"), os.path.expandvars("$GRID/merge_tables.py"), # python wrapper for the mr_filter wavelet cleaning # os.path.expandvars("$PYWI/pywi/"), # os.path.expandvars("$PYWICTA/pywicta/"), # script that is being run os.path.expandvars("$PROTOPIPE/protopipe/scripts/" + execute), # Configuration file os.path.expandvars(os.path.join(config_path, config_file)), ] models_to_upload = [] configs_to_upload = [] if estimate_energy is True and switches["output_type"] in "TRAINING": config_path_template = "LFN:" + os.path.join(home_grid, outdir, model_dir, "{}.yaml") config_to_upload = config_path_template.format(regressor_method) model_path_template = "LFN:" + os.path.join( home_grid, outdir, model_dir, "regressor_{}_{}.pkl.gz") for cam_id in cam_id_list: model_to_upload = model_path_template.format( cam_id, regressor_method) # TBC print("The following model(s) will be uploaded to the GRID:") print(model_to_upload) models_to_upload.append(model_to_upload) print( "The following configs(s) for such models will be uploaded to the GRID:" ) print(config_to_upload) configs_to_upload.append(config_to_upload) # input_sandbox.append(model_to_upload) elif estimate_energy is False and switches["output_type"] in "TRAINING": pass else: # Charge also classifer for DL2 model_type_list = ["regressor", "classifier"] model_method_list = [regressor_method, classifier_method] config_path_template = "LFN:" + os.path.join(home_grid, outdir, model_dir, "{}.yaml") model_path_template = "LFN:" + os.path.join( home_grid, outdir, model_dir, "{}_{}_{}.pkl.gz") if force_tailcut_for_extended_cleaning is True: force_mode = mode.replace("wave", "tail") print("################") print(force_mode) else: force_mode = mode for idx, model_type in enumerate(model_type_list): print( "The following configuration file will be uploaded to the GRID:" ) config_to_upload = config_path_template.format( model_method_list[idx]) print(config_to_upload) configs_to_upload.append(config_to_upload) # upload only 1 copy print( "The following model(s) related to such configuration file will be uploaded to the GRID:" ) for cam_id in cam_id_list: if model_type in "regressor" and use_regressor is False: print("Do not upload regressor model on GRID!!!") continue if model_type in "classifier" and use_classifier is False: print("Do not upload classifier model on GRID!!!") continue model_to_upload = model_path_template.format( model_type_list[idx], cam_id, model_method_list[idx]) print(model_to_upload) models_to_upload.append(model_to_upload) # input_sandbox.append(model_to_upload) # summary before submitting print("\nDEBUG> running as:") print(pilot_args_write) print("\nDEBUG> with input_sandbox:") print(input_sandbox) print("\nDEBUG> with output file:") print(output_filename.format("{job_name}")) print("\nDEBUG> Particles:") print(particle) print("\nDEBUG> Energy estimation:") print(estimate_energy) # ######## ## ## ## ## ## ## #### ## ## ###### # ## ## ## ## ### ## ### ## ## ### ## ## ## # ## ## ## ## #### ## #### ## ## #### ## ## # ######## ## ## ## ## ## ## ## ## ## ## ## ## ## #### # ## ## ## ## ## #### ## #### ## ## #### ## ## # ## ## ## ## ## ### ## ### ## ## ### ## ## # ## ## ####### ## ## ## ## #### ## ## ###### # list of files on the GRID SE space # not submitting jobs where we already have the output batcmd = "dirac-dms-user-lfns --BaseDir {}".format( os.path.join(home_grid, output_path)) result = subprocess.check_output(batcmd, shell=True) try: grid_filelist = open(result.split()[-1]).read() except IOError: raise IOError("ERROR> cannot read GRID filelist...") # get jobs from today and yesterday... days = [] for i in range(2): # how many days do you want to look back? days.append( (datetime.date.today() - datetime.timedelta(days=i)).isoformat()) # get list of run_tokens that are currently running / waiting running_ids = set() running_names = [] for status in ["Waiting", "Running", "Checking"]: for day in days: try: [ running_ids.add(id) for id in dirac.selectJobs( status=status, date=day, owner=user_name)["Value"] ] except KeyError: pass n_jobs = len(running_ids) if n_jobs > 0: print("Scanning {} running/waiting jobs... please wait...".format( n_jobs)) for i, id in enumerate(running_ids): if ((100 * i) / n_jobs) % 5 == 0: print("\r{} %".format(((20 * i) / n_jobs) * 5)), jobname = dirac.getJobAttributes(id)["Value"]["JobName"] running_names.append(jobname) else: print("\n... done") for bunch in list_run_to_loop_on: # for bunch in bunches_of_run: # from IPython import embed # embed() # this selects the `runxxx` part of the first and last file in the run # list and joins them with a dash so that we get a nice identifier in # the output file name. # if there is only one file in the list, use only that one # run_token = re.split('_', bunch[+0])[3] # JLK JLK run_token = re.split("_", bunch[0])[3] if len(bunch) > 1: run_token = "-".join([run_token, re.split("_", bunch[-1])[3]]) print("-" * 50) print("-" * 50) # setting output name output_filenames = dict() if switches["output_type"] in "DL2": job_name = "protopipe_{}_{}_{}_{}_{}".format( config_name, switches["output_type"], particle, run_token, mode) output_filenames[mode] = output_filename.format("_".join( [particle, mode, run_token])) else: job_name = "protopipe_{}_{}_{}_{}_{}_{}".format( config_name, switches["output_type"], step, particle, run_token, mode) output_filenames[mode] = output_filename.format("_".join( [step, particle, mode, run_token])) # if job already running / waiting, skip if job_name in running_names: print("\n WARNING> {} still running\n".format(job_name)) continue print("Output file name: {}".format(output_filenames[mode])) # if file already in GRID storage, skip # (you cannot overwrite it there, delete it and resubmit) # (assumes tail and wave will always be written out together) already_exist = False file_on_grid = os.path.join(output_path, output_filenames[mode]) print("DEBUG> check for existing file on GRID...") if file_on_grid in grid_filelist: print("\n WARNING> {} already on GRID SE\n".format(job_name)) continue if n_jobs_max == 0: print("WARNING> maximum number of jobs to submit reached") print("WARNING> breaking loop now") break else: n_jobs_max -= 1 j = Job() # runtime in seconds times 8 (CPU normalisation factor) j.setCPUTime(6 * 3600 * 8) j.setName(job_name) j.setInputSandbox(input_sandbox) if banned_sites: j.setBannedSites(banned_sites) # Add simtel files as input data j.setInputData(bunch) for run_file in bunch: file_token = re.split("_", run_file)[3] # wait for a random number of seconds (up to five minutes) before # starting to add a bit more entropy in the starting times of the # dirac queries. # if too many jobs try in parallel to access the SEs, # the interface crashes # #sleep = random.randint(0, 20) # seconds # #j.setExecutable('sleep', str(sleep)) # JLK: Try to stop doing that # consecutively downloads the data files, processes them, # deletes the input # and goes on to the next input file; # afterwards, the output files are merged # j.setExecutable('dirac-dms-get-file', "LFN:" + run_file) # source the miniconda ctapipe environment and # run the python script with all its arguments if switches["output_type"] in "DL2": output_filename_temp = output_filename.format("_".join( [particle, mode, file_token])) if switches["output_type"] in "TRAINING": output_filename_temp = output_filename.format("_".join( [step, particle, mode, file_token])) j.setExecutable( "./pilot.sh", pilot_args_write.format( outfile=output_filename_temp, infile_name=os.path.basename(run_file), mode=mode, ), ) # remove the current file to clear space j.setExecutable("rm", os.path.basename(run_file)) # simple `ls` for good measure j.setExecutable("ls", "-lh") # if there is more than one file per job, merge the output tables if len(bunch) > 1: names = [] names.append(("*_{}_".format(particle), output_filenames[mode])) for in_name, out_name in names: print("in_name: {}, out_name: {}".format(in_name, out_name)) j.setExecutable( "./pilot.sh", pilot_args_merge.format(in_name=in_name, out_name=out_name), ) print("DEBUG> args append: {}".format( pilot_args_merge.format(in_name=in_name, out_name=out_name))) bunch.extend(models_to_upload) bunch.extend(configs_to_upload) j.setInputData(bunch) print("Input data set to job = {}".format(bunch)) outputs = [] outputs.append(output_filenames[mode]) print("Output file path: {}{}".format(output_path, output_filenames[mode])) j.setOutputData(outputs, outputSE=None, outputPath=output_path) # check if we should somehow stop doing what we are doing if switches["dry"] is True: print("\nThis is a DRY RUN! -- NO job has been submitted!") print("Name of the job: {}".format(job_name)) print("Name of the output file: {}".format(outputs)) print("Output path from GRID home: {}".format(output_path)) break # this sends the job to the GRID and uploads all the # files into the input sandbox in the process print("\nSUBMITTING job with the following INPUT SANDBOX:") print(input_sandbox) print("Submission RESULT: {}\n".format(dirac.submitJob(j)["Value"])) # break if this is only a test submission if switches["test"] is True: print("This is a TEST RUN! -- Only ONE job will be submitted!") print("Name of the job: {}".format(job_name)) print("Name of the output file: {}".format(outputs)) print("Output path from GRID home: {}".format(output_path)) break # since there are two nested loops, need to break again if switches["test"] is True: break try: os.remove("datapipe.tar.gz") os.remove("modules.tar.gz") except: pass # Upload analysis configuration file for provenance SE_LIST = ['CC-IN2P3-USER', 'DESY-ZN-USER', 'CNAF-USER', 'CEA-USER'] analysis_config_local = os.path.join(config_path, config_file) # the configuration file is uploaded to the data directory because # the training samples (as well as their cleaning settings) are independent analysis_config_dirac = os.path.join(home_grid, output_path, config_file) print("Uploading {} to {}...".format(analysis_config_local, analysis_config_dirac)) if switches["dry"] is False: # Upload this file to all Dirac Storage Elements in SE_LIST for se in SE_LIST: # the uploaded config file overwrites any old copy ana_cfg_upload_cmd = "dirac-dms-add-file -f {} {} {}".format( analysis_config_dirac, analysis_config_local, se) ana_cfg_upload_result = subprocess.check_output(ana_cfg_upload_cmd, shell=True) print(ana_cfg_upload_result) else: print("This is a DRY RUN! -- analysis.yaml has NOT been uploaded.") print("\nall done -- exiting now") exit()
def main(): maxJobs = 100 Script.registerSwitch("", "Status=", "Primary status") Script.registerSwitch("", "MinorStatus=", "Secondary status") Script.registerSwitch("", "ApplicationStatus=", "Application status") Script.registerSwitch("", "Site=", "Execution site") Script.registerSwitch("", "Owner=", "Owner (DIRAC nickname)") Script.registerSwitch("", "JobGroup=", "Select jobs for specified job group") Script.registerSwitch("", "Date=", "Date in YYYY-MM-DD format, if not specified default is today") Script.registerSwitch("", "Maximum=", "Maximum number of jobs shown (default %d, 0 means all)" % maxJobs) switches, args = Script.parseCommandLine(ignoreErrors=True) # Default values status = None minorStatus = None appStatus = None site = None owner = None jobGroups = [] date = None if args: Script.showHelp() exitCode = 0 for switch in switches: if switch[0].lower() == "status": status = switch[1] elif switch[0].lower() == "minorstatus": minorStatus = switch[1] elif switch[0].lower() == "applicationstatus": appStatus = switch[1] elif switch[0].lower() == "site": site = switch[1] elif switch[0].lower() == "owner": owner = switch[1] elif switch[0].lower() == "jobgroup": for jg in switch[1].split(","): if jg.isdigit(): jobGroups.append("%08d" % int(jg)) else: jobGroups.append(jg) elif switch[0].lower() == "date": date = switch[1] elif switch[0] == "Maximum": try: maxJobs = int(switch[1]) except TypeError: gLogger.fatal("Invalid max number of jobs", switch[1]) DIRAC.exit(1) selDate = date if not date: selDate = "Today" conditions = { "Status": status, "MinorStatus": minorStatus, "ApplicationStatus": appStatus, "Owner": owner, "JobGroup": ",".join(str(jg) for jg in jobGroups), "Date": selDate, } from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() jobs = [] if jobGroups: for jobGroup in jobGroups: res = dirac.selectJobs( status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, jobGroup=jobGroup, date=date, printErrors=False, ) if res["OK"]: jobs.extend(res["Value"]) else: gLogger.error("Can't select jobs: ", res["Message"]) else: res = dirac.selectJobs( status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, date=date, printErrors=False, ) if res["OK"]: jobs.extend(res["Value"]) else: gLogger.error("Can't select jobs: ", res["Message"]) conds = ["%s = %s" % (n, v) for n, v in conditions.items() if v] if maxJobs and len(jobs) > maxJobs: jobs = jobs[:maxJobs] constrained = " (first %d shown) " % maxJobs else: constrained = " " if jobs: gLogger.notice( "==> Selected %s jobs%swith conditions: %s\n%s" % (len(jobs), constrained, ", ".join(conds), ",".join(jobs)) ) else: gLogger.notice("No jobs were selected with conditions:", ", ".join(conds)) DIRAC.exit(exitCode)
'JobGroup': ','.join(str(jg) for jg in jobGroups), 'Date': selDate} from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() jobs = [] if jobGroups: for jobGroup in jobGroups: res = dirac.selectJobs(status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, jobGroup=jobGroup, date=date, printErrors=False) if res['OK']: jobs.extend(res['Value']) else: gLogger.error("Can't select jobs: ", res['Message']) else: res = dirac.selectJobs(status=status, minorStatus=minorStatus, applicationStatus=appStatus, site=site, owner=owner, date=date, printErrors=False)
exitCode = 0 iKey = "FailureReason" args = Script.getPositionalArgs() if len(args): iKey = args[0] print '*INFO* looking for key %s'%iKey # first, get all jobs which are marked as stalled status = "Failed" minor_stat = "Job stalled: pilot not running" owner = "zimmer" dirac = Dirac() jobs = [] conditions = {"Status":status,"MinorStatus":minor_stat,"Owner":owner} res = dirac.selectJobs( status = status, minorStatus = minor_stat, owner = owner) if not res['OK']: gLogger("ERROR retrieving jobs") gLogger(res["Message"]) exitCode = 2 else: conds = [] for n, v in conditions.items(): if v: conds.append( '%s = %s' % ( n, v ) ) jobs = res['Value'] pilot_refs = {} for job in jobs: # next get pilot refs key = job
group = None jobs = [] for sw, value in Script.getUnprocessedSwitches(): if sw in ('D', 'Dir'): outputDir = value elif sw.lower() in ('f', 'file'): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ('g', 'jobgroup'): group = value jobDate = toString(date() - 30 * day) # Choose jobs in final state, no more than 30 days old result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Done') if not result['OK']: if not "No jobs selected" in result['Message']: print "Error:", result['Message'] DIRAC.exit(-1) else: jobs += result['Value'] result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Failed') if not result['OK']: if not "No jobs selected" in result['Message']: print "Error:", result['Message'] DIRAC.exit(-1) else: jobs += result['Value']
#print owner, jobGroup, nHours # Start doing something # import Dirac here (and not on the top of the file) if you don't want to get into trouble from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() # dirac.selectJobs( status='Failed', owner='paterson', site='LCG.CERN.ch') # owner=owner, date=jobDate onehour = datetime.timedelta(hours = 1) now=datetime.datetime.now() Script.gLogger.notice(now) results=dirac.selectJobs(jobGroup=jobGroup, owner=owner, date=now-nHours*onehour) if not results.has_key('Value'): Script.gLogger.notice("No job found for group \"%s\" and owner \"%s\" in the past %s hours"% (jobGroup, owner, nHours)) Script.sys.exit(0) # Found some jobs, print information jobsList=results['Value'] Script.gLogger.notice("%s jobs found for group \"%s\" and owner \"%s\" in the past %s hours\n"% (len(jobsList), jobGroup, owner, nHours)) status=dirac.status(jobsList) # for details #print dirac.getJobSummary(3075536)
# get jobs from today and yesterday... days = [] for i in range(2): # how many days do you want to look back? days.append( (datetime.date.today() - datetime.timedelta(days=i)).isoformat()) # get list of run_tokens that are currently running / waiting running_ids = set() running_names = [] for status in ["Waiting", "Running", "Checking"]: for day in days: try: [ running_ids.add(id) for id in dirac.selectJobs( status=status, date=day, #owner="tmichael")['Value']] owner="jlefaucheur")['Value'] ] except KeyError: pass n_jobs = len(running_ids) if n_jobs > 0: print( "getting names from {} running/waiting jobs... please wait...".format( n_jobs)) for i, id in enumerate(running_ids): if ((100 * i) / n_jobs) % 5 == 0: print("\r{} %".format(((20 * i) / n_jobs) * 5)), jobname = dirac.attributes(id)["Value"]["JobName"]
from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments from DIRAC.Core.Utilities.Time import toString, date, day dirac = Dirac() jobs = [] for sw, value in Script.getUnprocessedSwitches(): if sw.lower() in ('f', 'file'): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ('g', 'jobgroup'): group = value jobDate = toString(date() - 30 * day) result = dirac.selectJobs(jobGroup=value, date=jobDate) if not result['OK']: if "No jobs selected" not in result['Message']: print("Error:", result['Message']) DIRAC.exit(-1) else: jobs += result['Value'] for arg in parseArguments(args): jobs.append(arg) if not jobs: print("Warning: no jobs selected") Script.showHelp() DIRAC.exit(0)
from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() exitCode = 0 errorList = [] jobs = [] for sw, value in Script.getUnprocessedSwitches(): if sw.lower() in ("f", "file"): if os.path.exists(value): jFile = open(value) jobs += jFile.read().split() jFile.close() elif sw.lower() in ("g", "jobgroup"): group = value result = dirac.selectJobs(jobGroup=value) if not result["OK"]: if not "No jobs selected" in result["Message"]: print "Error:", result["Message"] DIRAC.exit(-1) else: jobs += result["Value"] for arg in args: jobs.append(arg) if not jobs: print "Warning: no jobs selected" Script.showHelp() DIRAC.exit(0)
date = switch[1] elif switch[0].lower() == "file": filename = switch[1] selDate = date if not date: selDate = 'Today' from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() exitCode = 0 errorList = [] resultDict = {} result = dirac.selectJobs( status = status, minorStatus = minorStatus, applicationStatus = appStatus, site = site, owner = owner, jobGroup = jobGroup, date = date ) if result['OK']: jobs = result['Value'] else: print("Error in selectJob", result['Message']) DIRAC.exit( 2 ) for job in jobs: result = dirac.getOutputSandbox( job ) if result['OK']: if os.path.exists( '%s' % job ): lines = [] try: lines = open( os.path.join( job, filename ) ).readlines()
if isinstance(since, int): since = str(datetime.datetime.now() - datetime.timedelta(days=since)).split()[0] if workerNodes or batchIDs: # status = [None] full = True monitoring = JobMonitoringClient() dirac = Dirac() # Get jobs according to selection jobs = set() for stat in status: res = dirac.selectJobs(site=site, date=since, status=stat, minorStatus=minorStatus) if not res['OK']: gLogger.error('Error selecting jobs', res['Message']) DIRAC.exit(1) allJobs = set(int(job) for job in res['Value']) if until: res = dirac.selectJobs(site=site, date=until, status=stat) if not res['OK']: gLogger.error('Error selecting jobs', res['Message']) DIRAC.exit(1) allJobs -= set(int(job) for job in res['Value']) jobs.update(allJobs) if not jobs: gLogger.always('No jobs found...') DIRAC.exit(0)
selDate = date if not date: selDate = 'Today' conditions = { 'Status':status, 'MinorStatus':minorStatus, 'ApplicationStatus':appStatus, 'Owner':owner, 'JobGroup':jobGroup, 'Date':selDate } from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() result = dirac.selectJobs( status = status, minorStatus = minorStatus, applicationStatus = appStatus, site = site, owner = owner, jobGroup = jobGroup, date = date ) if not result['OK']: print 'ERROR %s' % result['Message'] exitCode = 2 else: conds = [] for n, v in conditions.items(): if v: conds.append( '%s = %s' % ( n, v ) ) jobs = result['Value'] constrained = ' ' if len( jobs ) > 100: jobs = jobs[:100]
selDate = date if not date: selDate = 'Today' conditions = { 'Status':status, 'MinorStatus':minorStatus, 'ApplicationStatus':appStatus, 'Owner':owner, 'JobGroup':jobGroup, 'Date':selDate } from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() result = dirac.selectJobs( Status = status, MinorStatus = minorStatus, ApplicationStatus = appStatus, Site = site, Owner = owner, JobGroup = jobGroup, Date = date ) if not result['OK']: print 'ERROR %s' % result['Message'] exitCode = 2 else: conds = [] for n, v in conditions.items(): if v: conds.append( '%s = %s' % ( n, v ) ) jobs = result['Value'] constrained = ' ' if len( jobs ) > 100: jobs = jobs[:100]