def parse_jobs_list(jobs_list): ''' parse a jobs list by first getting the status of all jobs ''' from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() # status of all jobs status = dirac.status(jobs_list) # parse it sites_dict = {} status_dict = copy.copy(BASE_STATUS_DIR) for job in jobs_list: site = status['Value'][int(job)]['Site'] minstatus = status['Value'][int(job)]['MinorStatus'] majstatus = status['Value'][int(job)]['Status'] if majstatus not in status_dict.keys(): DIRAC.gLogger.notice('Add %s to BASE_STATUS_DIR' % majstatus) DIRAC.sys.exit(1) status_dict[majstatus] += 1 status_dict['Total'] += 1 if site not in sites_dict.keys(): if site.find('.') == -1: site = ' None' # note that blank spaces are needed sites_dict[site] = copy.copy(BASE_STATUS_DIR) sites_dict[site][majstatus] = 1 sites_dict[site]["Total"] = 1 else: sites_dict[site]["Total"] += 1 if majstatus not in sites_dict[site].keys(): sites_dict[site][majstatus] = 1 else: sites_dict[site][majstatus] += 1 return status_dict, sites_dict
j.setCPUTime(500) j.setExecutable('/bin/echo hello') j.setExecutable('/bin/hostname') j.setExecutable('/bin/echo hello again') j.setName('API') result = dirac.submitJob(j) print 'Submission Result: ' pprint.pprint(result) jobid = result['JobID'] # print job id to file for future reference joblog = open("jobid.log", "a") joblog.write(str(jobid) + '\n') joblog.close() # to interactively check on job status do: # dirac-wms-job-status -f jobid.log print "\nThe current status of this job is:" pprint.pprint(dirac.status(jobid)) joblog = open("jobid.log", "r") # list comprehension :-D all_jobids = [jobid.strip() for jobid in joblog.readlines()] print "\nThe current status of all jobs is:" all_status = dirac.status(all_jobids) pprint.pprint(all_status)
args = visit + ' ' + insidename + ' ' + str(startsensor) + ' ' + str( numsensors) + ' ' + str(idx) outputname = 'fits_' + visit + '_' + str(idx) + '.tar' j.setCPUTime(1209600) j.setExecutable('runimsim2.1.sh', arguments=args) j.stderr = "std.err" j.stdout = "std.out" #!!! May need the 2.1i directory here depending on visit number !!! j.setInputSandbox([ "runimsim2.1.sh", "run_imsim_nersc.py", "LFN:/lsst/user/j/james.perry/instcats/2.1i/" + instcatname ]) j.setOutputSandbox(["std.out", "std.err"]) j.setTag(["8Processors"]) #j.setOutputData([visit + "/" + outputname], outputPath="", outputSE=["IN2P3-CC-disk"]) j.setOutputData([visit + "/" + outputname], outputPath="", outputSE=["UKI-NORTHGRID-LANCS-HEP-disk"]) j.setPlatform("AnyPlatform") j.setDestination(site) jobID = dirac.submitJob(j) print("Submitted job to " + site + " as ID " + str(jobID)) print "Status is:", dirac.status(jobID['JobID']) joblistfile.write(str(jobID['JobID']) + '\n') joblistfile.close()
class CEBaseTest( TestBase ): """ CEBaseTest is base class for all the CE test classes. Real CE test should implement its _judge method. """ def __init__( self, args = None, apis = None ): super( CEBaseTest, self ).__init__( args, apis ) self.timeout = self.args.get( 'timeout', 1800 ) self.vo = self.args.get( 'VO' ) self.testType = self.args[ 'TestType' ] self.executable = self.args[ 'executable' ] self.__logPath = '/opt/dirac/pro/BESDIRAC/ResourceStatusSystem/SAM/log' self.__scriptPath = '/opt/dirac/pro/BESDIRAC/ResourceStatusSystem/SAM/sam_script' if 'WMSAdministrator' in self.apis: self.wmsAdmin = self.apis[ 'WMSAdministrator' ] else: self.wmsAdmin = RPCClient( 'WorkloadManagement/WMSAdministrator' ) if 'Dirac' in self.apis: self.dirac = self.apis[ 'Dirac' ] else: self.dirac = Dirac() def doTest( self, elementDict ): """ submit test job to the specified ce or cloud.. """ elementName = elementDict[ 'ElementName' ] elementType = elementDict[ 'ElementType' ] vos = elementDict[ 'VO' ] site = None; ce = None if elementType == 'ComputingElement': ce = elementName if elementType == 'CLOUD': site = elementName if self.vo: submitVO = self.vo elif vos: submitVO = vos[ 0 ] else: submitVO = 'bes' submissionTime = datetime.utcnow().replace( microsecond = 0 ) sendRes = self.__submit( site, ce, submitVO ) if not sendRes[ 'OK' ]: return sendRes jobID = sendRes[ 'Value' ] result = { 'Result' : { 'JobID' : jobID, 'VO' : submitVO, 'SubmissionTime' : submissionTime }, 'Finish' : False } return S_OK( result ) def __submit( self, site, CE, vo ): """ set the job and submit. """ job = Job() job.setName( self.testType ) job.setJobGroup( 'CE-Test' ) job.setExecutable( self.executable ) job.setInputSandbox( '%s/%s' % ( self.__scriptPath, self.executable ) ) if site and not CE: job.setDestination( site ) if CE: job.setDestinationCE( CE ) LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath result = self.dirac.submit( job ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() return result def getTestResult( self, elementName, vo, jobID, submissionTime ): """ download output sandbox and judge the test status from the log file. """ isFinish = False res = self.__getJobOutput( jobID, vo ) if not res[ 'OK' ]: return res output = res[ 'Value' ] status = res[ 'Status' ] resDict = { 'CompletionTime' : None, 'Status' : None, 'Log' : None, 'ApplicationTime' : None } utcNow = datetime.utcnow().replace( microsecond = 0 ) if output: isFinish = True resDict[ 'CompletionTime' ] = utcNow log = output[ 'Log' ] if not output[ 'Download' ]: resDict[ 'Status' ] = 'Unknown' resDict[ 'Log' ] = 'Fail to download log file for job %s: %s' % ( jobID, log ) else: resDict[ 'Log' ] = log resDict[ 'Status' ] = self._judge( log ) resDict[ 'AppliactionTime' ] = self.__getAppRunningTime( log ) else: if utcNow - submissionTime >= timedelta( seconds = self.timeout ): isFinish = True if elementName.split( '.' )[ 0 ] == 'CLOUD': site = elementName else: site = BESUtils.getSiteForCE( elementName ) jobCount = self.wmsAdmin.getSiteSummaryWeb( { 'Site' : site }, [], 0, 0 ) if not jobCount[ 'OK' ]: return jobCount params = jobCount[ 'Value' ][ 'ParameterNames' ] records = jobCount[ 'Value' ][ 'Records' ][ 0 ] run = records[ params.index( 'Running' ) ] done = records[ params.index( 'Done' ) ] if status == 'Waiting' and run == 0 and done == 0: resDict[ 'Status' ] = 'Bad' resDict[ 'Log' ] = 'The test job is waiting for %d seconds, but no running and done jobs at this site.' % self.timeout else: if run != 0: resDict[ 'Status' ] = 'Busy' resDict[ 'Log' ] = 'Site %s is too busy to execute this test job, job status is %s' % ( site, status ) else: resDict[ 'Status' ] = 'Unknown' resDict[ 'Log' ] = 'Test did not complete within the timeout of %d seconds, job status is %s' % ( self.timeout, status ) self.dirac.kill( jobID ) if not isFinish: return S_OK() else: return S_OK( resDict ) def __getJobOutput( self, jobID, vo ): status = self.dirac.status( jobID ) if not status[ 'OK' ]: return status status = status[ 'Value' ][ jobID ][ 'Status' ] if status in ( 'Done', 'Failed' ): LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath outputRes = self.dirac.getOutputSandbox( jobID, self.__logPath ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() if not outputRes[ 'OK' ]: ret = S_OK( { 'Download' : False, 'Log' : outputRes[ 'Message' ] } ) else: try: logfile = open( '%s/%d/Script1_CodeOutput.log' % ( self.__logPath, jobID ), 'r' ) log = logfile.read() logfile.close() except IOError, e: raise IOError os.system( 'rm -rf %s/%d' % ( self.__logPath, jobID ) ) ret = S_OK( { 'Download' : True, 'Log' : log } ) else:
])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() exitCode = 0 try: jobs = [int(job) for job in args] except Exception, x: print 'Expected integer for jobID' exitCode = 2 DIRAC.exit(exitCode) result = dirac.status(jobs) if result['OK']: for job in result['Value']: print 'JobID=' + str(job), for status in result['Value'][job]: print status + '=' + result['Value'][job][status] + ';', print else: exitCode = 2 print "ERROR: %s" % error DIRAC.exit(exitCode)
if not result['OK']: print "Error:", result['Message'] DIRACExit( -1 ) jobs += result['Value'] if len( args ) < 1 and not jobs: Script.showHelp() if len(args) > 0: jobs += args try: jobs = [ int( job ) for job in jobs ] except Exception, x: print 'Expected integer for jobID' exitCode = 2 DIRAC.exit( exitCode ) result = dirac.status( jobs ) if result['OK']: for job in result['Value']: print 'JobID=' + str( job ), for status in result['Value'][job]: print status + '=' + result['Value'][job][status] + ';', print else: exitCode = 2 print "ERROR: %s" % result['Message'] DIRAC.exit( exitCode )
from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac if (len(sys.argv) < 2) : print 'the input file with ids should be specified' print 'if the 3rd argument is <get_output> the job output sandbox will be downloaded' sys.exit(os.EX_USAGE) list = sys.argv[1] get_output = False if (len(sys.argv) > 2): if (sys.argv[2] == 'get_output'): get_output = True id_list_file = open(list, 'r') for line in id_list_file: ## line = line.strip().decode("utf-8").replace("True","true").replace("False","false") line = line.replace("True","true").replace("False","false") line = line.replace("'","\"") j = json.loads(line) dirac = Dirac() print dirac.status(j['Value']) if get_output: print dirac.getOutputSandbox(j['Value']) id_list_file.close()
#!/bin/env python import sys from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() jobid = sys.argv[1] print dirac.status(jobid) summary_file = str(jobid) + "_summary.txt" dirac.getJobSummary(jobid, outputFile=summary_file, printOutput=True) print dirac.getJobDebugOutput(jobid) print dirac.getJobLoggingInfo(jobid, printOutput=False)
line = line.strip() bits = line.split(' ') # visit, idx, job ID, site jobid = int(bits[2]) site = bits[3] jobids.append(jobid) # tally up failures by site if site in sitefailures: sitefailures[site] = sitefailures[site] + 1 else: sitefailures[site] = 1 # get statuses statuslist = dirac.status(jobids) if not 'Value' in statuslist: print "Error getting job status from DIRAC!" sys.exit(1) cvmfsProblemCount = 0 # tally up minor status for i in jobids: minorstatus = statuslist['Value'][i]['MinorStatus'] if minorstatus in minorstatuses: minorstatuses[minorstatus] = minorstatuses[minorstatus] + 1 else: minorstatuses[minorstatus] = 1 if minorstatus == "Application Finished With Errors":
onehour = datetime.timedelta(hours = 1) now=datetime.datetime.now() Script.gLogger.notice(now) results=dirac.selectJobs(jobGroup=jobGroup, owner=owner, date=now-nHours*onehour) if not results.has_key('Value'): Script.gLogger.notice("No job found for group \"%s\" and owner \"%s\" in the past %s hours"% (jobGroup, owner, nHours)) Script.sys.exit(0) # Found some jobs, print information jobsList=results['Value'] Script.gLogger.notice("%s jobs found for group \"%s\" and owner \"%s\" in the past %s hours\n"% (len(jobsList), jobGroup, owner, nHours)) status=dirac.status(jobsList) # for details #print dirac.getJobSummary(3075536) # print out my favourite tables SitesDict={} for job in jobsList: # print job, status['Value'][int(job)] site=status['Value'][int(job)]['Site'] # site=status['Value'][int(job)]['CE'] minstatus=status['Value'][int(job)]['MinorStatus'] majstatus=status['Value'][int(job)]['Status'] if majstatus not in {'Done', 'Failed'}:
j.setExecutable(EXEC,arguments="-W 600 -H 600 -X -0.77568377 -Y -0.13646737 -P %f -M 500 %s" % (pitch, out_bmp)) pitch+=step j.setOutputSandbox(out_bmp_list + ["StdOut"] + ["StdErr"]) result = dirac.submit(j) print 'Submission Result: ',result return result n_produced_pict = 0 jobinfo_list = [] while n_produced_pict < N_PICT: pitch_start = n_produced_pict * P_STEP + P_START jobinfo = launch_batch_pict(pitch_start, P_STEP, N_PICT_BATCH) jobinfo_list.append(jobinfo) n_produced_pict += N_PICT_BATCH job_state_dict = dict() job_in_flight = ['start'] while len(job_in_flight) != 0 : for jobinfo in jobinfo_list : jobid=jobinfo['Value'] status = dirac.status(jobid) state = status['Value'][jobid]['Status'] job_state_dict[jobid] = state job_in_flight = [k for k, v in job_state_dict.items() if v != 'Done'] print "Job still running :" print job_in_flight time.sleep(2)
local_time = datetime.datetime.utcnow() timedelta = local_time-datetime.timedelta(seconds=86400) if specialOptions.has_key("dayspassed"): timedelta = local_time-datetime.timedelta(seconds=float(specialOptions["dayspassed"])*3600) res = w.getJobs(my_dict,timedelta.strftime( '%Y-%m-%d %H:%M:%S' )) if not res['OK']: gLogger.error("Could not get list of running jobs.",res['Message']) dexit(1) job_list = res['Value'] else: job_list = specialOptions["JobID"].split(",") doLogging = True #for j in job_list: res = d.status(job_list) if not res['OK']: gLogger.error("Could not get status of job_list,",res['Message']) dexit(1) status = res['Value'] # get sites info sites = None res = w.getJobsSites(job_list) if not res['OK']: gLogger.error("Could not get sites;",res['Message']) else: sites = res['Value'] if not do_xml:
j.setCPUTime(500) j.setExecutable('/bin/echo hello') j.setExecutable('/bin/hostname') j.setExecutable('/bin/echo hello again') j.setName('API') result = dirac.submitJob(j) print 'Submission Result: ' pprint.pprint(result) jobid = result['JobID'] # print job id to file for future reference joblog = open("jobid.log", "a") joblog.write(str(jobid)+'\n') joblog.close() # to interactively check on job status do: # dirac-wms-job-status -f jobid.log print "\nThe current status of this job is:" pprint.pprint(dirac.status(jobid)) joblog = open("jobid.log", "r") # list comprehension :-D all_jobids = [jobid.strip() for jobid in joblog.readlines()] print "\nThe current status of all jobs is:" all_status = dirac.status(all_jobids) pprint.pprint(all_status)
class DiracDaemon(Daemonize): """Dirac Daemon.""" def __init__(self, address, **kwargs): """Initialise.""" super(DiracDaemon, self).__init__(action=self.main, **kwargs) self._address = address self._dirac_api = Dirac() def main(self): """Daemon main.""" # Defer creation of server to inside the daemon context otherwise the socket will be # closed when daemonising dirac_server = SimpleXMLRPCServer(self._address) dirac_server.register_introspection_functions() dirac_server.register_instance(self._dirac_api) # override Dirac().status to make sure that the keys are strings. dirac_server.register_function(self.status) dirac_server.register_function(self.submit_job) dirac_server.serve_forever() def status(self, ids): """ Return the status of Dirac jobs with ids. This method will essentially be overriding Dirac().status to ensure that the dict keys which are the ids of the jobs are cast to strings such that they can be sent over xmlrpc socket. """ return { str(k): v for k, v in self._dirac_api.status(ids).get("Value", {}).iteritems() } def submit_job(self, request_id, executable, macro, starting_seed=8000000, njobs=10, platform='ANY', output_data_site='UKI-LT2-IC-HEP-disk', output_log='lzproduction_output.log'): """ Submit LZProduction job to DIRAC. Args: request_id (int): The id number of the associated request executable (str): The full path to the executable job script macro (str): The full path to the macro for this job starting_seed (int): The random seed for the first of the parametric jobs njobs (int): The number of parametric jobs to create platform (str): The required platform output_data_site (str): The name of the grid site to store the output data at output_log (str): The file name for the output log file Returns: list: The list of created parametric job DIRAC ids """ j = Job() j.setName(os.path.splitext(os.path.basename(macro))[0] + '%(args)s') j.setExecutable(os.path.basename(executable), os.path.basename(macro) + ' %(args)s', output_log) j.setInputSandbox([executable, macro]) j.setOutputData('*.root', output_data_site, str(request_id)) j.setParameterSequence( "args", [str(i) for i in xrange(starting_seed, starting_seed + njobs)], addToWorkflow=True) j.setPlatform(platform) return self.status(self._dirac_api.submit(j).get("Value", []))
if specialOptions.has_key("dayspassed"): timedelta = local_time-datetime.timedelta(seconds=float(specialOptions["dayspassed"])*3600) res = w.getJobs(my_dict,timedelta.strftime( '%Y-%m-%d %H:%M:%S' )) if not res['OK']: gLogger.error("Could not get list of running jobs.",res['Message']) dexit(1) job_list = res['Value'] else: job_list = specialOptions["JobID"].split(",") doLogging = True status = {} sites = {} for chunk in breakListIntoChunks(job_list,1000): res = d.status(chunk) if not res['OK']: gLogger.error("Could not get status of job list chunk,",res['Message']) continue status.update(res['Value']) # get sites info res = w.getJobsSites(chunk) if not res['OK']: gLogger.error("Could not get sites;",res['Message']) sites.update(res['Value']) if not do_xml: print('# ID\thostname\tStatus\tSubmitted\tStarted\tEnded\tCPUtime\tMemory') for j in job_list: status_j=status[int(j)] if doLogging: