def test_execute( self ): job = Job() job.setName( "helloWorld-test" ) job.setExecutable( find_all( "helloWorld.py", '.', 'Integration' )[0], arguments = "This is an argument", logFile = "aLogFileForTest.txt" , parameters=[('executable', 'string', '', "Executable Script"), ('arguments', 'string', '', 'Arguments for executable Script'), ( 'applicationLog', 'string', '', "Log file name" ), ( 'someCustomOne', 'string', '', "boh" )], paramValues = [( 'someCustomOne', 'aCustomValue' )] ) job.setBannedSites( ['LCG.SiteA.com', 'DIRAC.SiteB.org'] ) job.setOwner( 'ownerName' ) job.setOwnerGroup( 'ownerGroup' ) job.setName( 'jobName' ) job.setJobGroup( 'jobGroup' ) job.setType( 'jobType' ) job.setDestination( 'DIRAC.someSite.ch' ) job.setCPUTime( 12345 ) job.setLogLevel( 'DEBUG' ) res = job.runLocal( self.d ) self.assertTrue( res['OK'] )
def helloWorldJob(): job = Job() job.setName( "helloWorld" ) exeScriptLocation = find_all( 'exe-script.py', '.', 'WorkloadManagementSystem' )[0] job.setInputSandbox( exeScriptLocation ) job.setExecutable( exeScriptLocation, "", "helloWorld.log" ) return job
def submitProbeJobs(self, ce): """ Submit some jobs to the CEs """ #need credentials, should be there since the initialize from DIRAC.Interfaces.API.Dirac import Dirac d = Dirac() from DIRAC.Interfaces.API.Job import Job from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations import DIRAC ops = Operations() scriptname = ops.getValue("ResourceStatus/SofwareManagementScript", self.script) j = Job() j.setDestinationCE(ce) j.setCPUTime(1000) j.setName("Probe %s" % ce) j.setJobGroup("SoftwareProbe") j.setExecutable("%s/GlastDIRAC/ResourceStatusSystem/Client/%s" % (DIRAC.rootPath, scriptname), logFile='SoftwareProbe.log') j.setOutputSandbox('*.log') res = d.submit(j) if not res['OK']: return res return S_OK()
def __submit( self, site, CE, vo ): """ set the job and submit. """ job = Job() job.setName( self.testType ) job.setJobGroup( 'CE-Test' ) job.setExecutable( self.executable ) job.setInputSandbox( '%s/%s' % ( self.__scriptPath, self.executable ) ) if site and not CE: job.setDestination( site ) if CE: job.setDestinationCE( CE ) LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath result = self.dirac.submit( job ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() return result
def test_basicJob(): job = Job() job.setOwner('ownerName') job.setOwnerGroup('ownerGroup') job.setName('jobName') job.setJobGroup('jobGroup') job.setExecutable('someExe') job.setType('jobType') job.setDestination('ANY') xml = job._toXML() try: with open('./DIRAC/Interfaces/API/test/testWF.xml') as fd: expected = fd.read() except IOError: with open('./Interfaces/API/test/testWF.xml') as fd: expected = fd.read() assert xml == expected try: with open('./DIRAC/Interfaces/API/test/testWFSIO.jdl') as fd: expected = fd.read() except IOError: with open('./Interfaces/API/test/testWFSIO.jdl') as fd: expected = fd.read() jdlSIO = job._toJDL(jobDescriptionObject=StringIO.StringIO(job._toXML())) assert jdlSIO == expected
def helloWorldJob(): job = Job() job.setName("helloWorld") exeScriptLocation = find_all('exe-script.py', '..', '/DIRAC/tests/Integration')[0] job.setInputSandbox(exeScriptLocation) job.setExecutable(exeScriptLocation, "", "helloWorld.log") return job
def test_execute( self ): j = Job() j.setName( "helloWorld-test" ) j.setExecutable( self.exeScriptLocation ) res = j.runLocal( self.d ) self.assertTrue( res['OK'] )
def prepareTransformationTasks(self,transBody,taskDict,owner='',ownerGroup=''): if (not owner) or (not ownerGroup): res = getProxyInfo(False,False) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] oJob = Job(transBody) for taskNumber in sortList(taskDict.keys()): paramsDict = taskDict[taskNumber] transID = paramsDict['TransformationID'] self.log.verbose('Setting job owner:group to %s:%s' % (owner,ownerGroup)) oJob.setOwner(owner) oJob.setOwnerGroup(ownerGroup) transGroup = str(transID).zfill(8) self.log.verbose('Adding default transformation group of %s' % (transGroup)) oJob.setJobGroup(transGroup) constructedName = str(transID).zfill(8)+'_'+str(taskNumber).zfill(8) self.log.verbose('Setting task name to %s' % constructedName) oJob.setName(constructedName) oJob._setParamValue('PRODUCTION_ID',str(transID).zfill(8)) oJob._setParamValue('JOB_ID',str(taskNumber).zfill(8)) inputData = None for paramName,paramValue in paramsDict.items(): self.log.verbose('TransID: %s, TaskID: %s, ParamName: %s, ParamValue: %s' %(transID,taskNumber,paramName,paramValue)) if paramName=='InputData': if paramValue: self.log.verbose('Setting input data to %s' %paramValue) oJob.setInputData(paramValue) elif paramName=='Site': if paramValue: self.log.verbose('Setting allocated site to: %s' %(paramValue)) oJob.setDestination(paramValue) elif paramValue: self.log.verbose('Setting %s to %s' % (paramName,paramValue)) oJob._addJDLParameter(paramName,paramValue) hospitalTrans = [int(x) for x in gConfig.getValue("/Operations/Hospital/Transformations",[])] if int(transID) in hospitalTrans: hospitalSite = gConfig.getValue("/Operations/Hospital/HospitalSite",'DIRAC.JobDebugger.ch') hospitalCEs = gConfig.getValue("/Operations/Hospital/HospitalCEs",[]) oJob.setType('Hospital') oJob.setDestination(hospitalSite) oJob.setInputDataPolicy('download',dataScheduling=False) if hospitalCEs: oJob._addJDLParameter('GridRequiredCEs',hospitalCEs) taskDict[taskNumber]['TaskObject'] = '' res = self.getOutputData({'Job':oJob._toXML(),'TransformationID':transID,'TaskID':taskNumber,'InputData':inputData}) if not res ['OK']: self.log.error("Failed to generate output data",res['Message']) continue for name,output in res['Value'].items(): oJob._addJDLParameter(name,string.join(output,';')) taskDict[taskNumber]['TaskObject'] = Job(oJob._toXML()) return S_OK(taskDict)
def parametricJob(): job = Job() job.setName("parametric_helloWorld_%n") exeScriptLocation = find_all('exe-script.py', '..', '/DIRAC/tests/Integration')[0] job.setInputSandbox(exeScriptLocation) job.setParameterSequence("args", ['one', 'two', 'three']) job.setParameterSequence("iargs", [1, 2, 3]) job.setExecutable(exeScriptLocation, arguments=": testing %(args)s %(iargs)s", logFile='helloWorld_%n.log') return job
def test_execute( self ): """ just testing unix "ls" """ job = Job() job.setName( "ls-test" ) job.setExecutable( "/bin/ls", '-l' ) res = job.runLocal( self.d ) self.assertTrue( res['OK'] )
def test_execute( self ): """ this one tests that I can execute a job that requires multi-processing """ j = Job() j.setName( "MP-test" ) j.setExecutable( self.mpExe ) j.setInputSandbox( find_all( 'mpTest.py', '.', 'Utilities' )[0] ) j.setTag( 'MultiProcessor' ) res = j.runLocal( self.d ) self.assertTrue( res['OK'] )
def _submitJob(self, result_id, executable, test_name, site_name): executable = executable.split('&') j = Job() j.setExecutable('python', arguments=executable[0] + " " + str(result_id)) sandBox = [] for file_name in executable: sandBox.append(SAM_TEST_DIR + file_name) j.setInputSandbox(sandBox) j.setName(test_name) j.setJobGroup('sam_test') j.setDestination(site_name) result = self.dirac.submit(j) return result
def test_execute(self): """ this one tests that I can execute a job that requires multi-processing """ j = Job() j.setName("MP-test") j.setExecutable(self.mpExe) j.setInputSandbox(find_all('mpTest.py', rootPath, 'DIRAC/tests/Utilities')[0]) j.setTag('MultiProcessor') j.setLogLevel('DEBUG') res = j.runLocal(self.d) if multiprocessing.cpu_count() > 1: self.assertTrue(res['OK']) else: self.assertFalse(res['OK'])
def do_installonsite(self,argss): """ Install a release on a grid site : installonsite tag site """ args = argss.split() if len(args)<2: print self.do_installonsite.__doc__ return tag = args[0] site = args[1] #print "Check if the software with the tag '"+tag+"' exists on the rsync server..." #res = self.client.getSitesForTag(tag) #if not res['OK']: #print res['Message'] #return #print "tag found !" from DIRAC.Interfaces.API.Dirac import Dirac d = Dirac() from DIRAC.Interfaces.API.Job import Job from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations import os ops = Operations() scriptname = "InstallSoftware.py" j = Job() j.setDestination(site) j.setCPUTime(1000) j.setName("Installation "+tag) j.setExecutable(os.environ['DIRAC']+"/GlastDIRAC/ResourceStatusSystem/Client/"+scriptname , logFile='SoftwareInstallation.log') j.setOutputSandbox('*.log') res = d.submit(j) if not res['OK']: print "Could not submit the installation at site %s, message %s"%(site,res['Message']) return print "Job submitted, id = "+str(res['Value']) print "Add tag :" res = self.client.addTagAtSite(tag,site) if not res['OK']: print "Could not register tag %s at site %s, message %s"%(tag,site,res['Message']) return print "Added %s to %i CEs"%(tag,len(res['Value'][tag]))
def submitJob(jobPara): dirac = Dirac() j = Job() j.setName(jobPara['jobName']) j.setJobGroup(jobPara['jobGroup']) j.setExecutable(jobPara['jobScript'], logFile = jobPara['jobScriptLog']) j.setInputSandbox(jobPara['inputSandbox']) j.setOutputSandbox(jobPara['outputSandbox']) j.setOutputData(jobPara['outputData'], jobPara['SE']) j.setDestination(jobPara['sites']) j.setCPUTime(jobPara['CPUTime']) result = dirac.submit(j) if result['OK']: print 'Job %s submitted successfully. ID = %d' %(jobPara['jobName'],result['Value']) else: print 'Job %s submitted failed' %jobPara['jobName'] return result
def launch_batch_pict( pitch_start, step, n_pict ): j = Job() j.setCPUTime(500) j.setName('%s_%f' % (EXEC, pitch_start)) j.setJobGroup(JOBGROUP) j.setInputSandbox([EXEC]) out_bmp_list=[] pitch=pitch_start for i in range(n_pict): out_bmp='out_%f.bmp' % pitch out_bmp_list.append(out_bmp) j.setExecutable(EXEC,arguments="-W 600 -H 600 -X -0.77568377 -Y -0.13646737 -P %f -M 500 %s" % (pitch, out_bmp)) pitch+=step j.setOutputSandbox(out_bmp_list + ["StdOut"] + ["StdErr"]) result = dirac.submit(j) print 'Submission Result: ',result return result
def submit(self, param): j = Job() j.setName(param['jobName']) j.setExecutable(param['jobScript'],logFile = param['jobScriptLog']) if self.site: j.setDestination(self.site) if self.jobGroup: j.setJobGroup(self.jobGroup) j.setInputSandbox(param['inputSandbox']) j.setOutputSandbox(param['outputSandbox']) j.setOutputData(param['outputData'], outputSE = self.outputSE, outputPath = self.outputPath) dirac = GridDirac() result = dirac.submit(j) status = {} status['submit'] = result['OK'] if status['submit']: status['job_id'] = result['Value'] return status
def test_execute_success(self): job = Job() job._siteSet = {'DIRAC.someSite.ch'} job.setName("helloWorld-test") job.setExecutable(self.helloWorld, logFile="aLogFileForTest.txt", parameters=[('executable', 'string', '', "Executable Script"), ('arguments', 'string', '', 'Arguments for executable Script'), ('applicationLog', 'string', '', "Log file name"), ('someCustomOne', 'string', '', "boh")], paramValues=[('someCustomOne', 'aCustomValue')]) job.setBannedSites(['LCG.SiteA.com', 'DIRAC.SiteB.org']) job.setOwner('ownerName') job.setOwnerGroup('ownerGroup') job.setName('jobName') job.setJobGroup('jobGroup') job.setType('jobType') job.setDestination('DIRAC.someSite.ch') job.setCPUTime(12345) job.setLogLevel('DEBUG') try: # This is the standard location in Jenkins job.setInputSandbox( find_all('pilot.cfg', os.environ['WORKSPACE'] + '/PilotInstallDIR')[0]) except (IndexError, KeyError): job.setInputSandbox(find_all('pilot.cfg', rootPath)[0]) job.setConfigArgs('pilot.cfg') res = job.runLocal(self.d) self.assertTrue(res['OK'])
def test_min1(self): """ this tests executes a job that requires at least 1 processor """ j = Job() j.setName("MP-test-min1") j.setExecutable(self.mpExeFlex, arguments='2') j.setInputSandbox( find_all('mpTest-flexible.py', rootPath, 'DIRAC/tests/Utilities')[0]) j.setNumberOfProcessors( minNumberOfProcessors=1) # This requires 1 to infinite processors j.setLogLevel('DEBUG') try: # This is the standard location in Jenkins j.setInputSandbox( find_all('pilot.cfg', os.environ['WORKSPACE'] + '/PilotInstallDIR')[0]) except (IndexError, KeyError): j.setInputSandbox(find_all('pilot.cfg', rootPath)[0]) j.setConfigArgs('pilot.cfg') res = j.runLocal(self.d) self.assertTrue(res['OK'])
def basicTest(self): j = Job() j.setCPUTime(50000) j.setExecutable('/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/myPythonScript.py') # j.setExecutable('/bin/echo hello') j.setOwner('paterson') j.setType('test') j.setName('MyJobName') #j.setAncestorDepth(1) j.setInputSandbox(['/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV.opts','/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV2.opts']) j.setOutputSandbox(['firstfile.txt','anotherfile.root']) j.setInputData(['/lhcb/production/DC04/v2/DST/00000742_00003493_11.dst', '/lhcb/production/DC04/v2/DST/00000742_00003493_10.dst']) j.setOutputData(['my.dst','myfile.log']) j.setDestination('LCG.CERN.ch') j.setPlatform('LCG') j.setSystemConfig('x86_64-slc5-gcc43-opt') j.setSoftwareTags(['VO-lhcb-Brunel-v30r17','VO-lhcb-Boole-v12r10']) #print j._toJDL() #print j.printObj() xml = j._toXML() testFile = 'jobDescription.xml' if os.path.exists(testFile): os.remove(testFile) xmlfile = open(testFile,'w') xmlfile.write(xml) xmlfile.close() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Creating code for the workflow' print j.createCode() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Executing the workflow' j.execute() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Trying to run the same workflow from generated XML file' workflow = fromXMLFile(testFile) code = workflow.createCode() print code workflow.execute()
def softinstall(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job if (len(args) < 2): Script.showHelp() version = args[0] site = args[1] if version not in ['18122013', 'v0r7p0']: Script.gLogger.error('Version not valid') Script.showHelp() j = Job() j.setInputSandbox(['cta-ctools-install.py', 'SoftwareInstallation.py']) j.setExecutable('./cta-ctools-install.py', version) j.setDestination([site]) j.setName('ctoolsInstall') j.setCPUTime(100000) Script.gLogger.info(j._toJDL()) if site in ['LCG.GRIF.fr', 'LCG.M3PEC.fr']: if site == 'LCG.GRIF.fr': ceList = [ 'apcce02.in2p3.fr', 'grid36.lal.in2p3.fr', 'lpnhe-cream.in2p3.fr', 'llrcream.in2p3.fr', 'node74.datagrid.cea.fr' ] elif site == 'LCG.M3PEC.fr': # ceList = ['ce0.bordeaux.inra.fr','ce0.m3pec.u-bordeaux1.fr'] ceList = ['ce0.bordeaux.inra.fr'] for ce in ceList: j.setDestinationCE(ce) name = 'ctoolsInstall' + '_' + ce j.setName(name) res = Dirac().submit(j) print res DIRAC.exit() else: name = 'ctoolsInstall' j.setName(name) res = Dirac().submit(j) print res
def prepareTransformationTasks(self, transBody, taskDict, owner='', ownerGroup=''): if (not owner) or (not ownerGroup): res = getProxyInfo(False, False) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] oJob = Job(transBody) for taskNumber in sortList(taskDict.keys()): paramsDict = taskDict[taskNumber] transID = paramsDict['TransformationID'] self.log.verbose('Setting job owner:group to %s:%s' % (owner, ownerGroup)) oJob.setOwner(owner) oJob.setOwnerGroup(ownerGroup) transGroup = str(transID).zfill(8) self.log.verbose('Adding default transformation group of %s' % (transGroup)) oJob.setJobGroup(transGroup) constructedName = str(transID).zfill(8) + '_' + str( taskNumber).zfill(8) self.log.verbose('Setting task name to %s' % constructedName) oJob.setName(constructedName) oJob._setParamValue('PRODUCTION_ID', str(transID).zfill(8)) oJob._setParamValue('JOB_ID', str(taskNumber).zfill(8)) inputData = None for paramName, paramValue in paramsDict.items(): self.log.verbose( 'TransID: %s, TaskID: %s, ParamName: %s, ParamValue: %s' % (transID, taskNumber, paramName, paramValue)) if paramName == 'InputData': if paramValue: self.log.verbose('Setting input data to %s' % paramValue) oJob.setInputData(paramValue) elif paramName == 'Site': if paramValue: self.log.verbose('Setting allocated site to: %s' % (paramValue)) oJob.setDestination(paramValue) elif paramValue: self.log.verbose('Setting %s to %s' % (paramName, paramValue)) oJob._addJDLParameter(paramName, paramValue) hospitalTrans = [ int(x) for x in gConfig.getValue( "/Operations/Hospital/Transformations", []) ] if int(transID) in hospitalTrans: hospitalSite = gConfig.getValue( "/Operations/Hospital/HospitalSite", 'DIRAC.JobDebugger.ch') hospitalCEs = gConfig.getValue( "/Operations/Hospital/HospitalCEs", []) oJob.setType('Hospital') oJob.setDestination(hospitalSite) oJob.setInputDataPolicy('download', dataScheduling=False) if hospitalCEs: oJob._addJDLParameter('GridRequiredCEs', hospitalCEs) taskDict[taskNumber]['TaskObject'] = '' res = self.getOutputData({ 'Job': oJob._toXML(), 'TransformationID': transID, 'TaskID': taskNumber, 'InputData': inputData }) if not res['OK']: self.log.error("Failed to generate output data", res['Message']) continue for name, output in res['Value'].items(): oJob._addJDLParameter(name, string.join(output, ';')) taskDict[taskNumber]['TaskObject'] = Job(oJob._toXML()) return S_OK(taskDict)
""" simple hello world job """ from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers j = Job() j.setName( "helloWorld-test" ) j.setExecutable( "exe-script.py", "", "Executable.log" ) # <-- user settings j.setCPUTime( 172800 ) tier1s = DMSHelpers().getTiers( tier = ( 0, 1 ) ) j.setBannedSites( tier1s ) # user settings --> # print j.workflow # submit the job to dirac result = Dirac().submitJob(j) print result
def dirLUT(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job user_script = './dirLUT.sh' if (len(args) != 3): Script.showHelp() direction = args[0] zenith = args[1] diffuse = args[2] if diffuse == "0": diffName = "point" elif diffuse == "1": diffName = "diff" else: print "Invalid extension definition!" Script.showHelp() return 1 if zenith == "40": zenName = "40deg" elif zenith == "20": zenName = "20deg" else: print "Invalid zenith definition!" Script.showHelp() return 1 if direction == "N": directionName = "north" # deg = "180" elif direction == "S": directionName = "south" # deg = "0" else: print 'Wrong direction. It can only be "N" or "S".' Script.showHelp() return 1 listname = './training/gamma_trainLUT_%s_%s_%s.lfns' % (zenName, diffName, direction) with open(listname) as f: totalEntries = sum(1 for _ in f) # Number of files used per job runN = 20 runMin = 0 runMax = totalEntries / runN for i in range(runMin, runMax): jobName = "%s_%s_%s_%s_%s" % (user_script, direction, zenName, diffName, i) jobOut = "%s_%s_%s%s.out" % (user_script, directionName, diffName, i) script_args = "%s %s %s %s %s" % (direction, zenName, diffName, i, runN) j = Job() j.setInputSandbox([ user_script, listname, "setupPackageMARS.sh", "CheckFileZombie.C" ]) j.setExecutable(user_script, script_args) j.setOutputSandbox([jobOut, "applicationLog.txt"]) j.setName(jobName) j.setBannedSites([ 'LCG.MSFG.fr', 'LCG.M3PEC.fr', 'LCG.OBSPM.fr', 'LCG.UNI-DORTMUND.de', 'LCG.UNIV-LILLE.fr' ]) Script.gLogger.info(j._toJDL()) print "Submitting job %s" % (script_args) Dirac().submit(j)
def submitTS(): ######################################## # Modify here with your dirac username owner = 'user02' ######################################## ######################################## # Job description ######################################## job = Job() job.setName('mandelbrot raw') job.setOutputSandbox(['*log']) job.workflow.addParameter( Parameter("JOB_ID", "000000", "string", "", "", True, False, "Initialize JOB_ID")) ## define the job workflow in 3 steps # job step1: setup software job.setExecutable('git clone https://github.com/bregeon/mandel4ts.git') # job step2: run mandelbrot application job.setExecutable('./mandel4ts/mandelbrot.py', arguments="-P 0.0005 -M 1000 -L @{JOB_ID} -N 200") outputPath = os.path.join('/vo.france-grilles.fr/user', owner[0], owner, 'mandelbrot/images/raw') outputPattern = 'data_*txt' outputSE = 'DIRAC-USER' outputMetadata = json.dumps({ "application": "mandelbrot", "image_format": "ascii", "image_width": 7680, "image_height": 200, "owner": owner }) # job step3: upload data and set metadata job.setExecutable('./mandel4ts/dirac-add-files.py', arguments="%s '%s' %s '%s'" % (outputPath, outputPattern, outputSE, outputMetadata)) ######################################## # Transformation definition ######################################## t = Transformation() t.setTransformationName(owner + '_step1') t.setType("MCSimulation") t.setDescription("Mandelbrot images production") t.setLongDescription("Mandelbrot images production") # set the job workflow to the transformation t.setBody(job.workflow.toXML()) ######################################## # Transformation submission ######################################## res = t.addTransformation() if not res['OK']: print(res['Message']) DIRAC.exit(-1) t.setStatus("Active") t.setAgentType("Automatic") return res
def TrainERF(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job user_script = './trainERF.sh' if (len(args) != 3): Script.showHelp() direction = args[0] zenith = args[1] diffuse = args[2] site = "PARANAL" if diffuse == "0": diffName = "point" elif diffuse == "1": diffName = "diff" else: print "Invalid extension definition!" Script.showHelp() return 1 if zenith == "40": zenName = "40deg" elif zenith == "20": zenName = "20deg" else: print "Invalid zenith definition!" Script.showHelp() return 1 if direction == "N": directionName = "north" deg = "180" elif direction == "S": directionName = "south" deg = "0" else: print 'Wrong direction. It can only be "N" or "S".' Script.showHelp() return 1 # List of files over which the training should be done LFN_file = './training/gamma_trainERF_%s_%s_%s.lfns' % (zenName, diffName, direction) StatFile = './Statistic_train.txt' for telType in range(0, 6): jobName = "%s_%s_%s_%s_%s" % (user_script, directionName, telType, diffName, zenName) jobOut = "%s_%s_%s.out" % (user_script, directionName, telType) script_args = "%s %s %s %s %s" % (direction, site, diffName, zenName, telType) j = Job() # create LFN list # LFNList = [] # f = open(LFN_file, 'r') # for line in f: # infileLFN = line.strip() # LFNList.append(infileLFN) j.setInputSandbox( [user_script, "setupPackageMARS.sh", LFN_file, StatFile]) j.setExecutable(user_script, script_args) j.setOutputSandbox([jobOut, "applicationLog.txt"]) j.setName(jobName) Script.gLogger.info(j._toJDL()) print "Launching %s %s" % (user_script, script_args) Dirac().submit(j)
for i in range(parts): if i < 10: inputdata_list.append(lfn + 'second/HPC_data.tar.gz.0' + str(i)) else: inputdata_list.append(lfn + 'second/HPC_data.tar.gz.' + str(i)) #SitesList = ['LCG.UKI-NORTHGRID-MAN-HEP.uk', 'LCG.UKI-LT2-IC-HEP.uk', 'LCG.UKI-LT2-QMUL.uk', 'LCG.UKI-NORTHGRID-LANCS-HEP.uk'] #SEList = ['UKI-NORTHGRID-MAN-HEP-disk', 'UKI-LT2-IC-HEP-disk', 'UKI-LT2-QMUL2-disk', 'UKI-NORTHGRID-LANCS-HEP-disk'] SitesList = ['LCG.UKI-NORTHGRID-MAN-HEP.uk'] SEList = ['UKI-NORTHGRID-MAN-HEP-disk'] print("Input data list:\n") print(inputdata_list) for i in range(0, total_pixels, chunks): id_start = i id_end = i + chunks dirac = Dirac() j.setName('CS Faraday Rotation Measurement Reconstruction - Pixels from ' + str(id_start) + ' to ' + str(id_end - 1)) j.setPlatform('EL7') j.setTag([str(nprocs) + 'Processors']) j.setDestination(SitesList) j.setExecutable('RMSynthesis2.sh', arguments=str(nprocs) + ' ' + str(id_start) + ' ' + str(id_end) + ' ' + str(expmnt)) # Input data j.setInputData(inputdata_list) j.setInputSandbox([ 'RMSynthesis2.sh', 'run2.sh', 'prmon_1.0.1_x86_64-static-gnu72-opt.tar.gz' ]) # Output data j.setOutputSandbox([ 'StdOut', 'StdErr',
def submitTS(): ######################################## # Modify here with your dirac username owner = 'user02' ######################################## ######################################## # Job description ######################################## job = Job() job.setName('merge mandelbrot') job.setOutputSandbox(['*log']) ## define the job workflow in 3 steps # job step1: setup software job.setExecutable('git clone https://github.com/bregeon/mandel4ts.git') # job step2: run mandelbrot merge job.setExecutable('./mandel4ts/merge_data.py') outputPath = os.path.join('/vo.france-grilles.fr/user', owner[0], owner, 'mandelbrot/images/merged') outputPattern = 'data_merged*txt' outputSE = 'DIRAC-USER' nb_input_files = 7 outputMetadata = json.dumps({ "application": "mandelbrot", "image_format": "ascii", "image_width": 7680, "image_height": 200 * nb_input_files, "owner": owner }) # job step3: upload data and set metadata job.setExecutable('./mandel4ts/dirac-add-files.py', arguments="%s '%s' %s '%s'" % (outputPath, outputPattern, outputSE, outputMetadata)) ######################################## # Transformation definition ######################################## t = Transformation() t.setTransformationName(owner + '_step2') t.setType("DataReprocessing") t.setDescription("Merge mandelbrot images production") t.setLongDescription("Merge mandelbrot images production") t.setGroupSize(nb_input_files) # group input files # set the job workflow to the transformation t.setBody(job.workflow.toXML()) # define input data by metadata query inputMetaquery = json.dumps({ "application": "mandelbrot", "image_format": "ascii", "image_width": 7680, "image_height": 200, "owner": owner }) t.setFileMask(inputMetaquery) ######################################## # Transformation submission ######################################## res = t.addTransformation() if not res['OK']: print(res['Message']) DIRAC.exit(-1) t.setStatus("Active") t.setAgentType("Automatic") return res
# (assumes tail and wave will always be written out together) if '/'.join([output_path.strip('/'), output_filename_wave]) in GRID_filelist: print("\n{} already on GRID SE\n".format(job_name)) continue if NJobs == 0: print("maximum number of jobs to submit reached") print("breaking loop now") break else: NJobs -= 1 j = Job() # runtime in seconds times 8 (CPU normalisation factor) j.setCPUTime(6 * 3600 * 8) j.setName(job_name) j.setInputSandbox(input_sandbox) if banned_sites: j.setBannedSites(banned_sites) # j.setDestination( 'LCG.IN2P3-CC.fr' ) # mr_filter loses its executable property by uploading it to the GRID SE; reset j.setExecutable('chmod', '+x mr_filter') j.setExecutable('ls -lah') for run_file in run_filelist: file_token = re.split('_', run_file)[3] # wait for a random number of seconds (up to five minutes) before starting
def submitTS(): ######################################## # Modify here with your dirac username owner = 'user02' ######################################## ######################################## # Job description ######################################## job = Job() job.setName('mandelbrot raw') job.setOutputSandbox(['*log']) job.setType('MCSimulation') # this is so that the JOB_ID within the transformation can be evaluated on the fly in the job application, see below job.workflow.addParameter( Parameter("JOB_ID", "000000", "string", "", "", True, False, "Initialize JOB_ID")) ## define the job workflow in 3 steps # job step1: setup software job.setExecutable('git clone https://github.com/bregeon/mandel4ts.git') # job step2: run mandelbrot application # note how the JOB_ID (within the transformation) is passed as an argument and will be evaluated on the fly job.setExecutable('./mandel4ts/mandelbrot.py', arguments="-P 0.0005 -M 1000 -L @{JOB_ID} -N 200") outputPath = os.path.join('/vo.france-grilles.fr/user', owner[0], owner, 'ts_mandelbrot/images/raw') outputPattern = 'data_*txt' outputSE = 'DIRAC-USER' outputMetadata = json.dumps({ "application": "mandelbrot", "image_format": "ascii", "image_width": 7680, "image_height": 200, "owner": owner }) # job step3: upload data and set metadata # pilot.cfg in arguments is necessary with pilot 3 job.setExecutable('./mandel4ts/dirac-add-files.py', arguments="pilot.cfg %s '%s' %s '%s'" % (outputPath, outputPattern, outputSE, outputMetadata)) # job step4: mark input files as done with the FailoverRequest (and a few other things) job.setExecutable('/bin/ls -l', modulesList=['Script', 'FailoverRequest']) ######################################## # Transformation definition ######################################## t = Transformation() t.setTransformationName(owner + '_step1') t.setType("MCSimulation") t.setDescription("Mandelbrot images production") t.setLongDescription("Mandelbrot images production") # set the job workflow to the transformation t.setBody(job.workflow.toXML()) ######################################## # Transformation submission ######################################## res = t.addTransformation() if not res['OK']: print(res['Message']) DIRAC.exit(-1) t.setStatus("Active") t.setAgentType("Automatic") return res
input_sandbox_files.append(script) input_sandbox_files.append(pipeline_wrapper) executable = "bash %s"%pipeline_wrapper else: executable = args[0].replace("bash ","").replace("./","") if not os.path.isfile(executable): gLogger.error("file %s not found."%executable) dexit(1) os.chmod(executable,0755) # make file executable input_sandbox_files.append(executable) j.setExecutable(str(executable)) else: gLogger.error("No executable defined.") dexit(1) j.setName("MC job") if not opts.name is None: j.setName(opts.name) j.setInputSandbox(input_sandbox_files) # all input files in the sandbox j.setOutputSandbox(output_sandbox_files) j.setCPUTime(opts.cpu) if not opts.site is None: j.setDestination(opts.site.split(","))#can also be a list if not opts.bannedSites is None: j.setBannedSites(opts.bannedSites.split(",")) if not opts.release is None: tag = opts.release
def helloWorldJob(): job = Job() job.setName( "helloWorld" ) job.setInputSandbox( '../../Integration/exe-script.py' ) job.setExecutable( "exe-script.py", "", "helloWorld.log" ) return job
} file_dict["%s.txt" % (fn)] = metadata # Update the user. print("*") print("* Uploading the following files:") for fn in file_dict.keys(): print("*-> '%s'" % (fn)) print("*") ## The DIRAC job to submit. j = Job(stdout='StdOut', stderr='StdErr') # Set the name of the job (viewable in the web portal). j.setName(jobname) # As we're just copying the input sandbox to the storage element # via OutputData, we'll just list the files as a check for the # output written to StdOut. j.setExecutable('/bin/ls -l') # Here we add the names of the temporary copies of the frame data # files in the dataset to the input sandbox. These will be uploaded # to the grid with the job... j.setInputSandbox(file_dict.keys()) #...and added to the desired storage element with the corresponding # LFN via the job's OutputData. You may wish to change: # * The Storage Element - by changing the outputSE parameter; # * The LFN base name - by changing the outputPath parameter.
from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() j = Job() j.setCPUTime(500) j.setExecutable('ls') j.setName('testjob') res = dirac.submitJob(j) print 'Submission Result: ',res['Value']
j.setInputSandbox(input_file) input_file_base = os.path.basename(input_file) ## prepare the list of output files run_log = input_file_base + ".log" dat = input_file_base.replace('aug', 'DAT') datlong = dat + ".long" output_files= [run_log, 'fluka11.out', 'fluka15.err', dat, datlong ] ## prepare the output location in GRID storage; the input path will be the used also for GRID storage outdir = grid_basedir_output + path + "/" + str(runnr) ## ALWAYS, INFO, VERBOSE, WARN, DEBUG j.setLogLevel('debug') j.setName('AUGER test simulation') j.setDestinationCE(ce1) ## j.setDestination(site_dirac) j.setCPUTime(345600) ## 4 days ## download the script for preparing corsika input file for usage with cvmfs j.setExecutable( 'curl', arguments = ' -fsSLkO http://issaf.spacescience.ro/adrian/AUGER/make_run4cvmfs',logFile='cmd_logs.log') j.setExecutable( 'chmod', arguments = ' +x make_run4cvmfs',logFile='cmd_logs.log') ## create the simulation script configured for use with cvmfs ## set the make_run4cvmfs arguments to include the corsika_version and corsika_bin make_run4cvmfs_arg = input_file_base + " " + corsika_version + " " + corsika_bin j.setExecutable( './make_run4cvmfs', arguments = make_run4cvmfs_arg, logFile='cmd_logs.log')
def main(): """ Launch job on the GRID """ # this thing pilots everything related to the GRID dirac = Dirac() if switches["output_type"] in "TRAINING": print("Preparing submission for TRAINING data") elif switches["output_type"] in "DL2": print("Preparing submission for DL2 data") else: print("You have to choose either TRAINING or DL2 as output type!") sys.exit() # Read configuration file cfg = load_config(switches["config_file"]) # Analysis config_path = cfg["General"]["config_path"] config_file = cfg["General"]["config_file"] mode = cfg["General"]["mode"] # One mode naw particle = cfg["General"]["particle"] estimate_energy = cfg["General"]["estimate_energy"] force_tailcut_for_extended_cleaning = cfg["General"][ "force_tailcut_for_extended_cleaning"] # Take parameters from the analysis configuration file ana_cfg = load_config(os.path.join(config_path, config_file)) config_name = ana_cfg["General"]["config_name"] cam_id_list = ana_cfg["General"]["cam_id_list"] # Regressor and classifier methods regressor_method = ana_cfg["EnergyRegressor"]["method_name"] classifier_method = ana_cfg["GammaHadronClassifier"]["method_name"] # Someone might want to create DL2 without score or energy estimation if regressor_method in ["None", "none", None]: use_regressor = False else: use_regressor = True if classifier_method in ["None", "none", None]: use_classifier = False else: use_classifier = True # GRID outdir = os.path.join(cfg["GRID"]["outdir"], config_name) n_file_per_job = cfg["GRID"]["n_file_per_job"] n_jobs_max = cfg["GRID"]["n_jobs_max"] model_dir = cfg["GRID"]["model_dir"] training_dir_energy = cfg["GRID"]["training_dir_energy"] training_dir_classification = cfg["GRID"]["training_dir_classification"] dl2_dir = cfg["GRID"]["dl2_dir"] home_grid = cfg["GRID"]["home_grid"] user_name = cfg["GRID"]["user_name"] banned_sites = cfg["GRID"]["banned_sites"] # HACK if force_tailcut_for_extended_cleaning is True: print("Force tail cuts for extended cleaning!!!") # Prepare command to launch script source_ctapipe = "source /cvmfs/cta.in2p3.fr/software/conda/dev/setupConda.sh" source_ctapipe += " && conda activate ctapipe_v0.11.0" if switches["output_type"] in "TRAINING": execute = "data_training.py" script_args = [ "--config_file={}".format(config_file), "--estimate_energy={}".format(str(estimate_energy)), "--regressor_config={}.yaml".format(regressor_method), "--regressor_dir=./", "--outfile {outfile}", "--indir ./ --infile_list={infile_name}", "--max_events={}".format(switches["max_events"]), "--{mode}", "--cam_ids {}".format(cam_id_list), ] output_filename_template = "TRAINING" elif switches["output_type"] in "DL2": execute = "write_dl2.py" script_args = [ "--config_file={}".format(config_file), "--regressor_config={}.yaml".format(regressor_method), "--regressor_dir=./", "--classifier_config={}.yaml".format(classifier_method), "--classifier_dir=./", "--outfile {outfile}", "--indir ./ --infile_list={infile_name}", "--max_events={}".format(switches["max_events"]), "--{mode}", "--force_tailcut_for_extended_cleaning={}".format( force_tailcut_for_extended_cleaning), "--cam_ids {}".format(cam_id_list), ] output_filename_template = "DL2" # Make the script save also the full calibrated images if required if switches["save_images"] is True: script_args.append("--save_images") # Make the script print debug information if required if switches["debug_script"] is True: script_args.append("--debug") cmd = [source_ctapipe, "&&", "./" + execute] cmd += script_args pilot_args_write = " ".join(cmd) # For table merging if multiple runs pilot_args_merge = " ".join([ source_ctapipe, "&&", "./merge_tables.py", "--template_file_name", "{in_name}", "--outfile", "{out_name}", ]) prod3b_filelist = dict() if estimate_energy is False and switches["output_type"] in "TRAINING": prod3b_filelist["gamma"] = cfg["EnergyRegressor"]["gamma_list"] elif estimate_energy is True and switches["output_type"] in "TRAINING": prod3b_filelist["gamma"] = cfg["GammaHadronClassifier"]["gamma_list"] prod3b_filelist["proton"] = cfg["GammaHadronClassifier"]["proton_list"] elif switches["output_type"] in "DL2": prod3b_filelist["gamma"] = cfg["Performance"]["gamma_list"] prod3b_filelist["proton"] = cfg["Performance"]["proton_list"] prod3b_filelist["electron"] = cfg["Performance"]["electron_list"] # from IPython import embed # embed() # Split list of files according to stoprage elements with open(prod3b_filelist[particle]) as f: filelist = f.readlines() filelist = ["{}".format(_.replace("\n", "")) for _ in filelist] res = dirac.splitInputData(filelist, n_file_per_job) list_run_to_loop_on = res["Value"] # define a template name for the file that's going to be written out. # the placeholder braces are going to get set during the file-loop output_filename = output_filename_template output_path = outdir if estimate_energy is False and switches["output_type"] in "TRAINING": output_path += "/{}/".format(training_dir_energy) step = "energy" if estimate_energy is True and switches["output_type"] in "TRAINING": output_path += "/{}/".format(training_dir_classification) step = "classification" if switches["output_type"] in "DL2": if force_tailcut_for_extended_cleaning is False: output_path += "/{}/".format(dl2_dir) else: output_path += "/{}_force_tc_extended_cleaning/".format(dl2_dir) step = "" output_filename += "_{}.h5" # sets all the local files that are going to be uploaded with the job # plus the pickled classifier # if file name starts with `LFN:`, it will be copied from the GRID input_sandbox = [ # Utility to assign one job to one command... os.path.expandvars("$GRID/pilot.sh"), os.path.expandvars("$PROTOPIPE/protopipe/"), os.path.expandvars("$GRID/merge_tables.py"), # python wrapper for the mr_filter wavelet cleaning # os.path.expandvars("$PYWI/pywi/"), # os.path.expandvars("$PYWICTA/pywicta/"), # script that is being run os.path.expandvars("$PROTOPIPE/protopipe/scripts/" + execute), # Configuration file os.path.expandvars(os.path.join(config_path, config_file)), ] models_to_upload = [] configs_to_upload = [] if estimate_energy is True and switches["output_type"] in "TRAINING": config_path_template = "LFN:" + os.path.join(home_grid, outdir, model_dir, "{}.yaml") config_to_upload = config_path_template.format(regressor_method) model_path_template = "LFN:" + os.path.join( home_grid, outdir, model_dir, "regressor_{}_{}.pkl.gz") for cam_id in cam_id_list: model_to_upload = model_path_template.format( cam_id, regressor_method) # TBC print("The following model(s) will be uploaded to the GRID:") print(model_to_upload) models_to_upload.append(model_to_upload) print( "The following configs(s) for such models will be uploaded to the GRID:" ) print(config_to_upload) configs_to_upload.append(config_to_upload) # input_sandbox.append(model_to_upload) elif estimate_energy is False and switches["output_type"] in "TRAINING": pass else: # Charge also classifer for DL2 model_type_list = ["regressor", "classifier"] model_method_list = [regressor_method, classifier_method] config_path_template = "LFN:" + os.path.join(home_grid, outdir, model_dir, "{}.yaml") model_path_template = "LFN:" + os.path.join( home_grid, outdir, model_dir, "{}_{}_{}.pkl.gz") if force_tailcut_for_extended_cleaning is True: force_mode = mode.replace("wave", "tail") print("################") print(force_mode) else: force_mode = mode for idx, model_type in enumerate(model_type_list): print( "The following configuration file will be uploaded to the GRID:" ) config_to_upload = config_path_template.format( model_method_list[idx]) print(config_to_upload) configs_to_upload.append(config_to_upload) # upload only 1 copy print( "The following model(s) related to such configuration file will be uploaded to the GRID:" ) for cam_id in cam_id_list: if model_type in "regressor" and use_regressor is False: print("Do not upload regressor model on GRID!!!") continue if model_type in "classifier" and use_classifier is False: print("Do not upload classifier model on GRID!!!") continue model_to_upload = model_path_template.format( model_type_list[idx], cam_id, model_method_list[idx]) print(model_to_upload) models_to_upload.append(model_to_upload) # input_sandbox.append(model_to_upload) # summary before submitting print("\nDEBUG> running as:") print(pilot_args_write) print("\nDEBUG> with input_sandbox:") print(input_sandbox) print("\nDEBUG> with output file:") print(output_filename.format("{job_name}")) print("\nDEBUG> Particles:") print(particle) print("\nDEBUG> Energy estimation:") print(estimate_energy) # ######## ## ## ## ## ## ## #### ## ## ###### # ## ## ## ## ### ## ### ## ## ### ## ## ## # ## ## ## ## #### ## #### ## ## #### ## ## # ######## ## ## ## ## ## ## ## ## ## ## ## ## ## #### # ## ## ## ## ## #### ## #### ## ## #### ## ## # ## ## ## ## ## ### ## ### ## ## ### ## ## # ## ## ####### ## ## ## ## #### ## ## ###### # list of files on the GRID SE space # not submitting jobs where we already have the output batcmd = "dirac-dms-user-lfns --BaseDir {}".format( os.path.join(home_grid, output_path)) result = subprocess.check_output(batcmd, shell=True) try: grid_filelist = open(result.split()[-1]).read() except IOError: raise IOError("ERROR> cannot read GRID filelist...") # get jobs from today and yesterday... days = [] for i in range(2): # how many days do you want to look back? days.append( (datetime.date.today() - datetime.timedelta(days=i)).isoformat()) # get list of run_tokens that are currently running / waiting running_ids = set() running_names = [] for status in ["Waiting", "Running", "Checking"]: for day in days: try: [ running_ids.add(id) for id in dirac.selectJobs( status=status, date=day, owner=user_name)["Value"] ] except KeyError: pass n_jobs = len(running_ids) if n_jobs > 0: print("Scanning {} running/waiting jobs... please wait...".format( n_jobs)) for i, id in enumerate(running_ids): if ((100 * i) / n_jobs) % 5 == 0: print("\r{} %".format(((20 * i) / n_jobs) * 5)), jobname = dirac.getJobAttributes(id)["Value"]["JobName"] running_names.append(jobname) else: print("\n... done") for bunch in list_run_to_loop_on: # for bunch in bunches_of_run: # from IPython import embed # embed() # this selects the `runxxx` part of the first and last file in the run # list and joins them with a dash so that we get a nice identifier in # the output file name. # if there is only one file in the list, use only that one # run_token = re.split('_', bunch[+0])[3] # JLK JLK run_token = re.split("_", bunch[0])[3] if len(bunch) > 1: run_token = "-".join([run_token, re.split("_", bunch[-1])[3]]) print("-" * 50) print("-" * 50) # setting output name output_filenames = dict() if switches["output_type"] in "DL2": job_name = "protopipe_{}_{}_{}_{}_{}".format( config_name, switches["output_type"], particle, run_token, mode) output_filenames[mode] = output_filename.format("_".join( [particle, mode, run_token])) else: job_name = "protopipe_{}_{}_{}_{}_{}_{}".format( config_name, switches["output_type"], step, particle, run_token, mode) output_filenames[mode] = output_filename.format("_".join( [step, particle, mode, run_token])) # if job already running / waiting, skip if job_name in running_names: print("\n WARNING> {} still running\n".format(job_name)) continue print("Output file name: {}".format(output_filenames[mode])) # if file already in GRID storage, skip # (you cannot overwrite it there, delete it and resubmit) # (assumes tail and wave will always be written out together) already_exist = False file_on_grid = os.path.join(output_path, output_filenames[mode]) print("DEBUG> check for existing file on GRID...") if file_on_grid in grid_filelist: print("\n WARNING> {} already on GRID SE\n".format(job_name)) continue if n_jobs_max == 0: print("WARNING> maximum number of jobs to submit reached") print("WARNING> breaking loop now") break else: n_jobs_max -= 1 j = Job() # runtime in seconds times 8 (CPU normalisation factor) j.setCPUTime(6 * 3600 * 8) j.setName(job_name) j.setInputSandbox(input_sandbox) if banned_sites: j.setBannedSites(banned_sites) # Add simtel files as input data j.setInputData(bunch) for run_file in bunch: file_token = re.split("_", run_file)[3] # wait for a random number of seconds (up to five minutes) before # starting to add a bit more entropy in the starting times of the # dirac queries. # if too many jobs try in parallel to access the SEs, # the interface crashes # #sleep = random.randint(0, 20) # seconds # #j.setExecutable('sleep', str(sleep)) # JLK: Try to stop doing that # consecutively downloads the data files, processes them, # deletes the input # and goes on to the next input file; # afterwards, the output files are merged # j.setExecutable('dirac-dms-get-file', "LFN:" + run_file) # source the miniconda ctapipe environment and # run the python script with all its arguments if switches["output_type"] in "DL2": output_filename_temp = output_filename.format("_".join( [particle, mode, file_token])) if switches["output_type"] in "TRAINING": output_filename_temp = output_filename.format("_".join( [step, particle, mode, file_token])) j.setExecutable( "./pilot.sh", pilot_args_write.format( outfile=output_filename_temp, infile_name=os.path.basename(run_file), mode=mode, ), ) # remove the current file to clear space j.setExecutable("rm", os.path.basename(run_file)) # simple `ls` for good measure j.setExecutable("ls", "-lh") # if there is more than one file per job, merge the output tables if len(bunch) > 1: names = [] names.append(("*_{}_".format(particle), output_filenames[mode])) for in_name, out_name in names: print("in_name: {}, out_name: {}".format(in_name, out_name)) j.setExecutable( "./pilot.sh", pilot_args_merge.format(in_name=in_name, out_name=out_name), ) print("DEBUG> args append: {}".format( pilot_args_merge.format(in_name=in_name, out_name=out_name))) bunch.extend(models_to_upload) bunch.extend(configs_to_upload) j.setInputData(bunch) print("Input data set to job = {}".format(bunch)) outputs = [] outputs.append(output_filenames[mode]) print("Output file path: {}{}".format(output_path, output_filenames[mode])) j.setOutputData(outputs, outputSE=None, outputPath=output_path) # check if we should somehow stop doing what we are doing if switches["dry"] is True: print("\nThis is a DRY RUN! -- NO job has been submitted!") print("Name of the job: {}".format(job_name)) print("Name of the output file: {}".format(outputs)) print("Output path from GRID home: {}".format(output_path)) break # this sends the job to the GRID and uploads all the # files into the input sandbox in the process print("\nSUBMITTING job with the following INPUT SANDBOX:") print(input_sandbox) print("Submission RESULT: {}\n".format(dirac.submitJob(j)["Value"])) # break if this is only a test submission if switches["test"] is True: print("This is a TEST RUN! -- Only ONE job will be submitted!") print("Name of the job: {}".format(job_name)) print("Name of the output file: {}".format(outputs)) print("Output path from GRID home: {}".format(output_path)) break # since there are two nested loops, need to break again if switches["test"] is True: break try: os.remove("datapipe.tar.gz") os.remove("modules.tar.gz") except: pass # Upload analysis configuration file for provenance SE_LIST = ['CC-IN2P3-USER', 'DESY-ZN-USER', 'CNAF-USER', 'CEA-USER'] analysis_config_local = os.path.join(config_path, config_file) # the configuration file is uploaded to the data directory because # the training samples (as well as their cleaning settings) are independent analysis_config_dirac = os.path.join(home_grid, output_path, config_file) print("Uploading {} to {}...".format(analysis_config_local, analysis_config_dirac)) if switches["dry"] is False: # Upload this file to all Dirac Storage Elements in SE_LIST for se in SE_LIST: # the uploaded config file overwrites any old copy ana_cfg_upload_cmd = "dirac-dms-add-file -f {} {} {}".format( analysis_config_dirac, analysis_config_local, se) ana_cfg_upload_result = subprocess.check_output(ana_cfg_upload_cmd, shell=True) print(ana_cfg_upload_result) else: print("This is a DRY RUN! -- analysis.yaml has NOT been uploaded.") print("\nall done -- exiting now") exit()
already_exist = True break if already_exist == True: continue if NJobs == 0: print("maximum number of jobs to submit reached") print("breaking loop now") break else: NJobs -= 1 j = Job() # runtime in seconds times 8 (CPU normalisation factor) j.setCPUTime(6 * 3600 * 8) j.setName(job_name) j.setInputSandbox(input_sandbox) if banned_sites: j.setBannedSites(banned_sites) # mr_filter loses its executable property by uploading it to the GRID SE; reset j.setExecutable('chmod', '+x mr_filter') j.setExecutable('ls -lah') for run_file in run_filelist: file_token = re.split('_', run_file)[3] # wait for a random number of seconds (up to five minutes) before starting # to add a bit more entropy in the starting times of the dirac querries.
##################### ## PREPARE JOB ## ##################### if (DO_NOT_SUBMIT): sys.exit(os.EX_USAGE) ### ALWAYS, INFO, VERBOSE, WARN, DEBUG j.setLogLevel('debug') j.setDestination(site_dirac) JOB_IDX = first_job + 1 + idx JOB_NAME = PROD_NAME + " IDX_" + str(JOB_IDX) print '\nJOB NAME is : ', JOB_NAME j.setName(JOB_NAME) j.setCPUTime(JOB_CPUTIME) ## 4 days run_corsika_sim_args = input_file_base + " " + corsika_version + " " + corsika_bin j.setExecutable( './run_corsika_sim', arguments = run_corsika_sim_args, logFile='run_sim.log') if (TEST_JOB) : jobID = dirac.submit(j,mode='local') else : jobID = dirac.submit(j) id = str(jobID) + "\n" print 'Submission Result: ',jobID with open('jobids.list', 'a') as f_id_log: f_id_log.write(id + '\n')
from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job dirac = Dirac() j = Job() j.setName("compile_mrfilter") j.setCPUTime(80) j.setInputSandbox(["dirac_compile_mrfilter_pilot.sh"]) j.setExecutable("dirac_compile_mrfilter_pilot.sh", "") j.setOutputData(["mr_filter"], outputSE=None, outputPath="cta/bin/mr_filter/v3_1/") Dirac().submit(j)
from DIRAC.tests.Utilities.utils import find_all from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac #from tests.Workflow.Integration.Test_UserJobs import createJob gLogger.setLevel( 'DEBUG' ) cwd = os.path.realpath( '.' ) dirac = Dirac() # Simple Hello Word job to DIRAC.Jenkins.ch gLogger.info( "\n Submitting hello world job targeting DIRAC.Jenkins.ch" ) helloJ = Job() helloJ.setName( "helloWorld-TEST-TO-Jenkins" ) helloJ.setInputSandbox( [find_all( 'exe-script.py', '..', '/DIRAC/tests/Workflow/' )[0]] ) helloJ.setExecutable( "exe-script.py", "", "helloWorld.log" ) helloJ.setCPUTime( 17800 ) helloJ.setDestination( 'DIRAC.Jenkins.ch' ) result = dirac.submit( helloJ ) gLogger.info( "Hello world job: ", result ) if not result['OK']: gLogger.error( "Problem submitting job", result['Message'] ) exit( 1 ) # Simple Hello Word job to DIRAC.Jenkins.ch, that needs to be matched by a MP WN gLogger.info( "\n Submitting hello world job targeting DIRAC.Jenkins.ch and a MP WN" ) helloJMP = Job() helloJMP.setName( "helloWorld-TEST-TO-Jenkins-MP" ) helloJMP.setInputSandbox( [find_all( 'exe-script.py', '..', '/DIRAC/tests/Workflow/' )[0]] )
def submitTS(): ######################################## # Modify here with your dirac username owner = 'user02' ######################################## ######################################## # Job description ######################################## job = Job() job.setName('build mandelbrot') job.setOutputSandbox( ['*log'] ) job.setType('DataReprocessing') ## define the job workflow in 3 steps # job step1: setup software job.setExecutable('git clone https://github.com/bregeon/mandel4ts.git') # job step2: run mandelbrot build image job.setExecutable('./mandel4ts/build_merged_img.py') outputPath = os.path.join('/vo.france-grilles.fr/user',owner[0],owner,'mandelbrot/images/final') outputPattern = 'merged_image.bmp' outputSE = 'DIRAC-USER' outputMetadata = json.dumps( {"application":"mandelbrot","image_format":"bmp", "image_width":7680, "image_height":4200, "owner":owner} ) # job step3: upload data and set metadata job.setExecutable( './mandel4ts/dirac-add-files.py', arguments = "%s '%s' %s '%s'" % (outputPath, outputPattern, outputSE, outputMetadata ) ) # job step4: mark input files as done with the FailoverRequest (and a few other things) job.setExecutable('/bin/ls -l', modulesList=['Script', 'FailoverRequest']) ######################################## # Transformation definition ######################################## t = Transformation() t.setTransformationName( owner+'_step3' ) t.setType( "DataReprocessing" ) t.setDescription( "Merge mandelbrot images production" ) t.setLongDescription( "Merge mandelbrot images production" ) t.setGroupSize( 3 ) # group input files # set the job workflow to the transformation t.setBody ( job.workflow.toXML() ) # define input data by metadata query inputMetaquery = {"application":"mandelbrot","image_format":"ascii", "image_width":7680, "image_height":1400, "owner":owner} t.setInputMetaQuery(inputMetaquery) ######################################## # Transformation submission ######################################## res = t.addTransformation() if not res['OK']: print(res['Message']) DIRAC.exit( -1 ) t.setStatus( "Active" ) t.setAgentType( "Automatic" ) return res
def main(dataset, chunksize, test): ''' The DATASET argument is a list of paths to MC files on the grid. Like the output of cta-prod3-dump-dataset for example. See also https://forge.in2p3.fr/projects/cta_dirac/wiki/CTA-DIRAC_MC_PROD3_Status Keep in mind that for some effing reason this needs to be executed within this weird 'dirac' environment which comes with its own glibc, python and pip. I guess the real Mr. Dirac would turn in his grave. ''' dirac = Dirac() with open(dataset) as f: simtel_files = f.readlines() print('Analysing {}'.format(len(simtel_files))) server_list = [ "TORINO-USER", "CYF-STORM-USER", "CYF-STORM-Disk", "M3PEC-Disk", "OBSPM-Disk", "POLGRID-Disk", "FRASCATI-USER", "LAL-Disk", "CIEMAT-Disk", "CIEMAT-USER", "CPPM-Disk", "LAL-USER", "CYFRONET-Disk", "DESY-ZN-USER", "M3PEC-USER", "LPNHE-Disk", "LPNHE-USER", "LAPP-USER", "LAPP-Disk" ] desy_server = 'DESY-ZN-USER' servers_with_miniconda = [ 'LCG.IN2P3-CC.fr', 'LCG.DESY-ZEUTHEN.de', 'LCG.CNAF.it', 'LCG.GRIF.fr', 'LCG.CYFRONET.pl', 'LCG.Prague.cz', 'LCG.CIEMAT.es' ] chunks = np.array_split(sorted(simtel_files), int(len(simtel_files) / chunksize)) print('Got a total of {} chunks'.format(len(chunks))) for c, simtel_filenames in tqdm(enumerate( chunks[0:2])): # send just 2 jobs for now. # convert chunk to a list of strings. becasue this dirac thing cant take numpy arrays simtel_filenames = [ str(s).strip() for s in simtel_filenames if 'SCT' not in s ] print('Starting processing for chunk {}'.format(c)) print(simtel_filenames) j = Job() # set runtime to 0.5h j.setCPUTime(30 * 60) j.setName('cta_preprocessing_{}'.format(c)) j.setInputData(simtel_filenames) j.setOutputData(['./processing_output/*.hdf5'], outputSE=None, outputPath='cta_preprocessing/') j.setInputSandbox( ['../process_simtel.py', './install_dependencies.py']) j.setOutputSandbox(['cta_preprocessing.log']) j.setExecutable('./job_script.sh') # These servers seem to have mini conda installed # destination = np.random.choice(servers_with_miniconda) j.setDestination(servers_with_miniconda) value = dirac.submit(j) print('Number {} Submission Result: {}'.format(c, value))
"../tools/pexpect.py", "../tools/StorageElement.py" ] # these files are created by the job outputSandbox =\ [ stdout, stderr, logfile ] # the executible here '' is later set, so don't confuse users later on diracJob = Job('', stdout, stderr) # give a descriptive name diracJob.setName('ND280Custom') # set the program/executable, arguments, logFile, ... diracJob.setExecutable(exe, arguments=args, logFile=logfile) # set the job length diracJob.setCPUTime(3600) diracJob.setExecutionEnv(environmentDict) diracJob.setInputSandbox(inputSandbox) diracJob.setOutputSandbox(outputSandbox) print 'job being submitted...' dirac = Dirac() result = dirac.submit(diracJob) print 'Submission Result: ', result
visit = sys.argv[1] idx = int(sys.argv[2]) print "Submitting jobs for visit", visit, "index", idx listfile = 'site_test_jobs.txt' if len(sys.argv) == 4: sites = [sys.argv[3]] listfile = 'site_test_' + sys.argv[3] + '.txt' # open a file to record a list of job IDs joblistfile = open(listfile, 'w') for site in sites: j = Job() j.setName("ImSim_" + visit + "_" + str(idx)) instcatname = visit + ".tar.gz" insidename = 'phosim_cat_' + str(int(visit)) + '.txt' #startsensor = idx * 4 #numsensors = 4 #if idx == 47: # numsensors = 1 startsensor = idx numsensors = 1 args = visit + ' ' + insidename + ' ' + str(startsensor) + ' ' + str( numsensors) + ' ' + str(idx) outputname = 'fits_' + visit + '_' + str(idx) + '.tar'
Script.parseCommandLine(ignoreErrors=True) # end of DIRAC setup from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac import pprint dirac = Dirac() j = Job() j.setCPUTime(500) j.setExecutable('/bin/echo hello') j.setExecutable('/bin/hostname') j.setExecutable('/bin/echo hello again') j.setName('API') result = dirac.submitJob(j) print 'Submission Result: ' pprint.pprint(result) jobid = result['JobID'] # print job id to file for future reference joblog = open("jobid.log", "a") joblog.write(str(jobid) + '\n') joblog.close() # to interactively check on job status do: # dirac-wms-job-status -f jobid.log print "\nThe current status of this job is:"
def Stereo(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job user_script = './stereo.sh' macro = './CTAstereo.C' if (len(args) != 5): Script.showHelp() particle = args[0] typeofdata = args[1] direction = args[2] zenith = args[3] diffuse = args[4] if typeofdata == 'train': # The master layout with all the telescopes candidates = './Prod3_3HB9All_Candidates.txt' elif typeofdata == 'test': # Different layouts candidates = './Prod3_3HB9_Candidates_Full.txt' else: print "Invalid type of data definition!" Script.showHelp() return 1 if diffuse == "0": diffName = "point" elif diffuse == "1": diffName = "diff" else: print "Invalid extension definition!" Script.showHelp() return 1 if zenith == "40": zenName = "40deg" elif zenith == "20": zenName = "20deg" else: print "Invalid zenith definition!" Script.showHelp() return 1 if direction == "N": directionName = "north" # deg = "180" elif direction == "S": directionName = "south" # deg = "0" else: print 'Wrong direction. It can only be "N" or "S".' Script.showHelp() return 1 filesPerJob = 5 site = "PARANAL" listname = './training/gamma_trainLUT_%s_%s_%s.lfns' % (zenName, diffName, direction) loop = 0 iJob = 0 # totalEntries /= (2*filesPerJob) # print totalEntries f = open(listname, 'r') totalEntries = sum(1 for _ in f) f = open(listname, 'r') fileList = [] text_file_name = "lfnFiles_%s_%s_%s_%s.txt" % (particle, direction, zenName, diffuse) text_file = open(text_file_name, "w") for line in f: loop = loop+1 infileLFN = line.strip() # filein = os.path.basename(infileLFN) fileList.append(infileLFN) text_file.write("%s\n" % infileLFN) remain = loop % filesPerJob if iJob == 10: break if loop == totalEntries: remain = 0 if remain == 0: iJob = iJob+1 j = Job() text_file.close() j.setInputSandbox([user_script, "setupPackageMARS.sh", text_file_name, candidates, macro]) jobName = "%s %s %s %s %s %s %s %s" % (user_script, site, particle, typeofdata, directionName, zenName, diffName, iJob) jobOut = "%s_%s_%s_%s_%s.out" % (user_script, site, typeofdata, directionName, iJob) script_args = "%s %s %s %s %s %s %s" % (particle, typeofdata, direction, zenName, diffName, site, iJob) j.setExecutable(user_script, script_args) j.setOutputSandbox([jobOut, "applicationLog.txt"]) j.setName(jobName) j.setBannedSites(['LCG.MSFG.fr', 'LCG.M3PEC.fr', 'LCG.OBSPM.fr', 'LCG.UNI-DORTMUND.de', 'LCG.UNIV-LILLE.fr', 'LCG.GRIF.fr', 'ARC.SE-SNIC-T2.se']) Script.gLogger.info(j._toJDL()) print "Submitting job %s" % (jobName) Dirac().submit(j) fileList = [] text_file = open(text_file_name, "w")
from DIRAC.tests.Utilities.utils import find_all from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac #from tests.Workflow.Integration.Test_UserJobs import createJob gLogger.setLevel('DEBUG') cwd = os.path.realpath('.') dirac = Dirac() # Simple Hello Word job to DIRAC.Jenkins.ch gLogger.info("\n Submitting hello world job targeting DIRAC.Jenkins.ch") helloJ = Job() helloJ.setName("helloWorld-TEST-TO-Jenkins") helloJ.setInputSandbox([find_all('exe-script.py', '..', '/DIRAC/tests/Workflow/')[0]]) helloJ.setExecutable("exe-script.py", "", "helloWorld.log") helloJ.setCPUTime(1780) helloJ.setDestination('DIRAC.Jenkins.ch') helloJ.setLogLevel('DEBUG') result = dirac.submitJob(helloJ) gLogger.info("Hello world job: ", result) if not result['OK']: gLogger.error("Problem submitting job", result['Message']) exit(1) # Simple Hello Word job to DIRAC.Jenkins.ch, that needs to be matched by a MP WN gLogger.info("\n Submitting hello world job targeting DIRAC.Jenkins.ch and a MP WN") helloJMP = Job() helloJMP.setName("helloWorld-TEST-TO-Jenkins-MP")
def TrainRF(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job user_script = './trainRF.sh' if (len(args) != 3): Script.showHelp() direction = args[0] zenith = args[1] diffuse = args[2] site = "PARANAL" if diffuse == "0": diffName = "point" elif diffuse == "1": diffName = "diff" else: print "Invalid extension definition!" Script.showHelp() return 1 if zenith == "40": zenName = "40deg" elif zenith == "20": zenName = "20deg" else: print "Invalid zenith definition!" Script.showHelp() return 1 if direction == "N": directionName = "north" # deg = "180" elif direction == "S": directionName = "south" # deg = "0" else: print 'Wrong direction. It can only be "N" or "S".' Script.showHelp() return 1 # Macro fixing the file check before continues. ROOTmacro = "CTAtrain.C" # List of files over which the training should be done LFN_file_gammas = './training/gamma_ghtrain_%s_%s_%s.lfns' % ( zenName, diffName, direction) LFN_file_protons = './training/proton_ghtrain_%s_%s_%s.lfns' % ( zenName, diffName, direction) StatFile = './Statistic_train.txt' for telType in range(0, 6): jobName = "%s_%s_%s_%s_%s" % (user_script, directionName, diffName, telType, zenName) jobOut = "%s_%s_%s_%s.out" % (user_script, directionName, diffName, telType) script_args = "%s %s %s %s %s" % (direction, site, diffName, telType, zenName) j = Job() j.setInputSandbox([ user_script, "setupPackageMARS.sh", LFN_file_gammas, LFN_file_protons, ROOTmacro, StatFile ]) j.setExecutable(user_script, script_args) j.setOutputSandbox([jobOut, "applicationLog.txt"]) j.setName(jobName) Script.gLogger.info(j._toJDL()) print "Launching %s %s" % (user_script, script_args) Dirac().submit(j)
# DO NOT CHANGE THE ORDER OF THESE! from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job jobName = 'HelloWorld' executable = '/bin/echo' stdout = 'std.out' stderr = 'std.err' logfile = 'HelloWorldjob.log' # The executable here '' is later set, so don't confuse users later on diracJob = Job('', stdout, stderr) diracJob.setName(jobName) # Set the program/executable, arguments, logFile, ... diracJob.setExecutable('echo', arguments='\"Hello world!\"') # multiple executables can be set/appended # diracJob.setExecutable('ls', arguments='-l') # diracJob.setExecutable(executable, arguments='\"hello again\"') # Set the job length, but not needed in this example diracJob.setCPUTime(500) print 'submitting job', jobName dirac = Dirac() result = dirac.submitJob(diracJob) print 'Submission Result: ', result
Script.parseCommandLine( ignoreErrors = True ) # end of DIRAC setup from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac import pprint dirac = Dirac() j = Job() j.setCPUTime(500) j.setExecutable('/bin/echo hello') j.setExecutable('/bin/hostname') j.setExecutable('/bin/echo hello again') j.setName('API') result = dirac.submitJob(j) print 'Submission Result: ' pprint.pprint(result) jobid = result['JobID'] # print job id to file for future reference joblog = open("jobid.log", "a") joblog.write(str(jobid)+'\n') joblog.close() # to interactively check on job status do: # dirac-wms-job-status -f jobid.log print "\nThe current status of this job is:"
# dirac job created by ganga from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac j = Job() dirac = Dirac() # default commands added by ganga j.setName("helloWorld-test") j.setInputSandbox( ['/afs/cern.ch/user/f/fstagni/userJobs/_inputHello.tar.bz2', '/afs/cern.ch/user/f/fstagni/userJobs/hello-script.py'] ) j.setExecutable("exe-script.py","","Ganga_Executable.log") # <-- user settings j.setCPUTime(172800) j.setBannedSites(['LCG.CERN.ch', 'LCG.CNAF.it', 'LCG.GRIDKA.de', 'LCG.IN2P3.fr', 'LCG.NIKHEF.nl', 'LCG.PIC.es', 'LCG.RAL.uk', 'LCG.SARA.nl']) # user settings --> #print j.workflow # submit the job to dirac result = dirac.submit(j) print result
def Flux(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job import time import os.path user_script = './flux.sh' modmacro = './CTAflux_speeed.C' site = "PARANAL" if (len(args) != 5): Script.showHelp() typeofdata = "test" particle = args[0] direction = args[1] MOD = args[2] exten = args[3] zenName = args[4] # List of files over which flux should be run LFN_file = "./stereofiles/lfn_%s_%s_%s_%s.lfns" % (particle, exten, zenName, direction) fileLength = sum(1 for line in open(LFN_file)) f = open(LFN_file, 'r') if particle == "proton": filesPerJob = 10 else: filesPerJob = 20 fileList = [] text_file_name = "lfnStereoFiles_%s_%s_%s_%s.txt" % (particle, exten, typeofdata, direction) text_file = open(text_file_name, "w") # File containing the id number of files already produced. The relaunch of these jobs will be skipped done_file_name = "./stereofiles/done/done_%s_%s_%s_%s.lfns" % ( particle, exten, zenName, direction) if os.path.exists(done_file_name): done_content = [ int(line.strip()) for line in open(done_file_name, 'r') ] else: done_content = [] loop = 0 iJob = 0 for line in f: loop = loop + 1 infileLFN = line.strip() fileList.append(infileLFN) text_file.write("%s\n" % infileLFN) remain = loop % filesPerJob if remain == 0 or loop == fileLength: iJob = iJob + 1 # Skipping of already finished jobs if iJob in done_content: text_file.close() fileList = [] text_file = open(text_file_name, "w") continue else: j = Job() text_file.close() j.setInputSandbox([ user_script, "setupPackageMARS.sh", "CheckFileZombie.C", text_file_name, modmacro ]) jobName = "%s_%s_%s_%s_%s_%s_%s" % (user_script, site, particle, direction, iJob, exten, zenName) jobOut = "%s_%s_%s_%s_%s.out" % (user_script, site, particle, direction, iJob) script_args = "%s %s %s %s %s %s %s" % ( particle, site, iJob, direction, MOD, exten, zenName) j.setExecutable(user_script, script_args) j.setOutputSandbox([jobOut, "applicationLog.txt"]) j.setName(jobName) j.setBannedSites([ 'LCG.MSFG.fr', 'LCG.M3PEC.fr', 'LCG.OBSPM.fr', 'LCG.UNI-DORTMUND.de', 'LCG.UNIV-LILLE.fr', 'LCG.Prague.cz', 'LCG.GRIF.fr' ]) Script.gLogger.info(j._toJDL()) print "Submitting job %s %s %s %s %s %s" % ( user_script, zenName, particle, direction, site, iJob) time.sleep(3) Dirac().submit(j) fileList = [] text_file = open(text_file_name, "w")