def execute(arguments): """The only real function executed here""" global gJobReport jobID = arguments["Job"].get("JobID", 0) os.environ["JOBID"] = str(jobID) jobID = int(jobID) if "WorkingDirectory" in arguments: wdir = os.path.expandvars(arguments["WorkingDirectory"]) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( "JobWrapperTemplate found that the working directory already exists" ) rescheduleResult = rescheduleFailedJob( jobID, "Working Directory already exists") else: gLogger.exception( "JobWrapperTemplate could not create working directory" ) rescheduleResult = rescheduleFailedJob( jobID, "Could Not Create Working Directory") return 1 gJobReport = JobReport(jobID, "JobWrapper") try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: # pylint: disable=broad-except gLogger.exception("JobWrapper failed the initialization phase", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION) return 1 if "InputSandbox" in arguments["Job"]: gJobReport.commit() try: result = job.transferInputSandbox(arguments["Job"]["InputSandbox"]) if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError: gLogger.exception("JobWrapper failed to download input sandbox") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while downloading input sandbox", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX) return 1 else: gLogger.verbose("Job has no InputSandbox requirement") gJobReport.commit() if "InputData" in arguments["Job"]: if arguments["Job"]["InputData"]: try: result = job.resolveInputData() if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError: gLogger.exception("JobWrapper failed to resolve input data") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while resolving input data", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION) return 1 else: gLogger.verbose("Job has a null InputData requirement:") gLogger.verbose(arguments) else: gLogger.verbose("Job has no InputData requirement") gJobReport.commit() try: result = job.execute() if not result["OK"]: gLogger.error("Failed to execute job", result["Message"]) raise JobWrapperError((result["Message"], result["Errno"])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == "0": gLogger.verbose("JobWrapper exited with status=0 after execution") if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION) return 1 gLogger.exception("Job failed in execution phase") gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting(status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception("Job raised exception during execution phase", lException=exc) gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting(status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC) return 1 if "OutputSandbox" in arguments["Job"] or "OutputData" in arguments["Job"]: try: result = job.processJobOutputs() if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError as exc: gLogger.exception("JobWrapper failed to process output files") gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS) return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while processing output files", lException=exc) gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS) return 2 else: gLogger.verbose("Job has no OutputData or OutputSandbox requirement") try: # Failed jobs will return !=0 / successful jobs will return 0 return job.finalize() except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception during the finalization phase", lException=exc) return 2
def execute(arguments): """ The only real function executed here """ global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if 'WorkingDirectory' in arguments: wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( 'JobWrapperTemplate found that the working directory already exists' ) rescheduleResult = rescheduleFailedJob( jobID, 'Working Directory already exists') else: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: #pylint: disable=broad-except gLogger.exception('JobWrapper failed the initialization phase', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as exc: #pylint: disable=broad-except gLogger.exception('JobWrapper failed sending job accounting', lException=exc) return 1 if 'InputSandbox' in arguments['Job']: gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while downloading input sandbox', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if 'InputData' in arguments['Job']: if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while resolving input data', lException=exc) rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError((result['Message'], result['Errno'])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport) job.sendJobAccounting(rescheduleResult, 'JobWrapper execution') return 1 gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception('Job raised exception during execution phase', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']: try: result = job.processJobOutputs(arguments) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError as exc: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while processing output files', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize() except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception during the finalization phase', lException=exc) return 2
#!/usr/bin/env python import os import sys from DIRAC.Core.Base import Script Script.initialize(ignoreErrors=True) from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ.get('DIRACJOBID', '0') if not jobID: print 'DIRAC job ID not found' sys.exit(1) jobReport = JobReport(jobID, 'JUNO_JobScript') result = jobReport.setApplicationStatus(', '.join(sys.argv[1:])) if not result['OK']: print 'Set application status error: %s' % result
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("T:", "template=", "Corsika Template") Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig") Script.registerSwitch("p:", "run_number=", "Do not use: Run Number automatically set") Script.registerSwitch("E:", "executable=", "Executable (Use SetExecutable)") Script.registerSwitch("V:", "version=", "Version (Use setVersion)") Script.registerSwitch("M:", "mode=", "Mode (corsika_standalone/corsika_simtelarray)") Script.parseCommandLine(ignoreErrors=True) ## default values ############## run_number = None corsikaTemplate = None simtelConfig = None executable = None version = None mode = 'corsika_simtelarray' ### set switch values ### for switch in Script.getUnprocessedSwitches(): if switch[0] == "run_number" or switch[0] == "p": run_number = switch[1].split('ParametricParameters=')[1] elif switch[0] == "template" or switch[0] == "T": corsikaTemplate = switch[1] elif switch[0] == "simtelConfig" or switch[0] == "S": simtelConfig = switch[1] elif switch[0] == "executable" or switch[0] == "E": executable = switch[1] elif switch[0] == "version" or switch[0] == "V": version = switch[1] elif switch[0] == "mode" or switch[0] == "M": mode = switch[1] if version == None or executable == None or run_number == None or corsikaTemplate == None: Script.showHelp() jobReport.setApplicationStatus('Missing options') DIRAC.exit(-1) from CTADIRAC.Core.Workflow.Modules.CorsikaApp import CorsikaApp from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) createGlobalsFromConfigFiles(corsikaTemplate, version) ############ Producing Corsika File install_CorsikaSimtelPack(version) CorsikaSimtelPack = 'corsika_simhessarray/' + version + '/corsika_simhessarray' cs = CorsikaApp() cs.setSoftwarePackage(CorsikaSimtelPack) cs.csExe = executable cs.csArguments = [ '--run-number', run_number, '--run', 'corsika', corsikaTemplate ] corsikaReturnCode = cs.execute() if corsikaReturnCode != 0: DIRAC.gLogger.error('Corsika Application: Failed') jobReport.setApplicationStatus('Corsika Application: Failed') DIRAC.exit(-1) ###################### rename of corsika output file ####################### rundir = 'run' + run_number filein = rundir + '/' + corsikaOutputFileName corsikaFileName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.gz' mv_cmd = 'mv ' + filein + ' ' + corsikaFileName if (os.system(mv_cmd)): DIRAC.exit(-1) ### create corsika tar luisa #################### corsikaTarName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.tar.gz' filetar1 = rundir + '/' + 'input' filetar2 = rundir + '/' + 'DAT' + run_number + '.dbase' filetar3 = rundir + '/run' + str(int(run_number)) + '.log' cmdTuple = [ '/bin/tar', 'zcf', corsikaTarName, filetar1, filetar2, filetar3 ] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to execute tar') DIRAC.exit(-1) if (mode == 'corsika_standalone'): DIRAC.exit() ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "NORTH": 'cta-prod2n', "SCMST": 'cta-prod2-sc3' } #if simtelConfig=="6INROW": # all_configs=["4MSST","SCSST","ASTRI","NSBX3","STD","SCMST"] #elif simtelConfig=="5INROW": # all_configs=["4MSST","SCSST","ASTRI","NSBX3","STD"] #else: # all_configs=[simtelConfig] all_configs = [simtelConfig] for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) current_version = version DIRAC.gLogger.notice('current version is', current_version) #if current_conf == "SCMST": # current_version = version + '_sc3' # DIRAC.gLogger.notice('current version is', current_version) # if os.path.isdir('sim_telarray'): # DIRAC.gLogger.notice('Package found in the local area. Removing package...') # cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' # if(os.system(cmd)): # DIRAC.exit( -1 ) # install_CorsikaSimtelPack(current_version) #else: # current_version = version # DIRAC.gLogger.notice('current version is', current_version) #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2 export CORSIKA_IO_BUFFER=800MB ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, current_conf)) fd.close() #################################### os.system('chmod u+x run_sim.sh') os.system('chmod u+x grid_prod2-repro.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendSimtelOutput) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log') == 0): DIRAC.gLogger.error('Broken string found in simtel.log') jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## check simtel data/log/histo Output File exist cfg = cfg_dict[current_conf] simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/*/Data/*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/*/Log/*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/*/Histograms/*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) ################################################################ DIRAC.exit()
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("p:", "inputfile=", "Input File") Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig") Script.registerSwitch("V:", "version=", "Version (Use setVersion)") Script.registerSwitch("C:", "comp=", "Compile (True/False)") Script.parseCommandLine(ignoreErrors=True) ## default values ############## simtelConfig = None version = None comp = True ### set switch values ### for switch in Script.getUnprocessedSwitches(): if switch[0] == "inputfile" or switch[0] == "p": corsikaFileLFN = switch[1].split('ParametricInputData=LFN:')[1] elif switch[0] == "simtelConfig" or switch[0] == "S": simtelConfig = switch[1] elif switch[0] == "version" or switch[0] == "V": version = switch[1] elif switch[0] == "comp" or switch[0] == "C": comp = switch[1] if version == None: Script.showHelp() jobReport.setApplicationStatus('Missing options') DIRAC.exit(-1) from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) ############ Producing SimTel File if comp == True: install_CorsikaSimtelPack(version) else: CorsikaSimtelPack = 'corsika_simhessarray/' + version + '/corsika_simhessarray' res = installSoftwarePackage(CorsikaSimtelPack, workingArea()) if not res['OK']: DIRAC.gLogger.error('Failed to execute installSoftwarePackage', res) CorsikaSimtelPack = 'corsika_simhessarray/' + version + '/corsika_simhessarray' cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "NORTH": 'cta-prod2n', "ASTRISLN": 'cta-astri-sln' } #if simtelConfig=="6INROW": # all_configs=["4MSST","SCSST","ASTRI","NSBX3","STD","SCMST"] #elif simtelConfig=="5INROW": # all_configs=["4MSST","SCSST","ASTRI","NSBX3","STD"] #else: # all_configs=[simtelConfig] all_configs = [simtelConfig] for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) current_version = version DIRAC.gLogger.notice('current version is', current_version) #if current_conf == "SCMST": # current_version = version + '_sc3' # DIRAC.gLogger.notice('current version is', current_version) # if os.path.isdir('sim_telarray'): # DIRAC.gLogger.notice('Package found in the local area. Removing package...') # cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' # if(os.system(cmd)): # DIRAC.exit( -1 ) # install_CorsikaSimtelPack(current_version) #else: # current_version = version # DIRAC.gLogger.notice('current version is', current_version) #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2 export CORSIKA_IO_BUFFER=800MB ./grid_prod2-repro.sh %s %s""" % (os.path.basename(corsikaFileLFN), current_conf)) fd.close() #################################### os.system('chmod u+x run_sim.sh') os.system('chmod u+x grid_prod2-repro.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendSimtelOutput) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log') == 0): DIRAC.gLogger.error('Broken string found in simtel.log') jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## check simtel data/log/histo Output File exist cfg = cfg_dict[current_conf] simtelFileName = os.path.basename(corsikaFileLFN).replace( 'corsika.gz', 'simtel.gz') cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) simtelLogFileName = simtelFileName.replace('simtel.gz', 'log.gz') cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelHistFileName = simtelFileName.replace('simtel.gz', 'hdata.gz') cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) ################################################################ DIRAC.exit()
def execute(self): """The JobAgent execution method. """ if self.jobCount: #Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn(self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('%s normalized CPU units remaining in slot' % (self.timeLeft)) # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') available = self.computingElement.available() if not available['OK'] or not available['Value']: self.log.info('Resource is not available') self.log.info(available['Message']) return self.__finish('CE Not Available') self.log.info(available['Message']) result = self.computingElement.getDescription() if not result['OK']: return result ceDict = result['Value'] # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if not 'PilotReference' in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.verbose(ceDict) start = time.time() jobRequest = self.__requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime = %.2f (s)' % (matchTime)) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error(jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: self.log.error(jobRequest['Message']) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] jobID = matcherInfo['JobID'] self.pilotInfoReportedFlag = matcherInfo.get('PilotInfoReportedFlag', False) matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if not matcherInfo.has_key(param): self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned %s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo.keys(): if not key in matcherParams: value = matcherInfo[key] optimizerParams[key] = value parameters = self.__getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn(parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if not params.has_key('JobID'): msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if not params.has_key('JobType'): self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if not params.has_key('SystemConfig'): self.log.warn( 'Job has no system configuration defined in JDL parameters') systemConfig = gConfig.getValue('/LocalSite/Architecture', '') self.log.info( 'Setting system config to /LocalSite/Architecture = %s since it was not specified' % systemConfig) if not systemConfig: self.log.warn('/LocalSite/Architecture is not defined') params['SystemConfig'] = systemConfig else: systemConfig = params['SystemConfig'] if systemConfig.lower() == 'any': systemConfig = gConfig.getValue('/LocalSite/Architecture', '') self.log.info( 'Setting SystemConfig = /LocalSite/Architecture =', '"%s" since it was set to "ANY" in the job description' % systemConfig) if not systemConfig: self.log.warn('/LocalSite/Architecture is not defined') params['SystemConfig'] = systemConfig if not params.has_key('CPUTime'): self.log.warn( 'Job has no CPU requirement defined in JDL parameters') self.log.verbose('Job request successful: \n %s' % (jobRequest['Value'])) self.log.info('Received JobID=%s, JobType=%s, SystemConfig=%s' % (jobID, jobType, systemConfig)) self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if self.gridCEQueue: jobReport.setJobParameter('GridCEQueue', self.gridCEQueue, sendFlag=False) if os.environ.has_key('BOINC_JOB_ID'): # Report BOINC environment for p in [ 'BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName' ]: jobReport.setJobParameter(p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') # self.__setJobSite( jobID, self.siteName ) if not self.pilotInfoReportedFlag: self.__reportPilotInfo(jobID) result = self.__setupProxy(ownerDN, jobGroup) if not result['OK']: return self.__rescheduleFailedJob( jobID, result['Message'], self.stopOnApplicationFailure) if 'Value' in result and result['Value']: proxyChain = result['Value'] # Is this necessary at all? saveJDL = self.__saveJobJDLRequest(jobID, jobJDL) #self.__report(jobID,'Matched','Job Prepared to Submit') #resourceParameters = self.__getJDLParameters( resourceJDL ) #if not resourceParameters['OK']: # return resourceParameters #resourceParams = resourceParameters['Value'] software = self.__checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job %s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure) self.log.verbose('Before %sCE submitJob()' % (self.ceName)) submission = self.__submitJob(jobID, params, ceDict, optimizerParams, jobJDL, proxyChain) if not submission['OK']: self.__report(jobID, 'Failed', submission['Message']) return self.__finish(submission['Message']) elif 'PayloadFailed' in submission: # Do not keep running and do not overwrite the Payload error return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'], self.stopOnApplicationFailure) self.log.verbose('After %sCE submitJob()' % (self.ceName)) except Exception: self.log.exception() return self.__rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) currentTimes = list(os.times()) for i in range(len(currentTimes)): currentTimes[i] -= self.initTimes[i] utime, stime, cutime, cstime, elapsed = currentTimes cpuTime = utime + stime + cutime + cstime result = self.timeLeftUtil.getTimeLeft(cpuTime) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: if self.cpuFactor: # if the batch system is not defined used the CPUNormalizationFactor # defined locally self.timeLeft = self.__getCPUTimeLeft() scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value'] self.__setJobParam(jobID, 'ScaledCPUTime', str(scaledCPUTime - self.scaledCPUTime)) self.scaledCPUTime = scaledCPUTime return S_OK('Job Agent cycle complete')
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig) Script.registerSwitch("V:", "version=", "Version", setVersion) from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog Script.parseCommandLine() DIRAC.gLogger.setLevel('INFO') global fcc, fcL from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport global jobID jobID = os.environ['JOBID'] jobReport = JobReport(int(jobID)) ########### ## Checking MD coherence fc = FileCatalog('LcgFileCatalog') res = fc._getCatalogConfigDetails('DIRACFileCatalog') print 'DFC CatalogConfigDetails:', res res = fc._getCatalogConfigDetails('LcgFileCatalog') print 'LCG CatalogConfigDetails:', res fcc = FileCatalogClient() fcL = FileCatalog('LcgFileCatalog') from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() ############################ install_CorsikaSimtelPack(version) ############# # simtelConfigFile should be built from ??? #simtelConfigFilesPath = 'sim_telarray/multi' #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg' #createGlobalsFromConfigFiles(simtelConfigFile) #createGlobalsFromConfigFiles(current_version) ####################### ## files spread in 1000-runs subDirectories global corsikaFileLFN corsikaFileLFN = dirac.getJobJDL(jobID)['Value']['InputData'] print 'corsikaFileLFN is ' + corsikaFileLFN corsikaFileName = os.path.basename(corsikaFileLFN) run_number = corsikaFileName.split('run')[1].split('.corsika.gz')[ 0] # run001412.corsika.gz runNum = int(run_number) subRunNumber = '%03d' % runNum runNumModMille = runNum % 1000 runNumTrunc = (runNum - runNumModMille) / 1000 runNumSeriesDir = '%03dxxx' % runNumTrunc print 'runNumSeriesDir=', runNumSeriesDir f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w') f.close() ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "SCMST": 'cta-prod2-sc3', "NORTH": 'cta-prod2n' } if simtelConfig == "6INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD", "SCMST"] elif simtelConfig == "5INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"] elif simtelConfig == "3INROW": all_configs = ["SCSST", "STD", "SCMST"] else: all_configs = [simtelConfig] for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) if current_conf == "SCMST": current_version = version + '_sc3' DIRAC.gLogger.notice('current version is', current_version) if os.path.isdir('sim_telarray'): DIRAC.gLogger.notice( 'Package found in the local area. Removing package...') cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' if (os.system(cmd)): DIRAC.exit(-1) install_CorsikaSimtelPack(current_version) else: current_version = version DIRAC.gLogger.notice('current version is', current_version) ######################################################## createGlobalsFromConfigFiles(current_version) resultCreateSimtelDirMD = createSimtelFileSystAndMD( current_conf, current_version) if not resultCreateSimtelDirMD['OK']: DIRAC.gLogger.error('Failed to create simtelArray Directory MD') jobReport.setApplicationStatus( 'Failed to create simtelArray Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no simtelArray File produced') DIRAC.exit(-1) else: print 'simtel Directory MD successfully created' ############## introduce file existence check here ######################## simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel.gz' simtelDirPath_conf = simtelDirPath + '_' + current_conf simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data', runNumSeriesDir) simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName) res = CheckCatalogCoherence(simtelOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Current conf already done', current_conf) continue #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2 export CORSIKA_IO_BUFFER=800MB ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, current_conf)) fd.close() os.system('chmod u+x grid_prod2-repro.sh') os.system('chmod u+x run_sim.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendOutputSimTel) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log')): DIRAC.gLogger.notice('not broken') else: DIRAC.gLogger.notice('broken') jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## putAndRegister simtel data/log/histo Output File: cfg = cfg_dict[current_conf] cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) ############################################ simtelRunNumberSeriesDirExist = fcc.isDirectory( simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] newSimtelRunFileSeriesDir = ( simtelRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log', runNumSeriesDir) simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir, simtelLogFileName) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms', runNumSeriesDir) simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir, simtelHistFileName) ########### quality check on Histo Missing because it needs the NSHOW ############################################# ########## quality check on Log ############################# cmd = 'zcat %s | grep Finished.' % simtelLogFileName DIRAC.gLogger.notice('Executing system call:', cmd) if (os.system(cmd)): jobReport.setApplicationStatus('Log check Failed') DIRAC.exit(-1) ################################################ from DIRAC.Core.Utilities import List from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations opsHelper = Operations() global seList seList = opsHelper.getValue('ProductionOutputs/SimtelProd', []) seList = List.randomize(seList) DIRAC.gLogger.notice('SeList is:', seList) ######### Upload simtel data/log/histo ############################################## res = upload_to_seList(simtelOutFileLFN, simtelFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('OutputData Upload Error', simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutLogFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Log file already exists. Removing:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) res = upload_to_seList(simtelOutLogFileLFN, simtelLogFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Log Error', simtelOutLogFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutHistFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Histo file already exists. Removing:', simtelOutHistFileLFN) ret = dirac.removeFile(simtelOutHistFileLFN) res = upload_to_seList(simtelOutHistFileLFN, simtelHistFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Histo Error', simtelOutHistFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) DIRAC.gLogger.notice('Removing simtel log file:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) #################################################################### if newSimtelRunFileSeriesDir: insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc) ###### simtel File level metadata ############################################ simtelFileMD = {} simtelFileMD['runNumber'] = int(run_number) simtelFileMD['jobID'] = jobID simtelFileMD['simtelReturnCode'] = simtelReturnCode result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.addFileAncestors( {simtelOutFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutLogFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutHistFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) if not result['OK']: print 'ResultSetMetadata:', result['Message'] DIRAC.exit()
if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception, x: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleFailedJob(jobID, 'Could Not Create Working Directory') return 1 #root = arguments['CE']['Root'] jobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, jobReport) job.initialize(arguments) except Exception, x: gLogger.exception('JobWrapper failed the initialization phase') rescheduleFailedJob(jobID, 'Job Wrapper Initialization') job.sendWMSAccounting('Failed', 'Job Wrapper Initialization') return 1 if arguments['Job'].has_key('InputSandbox'): jobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']:
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("p:", "inputfile=", "Input File", setInputFile) Script.registerSwitch("E:", "simtelExecName=", "SimtelExecName", setExecutable) Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig) Script.registerSwitch("V:", "version=", "Version", setVersion) Script.registerSwitch("D:", "storage_element=", "Storage Element", setStorageElement) from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog Script.parseCommandLine() DIRAC.gLogger.setLevel('INFO') global fcc, fcL, storage_element from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) ########### ## Checking MD coherence fc = FileCatalog('LcgFileCatalog') res = fc._getCatalogConfigDetails('DIRACFileCatalog') print 'DFC CatalogConfigDetails:', res res = fc._getCatalogConfigDetails('LcgFileCatalog') print 'LCG CatalogConfigDetails:', res fcc = FileCatalogClient() fcL = FileCatalog('LcgFileCatalog') from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() ############################ ############# # simtelConfigFile should be built from ??? #simtelConfigFilesPath = 'sim_telarray/multi' #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg' #createGlobalsFromConfigFiles(simtelConfigFile) #createGlobalsFromConfigFiles(current_version) ####################### ## files spread in 1000-runs subDirectories corsikaFileName = os.path.basename(corsikaFileLFN) run_number = corsikaFileName.split('run')[1].split('.corsika.gz')[ 0] # run001412.corsika.gz runNum = int(run_number) subRunNumber = '%03d' % runNum runNumModMille = runNum % 1000 runNumTrunc = (runNum - runNumModMille) / 1000 runNumSeriesDir = '%03dxxx' % runNumTrunc print 'runNumSeriesDir=', runNumSeriesDir f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w') f.close() ##### If storage element is IN2P3-tape save simtel file on disk ############### if storage_element == 'CC-IN2P3-Tape': storage_element = 'CC-IN2P3-Disk' ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "SCMST": 'cta-prod2-sc3' } if simtelConfig == "6INROW": all_configs = ["SCMST", "4MSST", "SCSST", "ASTRI", "NSBX3", "STD"] elif simtelConfig == "5INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"] else: all_configs = [simtelConfig] for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) if current_conf == "SCMST": current_version = version + '_sc3' else: current_version = version if os.path.isdir('sim_telarray'): DIRAC.gLogger.notice( 'Package found in the local area. Removing package...') cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' if (os.system(cmd)): DIRAC.exit(-1) DIRAC.gLogger.notice('current version is', current_version) CorsikaSimtelPack = 'corsika_simhessarray/' + current_version + '/corsika_simhessarray' packs = [CorsikaSimtelPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) installSoftwareEnviron(package, workingArea()) packageTuple = package.split('/') corsika_subdir = sharedArea( ) + '/' + packageTuple[0] + '/' + current_version cmd = 'cp -u -r ' + corsika_subdir + '/* .' os.system(cmd) continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1) createGlobalsFromConfigFiles(current_version) resultCreateSimtelDirMD = createSimtelFileSystAndMD( current_conf, current_version) if not resultCreateSimtelDirMD['OK']: DIRAC.gLogger.error('Failed to create simtelArray Directory MD') jobReport.setApplicationStatus( 'Failed to create simtelArray Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no simtelArray File produced') DIRAC.exit(-1) else: print 'simtel Directory MD successfully created' ############## introduce file existence check here ######################## simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel.gz' simtelDirPath_conf = simtelDirPath + '_' + current_conf simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data', runNumSeriesDir) simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName) res = CheckCatalogCoherence(simtelOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Current conf already done', current_conf) continue #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2_rev1869 export CORSIKA_IO_BUFFER=800MB ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, current_conf)) fd.close() os.system('chmod u+x run_sim.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendOutputSimTel) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log')): DIRAC.gLogger.notice('not broken') else: DIRAC.gLogger.notice('broken') # Tag corsika File if Broken Pipe corsikaTagMD = {} corsikaTagMD['CorsikaToReprocess'] = 'CorsikaToReprocess' result = fcc.setMetadata(corsikaFileLFN, corsikaTagMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## putAndRegister simtel data/log/histo Output File: cfg = cfg_dict[current_conf] cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) ############################################ simtelRunNumberSeriesDirExist = fcc.isDirectory( simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] newSimtelRunFileSeriesDir = ( simtelRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log', runNumSeriesDir) simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir, simtelLogFileName) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms', runNumSeriesDir) simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir, simtelHistFileName) ################################################ DIRAC.gLogger.notice('Put and register simtel File in LFC and DFC:', simtelOutFileLFN) ret = dirac.addFile(simtelOutFileLFN, simtelFileName, storage_element) res = CheckCatalogCoherence(simtelOutFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### res = CheckCatalogCoherence(simtelOutLogFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Log file already exists. Removing:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) DIRAC.gLogger.notice( 'Put and register simtel Log File in LFC and DFC:', simtelOutLogFileLFN) ret = dirac.addFile(simtelOutLogFileLFN, simtelLogFileName, storage_element) res = CheckCatalogCoherence(simtelOutLogFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### res = CheckCatalogCoherence(simtelOutHistFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Histo file already exists. Removing:', simtelOutHistFileLFN) ret = dirac.removeFile(simtelOutHistFileLFN) DIRAC.gLogger.notice( 'Put and register simtel Histo File in LFC and DFC:', simtelOutHistFileLFN) ret = dirac.addFile(simtelOutHistFileLFN, simtelHistFileName, storage_element) res = CheckCatalogCoherence(simtelOutHistFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### if newSimtelRunFileSeriesDir: insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc) ###### simtel File level metadata ############################################ simtelFileMD = {} simtelFileMD['runNumber'] = int(run_number) simtelFileMD['jobID'] = jobID simtelFileMD['simtelReturnCode'] = simtelReturnCode result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.addFileAncestors( {simtelOutFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutLogFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutHistFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) if not result['OK']: print 'ResultSetMetadata:', result['Message'] DIRAC.exit()
def main(): from DIRAC.Core.Base import Script #### eventio_cta options ########################################## Script.registerSwitch( "T:", "tellist=", "Tellist", setTellist ) Script.registerSwitch( "F:", "Nfirst_mcevt=", "Nfirst_mcevt", setNfirst_mcevt) Script.registerSwitch( "L:", "Nlast_mcevt=", "Nlast_mcevt", setNlast_mcevt) ## add other eventio_cta options ################################ # Script.registerSwitch( "N:", "num=", "Num", setNum) ## Script.registerSwitch( "L:", "limitmc=", "Limitmc", setLimitmc) # Script.registerSwitch( "S:", "telidoffset=", "Telidoffset", setTelidoffset) Script.registerSwitch( "P:", "pixelslices=", "setPixelslices (true/false)",setPixelslices) Script.registerSwitch( "p:", "run_number=", "Run Number (set automatically)", setRunNumber ) ### other options ############################################### Script.registerSwitch( "V:", "version=", "HAP version", setVersion ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() if tellist == None or version == None: Script.showHelp() jobReport.setApplicationStatus('Options badly specified') DIRAC.exit( -1 ) from CTADIRAC.Core.Workflow.Modules.HapApplication import HapApplication from CTADIRAC.Core.Workflow.Modules.HapRootMacro import HapRootMacro from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int( jobID ) jobReport = JobReport( jobID ) HapPack = 'HAP/' + version + '/HAP' packs = ['HESS/v0.2/lib','HESS/v0.3/root',HapPack] for package in packs: DIRAC.gLogger.notice( 'Checking:', package ) if sharedArea: if checkSoftwarePackage( package, sharedArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Shared Area:', package ) continue if localArea: if checkSoftwarePackage( package, localArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Local Area:', package ) continue if installSoftwarePackage( package, localArea() )['OK']: continue DIRAC.gLogger.error( 'Check Failed for software package:', package ) DIRAC.gLogger.error( 'Software package not available') DIRAC.exit( -1 ) telconf = os.path.join( localArea(),'HAP/%s/config/%s' % (version,tellist)) ha = HapApplication() ha.setSoftwarePackage(HapPack) ha.hapExecutable = 'eventio_cta' fileout = 'raw_' + part_type + '_run' + run_number + '.root' infile = build_infile() ha.hapArguments = ['-file', infile, '-o', fileout, '-tellist', telconf] try: ha.hapArguments.extend(['-Nfirst_mcevt', Nfirst_mcevt, '-Nlast_mcevt', Nlast_mcevt]) except NameError: DIRAC.gLogger.info( 'Nfirst_mcevt/Nlast_mcevt options are not used' ) try: if(pixelslices == 'true'): ha.hapArguments.extend(['-pixelslices']) except NameError: DIRAC.gLogger.info( 'pixelslices option is not used' ) DIRAC.gLogger.notice( 'Executing Hap Converter Application' ) res = ha.execute() if not res['OK']: DIRAC.gLogger.error( 'Failed to execute eventio_cta Application') jobReport.setApplicationStatus('eventio_cta: Failed') DIRAC.exit( -1 ) if not os.path.isfile(fileout): error = 'raw file was not created:' DIRAC.gLogger.error( error, fileout ) jobReport.setApplicationStatus('eventio_cta: RawData not created') DIRAC.exit( -1 ) ###################### Check RAW DATA ####################### hr = HapRootMacro() hr.setSoftwarePackage(HapPack) DIRAC.gLogger.notice('Executing RAW check step0') hr.rootMacro = '/hapscripts/dst/Open_Raw.C+' outfilestr = '"' + fileout + '"' args = [outfilestr] DIRAC.gLogger.notice( 'Open_Raw macro Arguments:', args ) hr.rootArguments = args DIRAC.gLogger.notice( 'Executing Hap Open_Raw macro') res = hr.execute() if not res['OK']: DIRAC.gLogger.error( 'Open_Raw: Failed' ) DIRAC.exit( -1 ) #################Check stdout of 'Open_Raw.C macro ############################### DIRAC.gLogger.notice('Executing Raw Check step1') ret = getSoftwareEnviron(HapPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error( error, HapPack) DIRAC.exit( -1 ) hapEnviron = ret['Value'] hessroot = hapEnviron['HESSROOT'] check_script = hessroot + '/hapscripts/dst/check_raw.csh' cmdTuple = [check_script] ret = systemCall( 0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute RAW Check step1') jobReport.setApplicationStatus('Check_raw: Failed') DIRAC.exit( -1 ) status, stdout, stderr = ret['Value'] if status==1: jobReport.setApplicationStatus('RAW Check step1: Big problem during RAW production') DIRAC.gLogger.error( 'Check_raw: Big problem during RAW production' ) DIRAC.exit( -1 ) ############## DST production #######################" hr = HapRootMacro() hr.setSoftwarePackage(HapPack) infile = build_infile() infilestr = '"' + fileout + '"' telconfstr = '"' + telconf + '"' args = [str(int(run_number)), infilestr, telconfstr] try: args.extend([nevent]) except NameError: DIRAC.gLogger.info( 'nevent arg not used' ) DIRAC.gLogger.notice( 'make_CTA_DST macro Arguments:', args ) hr.rootMacro = '/hapscripts/dst/make_CTA_DST.C+' hr.rootArguments = args DIRAC.gLogger.notice( 'Executing Hap make_CTA_DST macro' ) res = hr.execute() if not res['OK']: DIRAC.gLogger.error( 'Failed to execute make_CTA_DST macro') jobReport.setApplicationStatus('Failure during make_CTA_DST') DIRAC.exit( -1 ) ############ check existance of output file #### filedst = 'dst_CTA_%08d' % int(run_number) + '.root' if not os.path.isfile(filedst): DIRAC.gLogger.error('dst file not found:', filedst ) jobReport.setApplicationStatus('make_CTA_DST.C: DST file not created') DIRAC.exit( -1 ) fileout = 'dst_' + part_type + '_run' + run_number + '.root' cmd = 'mv ' + filedst + ' ' + fileout os.system(cmd) #####################Check stdout ########################### DIRAC.gLogger.notice('Executing DST Check step0') check_script = hessroot + '/hapscripts/dst/check_dst0.csh' cmdTuple = [check_script] ret = systemCall( 0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute DST Check step0') jobReport.setApplicationStatus('Check_dst0: Failed') DIRAC.exit( -1 ) status, stdout, stderr = ret['Value'] if status==1: jobReport.setApplicationStatus('Check_dst0: Big problem during the DST production') DIRAC.gLogger.error( 'DST Check step0 reports: Big problem during the DST production' ) DIRAC.exit( -1 ) if status==2: jobReport.setApplicationStatus('Check_dst0: No triggered events') DIRAC.gLogger.notice( 'DST Check step0 reports: No triggered events' ) DIRAC.exit( ) ############# run the CheckDST macro ################# DIRAC.gLogger.notice('Executing DST check step1') hr.rootMacro = '/hapscripts/dst/CheckDST.C+' fileoutstr = '"' + fileout + '"' args = [fileoutstr] DIRAC.gLogger.notice( 'CheckDST macro Arguments:', args ) hr.rootArguments = args DIRAC.gLogger.notice( 'Executing Hap CheckDST macro') res = hr.execute() if not res['OK']: DIRAC.gLogger.error( 'Failure during DST Check step1' ) jobReport.setApplicationStatus('Check_dst1: Failed') DIRAC.exit( -1 ) #######################Check stdout of CheckDST.C macro ########################## DIRAC.gLogger.notice('Executing DST Check step2') check_script = hessroot + '/hapscripts/dst/check_dst2.csh' cmdTuple = [check_script] ret = systemCall( 0, cmdTuple, sendOutput ) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute DST Check step2') jobReport.setApplicationStatus('Check_dst2: Failed') DIRAC.exit( -1 ) status, stdout, stderr = ret['Value'] if status==1: jobReport.setApplicationStatus('DST Check step2: Big problem during the DST production') DIRAC.gLogger.error( 'DST Check step2 reports: Big problem during the DST production' ) DIRAC.exit( -1 ) if status==2: jobReport.setApplicationStatus('DST Check step2: No triggered events') DIRAC.gLogger.notice( 'DST Check step2 reports: No triggered events' ) DIRAC.exit( ) DIRAC.exit()
def main(): from DIRAC.Core.Base import Script Script.registerSwitch( "p:", "run_number=", "Run Number", setRunNumber ) Script.registerSwitch( "R:", "run=", "Run", setRun ) Script.registerSwitch( "P:", "config_path=", "Config Path", setConfigPath ) Script.registerSwitch( "T:", "template=", "Template", setTemplate ) Script.registerSwitch( "E:", "executable=", "Executable", setExecutable ) Script.registerSwitch( "V:", "version=", "Version", setVersion ) Script.registerSwitch( "M:", "mode=", "Mode", setMode ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() if version == None or executable == None or run_number == None or run == None or template == None: Script.showHelp() jobReport.setApplicationStatus('Options badly specified') DIRAC.exit( -1 ) from CTADIRAC.Core.Workflow.Modules.CorsikaApp import CorsikaApp from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int( jobID ) jobReport = JobReport( jobID ) CorsikaSimtelPack = 'corsika_simhessarray/' + version + '/corsika_simhessarray' packs = [CorsikaSimtelPack] for package in packs: DIRAC.gLogger.notice( 'Checking:', package ) if sharedArea: if checkSoftwarePackage( package, sharedArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Shared Area:', package ) installSoftwareEnviron( package, workingArea() ) packageTuple = package.split('/') corsika_subdir = sharedArea() + '/' + packageTuple[0] + '/' + version cmd = 'cp -r ' + corsika_subdir + '/* .' os.system(cmd) continue if workingArea: if checkSoftwarePackage( package, workingArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Local Area:', package ) continue if installSoftwarePackage( package, workingArea() )['OK']: ############## compile ############################# if version == 'clean_23012012': cmdTuple = ['./build_all','ultra','qgs2'] elif version in ['prod-2_21122012','prod-2_08032013','prod-2_06052013']: cmdTuple = ['./build_all','prod2','qgs2'] ret = systemCall( 0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute build') DIRAC.exit( -1 ) continue DIRAC.gLogger.error( 'Check Failed for software package:', package ) DIRAC.gLogger.error( 'Software package not available') DIRAC.exit( -1 ) cs = CorsikaApp() cs.setSoftwarePackage(CorsikaSimtelPack) cs.csExe = executable cs.csArguments = ['--run-number',run_number,'--run',run,template] corsikaReturnCode = cs.execute() if corsikaReturnCode != 0: DIRAC.gLogger.error( 'Failed to execute corsika Application') jobReport.setApplicationStatus('Corsika Application: Failed') DIRAC.exit( -1 ) ###### rename corsika file ################################# rundir = 'run' + run_number corsikaKEYWORDS = ['TELFIL'] dictCorsikaKW = fileToKWDict(template,corsikaKEYWORDS) corsikafilename = rundir + '/' + dictCorsikaKW['TELFIL'][0] destcorsikafilename = 'corsika_run' + run_number + '.corsika.gz' cmd = 'mv ' + corsikafilename + ' ' + destcorsikafilename os.system(cmd) ### create corsika tar #################### corsika_tar = 'corsika_run' + run_number + '.tar.gz' filetar1 = rundir + '/'+'input' filetar2 = rundir + '/'+ 'DAT' + run_number + '.dbase' filetar3 = rundir + '/run' + str(int(run_number)) + '.log' cmdTuple = ['/bin/tar','zcf',corsika_tar, filetar1,filetar2,filetar3] DIRAC.gLogger.notice( 'Executing command tuple:', cmdTuple ) ret = systemCall( 0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute tar') DIRAC.exit( -1 ) DIRAC.exit()
def main(): from DIRAC.Core.Base import Script ### make_CTA_DST options ############################################### Script.registerSwitch( "R:", "run_number=", "Run Number", setRunNumber ) Script.registerSwitch( "I:", "infile=", "Input file", setInfile ) Script.registerSwitch( "T:", "tellist=", "Tellist", setTellist ) Script.registerSwitch( "N:", "nevent=", "Nevent", setNevent ) ### other options ############################################### Script.registerSwitch( "V:", "version=", "HAP version", setVersion ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() if infile == None or tellist == None or version == None: Script.showHelp() jobReport.setApplicationStatus('Options badly specified') DIRAC.exit( -1 ) from CTADIRAC.Core.Workflow.Modules.HapRootMacro import HapRootMacro from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int( jobID ) jobReport = JobReport( jobID ) HapPack = 'HAP/' + version + '/HAP' packs = ['HESS/v0.2/lib','HESS/v0.3/root',HapPack] for package in packs: DIRAC.gLogger.notice( 'Checking:', package ) if sharedArea: if checkSoftwarePackage( package, sharedArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Shared Area:', package ) continue if localArea: if checkSoftwarePackage( package, localArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Local Area:', package ) continue if installSoftwarePackage( package, localArea() )['OK']: continue DIRAC.gLogger.error( 'Check Failed for software package:', package ) DIRAC.gLogger.error( 'Software package not available') DIRAC.exit( -1 ) hr = HapRootMacro() hr.setSoftwarePackage(HapPack) telconf = os.path.join( localArea(),'HAP/%s/config/%s' % (version,tellist)) infilestr = '"' + infile + '"' telconfstr = '"' + telconf + '"' args = [str(int(RunNum)), infilestr, telconfstr] try: args.extend([nevent]) except NameError: DIRAC.gLogger.info( 'nevent arg not used' ) DIRAC.gLogger.notice( 'make_CTA_DST macro Arguments:', args ) hr.rootMacro = '/hapscripts/dst/make_CTA_DST.C+' hr.rootArguments = args DIRAC.gLogger.notice( 'Executing Hap make_CTA_DST macro' ) res = hr.execute() if not res['OK']: DIRAC.gLogger.error( 'Failed to execute make_CTA_DST macro') jobReport.setApplicationStatus('Failure during make_CTA_DST') DIRAC.exit( -1 ) ############ check existance of output file #### filedst = 'dst_CTA_%08d' % int(RunNum) + '.root' if not os.path.isfile(filedst): DIRAC.gLogger.error('dst file not found:', filedst ) jobReport.setApplicationStatus('make_CTA_DST.C: DST file not created') DIRAC.exit( -1 ) ###################Check std out ############################# DIRAC.gLogger.notice('Executing DST Check step0') ret = getSoftwareEnviron(HapPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error( error, HapPack) DIRAC.exit( -1 ) hapEnviron = ret['Value'] hessroot = hapEnviron['HESSROOT'] check_script = hessroot + '/hapscripts/dst/check_dst0.csh' cmdTuple = [check_script] ret = systemCall( 0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute DST Check step0') jobReport.setApplicationStatus('Check_dst0: Failed') DIRAC.exit( -1 ) status, stdout, stderr = ret['Value'] if status==1: jobReport.setApplicationStatus('Check_dst0: Big problem during the DST production') DIRAC.gLogger.error( 'DST Check step0 reports: Big problem during the DST production' ) DIRAC.exit( -1 ) if status==2: jobReport.setApplicationStatus('Check_dst0: No triggered events') DIRAC.gLogger.notice( 'DST Check step0 reports: No triggered events' ) DIRAC.exit( ) ############# run the CheckDST macro ################# DIRAC.gLogger.notice('Executing DST check step1') hr.rootMacro = '/hapscripts/dst/CheckDST.C+' fileoutstr = '"' + filedst + '"' args = [fileoutstr] DIRAC.gLogger.notice( 'CheckDST macro Arguments:', args ) hr.rootArguments = args DIRAC.gLogger.notice( 'Executing Hap CheckDST macro') res = hr.execute() if not res['OK']: DIRAC.gLogger.error( 'Failure during DST Check step1' ) jobReport.setApplicationStatus('Check_dst1: Failed') DIRAC.exit( -1 ) ######################check stdout of CheckDST macro ########################### DIRAC.gLogger.notice('Executing DST Check step2') check_script = hessroot + '/hapscripts/dst/check_dst2.csh' cmdTuple = [check_script] ret = systemCall( 0, cmdTuple, sendOutput ) if not ret['OK']: DIRAC.gLogger.error( 'Failed to execute DST Check step2') jobReport.setApplicationStatus('Check_dst2: Failed') DIRAC.exit( -1 ) status, stdout, stderr = ret['Value'] if status==1: jobReport.setApplicationStatus('DST Check step2: Big problem during the DST production') DIRAC.gLogger.error( 'DST Check step2 reports: Big problem during the DST production' ) DIRAC.exit( -1 ) if status==2: jobReport.setApplicationStatus('DST Check step2: No triggered events') DIRAC.gLogger.notice( 'DST Check step2 reports: No triggered events' ) DIRAC.exit( ) DIRAC.exit()
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception: gLogger.exception('JobWrapper failed the initialization phase') rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception, x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments)
def main(): from DIRAC.Core.Base import Script Script.initialize() DIRAC.gLogger.notice('Platform is:') os.system('dirac-platform') from DIRAC.DataManagementSystem.Client.DataManager import DataManager from CTADIRAC.Core.Workflow.Modules.EvnDispApp import EvnDispApp from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) version = sys.argv[3] DIRAC.gLogger.notice('Version:', version) EvnDispPack = os.path.join('evndisplay', version, 'evndisplay') packs = [EvnDispPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) installSoftwareEnviron(package, sharedArea()) continue else: installSoftwarePackage(package, workingArea()) DIRAC.gLogger.notice('Package found in workingArea:', package) continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1) ed = EvnDispApp() ed.setSoftwarePackage(EvnDispPack) dstFileLFNList = sys.argv[-1].split('ParametricParameters={')[1].split( '}')[0].replace(',', ' ') args = [] i = 0 for word in dstFileLFNList.split(): i = i + 1 dstfile = os.path.basename(word) ###### execute evndisplay stage1 ############### executable = sys.argv[5] logfileName = executable + '_' + str(i) + '.log' args = ['-sourcefile', dstfile, '-outputdirectory', 'outdir'] # add other arguments for evndisp specified by user ###### evndispparfile = open('evndisp.par', 'r').readlines() for line in evndispparfile: for word in line.split(): args.append(word) execute_module(ed, executable, args) for name in glob.glob('outdir/*.root'): evndispOutFile = name.split('.root')[0] + '_' + str( jobID) + '_evndisp.root' cmd = 'mv ' + name + ' ' + os.path.basename(evndispOutFile) if (os.system(cmd)): DIRAC.exit(-1) ########### quality check on Log ############################################# cmd = 'mv ' + executable + '.log' + ' ' + logfileName if (os.system(cmd)): DIRAC.exit(-1) fd = open('check_log.sh', 'w') fd.write("""#! /bin/sh if grep -i "error" %s; then exit 1 fi if grep "Final checks on result file (seems to be OK):" %s; then exit 0 else exit 1 fi """ % (logfileName, logfileName)) fd.close() os.system('chmod u+x check_log.sh') cmd = './check_log.sh' DIRAC.gLogger.notice('Executing system call:', cmd) if (os.system(cmd)): jobReport.setApplicationStatus('EvnDisp Log Check Failed') DIRAC.exit(-1) ################################################################## ########### remove the dst file ############################################# cmd = 'rm ' + dstfile if (os.system(cmd)): DIRAC.exit(-1) DIRAC.exit()
def execute(self): """The JobAgent execution method.""" self.log.verbose("Job Agent execution loop") queueDictItems = list(self.queueDict.items()) random.shuffle(queueDictItems) # Check that there is enough slots locally result = self._checkCEAvailability(self.computingElement) if not result["OK"] or result["Value"]: return result for queueName, queueDictionary in queueDictItems: # Make sure there is no problem with the queue before trying to submit if not self._allowedToSubmit(queueName): continue # Get a working proxy ce = queueDictionary["CE"] cpuTime = 86400 * 3 self.log.verbose( "Getting pilot proxy", "for %s/%s %d long" % (self.pilotDN, self.pilotGroup, cpuTime)) result = gProxyManager.getPilotProxyFromDIRACGroup( self.pilotDN, self.pilotGroup, cpuTime) if not result["OK"]: return result proxy = result["Value"] result = proxy.getRemainingSecs() # pylint: disable=no-member if not result["OK"]: return result lifetime_secs = result["Value"] ce.setProxy(proxy, lifetime_secs) # Check that there is enough slots in the remote CE to match a job result = self._checkCEAvailability(ce) if not result["OK"] or result["Value"]: self.failedQueues[queueName] += 1 continue # Get environment details and enhance them result = self._getCEDict(ce) if not result["OK"]: self.failedQueues[queueName] += 1 continue ceDictList = result["Value"] for ceDict in ceDictList: # Information about number of processors might not be returned in CE.getCEStatus() ceDict["NumberOfProcessors"] = ce.ceParameters.get( "NumberOfProcessors") self._setCEDict(ceDict) # Update the configuration with the names of the Site, CE and queue to target # This is used in the next stages self._updateConfiguration("Site", queueDictionary["Site"]) self._updateConfiguration("GridCE", queueDictionary["CEName"]) self._updateConfiguration("CEQueue", queueDictionary["QueueName"]) self._updateConfiguration("RemoteExecution", True) # Try to match a job jobRequest = self._matchAJob(ceDictList) while jobRequest["OK"]: # Check matcher information returned matcherParams = ["JDL", "DN", "Group"] matcherInfo = jobRequest["Value"] jobID = matcherInfo["JobID"] jobReport = JobReport(jobID, "PushJobAgent@%s" % self.siteName) result = self._checkMatcherInfo(matcherInfo, matcherParams, jobReport) if not result["OK"]: self.failedQueues[queueName] += 1 break jobJDL = matcherInfo["JDL"] jobGroup = matcherInfo["Group"] ownerDN = matcherInfo["DN"] ceDict = matcherInfo["CEDict"] matchTime = matcherInfo["matchTime"] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] # Get JDL paramters parameters = self._getJDLParameters(jobJDL) if not parameters["OK"]: jobReport.setJobStatus( status=JobStatus.FAILED, minorStatus="Could Not Extract JDL Parameters") self.log.warn("Could Not Extract JDL Parameters", parameters["Message"]) self.failedQueues[queueName] += 1 break params = parameters["Value"] result = self._extractValuesFromJobParams(params, jobReport) if not result["OK"]: self.failedQueues[queueName] += 1 break submissionParams = result["Value"] jobID = submissionParams["jobID"] jobType = submissionParams["jobType"] self.log.verbose("Job request successful: \n", jobRequest["Value"]) self.log.info( "Received", "JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s" % (jobID, jobType, ownerDN, jobGroup)) try: jobReport.setJobParameter(par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False) jobReport.setJobStatus(status=JobStatus.MATCHED, minorStatus="Job Received by Agent", sendFlag=False) # Setup proxy result_setupProxy = self._setupProxy(ownerDN, jobGroup) if not result_setupProxy["OK"]: result = self._rescheduleFailedJob( jobID, result_setupProxy["Message"]) self.failedQueues[queueName] += 1 break proxyChain = result_setupProxy.get("Value") # Check software and install them if required software = self._checkInstallSoftware( jobID, params, ceDict, jobReport) if not software["OK"]: self.log.error("Failed to install software for job", "%s" % (jobID)) errorMsg = software["Message"] if not errorMsg: errorMsg = "Failed software installation" result = self._rescheduleFailedJob(jobID, errorMsg) self.failedQueues[queueName] += 1 break # Submit the job to the CE self.log.debug("Before self._submitJob() (%sCE)" % (self.ceName)) result_submitJob = self._submitJob( jobID=jobID, jobParams=params, resourceParams=ceDict, optimizerParams=optimizerParams, proxyChain=proxyChain, jobReport=jobReport, processors=submissionParams["processors"], wholeNode=submissionParams["wholeNode"], maxNumberOfProcessors=submissionParams[ "maxNumberOfProcessors"], mpTag=submissionParams["mpTag"], ) # Committing the JobReport before evaluating the result of job submission res = jobReport.commit() if not res["OK"]: resFD = jobReport.generateForwardDISET() if not resFD["OK"]: self.log.error( "Error generating ForwardDISET operation", resFD["Message"]) elif resFD["Value"]: # Here we create the Request. op = resFD["Value"] request = Request() requestName = "jobAgent_%s" % jobID request.RequestName = requestName.replace('"', "") request.JobID = jobID request.SourceComponent = "JobAgent_%s" % jobID request.addOperation(op) # This might fail, but only a message would be printed. self._sendFailoverRequest(request) if not result_submitJob["OK"]: self.log.error("Error during submission", result_submitJob["Message"]) self.failedQueues[queueName] += 1 break elif "PayloadFailed" in result_submitJob: # Do not keep running and do not overwrite the Payload error message = "Payload execution failed with error code %s" % result_submitJob[ "PayloadFailed"] self.log.info(message) self.log.debug("After %sCE submitJob()" % (self.ceName)) # Check that there is enough slots locally result = self._checkCEAvailability(self.computingElement) if not result["OK"] or result["Value"]: return result # Check that there is enough slots in the remote CE to match a new job result = self._checkCEAvailability(ce) if not result["OK"] or result["Value"]: self.failedQueues[queueName] += 1 break # Try to match a new job jobRequest = self._matchAJob(ceDictList) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) result = self._rescheduleFailedJob( jobID, "Job processing failed with exception") self.failedQueues[queueName] += 1 break if not jobRequest["OK"]: self._checkMatchingIssues(jobRequest) self.failedQueues[queueName] += 1 continue return S_OK("Push Job Agent cycle complete")
def main(): from DIRAC.Core.Base import Script #### eventio_cta options ########################################## Script.registerSwitch("I:", "infile=", "Input file", setInfile) Script.registerSwitch("O:", "outfile=", "Output file", setOutfile) Script.registerSwitch("T:", "tellist=", "Tellist", setTellist) Script.registerSwitch("F:", "Nfirst_mcevt=", "Nfirst_mcevt", setNfirst_mcevt) Script.registerSwitch("L:", "Nlast_mcevt=", "Nlast_mcevt", setNlast_mcevt) Script.registerSwitch("P:", "pixelslices=", "setPixelslices (true/false)", setPixelslices) ### other options ############################################### Script.registerSwitch("V:", "version=", "HAP version", setVersion) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() if outfile == None or infile == None or tellist == None or version == None: Script.showHelp() jobReport.setApplicationStatus('Options badly specified') DIRAC.exit(-1) from CTADIRAC.Core.Workflow.Modules.HapApplication import HapApplication from CTADIRAC.Core.Workflow.Modules.HapRootMacro import HapRootMacro from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) HapPack = 'HAP/' + version + '/HAP' packs = ['HESS/v0.2/lib', 'HESS/v0.3/root', HapPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) continue if localArea: if checkSoftwarePackage(package, localArea())['OK']: DIRAC.gLogger.notice('Package found in Local Area:', package) continue if installSoftwarePackage(package, localArea())['OK']: continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1) telconf = os.path.join(localArea(), 'HAP/%s/config/%s' % (version, tellist)) ha = HapApplication() ha.setSoftwarePackage(HapPack) ha.hapExecutable = 'eventio_cta' ha.hapArguments = ['-file', infile, '-o', outfile, '-tellist', telconf] try: ha.hapArguments.extend( ['-Nfirst_mcevt', Nfirst_mcevt, '-Nlast_mcevt', Nlast_mcevt]) except NameError: DIRAC.gLogger.info('Nfirst_mcevt/Nlast_mcevt options are not used') try: if (pixelslices == 'true'): ha.hapArguments.extend(['-pixelslices']) except NameError: DIRAC.gLogger.info('pixelslices option is not used') DIRAC.gLogger.notice('Executing Hap Converter Application') res = ha.execute() if not res['OK']: DIRAC.gLogger.error('Failed to execute eventio_cta Application') jobReport.setApplicationStatus('eventio_cta: Failed') DIRAC.exit(-1) if not os.path.isfile(outfile): error = 'raw file was not created:' DIRAC.gLogger.error(error, outfile) jobReport.setApplicationStatus('eventio_cta: RawData not created') DIRAC.exit(-1) ###################### Check RAW DATA ####################### hr = HapRootMacro() hr.setSoftwarePackage(HapPack) DIRAC.gLogger.notice('Executing RAW check step0') hr.rootMacro = '/hapscripts/dst/Open_Raw.C+' outfilestr = '"' + outfile + '"' args = [outfilestr] DIRAC.gLogger.notice('Open_Raw macro Arguments:', args) hr.rootArguments = args DIRAC.gLogger.notice('Executing Hap Open_Raw macro') res = hr.execute() if not res['OK']: DIRAC.gLogger.error('Open_Raw: Failed') DIRAC.exit(-1) #########################Quality Check for raw Output File: step1#################### DIRAC.gLogger.notice('Executing Raw Check step1') ret = getSoftwareEnviron(HapPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error(error, HapPack) DIRAC.exit(-1) hapEnviron = ret['Value'] hessroot = hapEnviron['HESSROOT'] check_script = hessroot + '/hapscripts/dst/check_raw.csh' cmdTuple = [check_script] ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to execute RAW Check step1') jobReport.setApplicationStatus('Check_raw: Failed') DIRAC.exit(-1) status, stdout, stderr = ret['Value'] if status == 1: jobReport.setApplicationStatus( 'RAW Check step1: Big problem during RAW production') DIRAC.gLogger.error('Check_raw: Big problem during RAW production') DIRAC.exit(-1) DIRAC.exit()
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) # Fix in the environment to get a reasonable performance from dCache, # until we move to a new version of root # os.environ['DCACHE_RAHEAD'] = str(1) # os.environ['DCACHE_RA_BUFFER'] = str(50*1024) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleFailedJob(jobID, 'Could Not Create Working Directory') return 1 #root = arguments['CE']['Root'] gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception: gLogger.exception('JobWrapper failed the initialization phase') rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) job.sendWMSAccounting('Failed', 'Job Wrapper Initialization') return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleFailedJob(jobID, 'Input Sandbox Download') job.sendWMSAccounting('Failed', 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception, x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleFailedJob(jobID, 'Input Data Resolution') job.sendWMSAccounting('Failed', 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments)
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("p:", "run_number=", "Run Number", setRunNumber) Script.registerSwitch("T:", "template=", "Template", setCorsikaTemplate) Script.registerSwitch("E:", "executable=", "Executable", setExecutable) Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig) Script.registerSwitch("V:", "version=", "Version", setVersion) Script.registerSwitch("M:", "mode=", "Mode", setMode) Script.registerSwitch("C:", "savecorsika=", "Save Corsika", setSaveCorsika) from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog Script.parseCommandLine() global fcc, fcL, storage_element from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from CTADIRAC.Core.Workflow.Modules.CorsikaApp import CorsikaApp from CTADIRAC.Core.Workflow.Modules.Read_CtaApp import Read_CtaApp from DIRAC.Core.Utilities.Subprocess import systemCall jobID = os.environ['JOBID'] jobID = int(jobID) global jobReport jobReport = JobReport(jobID) ########### ## Checking MD coherence fc = FileCatalog('LcgFileCatalog') res = fc._getCatalogConfigDetails('DIRACFileCatalog') print 'DFC CatalogConfigDetails:', res res = fc._getCatalogConfigDetails('LcgFileCatalog') print 'LCG CatalogConfigDetails:', res fcc = FileCatalogClient() fcL = FileCatalog('LcgFileCatalog') from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() ############# simtelConfigFilesPath = 'sim_telarray/multi' simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg' #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-prod1s.cfg' createGlobalsFromConfigFiles('prodConfigFile', corsikaTemplate, version) ######################Building prod Directory Metadata ####################### resultCreateProdDirMD = createProdFileSystAndMD() if not resultCreateProdDirMD['OK']: DIRAC.gLogger.error('Failed to create prod Directory MD') jobReport.setApplicationStatus('Failed to create prod Directory MD') DIRAC.gLogger.error('Metadata coherence problem, no file produced') DIRAC.exit(-1) else: print 'prod Directory MD successfully created' ######################Building corsika Directory Metadata ####################### resultCreateCorsikaDirMD = createCorsikaFileSystAndMD() if not resultCreateCorsikaDirMD['OK']: DIRAC.gLogger.error('Failed to create corsika Directory MD') jobReport.setApplicationStatus('Failed to create corsika Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no corsikaFile produced') DIRAC.exit(-1) else: print 'corsika Directory MD successfully created' ############ Producing Corsika File global CorsikaSimtelPack CorsikaSimtelPack = os.path.join('corsika_simhessarray', version, 'corsika_simhessarray') install_CorsikaSimtelPack(version, 'sim') cs = CorsikaApp() cs.setSoftwarePackage(CorsikaSimtelPack) cs.csExe = executable cs.csArguments = [ '--run-number', run_number, '--run', 'corsika', corsikaTemplate ] corsikaReturnCode = cs.execute() if corsikaReturnCode != 0: DIRAC.gLogger.error('Corsika Application: Failed') jobReport.setApplicationStatus('Corsika Application: Failed') DIRAC.exit(-1) ###################### rename of corsika output file ####################### rundir = 'run' + run_number filein = rundir + '/' + corsikaOutputFileName corsikaFileName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.gz' mv_cmd = 'mv ' + filein + ' ' + corsikaFileName if (os.system(mv_cmd)): DIRAC.exit(-1) ######################## ######################## ## files spread in 1000-runs subDirectories runNum = int(run_number) subRunNumber = '%03d' % runNum runNumModMille = runNum % 1000 runNumTrunc = (runNum - runNumModMille) / 1000 runNumSeriesDir = '%03dxxx' % runNumTrunc print 'runNumSeriesDir=', runNumSeriesDir ### create corsika tar luisa #################### corsikaTarName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.tar.gz' filetar1 = rundir + '/' + 'input' filetar2 = rundir + '/' + 'DAT' + run_number + '.dbase' filetar3 = rundir + '/run' + str(int(run_number)) + '.log' cmdTuple = [ '/bin/tar', 'zcf', corsikaTarName, filetar1, filetar2, filetar3 ] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to execute tar') DIRAC.exit(-1) ###################################################### corsikaOutFileDir = os.path.join(corsikaDirPath, particle, 'Data', runNumSeriesDir) corsikaOutFileLFN = os.path.join(corsikaOutFileDir, corsikaFileName) corsikaRunNumberSeriesDirExist = fcc.isDirectory( corsikaOutFileDir)['Value']['Successful'][corsikaOutFileDir] newCorsikaRunNumberSeriesDir = ( corsikaRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD #### create a file to DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK ################ f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w') f.close() if savecorsika == 'True': DIRAC.gLogger.notice('Put and register corsika File in LFC and DFC:', corsikaOutFileLFN) ret = dirac.addFile(corsikaOutFileLFN, corsikaFileName, storage_element) res = CheckCatalogCoherence(corsikaOutFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) # put and register corsikaTarFile: corsikaTarFileDir = os.path.join(corsikaDirPath, particle, 'Log', runNumSeriesDir) corsikaTarFileLFN = os.path.join(corsikaTarFileDir, corsikaTarName) ##### If storage element is IN2P3-tape save simtel file on disk ############### if storage_element == 'CC-IN2P3-Tape': storage_element = 'CC-IN2P3-Disk' DIRAC.gLogger.notice( 'Put and register corsikaTar File in LFC and DFC:', corsikaTarFileLFN) ret = dirac.addFile(corsikaTarFileLFN, corsikaTarName, storage_element) ####Checking and restablishing catalog coherence ##################### res = CheckCatalogCoherence(corsikaTarFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### if newCorsikaRunNumberSeriesDir: insertRunFileSeriesMD(corsikaOutFileDir, runNumTrunc) insertRunFileSeriesMD(corsikaTarFileDir, runNumTrunc) ###### insert corsika File Level metadata ############################################ corsikaFileMD = {} corsikaFileMD['runNumber'] = int(run_number) corsikaFileMD['jobID'] = jobID corsikaFileMD['corsikaReturnCode'] = corsikaReturnCode corsikaFileMD['nbShowers'] = nbShowers result = fcc.setMetadata(corsikaOutFileLFN, corsikaFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(corsikaTarFileLFN, corsikaFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] ##### Exit now if only corsika simulation required if (mode == 'corsika_standalone'): DIRAC.exit() ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "SCMST": 'cta-prod2-sc3', "NORTH": 'cta-prod2n' } if simtelConfig == "6INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD", "SCMST"] elif simtelConfig == "5INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"] elif simtelConfig == "3INROW": all_configs = ["SCSST", "STD", "SCMST"] else: all_configs = [simtelConfig] ############################################ #for current_conf in all_configs: #DIRAC.gLogger.notice('current conf is',current_conf) #if current_conf == "SCMST": #current_version = version + '_sc3' #DIRAC.gLogger.notice('current version is', current_version) #if os.path.isdir('sim_telarray'): #DIRAC.gLogger.notice('Package found in the local area. Removing package...') #cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' #if(os.system(cmd)): #DIRAC.exit( -1 ) #install_CorsikaSimtelPack(current_version) #else: #current_version = version #DIRAC.gLogger.notice('current version is', current_version) ############################################################# for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) if current_conf == "SCMST": current_version = version + '_sc3' DIRAC.gLogger.notice('current version is', current_version) installSoftwareEnviron(CorsikaSimtelPack, workingArea(), 'sim-sc3') else: current_version = version DIRAC.gLogger.notice('current version is', current_version) ######################################################## global simtelDirPath global simtelProdVersion simtelProdVersion = current_version + '_simtel' simtelDirPath = os.path.join(corsikaParticleDirPath, simtelProdVersion) resultCreateSimtelDirMD = createSimtelFileSystAndMD(current_conf) if not resultCreateSimtelDirMD['OK']: DIRAC.gLogger.error('Failed to create simtelArray Directory MD') jobReport.setApplicationStatus( 'Failed to create simtelArray Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no simtelArray File produced') DIRAC.exit(-1) else: DIRAC.gLogger.notice('simtel Directory MD successfully created') ############## check simtel data file LFN exists ######################## simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel.gz' simtelDirPath_conf = simtelDirPath + '_' + current_conf simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data', runNumSeriesDir) simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName) res = CheckCatalogCoherence(simtelOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Current conf already done', current_conf) continue #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh source ./Corsika_simhessarrayEnv.sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2_rev10503 export CORSIKA_IO_BUFFER=800MB cp ../grid_prod2-repro.sh . ln -s ../%s ln -s ../$SVNTAG ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, corsikaFileName, current_conf)) fd.close() #################################### os.system('chmod u+x run_sim.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendOutputSimTel) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log') == 0): DIRAC.gLogger.error('Broken string found in simtel.log') jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## check simtel data/log/histo Output File exist cfg = cfg_dict[current_conf] #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if current_conf == "SCMST": cmdprefix = 'mv sim-sc3/Data/sim_telarray/' + cfg + '/0.0deg/' else: cmdprefix = 'mv sim/Data/sim_telarray/' + cfg + '/0.0deg/' cmd = cmdprefix + 'Data/*' + cfg + '_*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) ############################################ simtelRunNumberSeriesDirExist = fcc.isDirectory( simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] newSimtelRunFileSeriesDir = ( simtelRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName cmd = cmdprefix + 'Log/*' + cfg + '_*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log', runNumSeriesDir) simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir, simtelLogFileName) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata.gz' #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName cmd = cmdprefix + 'Histograms/*' + cfg + '_*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms', runNumSeriesDir) simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir, simtelHistFileName) ########### quality check on Histo ############################################# fd = open('check_histo.sh', 'w') fd.write("""#! /bin/sh nsim=$(list_histograms %s|fgrep 'Histogram 6 '|sed 's/^.*contents: //'| sed 's:/.*$::') nevents=%d if [ $nsim -lt $(( $nevents - 20 )) ]; then echo 'nsim found:' $nsim echo 'nsim expected:' $nevents exit 1 else echo 'nsim found:' $nsim echo 'nsim expected:' $nevents fi """ % (simtelHistFileName, int(nbShowers) * int(cscat))) fd.close() ret = getSoftwareEnviron(CorsikaSimtelPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error(error, CorsikaSimtelPack) DIRAC.exit(-1) corsikaEnviron = ret['Value'] os.system('chmod u+x check_histo.sh') cmdTuple = ['./check_histo.sh'] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput, env=corsikaEnviron) checkHistoReturnCode, stdout, stderr = ret['Value'] if not ret['OK']: DIRAC.gLogger.error('Failed to execute check_histo.sh') DIRAC.gLogger.error('check_histo.sh status is:', checkHistoReturnCode) DIRAC.exit(-1) if (checkHistoReturnCode != 0): DIRAC.gLogger.error('Failure during check_histo.sh') DIRAC.gLogger.error('check_histo.sh status is:', checkHistoReturnCode) jobReport.setApplicationStatus('Histo check Failed') DIRAC.exit(-1) ########## quality check on Log ############################# cmd = 'zcat %s | grep Finished.' % simtelLogFileName DIRAC.gLogger.notice('Executing system call:', cmd) if (os.system(cmd)): jobReport.setApplicationStatus('Log check Failed') DIRAC.exit(-1) ################################################ from DIRAC.Core.Utilities import List from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations opsHelper = Operations() global seList seList = opsHelper.getValue('ProductionOutputs/SimtelProd', []) seList = List.randomize(seList) DIRAC.gLogger.notice('SeList is:', seList) ######### Upload simtel data/log/histo ############################################## res = upload_to_seList(simtelOutFileLFN, simtelFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('OutputData Upload Error', simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutLogFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Log file already exists. Removing:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) res = upload_to_seList(simtelOutLogFileLFN, simtelLogFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Log Error', simtelOutLogFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutHistFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Histo file already exists. Removing:', simtelOutHistFileLFN) ret = dirac.removeFile(simtelOutHistFileLFN) res = upload_to_seList(simtelOutHistFileLFN, simtelHistFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Histo Error', simtelOutHistFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) DIRAC.gLogger.notice('Removing simtel log file:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) # simtelRunNumberSeriesDirExist = fcc.isDirectory(simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] # newSimtelRunFileSeriesDir = (simtelRunNumberSeriesDirExist != True) # if new runFileSeries, will need to add new MD if newSimtelRunFileSeriesDir: print 'insertRunFileSeriesMD' insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc) else: print 'NotinsertRunFileSeriesMD' ###### simtel File level metadata ############################################ simtelFileMD = {} simtelFileMD['runNumber'] = int(run_number) simtelFileMD['jobID'] = jobID simtelFileMD['simtelReturnCode'] = simtelReturnCode result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] if savecorsika == 'True': result = fcc.addFileAncestors( {simtelOutFileLFN: { 'Ancestors': [corsikaOutFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutLogFileLFN: { 'Ancestors': [corsikaOutFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutHistFileLFN: { 'Ancestors': [corsikaOutFileLFN] }}) print 'result addFileAncestor:', result ##### Exit now if only corsika simulation required if (mode == 'corsika_simtel'): continue ######### run read_cta ####################################### rcta = Read_CtaApp() rcta.setSoftwarePackage(CorsikaSimtelPack) rcta.rctaExe = 'read_cta' powerlaw_dict = { 'gamma': '-2.57', 'gamma_ptsrc': '-2.57', 'proton': '-2.70', 'electron': '-3.21' } dstFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel-dst0.gz' dstHistoFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata-dst0.gz' ## added some options starting from Armazones_2K prod. rcta.rctaArguments = [ '-r', '4', '-u', '--integration-scheme', '4', '--integration-window', '7,3', '--tail-cuts', '6,8', '--min-pix', '2', '--min-amp', '20', '--type', '1,0,0,400', '--tail-cuts', '9,12', '--min-amp', '20', '--type', '2,0,0,100', '--tail-cuts', '8,11', '--min-amp', '19', '--type', '3,0,0,40', '--tail-cuts', '6,9', '--min-amp', '15', '--type', '4,0,0,15', '--tail-cuts', '3.7,5.5', '--min-amp', '8', '--type', '5,0,0,70,5.6', '--tail-cuts', '2.4,3.2', '--min-amp', '5.6', '--dst-level', '0', '--dst-file', dstFileName, '--histogram-file', dstHistoFileName, '--powerlaw', powerlaw_dict[particle], simtelFileName ] rctaReturnCode = rcta.execute() if rctaReturnCode != 0: DIRAC.gLogger.error('read_cta Application: Failed') jobReport.setApplicationStatus('read_cta Application: Failed') DIRAC.exit(-1) ######## run dst quality checks ###################################### fd = open('check_dst_histo.sh', 'w') fd.write("""#! /bin/sh dsthistfilename=%s dstfile=%s n6="$(list_histograms -h 6 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" n12001="$(list_histograms -h 12001 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" if [ $n6 -ne $n12001 ]; then echo 'n6 found:' $n6 echo 'n12001 found:' $n12001 exit 1 else echo 'n6 found:' $n6 echo 'n12001 found:' $n12001 fi n12002="$(list_histograms -h 12002 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" nev="$(statio ${dstfile} | egrep '^2010' | cut -f2)" if [ -z "$nev" ]; then nev="0"; fi if [ $nev -ne $n12002 ]; then echo 'nev found:' $nev echo 'n12002 found:' $n12002 exit 1 else echo 'nev found:' $nev echo 'n12002 found:' $n12002 fi """ % (dstHistoFileName, dstFileName)) fd.close() os.system('chmod u+x check_dst_histo.sh') cmdTuple = ['./check_dst_histo.sh'] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput, env=corsikaEnviron) checkHistoReturnCode, stdout, stderr = ret['Value'] if not ret['OK']: DIRAC.gLogger.error('Failed to execute check_dst_histo.sh') DIRAC.gLogger.error('check_dst_histo.sh status is:', checkHistoReturnCode) DIRAC.exit(-1) if (checkHistoReturnCode != 0): DIRAC.gLogger.error('Failure during check_dst_histo.sh') DIRAC.gLogger.error('check_dst_histo.sh status is:', checkHistoReturnCode) jobReport.setApplicationStatus('Histo check Failed') DIRAC.exit(-1) ############create MD and upload dst data/histo ########################################################## global dstDirPath global dstProdVersion dstProdVersion = current_version + '_dst' dstDirPath = os.path.join(simtelDirPath_conf, dstProdVersion) dstOutFileDir = os.path.join(dstDirPath, 'Data', runNumSeriesDir) dstOutFileLFN = os.path.join(dstOutFileDir, dstFileName) resultCreateDstDirMD = createDstFileSystAndMD() if not resultCreateDstDirMD['OK']: DIRAC.gLogger.error('Failed to create Dst Directory MD') jobReport.setApplicationStatus('Failed to create Dst Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no Dst File produced') DIRAC.exit(-1) else: DIRAC.gLogger.notice('Dst Directory MD successfully created') ############################################################ res = CheckCatalogCoherence(dstOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('dst file already exists. Removing:', dstOutFileLFN) ret = dirac.removeFile(dstOutFileLFN) res = upload_to_seList(dstOutFileLFN, dstFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload dst Error', dstOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ############################################################## dstHistoFileDir = os.path.join(dstDirPath, 'Histograms', runNumSeriesDir) dstHistoFileLFN = os.path.join(dstHistoFileDir, dstHistoFileName) res = CheckCatalogCoherence(dstHistoFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('dst histo file already exists. Removing:', dstHistoFileLFN) ret = dirac.removeFile(dstHistoFileLFN) res = upload_to_seList(dstHistoFileLFN, dstHistoFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload dst Error', dstHistoFileName) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ########### Insert RunNumSeries MD ########################## dstRunNumberSeriesDirExist = fcc.isDirectory( dstOutFileDir)['Value']['Successful'][dstOutFileDir] newDstRunFileSeriesDir = ( dstRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD if newDstRunFileSeriesDir: insertRunFileSeriesMD(dstOutFileDir, runNumTrunc) insertRunFileSeriesMD(dstHistoFileDir, runNumTrunc) ####### dst File level metadata ############################################### dstFileMD = {} dstFileMD['runNumber'] = int(run_number) dstFileMD['jobID'] = jobID dstFileMD['rctaReturnCode'] = rctaReturnCode result = fcc.setMetadata(dstOutFileLFN, dstFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(dstHistoFileLFN, dstFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] ########## set the ancestors for dst ##################################### result = fcc.addFileAncestors( {dstOutFileLFN: { 'Ancestors': [simtelOutFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {dstHistoFileLFN: { 'Ancestors': [simtelOutFileLFN] }}) print 'result addFileAncestor:', result ###################################################### DIRAC.exit()
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Temporary mechanism to pass a shutdown message to the agent if os.path.exists('/var/lib/dirac_drain'): return self.__finish('Node is being drained by an operator') # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn( "Disabling filling mode as errors calculating time left", self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('normalized CPU units remaining in slot', self.timeLeft) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') result = self.computingElement.available() if not result['OK']: self.log.info('Resource is not available', result['Message']) return self.__finish('CE Not Available') ceInfoDict = result['CEInfoDict'] runningJobs = ceInfoDict.get("RunningJobs") availableSlots = result['Value'] if not availableSlots: if runningJobs: self.log.info('No available slots', '%d running jobs' % runningJobs) return S_OK('Job Agent cycle complete with %d running jobs' % runningJobs) else: self.log.info('CE is not available') return self.__finish('CE Not Available') result = self.computingElement.getDescription() if not result['OK']: return result # We can have several prioritized job retrieval strategies if isinstance(result['Value'], dict): ceDictList = [result['Value']] elif isinstance(result['Value'], list): # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy' ceDictList = result['Value'] for ceDict in ceDictList: # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if 'PilotReference' not in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.info('Requirements:', requirementsDict) self.log.verbose('CE dict', ceDict) # here finally calling the matcher start = time.time() jobRequest = MatcherClient().requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime', '= %.2f (s)' % (matchTime)) if jobRequest['OK']: break self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK, but no match found', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned', '%s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self._getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn('Could Not Extract JDL Parameters', parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') # Job requirements for determining the number of processors # the minimum number of processors requested processors = int( params.get('NumberOfProcessors', int(params.get('MinNumberOfProcessors', 1)))) # the maximum number of processors allowed to the payload maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0)) # need or not the whole node for the job wholeNode = 'WholeNode' in params mpTag = 'MultiProcessor' in params.get('Tags', []) if self.extraOptions: params['Arguments'] = (params.get('Arguments', '') + ' ' + self.extraOptions).strip() params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info( 'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' % (jobID, jobType, ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(thisp, gConfig.getValue( '/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') result = self._setupProxy(ownerDN, jobGroup) if not result['OK']: return self._rescheduleFailedJob(jobID, result['Message'], self.stopOnApplicationFailure) proxyChain = result.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self._checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self._rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName)) result = self._submitJob(jobID, params, ceDict, optimizerParams, proxyChain, processors, wholeNode, maxNumberOfProcessors, mpTag) if not result['OK']: return self.__finish(result['Message']) elif 'PayloadFailed' in result: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % result[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) return self._rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime, processors) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self._getCPUTimeLeft() return S_OK('Job Agent cycle complete')
def main(): from DIRAC.Core.Base import Script ### DoCtaIrf options ########################################################## Script.registerSwitch("A:", "analysis=", "Analysis Type", setAnalysisType) Script.registerSwitch("C:", "cuts=", "Cuts Config", setCutsConfig) Script.registerSwitch("R:", "runlist=", "Runlist", setRunlist) Script.registerSwitch("Z:", "zenith=", "Zenith", setZenith) Script.registerSwitch("O:", "offset=", "Offset", setOffset) Script.registerSwitch("M:", "energy=", "Energy Method", setEnergyMethod) Script.registerSwitch("T:", "arrayconfig=", "Array Configuration", setArrayConfig) Script.registerSwitch("P:", "particle=", "Particle Type", setParticleType) ## other options Script.registerSwitch("V:", "version=", "HAP version", setVersion) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from CTADIRAC.Core.Workflow.Modules.HapApplication import HapApplication from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) ha = HapApplication() HapPack = 'HAP/' + version + '/HAP' packs = ['HESS/v0.2/lib', 'HESS/v0.3/root', HapPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) continue if localArea: if checkSoftwarePackage(package, localArea())['OK']: DIRAC.gLogger.notice('Package found in Local Area:', package) continue if installSoftwarePackage(package, localArea())['OK']: continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1) ha.setSoftwarePackage(HapPack) ha.hapExecutable = 'DoCtaIrf' runlistdir = os.environ['PWD'] build_infile(runlist) ha.hapArguments = [ analysistype, cutsconfig, runlistdir, runlist, zenith, offset, arrayconfig, energymethod, particle ] DIRAC.gLogger.notice('Executing Hap Application') res = ha.execute() if not res['OK']: DIRAC.gLogger.error('Failed to execute Hap Application') jobReport.setApplicationStatus('Hap Application: Failed') DIRAC.exit(-1) DIRAC.exit()
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn(self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('%s normalized CPU units remaining in slot' % (self.timeLeft)) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') available = self.computingElement.available() if not available['OK'] or not available['Value']: self.log.info('Resource is not available') self.log.info(available['Message']) return self.__finish('CE Not Available') self.log.info(available['Message']) result = self.computingElement.getDescription() if not result['OK']: return result ceDict = result['Value'] # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if not 'PilotReference' in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.verbose(ceDict) start = time.time() jobRequest = self.__requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime = %.2f (s)' % (matchTime)) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned %s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self.__getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn(parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') if self.extraOptions: params['Arguments'] += ' ' + self.extraOptions params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info('Received JobID=%s, JobType=%s' % (jobID, jobType)) self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for p in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') result = self.__setupProxy(ownerDN, jobGroup) if not result['OK']: return self.__rescheduleFailedJob( jobID, result['Message'], self.stopOnApplicationFailure) proxyChain = result.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self.__checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before %sCE submitJob()' % (self.ceName)) submission = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain) if not submission['OK']: self.__report(jobID, 'Failed', submission['Message']) return self.__finish(submission['Message']) elif 'PayloadFailed' in submission: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % submission[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception: self.log.exception() return self.__rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self.__getCPUTimeLeft() scaledCPUTime = self.timeLeftUtil.getScaledCPU() self.__setJobParam(jobID, 'ScaledCPUTime', str(scaledCPUTime - self.scaledCPUTime)) self.scaledCPUTime = scaledCPUTime return S_OK('Job Agent cycle complete')
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception as e: gLogger.exception('JobWrapper failed the initialization phase', lException=e) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as e: gLogger.exception('JobWrapper failed sending job accounting', lException=e) return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception as x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError(result['Message']) except Exception as x: if str(x) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') else: gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(x), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if arguments['Job'].has_key('OutputSandbox') or arguments['Job'].has_key( 'OutputData'): try: result = job.processJobOutputs(arguments) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception as x: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(x), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize(arguments) except Exception: gLogger.exception('JobWrapper failed the finalization phase') return 2
def main(): from DIRAC.Core.Base import Script ### DoCtaIrf options ########################################################## Script.registerSwitch("A:", "analysis=", "Analysis Type", setAnalysisType) Script.registerSwitch("C:", "cuts=", "Cuts Config", setCutsConfig) Script.registerSwitch("R:", "runlist=", "Runlist", setRunlist) Script.registerSwitch("Z:", "zenith=", "Zenith", setZenith) Script.registerSwitch("O:", "offset=", "Offset", setOffset) Script.registerSwitch("M:", "energy=", "Energy Method", setEnergyMethod) Script.registerSwitch("T:", "arrayconfig=", "Array Configuration", setArrayConfig) Script.registerSwitch("P:", "particle=", "Particle Type", setParticleType) ## other options Script.registerSwitch("V:", "version=", "HAP version", setVersion) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from CTADIRAC.Core.Workflow.Modules.HapApplication import HapApplication from CTADIRAC.Core.Workflow.Modules.HapRootMacro import HapRootMacro from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) ha = HapApplication() HapPack = 'HAP/' + version + '/HAP' packs = ['HESS/v0.2/lib', 'HESS/v0.3/root', HapPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) continue if localArea: if checkSoftwarePackage(package, localArea())['OK']: DIRAC.gLogger.notice('Package found in Local Area:', package) continue if installSoftwarePackage(package, localArea())['OK']: continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1) ha.setSoftwarePackage(HapPack) ha.hapExecutable = 'DoCtaIrf' runlistdir = os.environ['PWD'] build_infile(runlist) ha.hapArguments = [ analysistype, cutsconfig, runlistdir, runlist, zenith, offset, arrayconfig, energymethod, particle ] DIRAC.gLogger.notice('Executing Hap Application') res = ha.execute() if not res['OK']: DIRAC.gLogger.error('Failed to execute Hap Application') jobReport.setApplicationStatus('Hap Application: Failed') DIRAC.exit(-1) ###################### Check TTree Output File ####################### outfile = 'MVAFile_' + runlist + ".root" if not os.path.isfile(outfile): error = 'TTree file was not created:' DIRAC.gLogger.error(error, outfile) jobReport.setApplicationStatus('DoCtaIrf: TTree file not created') DIRAC.exit(-1) ###################### Quality Check for TTree Output File: step0###################### hr = HapRootMacro() hr.setSoftwarePackage(HapPack) DIRAC.gLogger.notice('Executing TTree check step0') hr.rootMacro = '/hapscripts/mva/Open_TT.C+' outfilestr = '"' + outfile + '"' args = [outfilestr] DIRAC.gLogger.notice('Open_TT macro Arguments:', args) hr.rootArguments = args DIRAC.gLogger.notice('Executing Hap Open_TT macro') res = hr.execute() if not res['OK']: DIRAC.gLogger.error('Open_TT: Failed') DIRAC.exit(-1) #########################Quality Check for TTree Output File: step1#################### DIRAC.gLogger.notice('Executing TTree check step1') ret = getSoftwareEnviron(HapPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error(error, HapPack) DIRAC.exit(-1) hapEnviron = ret['Value'] hessroot = hapEnviron['HESSROOT'] check_script = hessroot + '/hapscripts/mva/check_TT.csh' cmdTuple = [check_script] ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to execute TTree Check step1') jobReport.setApplicationStatus('Check_TTree: Failed') DIRAC.exit(-1) ############################################# DIRAC.exit()
def main(): from DIRAC.Core.Base import Script Script.initialize() DIRAC.gLogger.notice('Platform is:') os.system('dirac-platform') from CTADIRAC.Core.Workflow.Modules.Read_CtaApp import Read_CtaApp from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) version = sys.argv[3] DIRAC.gLogger.notice('Version:', version) install_CorsikaSimtelPack(version) ######### run read_cta ####################################### rcta = Read_CtaApp() CorsikaSimtelPack = os.path.join('corsika_simhessarray', version, 'corsika_simhessarray') rcta.setSoftwarePackage(CorsikaSimtelPack) rcta.rctaExe = 'read_cta' # add arguments for read_cta specified by user ###### args = [] rctaparfile = open('read_cta.par', 'r').readlines() for line in rctaparfile: for word in line.split(): args.append(word) simtelFileLFN = sys.argv[-1].split('ParametricInputData=LFN:')[1] simtelFileName = os.path.basename(simtelFileLFN) dstFileName = simtelFileName.replace('simtel.gz', 'simtel-dst0.gz') dstHistoFileName = simtelFileName.replace('simtel.gz', 'hdata-dst0.gz') args.extend([ '--dst-file', dstFileName, '--histogram-file', dstHistoFileName, simtelFileName ]) rcta.rctaArguments = args rctaReturnCode = rcta.execute() if rctaReturnCode != 0: DIRAC.gLogger.error('read_cta Application: Failed') jobReport.setApplicationStatus('read_cta Application: Failed') DIRAC.exit(-1) ################################################################# from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron ret = getSoftwareEnviron(CorsikaSimtelPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error(error, CorsikaSimtelPack) DIRAC.exit(-1) read_ctaEnviron = ret['Value'] ######## run dst quality checks ###################################### fd = open('check_dst_histo.sh', 'w') fd.write("""#! /bin/sh dsthistfilename=%s dstfile=%s n6="$(list_histograms -h 6 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" n12001="$(list_histograms -h 12001 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" if [ $n6 -ne $n12001 ]; then echo 'n6 found:' $n6 echo 'n12001 found:' $n12001 exit 1 else echo 'n6 found:' $n6 echo 'n12001 found:' $n12001 fi n12002="$(list_histograms -h 12002 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" nev="$(statio ${dstfile} | egrep '^2010' | cut -f2)" if [ -z "$nev" ]; then nev="0"; fi if [ $nev -ne $n12002 ]; then echo 'nev found:' $nev echo 'n12002 found:' $n12002 exit 1 else echo 'nev found:' $nev echo 'n12002 found:' $n12002 fi """ % (dstHistoFileName, dstFileName)) fd.close() os.system('chmod u+x check_dst_histo.sh') cmdTuple = ['./check_dst_histo.sh'] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput, env=read_ctaEnviron) checkHistoReturnCode, stdout, stderr = ret['Value'] if not ret['OK']: DIRAC.gLogger.error('Failed to execute check_dst_histo.sh') DIRAC.gLogger.error('check_dst_histo.sh status is:', checkHistoReturnCode) DIRAC.exit(-1) if (checkHistoReturnCode != 0): DIRAC.gLogger.error('Failure during check_dst_histo.sh') DIRAC.gLogger.error('check_dst_histo.sh status is:', checkHistoReturnCode) jobReport.setApplicationStatus('Histo check Failed') DIRAC.exit(-1) DIRAC.exit()
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("p:", "inputfile=", "Input File", setInputFile) Script.registerSwitch("E:", "simtelExecName=", "SimtelExecName", setExecutable) Script.registerSwitch("V:", "version=", "Version", setVersion) Script.registerSwitch("D:", "storage_element=", "Storage Element", setStorageElement) from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog Script.parseCommandLine() global fcc, fcL, storage_element from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int(jobID) jobReport = JobReport(jobID) CorsikaSimtelPack = 'corsika_simhessarray/' + version + '/corsika_simhessarray' packs = [CorsikaSimtelPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) installSoftwareEnviron(package, workingArea()) packageTuple = package.split('/') corsika_subdir = sharedArea( ) + '/' + packageTuple[0] + '/' + version cmd = 'cp -u -r ' + corsika_subdir + '/* .' os.system(cmd) continue if workingArea: if checkSoftwarePackage(package, workingArea())['OK']: DIRAC.gLogger.notice('Package found in Local Area:', package) continue if installSoftwarePackage(package, workingArea())['OK']: ############## compile ############################# if version == 'prod-2_21122012': cmdTuple = ['./build_all', 'prod2', 'qgs2'] else: cmdTuple = ['./build_all', 'ultra', 'qgs2'] ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to compile') DIRAC.exit(-1) continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1) ########### ## Checking MD coherence fc = FileCatalog('LcgFileCatalog') res = fc._getCatalogConfigDetails('DIRACFileCatalog') print 'DFC CatalogConfigDetails:', res res = fc._getCatalogConfigDetails('LcgFileCatalog') print 'LCG CatalogConfigDetails:', res fcc = FileCatalogClient() fcL = FileCatalog('LcgFileCatalog') from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() ############################ ############# # CLAUDIA: simtelConfigFile should be built from ??? simtelConfigFilesPath = 'sim_telarray/multi' simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg' createGlobalsFromConfigFiles(simtelConfigFile) ####################### ## files spread in 1000-runs subDirectories corsikaFileName = os.path.basename(corsikaFileLFN) run_number = corsikaFileName.split('run')[1].split('.corsika.gz')[ 0] # run001412.corsika.gz runNum = int(run_number) subRunNumber = '%03d' % runNum runNumModMille = runNum % 1000 runNumTrunc = (runNum - runNumModMille) / 1000 runNumSeriesDir = '%03dxxx' % runNumTrunc print 'runNumSeriesDir=', runNumSeriesDir f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w') f.close() ##### If storage element is IN2P3-tape save simtel file on disk ############### if storage_element == 'CC-IN2P3-Tape': storage_element = 'CC-IN2P3-Disk' ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### resultCreateSimtelDirMD = createSimtelFileSystAndMD() if not resultCreateSimtelDirMD['OK']: DIRAC.gLogger.error('Failed to create simtelArray Directory MD') jobReport.setApplicationStatus( 'Failed to create simtelArray Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no simtelArray File produced') DIRAC.exit(-1) else: print 'simtel Directory MD successfully created' #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write( """#! /bin/sh echo "go for sim_telarray" . ./examples_common.sh export CORSIKA_IO_BUFFER=800MB zcat %s | $SIM_TELARRAY_PATH/run_sim_%s""" % (corsikaFileName, simtelExecName)) fd.close() os.system('chmod u+x run_sim.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendOutputSimTel) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log')): print 'not broken' else: print 'broken' # Tag corsika File if Broken Pipe corsikaTagMD = {} corsikaTagMD['CorsikaToReprocess'] = 'CorsikaToReprocess' result = fcc.setMetadata(corsikaFileLFN, corsikaTagMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## putAndRegister simtel data/log/histo Output File: simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.simtel.gz' cmd = 'mv Data/sim_telarray/' + simtelExecName + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutFileDir = os.path.join(simtelDirPath, 'Data', runNumSeriesDir) simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName) simtelRunNumberSeriesDirExist = fcc.isDirectory( simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] newSimtelRunFileSeriesDir = ( simtelRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' cmd = 'mv Data/sim_telarray/' + simtelExecName + '/0.0deg/Log/*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutLogFileDir = os.path.join(simtelDirPath, 'Log', runNumSeriesDir) simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir, simtelLogFileName) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.hdata.gz' cmd = 'mv Data/sim_telarray/' + simtelExecName + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutHistFileDir = os.path.join(simtelDirPath, 'Histograms', runNumSeriesDir) simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir, simtelHistFileName) ################################################ DIRAC.gLogger.notice('Put and register simtel File in LFC and DFC:', simtelOutFileLFN) ret = dirac.addFile(simtelOutFileLFN, simtelFileName, storage_element) res = CheckCatalogCoherence(simtelOutFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### DIRAC.gLogger.notice('Put and register simtel Log File in LFC and DFC:', simtelOutLogFileLFN) ret = dirac.addFile(simtelOutLogFileLFN, simtelLogFileName, storage_element) res = CheckCatalogCoherence(simtelOutLogFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### DIRAC.gLogger.notice('Put and register simtel Histo File in LFC and DFC:', simtelOutHistFileLFN) ret = dirac.addFile(simtelOutHistFileLFN, simtelHistFileName, storage_element) res = CheckCatalogCoherence(simtelOutHistFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### if newSimtelRunFileSeriesDir: insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc) ###### simtel File level metadata ############################################ simtelFileMD = {} simtelFileMD['runNumber'] = int(run_number) simtelFileMD['jobID'] = jobID simtelFileMD['simtelReturnCode'] = simtelReturnCode result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.addFileAncestors( {simtelOutFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutLogFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutHistFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) if not result['OK']: print 'ResultSetMetadata:', result['Message'] DIRAC.exit()
''' Created on 2015-05-19 21:45:37 @author: suo ''' import sys from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport from DIRAC.Core.Base import Script Script.parseCommandLine(ignoreErrors=False) jobID = sys.argv[1] experiment = sys.argv[2] message = sys.argv[3] jobReport = JobReport(jobID, experiment) result = jobReport.setApplicationStatus(message) if not result['OK']: try: with open('job.err', 'a') as errFile: print >> errFile, 'setJobStatus error: %s' % result except IOError: print 'IOError:', str(e)
def execute(self): """The JobAgent execution method.""" # Temporary mechanism to pass a shutdown message to the agent if os.path.exists("/var/lib/dirac_drain"): return self._finish("Node is being drained by an operator") self.log.verbose("Job Agent execution loop") # Check that there is enough slots to match a job result = self._checkCEAvailability(self.computingElement) if not result["OK"]: return self._finish(result["Message"]) if result["OK"] and result["Value"]: return result # Check that we are allowed to continue and that time left is sufficient if self.jobCount: cpuWorkLeft = self._computeCPUWorkLeft() result = self._checkCPUWorkLeft(cpuWorkLeft) if not result["OK"]: return result result = self._setCPUWorkLeft(cpuWorkLeft) if not result["OK"]: return result # Get environment details and enhance them result = self._getCEDict(self.computingElement) if not result["OK"]: return result ceDictList = result["Value"] for ceDict in ceDictList: self._setCEDict(ceDict) # Try to match a job jobRequest = self._matchAJob(ceDictList) self.stopAfterFailedMatches = self.am_getOption( "StopAfterFailedMatches", self.stopAfterFailedMatches) if not jobRequest["OK"]: res = self._checkMatchingIssues(jobRequest) if not res["OK"]: self._finish(res["Message"]) return res # if we don't match a job, independently from the reason, # we wait a bit longer before trying again time.sleep( int(self.am_getOption("PollingTime")) * (self.matchFailedCount + 1) * 2) return res # If we are, we matched a job # Reset the Counter self.matchFailedCount = 0 # Check matcher information returned matcherParams = ["JDL", "DN", "Group"] matcherInfo = jobRequest["Value"] jobID = matcherInfo["JobID"] jobReport = JobReport(jobID, "JobAgent@%s" % self.siteName) result = self._checkMatcherInfo(matcherInfo, matcherParams, jobReport) if not result["OK"]: return self._finish(result["Message"]) # Get matcher information if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( "PilotInfoReportedFlag", False) jobJDL = matcherInfo["JDL"] jobGroup = matcherInfo["Group"] ownerDN = matcherInfo["DN"] ceDict = matcherInfo["CEDict"] matchTime = matcherInfo["matchTime"] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] # Get JDL paramters parameters = self._getJDLParameters(jobJDL) if not parameters["OK"]: jobReport.setJobStatus( status=JobStatus.FAILED, minorStatus="Could Not Extract JDL Parameters") self.log.warn("Could Not Extract JDL Parameters", parameters["Message"]) return self._finish("JDL Problem") params = parameters["Value"] result = self._extractValuesFromJobParams(params, jobReport) if not result["OK"]: return self._finish(result["Value"]) submissionParams = result["Value"] jobID = submissionParams["jobID"] jobType = submissionParams["jobType"] self.log.verbose("Job request successful: \n", jobRequest["Value"]) self.log.info( "Received", "JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s" % (jobID, jobType, ownerDN, jobGroup)) self.jobCount += 1 try: jobReport.setJobParameter(par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False) if "BOINC_JOB_ID" in os.environ: # Report BOINC environment for thisp in ("BoincUserID", "BoincHostID", "BoincHostPlatform", "BoincHostName"): jobReport.setJobParameter(par_name=thisp, par_value=gConfig.getValue( "/LocalSite/%s" % thisp, "Unknown"), sendFlag=False) jobReport.setJobStatus(minorStatus="Job Received by Agent", sendFlag=False) result_setupProxy = self._setupProxy(ownerDN, jobGroup) if not result_setupProxy["OK"]: result = self._rescheduleFailedJob( jobID, result_setupProxy["Message"]) return self._finish(result["Message"], self.stopOnApplicationFailure) proxyChain = result_setupProxy.get("Value") # Save the job jdl for external monitoring self._saveJobJDLRequest(jobID, jobJDL) # Check software and install them if required software = self._checkInstallSoftware(jobID, params, ceDict, jobReport) if not software["OK"]: self.log.error("Failed to install software for job", "%s" % (jobID)) errorMsg = software["Message"] if not errorMsg: errorMsg = "Failed software installation" result = self._rescheduleFailedJob(jobID, errorMsg) return self._finish(result["Message"], self.stopOnApplicationFailure) gridCE = gConfig.getValue("/LocalSite/GridCE", "") if gridCE: jobReport.setJobParameter(par_name="GridCE", par_value=gridCE, sendFlag=False) queue = gConfig.getValue("/LocalSite/CEQueue", "") if queue: jobReport.setJobParameter(par_name="CEQueue", par_value=queue, sendFlag=False) self.log.debug("Before self._submitJob() (%sCE)" % (self.ceName)) result_submitJob = self._submitJob( jobID=jobID, jobParams=params, resourceParams=ceDict, optimizerParams=optimizerParams, proxyChain=proxyChain, jobReport=jobReport, processors=submissionParams["processors"], wholeNode=submissionParams["wholeNode"], maxNumberOfProcessors=submissionParams[ "maxNumberOfProcessors"], mpTag=submissionParams["mpTag"], ) # Committing the JobReport before evaluating the result of job submission res = jobReport.commit() if not res["OK"]: resFD = jobReport.generateForwardDISET() if not resFD["OK"]: self.log.error("Error generating ForwardDISET operation", resFD["Message"]) elif resFD["Value"]: # Here we create the Request. op = resFD["Value"] request = Request() requestName = "jobAgent_%s" % jobID request.RequestName = requestName.replace('"', "") request.JobID = jobID request.SourceComponent = "JobAgent_%s" % jobID request.addOperation(op) # This might fail, but only a message would be printed. self._sendFailoverRequest(request) if not result_submitJob["OK"]: return self._finish(result_submitJob["Message"]) elif "PayloadFailed" in result_submitJob: # Do not keep running and do not overwrite the Payload error message = "Payload execution failed with error code %s" % result_submitJob[ "PayloadFailed"] if self.stopOnApplicationFailure: return self._finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug("After %sCE submitJob()" % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) result = self._rescheduleFailedJob( jobID, "Job processing failed with exception", direct=True) return self._finish(result["Message"], self.stopOnApplicationFailure) return S_OK("Job Agent cycle complete")
def main(): from DIRAC.Core.Base import Script Script.initialize() DIRAC.gLogger.notice('Platform is:') os.system('dirac-platform') from DIRAC.DataManagementSystem.Client.DataManager import DataManager from CTADIRAC.Core.Workflow.Modules.EvnDispApp import EvnDispApp from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport jobID = os.environ['JOBID'] jobID = int( jobID ) jobReport = JobReport( jobID ) version = sys.argv[3] DIRAC.gLogger.notice( 'Version:', version ) EvnDispPack = os.path.join('evndisplay',version,'evndisplay') packs = [EvnDispPack] for package in packs: DIRAC.gLogger.notice( 'Checking:', package ) if checkSoftwarePackage( package, sharedArea() )['OK']: DIRAC.gLogger.notice( 'Package found in Shared Area:', package ) installSoftwareEnviron( package, sharedArea() ) # cmd = 'cp -r ' + os.path.join(sharedArea(),'evndisplay',version,'EVNDISP.CTA.runparameter') + ' .' # if(os.system(cmd)): # DIRAC.exit( -1 ) # cmd = 'cp -r ' + os.path.join(sharedArea(),'evndisplay',version,'Calibration') + ' .' # if(os.system(cmd)): # DIRAC.exit( -1 ) continue else: installSoftwarePackage( package, workingArea() ) DIRAC.gLogger.notice( 'Package found in workingArea:', package ) continue DIRAC.gLogger.error( 'Check Failed for software package:', package ) DIRAC.gLogger.error( 'Software package not available') DIRAC.exit( -1 ) ed = EvnDispApp() ed.setSoftwarePackage(EvnDispPack) ########## Use of trg mask file ####################### usetrgfile = sys.argv[7] DIRAC.gLogger.notice( 'Usetrgfile:', usetrgfile ) ####### Use of multiple inputs per job ### simtelFileLFNList = sys.argv[-1].split('ParametricParameters={')[1].split('}')[0].replace(',',' ') # first element of the list simtelFileLFN = simtelFileLFNList.split(' ')[0] ## convert the string into a list and get the basename simtelFileList = [] for word in simtelFileLFNList.split(): simtelFileList.append(os.path.basename(word)) #### Parse the Layout List ################# layoutList = parseLayoutList(sys.argv[9]) ############################################# #### Loop over the Layout List ################# for layout in layoutList: args = [] ########## download trg mask file ####################### if usetrgfile == 'True': trgmaskFileLFN = simtelFileLFN.replace( 'simtel.gz', 'trgmask.gz' ) DIRAC.gLogger.notice( 'Trying to download the trgmask File', trgmaskFileLFN ) result = DataManager().getFile( trgmaskFileLFN ) if not result['OK']: DIRAC.gLogger.error( 'Failed to download trgmakfile:', result ) jobReport.setApplicationStatus( 'Trgmakfile download Error' ) DIRAC.exit( -1 ) args.extend( ['-t', os.path.basename( trgmaskFileLFN )] ) ############################################################ ###### execute evndisplay converter ################## executable = sys.argv[5] ############ dst file Name ############################ run_number = simtelFileList[-1].split( 'run' )[1].split( '.simtel.gz' )[0] runNum = int( run_number ) subRunNumber = '%06d' % runNum particle = simtelFileList[-1].split( '_' )[0] if 'ptsrc' in simtelFileList[-1]: particle = particle + '_' + 'ptsrc' dstfile = particle + '_run' + subRunNumber + '_' + str( jobID ) + '_' + os.path.basename( layout ) + '_dst.root' ########################################### logfileName = executable + '_' + layout + '.log' layout = os.path.join( 'EVNDISP.CTA.runparameter/DetectorGeometry', layout ) DIRAC.gLogger.notice( 'Layout is:', layout ) # add other arguments for evndisplay converter specified by user ###### converterparfile = open( 'converter.par', 'r' ).readlines() for line in converterparfile: for word in line.split(): args.append( word ) ######################################################### args.extend( ['-a', layout] ) args.extend( ['-o', dstfile] ) args.extend( simtelFileList ) execute_module( ed, executable, args ) ########### check existence of DST file ############### if not os.path.isfile( dstfile ): DIRAC.gLogger.error( 'DST file Missing:', dstfile ) jobReport.setApplicationStatus( 'DST file Missing' ) DIRAC.exit( -1 ) ########### quality check on Log ############################################# cmd = 'mv ' + executable + '.log' + ' ' + logfileName if( os.system( cmd ) ): DIRAC.exit( -1 ) fd = open( 'check_log.sh', 'w' ) fd.write( """#! /bin/sh MCevts=$(grep writing %s | grep "MC events" | awk '{print $2}') if [ $MCevts -gt 0 ]; then exit 0 else echo "MCevts is zero" exit -1 fi """ % (logfileName)) fd.close() os.system( 'chmod u+x check_log.sh' ) cmd = './check_log.sh' DIRAC.gLogger.notice( 'Executing system call:', cmd ) if( os.system( cmd ) ): jobReport.setApplicationStatus( 'Converter Log Check Failed' ) DIRAC.exit( -1 ) #### Check the mode ################# mode = sys.argv[11] if( mode == 'convert_standalone' ): #DIRAC.exit() continue ###### execute evndisplay stage1 ############### executable = 'evndisp' logfileName = executable + '_' + os.path.basename( layout ) + '.log' args = ['-sourcefile', dstfile, '-outputdirectory', 'outdir'] # add other arguments for evndisp specified by user ###### evndispparfile = open( 'evndisp.par', 'r' ).readlines() for line in evndispparfile: for word in line.split(): args.append( word ) execute_module( ed, executable, args ) for name in glob.glob( 'outdir/*.root' ): evndispOutFile = name.split( '.root' )[0] + '_' + str( jobID ) + '_' + os.path.basename( layout ) + '_evndisp.root' cmd = 'mv ' + name + ' ' + os.path.basename( evndispOutFile ) if( os.system( cmd ) ): DIRAC.exit( -1 ) ########### quality check on Log ############################################# cmd = 'mv ' + executable + '.log' + ' ' + logfileName if( os.system( cmd ) ): DIRAC.exit( -1 ) fd = open( 'check_log.sh', 'w' ) fd.write( """#! /bin/sh if grep -i "error" %s; then exit 1 fi if grep "Final checks on result file (seems to be OK):" %s; then exit 0 else exit 1 fi """ % (logfileName,logfileName)) fd.close() os.system( 'chmod u+x check_log.sh' ) cmd = './check_log.sh' DIRAC.gLogger.notice( 'Executing system call:', cmd ) if( os.system( cmd ) ): jobReport.setApplicationStatus( 'EvnDisp Log Check Failed' ) DIRAC.exit( -1 ) ################################################################## ########### remove the converted dst file ############################################# cmd = 'rm ' + dstfile if( os.system( cmd ) ): DIRAC.exit( -1 ) DIRAC.exit()