def createJob(self): job = Job() job.setName(self.__stepName) job.setOutputSandbox(['*log']) job.setExecutable('/usr/bin/wget', arguments='"{0}/{1}"'.format(URL_ROOT, self.__executable)) job.setExecutable('/bin/chmod', arguments='+x "{0}"'.format(self.__executable)) arguments = '"{0}" "{1}" "{2}" "{3}" "{4}" "{5}" @{{JOB_ID}}'.format( self.__softwareVersion, self.__application, self.__outputPath, self.__outputPattern, self.__outputSE, self.__outputMode) if self.__extraArgs: arguments += ' ' + self.__extraArgs job.setExecutable(self.__executable, arguments=arguments) # failover for failed jobs job.setExecutable('/bin/ls -l', modulesList=['Script', 'FailoverRequest']) if self.__inputData: job.setInputData(self.__inputData) if self.__site: job.setDestination(self.__site) if self.__bannedsite: job.setBannedSites(self.__bannedsite) job.setOutputSandbox(['app.out', 'app.err', 'Script3_CodeOutput.log']) self.__job = job
def prepareTransformationTasks(self,transBody,taskDict,owner='',ownerGroup=''): if (not owner) or (not ownerGroup): res = getProxyInfo(False,False) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] oJob = Job(transBody) for taskNumber in sortList(taskDict.keys()): paramsDict = taskDict[taskNumber] transID = paramsDict['TransformationID'] self.log.verbose('Setting job owner:group to %s:%s' % (owner,ownerGroup)) oJob.setOwner(owner) oJob.setOwnerGroup(ownerGroup) transGroup = str(transID).zfill(8) self.log.verbose('Adding default transformation group of %s' % (transGroup)) oJob.setJobGroup(transGroup) constructedName = str(transID).zfill(8)+'_'+str(taskNumber).zfill(8) self.log.verbose('Setting task name to %s' % constructedName) oJob.setName(constructedName) oJob._setParamValue('PRODUCTION_ID',str(transID).zfill(8)) oJob._setParamValue('JOB_ID',str(taskNumber).zfill(8)) inputData = None for paramName,paramValue in paramsDict.items(): self.log.verbose('TransID: %s, TaskID: %s, ParamName: %s, ParamValue: %s' %(transID,taskNumber,paramName,paramValue)) if paramName=='InputData': if paramValue: self.log.verbose('Setting input data to %s' %paramValue) oJob.setInputData(paramValue) elif paramName=='Site': if paramValue: self.log.verbose('Setting allocated site to: %s' %(paramValue)) oJob.setDestination(paramValue) elif paramValue: self.log.verbose('Setting %s to %s' % (paramName,paramValue)) oJob._addJDLParameter(paramName,paramValue) hospitalTrans = [int(x) for x in gConfig.getValue("/Operations/Hospital/Transformations",[])] if int(transID) in hospitalTrans: hospitalSite = gConfig.getValue("/Operations/Hospital/HospitalSite",'DIRAC.JobDebugger.ch') hospitalCEs = gConfig.getValue("/Operations/Hospital/HospitalCEs",[]) oJob.setType('Hospital') oJob.setDestination(hospitalSite) oJob.setInputDataPolicy('download',dataScheduling=False) if hospitalCEs: oJob._addJDLParameter('GridRequiredCEs',hospitalCEs) taskDict[taskNumber]['TaskObject'] = '' res = self.getOutputData({'Job':oJob._toXML(),'TransformationID':transID,'TaskID':taskNumber,'InputData':inputData}) if not res ['OK']: self.log.error("Failed to generate output data",res['Message']) continue for name,output in res['Value'].items(): oJob._addJDLParameter(name,string.join(output,';')) taskDict[taskNumber]['TaskObject'] = Job(oJob._toXML()) return S_OK(taskDict)
def read_hessjob(args=None): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job if (len(args) != 1): Script.showHelp() version = args[0] user_script = './read_hess2dst.sh' sim_file = 'simtel_file.list' infileLFNList = [ '/vo.cta.in2p3.fr/MC/PROD2/Config_310113/prod-2_21122012_corsika/gamma/prod-2_06052013_simtel_STD/Data/002xxx/gamma_20.0_180.0_alt2662.0_run002997.simtel.gz', '/vo.cta.in2p3.fr/MC/PROD2/Config_310113/prod-2_21122012_corsika/gamma/prod-2_06052013_simtel_STD/Data/002xxx/gamma_20.0_180.0_alt2662.0_run002998.simtel.gz' ] f = open(sim_file, 'w') for infileLFN in infileLFNList: filein = os.path.basename(infileLFN) f.write(filein) f.write('\n') f.close() j = Job() j.setInputData(infileLFNList) options = [] options = [sim_file] executablestr = "%s %s %s" % (version, user_script, ' '.join(options)) j.setExecutable('./cta-read_hess.py', executablestr) j.setInputSandbox(['cta-read_hess.py', user_script, sim_file]) j.setOutputSandbox(['read_hess.log']) j.setOutputData(['*dst.gz']) j.setName(user_script) j.setCPUTime(100000) Script.gLogger.info(j._toJDL()) Dirac().submit(j)
def run_test_job(args): simtel_files = load_files_from_list(args[0]) #simtel_files = ["/vo.cta.in2p3.fr/MC/PROD3/LaPalma/proton/simtel/1260/Data/071xxx/proton_40deg_180deg_run71001___cta-prod3-lapalma3-2147m-LaPalma.simtel.gz", #"/vo.cta.in2p3.fr/MC/PROD3/LaPalma/proton/simtel/1260/Data/070xxx/proton_40deg_180deg_run70502___cta-prod3-lapalma3-2147m-LaPalma.simtel.gz"] dirac = Dirac() j = Job() j.setCPUTime(500) j.setInputData(simtel_files[0]) j.setExecutable('echo', 'Hello World!') j.setName('Hello World') res = dirac.submit(j) print('Submission Result: {}'.format(res)) return res
def simteljob(args = None ): from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job if (len(args)!=1): Script.showHelp() version = args[0] user_script = './run_simtel.sh' infileLFNList = ['/vo.cta.in2p3.fr/MC/PROD2/Config_120213/prod-2_21122012_corsika/proton/Data/044xxx/proton_20.0_180.0_alt2662.0_run044019.corsika.gz', '/vo.cta.in2p3.fr/MC/PROD2/Config_120213/prod-2_21122012_corsika/proton/Data/044xxx/proton_20.0_180.0_alt2662.0_run044085.corsika.gz'] for infileLFN in infileLFNList: filein = os.path.basename(infileLFN) j = Job() j.setInputSandbox( ['cta-simtel.py', user_script] ) j.setInputData(infileLFN) user_args = [] user_args = [filein] executablestr = "%s %s %s" % ( version, user_script, ' '.join( user_args ) ) j.setExecutable('./cta-simtel.py', executablestr) sim_out = 'Data/sim_telarray/cta-ultra5/0.0deg/Data/*.simtel.gz' log_out = 'Data/sim_telarray/cta-ultra5/0.0deg/Log/*.log.gz' hist_out = 'Data/sim_telarray/cta-ultra5/0.0deg/Histograms/*.hdata.gz' j.setOutputData([sim_out,log_out,hist_out]) j.setOutputSandbox('simtel.log') j.setName(user_script) j.setCPUTime(100000) Script.gLogger.info( j._toJDL() ) Dirac().submit( j )
def basicTest(self): j = Job() j.setCPUTime(50000) j.setExecutable( '/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/myPythonScript.py' ) # j.setExecutable('/bin/echo hello') j.setOwner('paterson') j.setType('test') j.setName('MyJobName') #j.setAncestorDepth(1) j.setInputSandbox([ '/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV.opts', '/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV2.opts' ]) j.setOutputSandbox(['firstfile.txt', 'anotherfile.root']) j.setInputData([ '/lhcb/production/DC04/v2/DST/00000742_00003493_11.dst', '/lhcb/production/DC04/v2/DST/00000742_00003493_10.dst' ]) j.setOutputData(['my.dst', 'myfile.log']) j.setDestination('LCG.CERN.ch') j.setPlatform('LCG') j.setSystemConfig('x86_64-slc5-gcc43-opt') j.setSoftwareTags(['VO-lhcb-Brunel-v30r17', 'VO-lhcb-Boole-v12r10']) #print j._toJDL() #print j.printObj() xml = j._toXML() testFile = 'jobDescription.xml' if os.path.exists(testFile): os.remove(testFile) xmlfile = open(testFile, 'w') xmlfile.write(xml) xmlfile.close() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Creating code for the workflow' print j.createCode() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Executing the workflow' j.execute() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Trying to run the same workflow from generated XML file' workflow = fromXMLFile(testFile) code = workflow.createCode() print code workflow.execute()
def basicTest(self): j = Job() j.setCPUTime(50000) j.setExecutable('/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/myPythonScript.py') # j.setExecutable('/bin/echo hello') j.setOwner('paterson') j.setType('test') j.setName('MyJobName') #j.setAncestorDepth(1) j.setInputSandbox(['/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV.opts','/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV2.opts']) j.setOutputSandbox(['firstfile.txt','anotherfile.root']) j.setInputData(['/lhcb/production/DC04/v2/DST/00000742_00003493_11.dst', '/lhcb/production/DC04/v2/DST/00000742_00003493_10.dst']) j.setOutputData(['my.dst','myfile.log']) j.setDestination('LCG.CERN.ch') j.setPlatform('LCG') j.setSystemConfig('x86_64-slc5-gcc43-opt') j.setSoftwareTags(['VO-lhcb-Brunel-v30r17','VO-lhcb-Boole-v12r10']) #print j._toJDL() #print j.printObj() xml = j._toXML() testFile = 'jobDescription.xml' if os.path.exists(testFile): os.remove(testFile) xmlfile = open(testFile,'w') xmlfile.write(xml) xmlfile.close() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Creating code for the workflow' print j.createCode() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Executing the workflow' j.execute() print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Trying to run the same workflow from generated XML file' workflow = fromXMLFile(testFile) code = workflow.createCode() print code workflow.execute()
def setInputData(self, lfns, bkClient=None, runNumber=None, persistencyType=None): """ Add the input data and the run number, if available """ if not lfns: self.log.warn( "no lfns passed in setInputData, was that intentional?") return S_OK("Nothing to do") res = Job.setInputData(self, lfns) if not res['OK']: return res if not runNumber or not persistencyType: if not bkClient: bkClient = BookkeepingClient() if not runNumber: res = bkClient.getFileMetadata(lfns) if not res['OK']: return res runNumbers = [] for fileMeta in res['Value']['Successful'].itervalues(): try: if fileMeta['RunNumber'] not in runNumbers and fileMeta[ 'RunNumber'] is not None: runNumbers.append(fileMeta['RunNumber']) except KeyError: continue if len(runNumbers) > 1: runNumber = 'Multiple' elif len(runNumbers) == 1: runNumber = str(runNumbers[0]) else: runNumber = 'Unknown' if not persistencyType: res = bkClient.getFileTypeVersion(lfns) if not res['OK']: return res typeVersions = res['Value'] if not typeVersions: self.log.verbose('The requested files do not exist in the BKK') typeVersion = '' else: self.log.verbose('Found file types %s for LFNs: %s' % (typeVersions.values(), typeVersions.keys())) typeVersionsList = list(set(typeVersions.values())) if len(typeVersionsList) == 1: typeVersion = typeVersionsList[0] else: typeVersion = '' self._addParameter(self.workflow, 'runNumber', 'JDL', runNumber, 'Input run number') self._addParameter(self.workflow, 'persistency', 'String', typeVersion, 'Persistency type of the inputs') return S_OK()
def main(dataset, chunksize, test): ''' The DATASET argument is a list of paths to MC files on the grid. Like the output of cta-prod3-dump-dataset for example. See also https://forge.in2p3.fr/projects/cta_dirac/wiki/CTA-DIRAC_MC_PROD3_Status Keep in mind that for some effing reason this needs to be executed within this weird 'dirac' environment which comes with its own glibc, python and pip. I guess the real Mr. Dirac would turn in his grave. ''' dirac = Dirac() with open(dataset) as f: simtel_files = f.readlines() print('Analysing {}'.format(len(simtel_files))) server_list = [ "TORINO-USER", "CYF-STORM-USER", "CYF-STORM-Disk", "M3PEC-Disk", "OBSPM-Disk", "POLGRID-Disk", "FRASCATI-USER", "LAL-Disk", "CIEMAT-Disk", "CIEMAT-USER", "CPPM-Disk", "LAL-USER", "CYFRONET-Disk", "DESY-ZN-USER", "M3PEC-USER", "LPNHE-Disk", "LPNHE-USER", "LAPP-USER", "LAPP-Disk" ] desy_server = 'DESY-ZN-USER' servers_with_miniconda = [ 'LCG.IN2P3-CC.fr', 'LCG.DESY-ZEUTHEN.de', 'LCG.CNAF.it', 'LCG.GRIF.fr', 'LCG.CYFRONET.pl', 'LCG.Prague.cz', 'LCG.CIEMAT.es' ] chunks = np.array_split(sorted(simtel_files), int(len(simtel_files) / chunksize)) print('Got a total of {} chunks'.format(len(chunks))) for c, simtel_filenames in tqdm(enumerate( chunks[0:2])): # send just 2 jobs for now. # convert chunk to a list of strings. becasue this dirac thing cant take numpy arrays simtel_filenames = [ str(s).strip() for s in simtel_filenames if 'SCT' not in s ] print('Starting processing for chunk {}'.format(c)) print(simtel_filenames) j = Job() # set runtime to 0.5h j.setCPUTime(30 * 60) j.setName('cta_preprocessing_{}'.format(c)) j.setInputData(simtel_filenames) j.setOutputData(['./processing_output/*.hdf5'], outputSE=None, outputPath='cta_preprocessing/') j.setInputSandbox( ['../process_simtel.py', './install_dependencies.py']) j.setOutputSandbox(['cta_preprocessing.log']) j.setExecutable('./job_script.sh') # These servers seem to have mini conda installed # destination = np.random.choice(servers_with_miniconda) j.setDestination(servers_with_miniconda) value = dirac.submit(j) print('Number {} Submission Result: {}'.format(c, value))
print("Input data list:\n") print(inputdata_list) for i in range(0, total_pixels, chunks): id_start = i id_end = i + chunks dirac = Dirac() j.setName('CS Faraday Rotation Measurement Reconstruction - Pixels from ' + str(id_start) + ' to ' + str(id_end - 1)) j.setPlatform('EL7') j.setTag([str(nprocs) + 'Processors']) j.setDestination(SitesList) j.setExecutable('RMSynthesis2.sh', arguments=str(nprocs) + ' ' + str(id_start) + ' ' + str(id_end) + ' ' + str(expmnt)) # Input data j.setInputData(inputdata_list) j.setInputSandbox([ 'RMSynthesis2.sh', 'run2.sh', 'prmon_1.0.1_x86_64-static-gnu72-opt.tar.gz' ]) # Output data j.setOutputSandbox([ 'StdOut', 'StdErr', 'outputtxt_' + str(id_start) + '_' + str(id_end - 1) + '.txt', 'prmon' + str(id_start) + '_' + str(id_end - 1) + '.txt' ]) o_data_file = lfn + 'second/results_experiment_' + str( expmnt) + '/' + 'LOS_' + str(id_start) + '_to_' + str(id_end - 1) + '.npy' try: output_process = subprocess.check_output('dirac-dms-remove-files ' +
dexit(1) sites = result[ 'Value' ] j.setDestination(sites) if not opts.stagein is None: input_stage_files = [] # we do add. input staging files = opts.stagein.split(",") for f in files: if f.startswith("LFN"): input_stage_files.append(f) else: input_stage_files+=extract_file(f) for f in input_stage_files: if not f.startswith("LFN"): gLogger.error("*ERROR* required inputfiles to be defined through LFN, could not find LFN in %s"%f) dexit(1) j.setInputData(input_stage_files) if opts.debug: gLogger.notice('*DEBUG* just showing the JDL of the job to be submitted') gLogger.notice(j._toJDL()) d = Dirac(True,"myRepo.rep") res = d.submit(j) if not res['OK']: gLogger.error("Error during Job Submission ",res['Message']) dexit(1) JobID = res['Value'] gLogger.notice("Your job %s (\"%s\") has been submitted."%(str(JobID),executable))
def main(): """ Launch job on the GRID """ # this thing pilots everything related to the GRID dirac = Dirac() if switches["output_type"] in "TRAINING": print("Preparing submission for TRAINING data") elif switches["output_type"] in "DL2": print("Preparing submission for DL2 data") else: print("You have to choose either TRAINING or DL2 as output type!") sys.exit() # Read configuration file cfg = load_config(switches["config_file"]) # Analysis config_path = cfg["General"]["config_path"] config_file = cfg["General"]["config_file"] mode = cfg["General"]["mode"] # One mode naw particle = cfg["General"]["particle"] estimate_energy = cfg["General"]["estimate_energy"] force_tailcut_for_extended_cleaning = cfg["General"][ "force_tailcut_for_extended_cleaning"] # Take parameters from the analysis configuration file ana_cfg = load_config(os.path.join(config_path, config_file)) config_name = ana_cfg["General"]["config_name"] cam_id_list = ana_cfg["General"]["cam_id_list"] # Regressor and classifier methods regressor_method = ana_cfg["EnergyRegressor"]["method_name"] classifier_method = ana_cfg["GammaHadronClassifier"]["method_name"] # Someone might want to create DL2 without score or energy estimation if regressor_method in ["None", "none", None]: use_regressor = False else: use_regressor = True if classifier_method in ["None", "none", None]: use_classifier = False else: use_classifier = True # GRID outdir = os.path.join(cfg["GRID"]["outdir"], config_name) n_file_per_job = cfg["GRID"]["n_file_per_job"] n_jobs_max = cfg["GRID"]["n_jobs_max"] model_dir = cfg["GRID"]["model_dir"] training_dir_energy = cfg["GRID"]["training_dir_energy"] training_dir_classification = cfg["GRID"]["training_dir_classification"] dl2_dir = cfg["GRID"]["dl2_dir"] home_grid = cfg["GRID"]["home_grid"] user_name = cfg["GRID"]["user_name"] banned_sites = cfg["GRID"]["banned_sites"] # HACK if force_tailcut_for_extended_cleaning is True: print("Force tail cuts for extended cleaning!!!") # Prepare command to launch script source_ctapipe = "source /cvmfs/cta.in2p3.fr/software/conda/dev/setupConda.sh" source_ctapipe += " && conda activate ctapipe_v0.11.0" if switches["output_type"] in "TRAINING": execute = "data_training.py" script_args = [ "--config_file={}".format(config_file), "--estimate_energy={}".format(str(estimate_energy)), "--regressor_config={}.yaml".format(regressor_method), "--regressor_dir=./", "--outfile {outfile}", "--indir ./ --infile_list={infile_name}", "--max_events={}".format(switches["max_events"]), "--{mode}", "--cam_ids {}".format(cam_id_list), ] output_filename_template = "TRAINING" elif switches["output_type"] in "DL2": execute = "write_dl2.py" script_args = [ "--config_file={}".format(config_file), "--regressor_config={}.yaml".format(regressor_method), "--regressor_dir=./", "--classifier_config={}.yaml".format(classifier_method), "--classifier_dir=./", "--outfile {outfile}", "--indir ./ --infile_list={infile_name}", "--max_events={}".format(switches["max_events"]), "--{mode}", "--force_tailcut_for_extended_cleaning={}".format( force_tailcut_for_extended_cleaning), "--cam_ids {}".format(cam_id_list), ] output_filename_template = "DL2" # Make the script save also the full calibrated images if required if switches["save_images"] is True: script_args.append("--save_images") # Make the script print debug information if required if switches["debug_script"] is True: script_args.append("--debug") cmd = [source_ctapipe, "&&", "./" + execute] cmd += script_args pilot_args_write = " ".join(cmd) # For table merging if multiple runs pilot_args_merge = " ".join([ source_ctapipe, "&&", "./merge_tables.py", "--template_file_name", "{in_name}", "--outfile", "{out_name}", ]) prod3b_filelist = dict() if estimate_energy is False and switches["output_type"] in "TRAINING": prod3b_filelist["gamma"] = cfg["EnergyRegressor"]["gamma_list"] elif estimate_energy is True and switches["output_type"] in "TRAINING": prod3b_filelist["gamma"] = cfg["GammaHadronClassifier"]["gamma_list"] prod3b_filelist["proton"] = cfg["GammaHadronClassifier"]["proton_list"] elif switches["output_type"] in "DL2": prod3b_filelist["gamma"] = cfg["Performance"]["gamma_list"] prod3b_filelist["proton"] = cfg["Performance"]["proton_list"] prod3b_filelist["electron"] = cfg["Performance"]["electron_list"] # from IPython import embed # embed() # Split list of files according to stoprage elements with open(prod3b_filelist[particle]) as f: filelist = f.readlines() filelist = ["{}".format(_.replace("\n", "")) for _ in filelist] res = dirac.splitInputData(filelist, n_file_per_job) list_run_to_loop_on = res["Value"] # define a template name for the file that's going to be written out. # the placeholder braces are going to get set during the file-loop output_filename = output_filename_template output_path = outdir if estimate_energy is False and switches["output_type"] in "TRAINING": output_path += "/{}/".format(training_dir_energy) step = "energy" if estimate_energy is True and switches["output_type"] in "TRAINING": output_path += "/{}/".format(training_dir_classification) step = "classification" if switches["output_type"] in "DL2": if force_tailcut_for_extended_cleaning is False: output_path += "/{}/".format(dl2_dir) else: output_path += "/{}_force_tc_extended_cleaning/".format(dl2_dir) step = "" output_filename += "_{}.h5" # sets all the local files that are going to be uploaded with the job # plus the pickled classifier # if file name starts with `LFN:`, it will be copied from the GRID input_sandbox = [ # Utility to assign one job to one command... os.path.expandvars("$GRID/pilot.sh"), os.path.expandvars("$PROTOPIPE/protopipe/"), os.path.expandvars("$GRID/merge_tables.py"), # python wrapper for the mr_filter wavelet cleaning # os.path.expandvars("$PYWI/pywi/"), # os.path.expandvars("$PYWICTA/pywicta/"), # script that is being run os.path.expandvars("$PROTOPIPE/protopipe/scripts/" + execute), # Configuration file os.path.expandvars(os.path.join(config_path, config_file)), ] models_to_upload = [] configs_to_upload = [] if estimate_energy is True and switches["output_type"] in "TRAINING": config_path_template = "LFN:" + os.path.join(home_grid, outdir, model_dir, "{}.yaml") config_to_upload = config_path_template.format(regressor_method) model_path_template = "LFN:" + os.path.join( home_grid, outdir, model_dir, "regressor_{}_{}.pkl.gz") for cam_id in cam_id_list: model_to_upload = model_path_template.format( cam_id, regressor_method) # TBC print("The following model(s) will be uploaded to the GRID:") print(model_to_upload) models_to_upload.append(model_to_upload) print( "The following configs(s) for such models will be uploaded to the GRID:" ) print(config_to_upload) configs_to_upload.append(config_to_upload) # input_sandbox.append(model_to_upload) elif estimate_energy is False and switches["output_type"] in "TRAINING": pass else: # Charge also classifer for DL2 model_type_list = ["regressor", "classifier"] model_method_list = [regressor_method, classifier_method] config_path_template = "LFN:" + os.path.join(home_grid, outdir, model_dir, "{}.yaml") model_path_template = "LFN:" + os.path.join( home_grid, outdir, model_dir, "{}_{}_{}.pkl.gz") if force_tailcut_for_extended_cleaning is True: force_mode = mode.replace("wave", "tail") print("################") print(force_mode) else: force_mode = mode for idx, model_type in enumerate(model_type_list): print( "The following configuration file will be uploaded to the GRID:" ) config_to_upload = config_path_template.format( model_method_list[idx]) print(config_to_upload) configs_to_upload.append(config_to_upload) # upload only 1 copy print( "The following model(s) related to such configuration file will be uploaded to the GRID:" ) for cam_id in cam_id_list: if model_type in "regressor" and use_regressor is False: print("Do not upload regressor model on GRID!!!") continue if model_type in "classifier" and use_classifier is False: print("Do not upload classifier model on GRID!!!") continue model_to_upload = model_path_template.format( model_type_list[idx], cam_id, model_method_list[idx]) print(model_to_upload) models_to_upload.append(model_to_upload) # input_sandbox.append(model_to_upload) # summary before submitting print("\nDEBUG> running as:") print(pilot_args_write) print("\nDEBUG> with input_sandbox:") print(input_sandbox) print("\nDEBUG> with output file:") print(output_filename.format("{job_name}")) print("\nDEBUG> Particles:") print(particle) print("\nDEBUG> Energy estimation:") print(estimate_energy) # ######## ## ## ## ## ## ## #### ## ## ###### # ## ## ## ## ### ## ### ## ## ### ## ## ## # ## ## ## ## #### ## #### ## ## #### ## ## # ######## ## ## ## ## ## ## ## ## ## ## ## ## ## #### # ## ## ## ## ## #### ## #### ## ## #### ## ## # ## ## ## ## ## ### ## ### ## ## ### ## ## # ## ## ####### ## ## ## ## #### ## ## ###### # list of files on the GRID SE space # not submitting jobs where we already have the output batcmd = "dirac-dms-user-lfns --BaseDir {}".format( os.path.join(home_grid, output_path)) result = subprocess.check_output(batcmd, shell=True) try: grid_filelist = open(result.split()[-1]).read() except IOError: raise IOError("ERROR> cannot read GRID filelist...") # get jobs from today and yesterday... days = [] for i in range(2): # how many days do you want to look back? days.append( (datetime.date.today() - datetime.timedelta(days=i)).isoformat()) # get list of run_tokens that are currently running / waiting running_ids = set() running_names = [] for status in ["Waiting", "Running", "Checking"]: for day in days: try: [ running_ids.add(id) for id in dirac.selectJobs( status=status, date=day, owner=user_name)["Value"] ] except KeyError: pass n_jobs = len(running_ids) if n_jobs > 0: print("Scanning {} running/waiting jobs... please wait...".format( n_jobs)) for i, id in enumerate(running_ids): if ((100 * i) / n_jobs) % 5 == 0: print("\r{} %".format(((20 * i) / n_jobs) * 5)), jobname = dirac.getJobAttributes(id)["Value"]["JobName"] running_names.append(jobname) else: print("\n... done") for bunch in list_run_to_loop_on: # for bunch in bunches_of_run: # from IPython import embed # embed() # this selects the `runxxx` part of the first and last file in the run # list and joins them with a dash so that we get a nice identifier in # the output file name. # if there is only one file in the list, use only that one # run_token = re.split('_', bunch[+0])[3] # JLK JLK run_token = re.split("_", bunch[0])[3] if len(bunch) > 1: run_token = "-".join([run_token, re.split("_", bunch[-1])[3]]) print("-" * 50) print("-" * 50) # setting output name output_filenames = dict() if switches["output_type"] in "DL2": job_name = "protopipe_{}_{}_{}_{}_{}".format( config_name, switches["output_type"], particle, run_token, mode) output_filenames[mode] = output_filename.format("_".join( [particle, mode, run_token])) else: job_name = "protopipe_{}_{}_{}_{}_{}_{}".format( config_name, switches["output_type"], step, particle, run_token, mode) output_filenames[mode] = output_filename.format("_".join( [step, particle, mode, run_token])) # if job already running / waiting, skip if job_name in running_names: print("\n WARNING> {} still running\n".format(job_name)) continue print("Output file name: {}".format(output_filenames[mode])) # if file already in GRID storage, skip # (you cannot overwrite it there, delete it and resubmit) # (assumes tail and wave will always be written out together) already_exist = False file_on_grid = os.path.join(output_path, output_filenames[mode]) print("DEBUG> check for existing file on GRID...") if file_on_grid in grid_filelist: print("\n WARNING> {} already on GRID SE\n".format(job_name)) continue if n_jobs_max == 0: print("WARNING> maximum number of jobs to submit reached") print("WARNING> breaking loop now") break else: n_jobs_max -= 1 j = Job() # runtime in seconds times 8 (CPU normalisation factor) j.setCPUTime(6 * 3600 * 8) j.setName(job_name) j.setInputSandbox(input_sandbox) if banned_sites: j.setBannedSites(banned_sites) # Add simtel files as input data j.setInputData(bunch) for run_file in bunch: file_token = re.split("_", run_file)[3] # wait for a random number of seconds (up to five minutes) before # starting to add a bit more entropy in the starting times of the # dirac queries. # if too many jobs try in parallel to access the SEs, # the interface crashes # #sleep = random.randint(0, 20) # seconds # #j.setExecutable('sleep', str(sleep)) # JLK: Try to stop doing that # consecutively downloads the data files, processes them, # deletes the input # and goes on to the next input file; # afterwards, the output files are merged # j.setExecutable('dirac-dms-get-file', "LFN:" + run_file) # source the miniconda ctapipe environment and # run the python script with all its arguments if switches["output_type"] in "DL2": output_filename_temp = output_filename.format("_".join( [particle, mode, file_token])) if switches["output_type"] in "TRAINING": output_filename_temp = output_filename.format("_".join( [step, particle, mode, file_token])) j.setExecutable( "./pilot.sh", pilot_args_write.format( outfile=output_filename_temp, infile_name=os.path.basename(run_file), mode=mode, ), ) # remove the current file to clear space j.setExecutable("rm", os.path.basename(run_file)) # simple `ls` for good measure j.setExecutable("ls", "-lh") # if there is more than one file per job, merge the output tables if len(bunch) > 1: names = [] names.append(("*_{}_".format(particle), output_filenames[mode])) for in_name, out_name in names: print("in_name: {}, out_name: {}".format(in_name, out_name)) j.setExecutable( "./pilot.sh", pilot_args_merge.format(in_name=in_name, out_name=out_name), ) print("DEBUG> args append: {}".format( pilot_args_merge.format(in_name=in_name, out_name=out_name))) bunch.extend(models_to_upload) bunch.extend(configs_to_upload) j.setInputData(bunch) print("Input data set to job = {}".format(bunch)) outputs = [] outputs.append(output_filenames[mode]) print("Output file path: {}{}".format(output_path, output_filenames[mode])) j.setOutputData(outputs, outputSE=None, outputPath=output_path) # check if we should somehow stop doing what we are doing if switches["dry"] is True: print("\nThis is a DRY RUN! -- NO job has been submitted!") print("Name of the job: {}".format(job_name)) print("Name of the output file: {}".format(outputs)) print("Output path from GRID home: {}".format(output_path)) break # this sends the job to the GRID and uploads all the # files into the input sandbox in the process print("\nSUBMITTING job with the following INPUT SANDBOX:") print(input_sandbox) print("Submission RESULT: {}\n".format(dirac.submitJob(j)["Value"])) # break if this is only a test submission if switches["test"] is True: print("This is a TEST RUN! -- Only ONE job will be submitted!") print("Name of the job: {}".format(job_name)) print("Name of the output file: {}".format(outputs)) print("Output path from GRID home: {}".format(output_path)) break # since there are two nested loops, need to break again if switches["test"] is True: break try: os.remove("datapipe.tar.gz") os.remove("modules.tar.gz") except: pass # Upload analysis configuration file for provenance SE_LIST = ['CC-IN2P3-USER', 'DESY-ZN-USER', 'CNAF-USER', 'CEA-USER'] analysis_config_local = os.path.join(config_path, config_file) # the configuration file is uploaded to the data directory because # the training samples (as well as their cleaning settings) are independent analysis_config_dirac = os.path.join(home_grid, output_path, config_file) print("Uploading {} to {}...".format(analysis_config_local, analysis_config_dirac)) if switches["dry"] is False: # Upload this file to all Dirac Storage Elements in SE_LIST for se in SE_LIST: # the uploaded config file overwrites any old copy ana_cfg_upload_cmd = "dirac-dms-add-file -f {} {} {}".format( analysis_config_dirac, analysis_config_local, se) ana_cfg_upload_result = subprocess.check_output(ana_cfg_upload_cmd, shell=True) print(ana_cfg_upload_result) else: print("This is a DRY RUN! -- analysis.yaml has NOT been uploaded.") print("\nall done -- exiting now") exit()