def doStartupChecks(): global doStartFromPAT log.output("********** Checking environment **********") log.output("--> Checking if you have a "+options.cmssw_ver+" release.") if (os.path.isdir(options.cmssw_ver)): log.output(" ---> Ok, the release is present!") cmd ='cd '+options.cmssw_ver+'; cmsenv' pExe = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) else: log.output(" ---> ERROR: Please scram the proper release first! (Exiting)") dieOnError("Environment: resquested CMSSW version is not found in the working directory") log.output("--> Checking if "+options.cmssw_ver+" contains the TopTreeProducer package.") if (os.path.isdir(options.cmssw_ver+TopTreeProducerDir)): log.output(" ---> Ok, the "+TopTreeProducerDir+" directory exists!") else: log.output(" ---> ERROR: Please ensure that you have the TopTreeProducer package installed! (Exiting)") dieOnError("Environment: resquested CMSSW version does not contain the TopTreeProducer package") log.output("--> Checking if "+options.cmssw_ver+" contains the PatAlgos package.") if (os.path.isdir(options.cmssw_ver+PatDir)): log.output(" ---> Ok, the "+PatDir+" directory exists!") else: log.output(" ---> ERROR: Please ensure that you have the PatAlgos package installed! (Exiting)") dieOnError("Environment: resquested CMSSW version does not contain the PatAlgos package") log.output("--> Checking DBS to see wether the requested Dataset exists") if dbsInst == "": dbsMgr = DBSHandler("cms_dbs_prod_global"); else: dbsMgr = DBSHandler(dbsInst); if doStartFromPAT: dbsMgr.setDBSInst("cms_dbs_ph_analysis_02") if not dbsMgr.datasetExists(options.dataset): log.output(" ---> ERROR: "+options.dataset+" was not found in DBS! (Exiting)") dieOnError("Dataset: DBS query for your dataset returned an empty set.") else: log.output(" ---> Ok, Dataset was found!") log.output("--> Checking status of CRABServer (not yet implemented)") crab = CRABHandler("","","",log) crab.checkGridProxy(False) crab.checkCredentials(False)
# log.sendErrorMails=bool(True) # FIXME ################## ## MAIN ROUTINE ## ################## log.output("--------------------------------------------") log.output("--> Automated FAST SIMULATION production <--") log.output("--------------------------------------------") # display input options and do consistency checks inputSummary() # check GRID proxy crab = CRABHandler("", "", log) crab.checkGridProxy(False) crab.checkCredentials(False) # create working directories if not cmssw_sim == "": workingDir_sim = setupDirs(cmssw_sim + "/src", "GEN-FASTSIM_" + publish_sim) processGENFASTSIM() if not options.dryRun: updateTopDB("GENFASTSIM") if not doDry:
def processPATandTOPTREE(): global workingDir global dbsInst global dataType global doDry global nEventsDBS global workingDir global dataType global nEventsTT global nEventsDBS global topTreeLocation global topCffName global patCffName global ttreeEventContent global options global jobEffPat global jobEffTT global CrabJSON log.output("********** Preparing to produce the PAT-tuple and TopTree in one go **********") startTime = gmtime() # create pat cfg pat = PatProducer(timestamp,workingDir,log); pat.createPatConfig(options.dataset,options.GlobalTag,dataType,options.doGenEvent,options.cmssw_ver,options.cmssw_ver_sample,options.flavourHistoryFilterPath,options.runOnMC,options.pat_config) patCffName = pat.getConfigFileName() # create toptree cfg top = TopTreeProducer(timestamp,workingDir,log) # top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample) top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample,options.top_config) topCffName = top.getConfigFileName() log.output(" ---> will expand the TopTree config before sending it with crab " ) cmd2 = 'cd '+options.cmssw_ver+'; eval `scramv1 runtime -sh`; cd -; python '+workingDir+'/'+top.getConfigFileName()+'; mv -v expanded.py '+workingDir+'/' if not workingDir.rfind("CMSSW_5_") == -1: log.output("Expanding TopTree config:: CMSSW_5_X_Y release detected, setting scram arch to slc5_amd64_gcc462") cmd2 = "export SCRAM_ARCH=\"slc5_amd64_gcc462\";"+cmd2 pExe = Popen(cmd2, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) log.output(pExe.stdout.read()) # create crab cfg #crab = CRABHandler(timestamp,workingDir,log); crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log); if not dbsInst == "": crab.setDBSInst(dbsInst) log.output(" ---> CRAB will use DBS instance "+dbsInst+" to look for your data.") #crab.nEventsPerJob = "20000" #crab.nEventsPerJob = "500" if not doDry: crab.scaleJobsSize(options.dataset,options.RunSelection,1) # if to much jobs (>2500) we create new cfg with 2500 jobs crab.runTwoConfigs(patCffName,topCffName) crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput) crab.createCRABcfg("crab_"+timestamp+".cfg", options.dataset, pat.getConfigFileName(), top.getOutputFileName(), "TOPTREE", bool(False), options.CEBlacklist, options.RunSelection, options.forceStandAlone) topTreeLocation = crab.getOutputLocation().split("\n")[0] if not doDry: crab.submitJobs() crab.checkJobs() crab.publishDataSet() nEventsDBS = crab.getnEventsDBS() nEventsTT = crab.checkFJR() CrabJSON = crab.getCrabJSON() if doStartFromPAT: nEventsDBS = crab.getnEventsDBS() ttreeEventContent = top.dumpEventContent(topTreeLocation) jobEffPat = crab.getJobEff() # same job-eff for pat & TT in case of duo-jobs jobEffTT = crab.getJobEff() log.output("--> Job Efficiency: "+str(crab.getJobEff())) endTime = gmtime() log.output("--> The TopTree production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.") log.appendToMSG("\n* TopTree production information: \n") if not crab.getOutputLocation() == "": log.appendToMSG("\t-> Data location: "+topTreeLocation+"\n") log.appendToMSG("\t-> Number of events processed: "+str(nEventsTT)) log.appendToMSG("\n Note: This TopTree was created from PAT inside one single job, the PATtuple was not stored")
def processTOPTREE(): log.output("********** Preparing to produce the TopTree **********") startTime = gmtime() global workingDir global dataType global doDry global nEventsDBS global nEventsTT global doPBS global topTreeLocation global topCffName global options global ttreeEventContent global jobEffTT global CrabJSON top = TopTreeProducer(timestamp,workingDir,log) top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample,options.top_config) topCffName = top.getConfigFileName() #crab = CRABHandler(timestamp,workingDir,log); crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log); useDataSet="" if doStartFromPAT: useDataSet=options.dataset else: useDataSet=patPublishName options.RunSelection = "" crab.setDBSInst("cms_dbs_ph_analysis_02") type = "TOPTREE" if not doDry: crab.scaleJobsSize(useDataSet,options.RunSelection,10) # if to much jobs (>2500) we create new cfg with 2500 jobs crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput) crab.createCRABcfg("crab_toptree_"+timestamp+".cfg", useDataSet, top.getConfigFileName(), top.getOutputFileName(), type, bool(False), options.CEBlacklist, options.RunSelection, options.forceStandAlone) # empty runselection for top topTreeLocation = crab.getOutputLocation().split("\n")[0] if not doDry: crab.submitJobs() crab.checkJobs() #time.sleep(60) # to be shure the jobs are in done status crab.publishDataSet() CrabJSON = crab.getCrabJSON() nEventsTT = crab.checkFJR() if doStartFromPAT: nEventsDBS = crab.getnEventsDBS() ttreeEventContent = top.dumpEventContent(topTreeLocation) jobEffTT = crab.getJobEff() log.output("--> Job Efficiency: "+str(crab.getJobEff())) endTime = gmtime() log.output("--> The TopTree production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.") log.appendToMSG("\n* TopTree production information: \n") if not crab.getOutputLocation() == "": log.appendToMSG("\t-> Data location: "+topTreeLocation+"\n") log.appendToMSG("\t-> Number of events processed: "+str(nEventsTT))
def processPAT(): global workingDir global dbsInst global dataType global doDry global patPublishName global nEventsPAT global nEventsDBS global patCffName global patLocation global patEventContent global jobEffPat global options global CrabJSON log.output("********** Preparing to produce the PAT-tuple **********") startTime = gmtime() pat = PatProducer(timestamp,workingDir,log); pat.createPatConfig(options.dataset,options.GlobalTag,dataType,options.doGenEvent,options.cmssw_ver,options.cmssw_ver_sample,options.flavourHistoryFilterPath,options.runOnMC,options.pat_config) patCffName = pat.getConfigFileName() #crab = CRABHandler(timestamp,workingDir,log); crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log); #print "**"+crab.baseDir if not dbsInst == "": crab.setDBSInst(dbsInst) log.output(" ---> CRAB will use DBS instance "+dbsInst+" to look for your data.") if not doDry: crab.scaleJobsSize(options.dataset,options.RunSelection,1) # if to much jobs (>2500) we create new cfg with 2500 jobs crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput) crab.createCRABcfg("crab_pat_"+timestamp+".cfg", options.dataset, pat.getConfigFileName(), pat.getOutputFileName(), "PAT", bool(True), options.CEBlacklist, options.RunSelection, options.forceStandAlone) if not doDry: crab.submitJobs() nEventsDBS = crab.getnEventsDBS() crab.checkJobs() time.sleep(60) # to be shure the jobs are in done status patPublishName = crab.publishDataSet() nEventsPAT = crab.checkFJR() patLocation = crab.getOutputLocation() patEventContent = pat.dumpEventContent(patLocation) jobEffPat = crab.getJobEff() log.output("--> Job Efficiency: "+str(crab.getJobEff())) endTime = gmtime() log.output("--> The PAT production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.") log.appendToMSG("\n* PAT production information: ") if not crab.getOutputLocation() == "": log.appendToMSG("\n\t-> Data location: "+patLocation+"\n") log.appendToMSG("\t-> DataSet was published in DBS as: "+patPublishName) log.appendToMSG("\t-> Number of events processed: "+str(nEventsPAT))
def doStartupChecks(): global doStartFromPAT log.output("********** Checking environment **********") log.output("--> Checking if you have a " + options.cmssw_ver + " release.") if (os.path.isdir(options.cmssw_ver)): log.output(" ---> Ok, the release is present!") cmd = 'cd ' + options.cmssw_ver + '; cmsenv' pExe = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) else: log.output( " ---> ERROR: Please scram the proper release first! (Exiting)") dieOnError( "Environment: resquested CMSSW version is not found in the working directory" ) log.output("--> Checking if " + options.cmssw_ver + " contains the TopTreeProducer package.") if (os.path.isdir(options.cmssw_ver + TopTreeProducerDir)): log.output(" ---> Ok, the " + TopTreeProducerDir + " directory exists!") else: log.output( " ---> ERROR: Please ensure that you have the TopTreeProducer package installed! (Exiting)" ) dieOnError( "Environment: resquested CMSSW version does not contain the TopTreeProducer package" ) log.output("--> Checking if " + options.cmssw_ver + " contains the PatAlgos package.") if (os.path.isdir(options.cmssw_ver + PatDir)): log.output(" ---> Ok, the " + PatDir + " directory exists!") else: log.output( " ---> ERROR: Please ensure that you have the PatAlgos package installed! (Exiting)" ) dieOnError( "Environment: resquested CMSSW version does not contain the PatAlgos package" ) log.output("--> Checking DBS to see wether the requested Dataset exists") if dbsInst == "": dbsMgr = DBSHandler("cms_dbs_prod_global") else: dbsMgr = DBSHandler(dbsInst) if doStartFromPAT: dbsMgr.setDBSInst("cms_dbs_ph_analysis_02") if not dbsMgr.datasetExists(options.dataset): log.output(" ---> ERROR: " + options.dataset + " was not found in DBS! (Exiting)") dieOnError( "Dataset: DBS query for your dataset returned an empty set.") else: log.output(" ---> Ok, Dataset was found!") log.output("--> Checking status of CRABServer (not yet implemented)") crab = CRABHandler("", "", "", log) crab.checkGridProxy(False) crab.checkCredentials(False)
from logHandler import logHandler from CrabHandler import CRABHandler crab = CRABHandler("",".",logHandler("")) crab.createGridProxy() crab.createMyProxyCredentials() #print crab.pickProxy()
dbsPublish = [] CffFilePath = [] # toptree to remove idTop = [] storagePathTop = [] mergedTopLocation = [] # logging log = logHandler("") # update grid-proxy for srm commands crab = CRABHandler("", "", log) crab.checkGridProxy(False) #rmSRMdir("/pnfs/iihe/cms/store/user/dhondt/QCD_Pt-20to30_EMEnriched_TuneZ2_7TeV-pythia6/Spring11-PU_S1_START311_V1G1-v1/29032011_213110/TOPTREE") #sys.exit(1) #### Remove DataSet -> ALL associated PatTuples -> All associated TopTrees if not options.cleanup and not options.rmDataSet == "None": log.output("--> Removing dataset " + options.rmDataSet + " and all associated PATtuples and TopTrees") sql.createQuery("SELECT", "datasets", "id",
## provide the desired logfile name to logHandler ## if you provide an empty string the output will be written on the stdOut if not options.stdout: log = logHandler(logFileName) else: log = logHandler("") ################# ## MAIN METHOD ## ################# # create a CrabHandler crab = CRABHandler(timestamp,options.dir,log) # change idle time crab.idleTime=float(options.idleTime) crab.idleTimeResubmit=float(options.idleTimeResubmit) # set the crab env crab.crabSource = "source /etc/profile.d/set_globus_tcp_port_range.sh; export EDG_WL_LOCATION=/opt/edg " # check GRID proxy crab.checkGridProxy(0) crab.checkCredentials(0)
## provide the desired logfile name to logHandler ## if you provide an empty string the output will be written on the stdOut if not options.stdout: log = logHandler(logFileName) else: log = logHandler("") ################# ## MAIN METHOD ## ################# # create a CrabHandler crab = CRABHandler(timestamp, options.dir, log) # change idle time crab.idleTime = float(options.idleTime) crab.idleTimeResubmit = float(options.idleTimeResubmit) # set the crab env crab.crabSource = "source /etc/profile.d/set_globus_tcp_port_range.sh; export EDG_WL_LOCATION=/opt/edg " # check GRID proxy crab.checkGridProxy(0) crab.checkCredentials(0)
def processPATandTOPTREE(): global workingDir global dbsInst global dataType global doDry global nEventsDBS global workingDir global dataType global nEventsTT global nEventsDBS global topTreeLocation global topCffName global patCffName global ttreeEventContent global options global jobEffPat global jobEffTT global CrabJSON log.output( "********** Preparing to produce the PAT-tuple and TopTree in one go **********" ) startTime = gmtime() # create pat cfg pat = PatProducer(timestamp, workingDir, log) pat.createPatConfig(options.dataset, options.GlobalTag, dataType, options.doGenEvent, options.cmssw_ver, options.cmssw_ver_sample, options.flavourHistoryFilterPath, options.runOnMC, options.pat_config) patCffName = pat.getConfigFileName() # create toptree cfg top = TopTreeProducer(timestamp, workingDir, log) # top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample) top.createTopTreeConfig(options.dataset, dataType, options.doGenEvent, options.GlobalTag, options.cmssw_ver, options.cmssw_ver_sample, options.top_config) topCffName = top.getConfigFileName() log.output( " ---> will expand the TopTree config before sending it with crab ") cmd2 = 'cd ' + options.cmssw_ver + '; eval `scramv1 runtime -sh`; cd -; python ' + workingDir + '/' + top.getConfigFileName( ) + '; mv -v expanded.py ' + workingDir + '/' if not workingDir.rfind("CMSSW_5_") == -1: log.output( "Expanding TopTree config:: CMSSW_5_X_Y release detected, setting scram arch to slc5_amd64_gcc462" ) cmd2 = "export SCRAM_ARCH=\"slc5_amd64_gcc462\";" + cmd2 pExe = Popen(cmd2, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) log.output(pExe.stdout.read()) # create crab cfg #crab = CRABHandler(timestamp,workingDir,log); crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log) if not dbsInst == "": crab.setDBSInst(dbsInst) log.output(" ---> CRAB will use DBS instance " + dbsInst + " to look for your data.") #crab.nEventsPerJob = "20000" #crab.nEventsPerJob = "500" if not doDry: crab.scaleJobsSize( options.dataset, options.RunSelection, 1) # if to much jobs (>2500) we create new cfg with 2500 jobs crab.runTwoConfigs(patCffName, topCffName) crab.AdditionalCrabInput = getAdditionalInputFiles( crab.AdditionalCrabInput) crab.createCRABcfg("crab_" + timestamp + ".cfg", options.dataset, pat.getConfigFileName(), top.getOutputFileName(), "TOPTREE", bool(False), options.CEBlacklist, options.RunSelection, options.forceStandAlone) topTreeLocation = crab.getOutputLocation().split("\n")[0] if not doDry: crab.submitJobs() crab.checkJobs() crab.publishDataSet() nEventsDBS = crab.getnEventsDBS() nEventsTT = crab.checkFJR() CrabJSON = crab.getCrabJSON() if doStartFromPAT: nEventsDBS = crab.getnEventsDBS() ttreeEventContent = top.dumpEventContent(topTreeLocation) jobEffPat = crab.getJobEff( ) # same job-eff for pat & TT in case of duo-jobs jobEffTT = crab.getJobEff() log.output("--> Job Efficiency: " + str(crab.getJobEff())) endTime = gmtime() log.output("--> The TopTree production took " + str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) + " hours.") log.appendToMSG("\n* TopTree production information: \n") if not crab.getOutputLocation() == "": log.appendToMSG("\t-> Data location: " + topTreeLocation + "\n") log.appendToMSG("\t-> Number of events processed: " + str(nEventsTT)) log.appendToMSG( "\n Note: This TopTree was created from PAT inside one single job, the PATtuple was not stored" )
def processTOPTREE(): log.output("********** Preparing to produce the TopTree **********") startTime = gmtime() global workingDir global dataType global doDry global nEventsDBS global nEventsTT global doPBS global topTreeLocation global topCffName global options global ttreeEventContent global jobEffTT global CrabJSON top = TopTreeProducer(timestamp, workingDir, log) top.createTopTreeConfig(options.dataset, dataType, options.doGenEvent, options.GlobalTag, options.cmssw_ver, options.cmssw_ver_sample, options.top_config) topCffName = top.getConfigFileName() #crab = CRABHandler(timestamp,workingDir,log); crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log) useDataSet = "" if doStartFromPAT: useDataSet = options.dataset else: useDataSet = patPublishName options.RunSelection = "" crab.setDBSInst("cms_dbs_ph_analysis_02") type = "TOPTREE" if not doDry: crab.scaleJobsSize( useDataSet, options.RunSelection, 10) # if to much jobs (>2500) we create new cfg with 2500 jobs crab.AdditionalCrabInput = getAdditionalInputFiles( crab.AdditionalCrabInput) crab.createCRABcfg("crab_toptree_" + timestamp + ".cfg", useDataSet, top.getConfigFileName(), top.getOutputFileName(), type, bool(False), options.CEBlacklist, options.RunSelection, options.forceStandAlone) # empty runselection for top topTreeLocation = crab.getOutputLocation().split("\n")[0] if not doDry: crab.submitJobs() crab.checkJobs() #time.sleep(60) # to be shure the jobs are in done status crab.publishDataSet() CrabJSON = crab.getCrabJSON() nEventsTT = crab.checkFJR() if doStartFromPAT: nEventsDBS = crab.getnEventsDBS() ttreeEventContent = top.dumpEventContent(topTreeLocation) jobEffTT = crab.getJobEff() log.output("--> Job Efficiency: " + str(crab.getJobEff())) endTime = gmtime() log.output("--> The TopTree production took " + str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) + " hours.") log.appendToMSG("\n* TopTree production information: \n") if not crab.getOutputLocation() == "": log.appendToMSG("\t-> Data location: " + topTreeLocation + "\n") log.appendToMSG("\t-> Number of events processed: " + str(nEventsTT))
def processPAT(): global workingDir global dbsInst global dataType global doDry global patPublishName global nEventsPAT global nEventsDBS global patCffName global patLocation global patEventContent global jobEffPat global options global CrabJSON log.output("********** Preparing to produce the PAT-tuple **********") startTime = gmtime() pat = PatProducer(timestamp, workingDir, log) pat.createPatConfig(options.dataset, options.GlobalTag, dataType, options.doGenEvent, options.cmssw_ver, options.cmssw_ver_sample, options.flavourHistoryFilterPath, options.runOnMC, options.pat_config) patCffName = pat.getConfigFileName() #crab = CRABHandler(timestamp,workingDir,log); crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log) #print "**"+crab.baseDir if not dbsInst == "": crab.setDBSInst(dbsInst) log.output(" ---> CRAB will use DBS instance " + dbsInst + " to look for your data.") if not doDry: crab.scaleJobsSize( options.dataset, options.RunSelection, 1) # if to much jobs (>2500) we create new cfg with 2500 jobs crab.AdditionalCrabInput = getAdditionalInputFiles( crab.AdditionalCrabInput) crab.createCRABcfg("crab_pat_" + timestamp + ".cfg", options.dataset, pat.getConfigFileName(), pat.getOutputFileName(), "PAT", bool(True), options.CEBlacklist, options.RunSelection, options.forceStandAlone) if not doDry: crab.submitJobs() nEventsDBS = crab.getnEventsDBS() crab.checkJobs() time.sleep(60) # to be shure the jobs are in done status patPublishName = crab.publishDataSet() nEventsPAT = crab.checkFJR() patLocation = crab.getOutputLocation() patEventContent = pat.dumpEventContent(patLocation) jobEffPat = crab.getJobEff() log.output("--> Job Efficiency: " + str(crab.getJobEff())) endTime = gmtime() log.output("--> The PAT production took " + str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) + " hours.") log.appendToMSG("\n* PAT production information: ") if not crab.getOutputLocation() == "": log.appendToMSG("\n\t-> Data location: " + patLocation + "\n") log.appendToMSG("\t-> DataSet was published in DBS as: " + patPublishName) log.appendToMSG("\t-> Number of events processed: " + str(nEventsPAT))
def processGENFASTSIM(): log.output(" ----> Preparing to produce the GEN-FASTSIM sample <----") startTime = gmtime() global workingDir_sim global doDry global options global cmssw_sim global gt_sim global publish_sim global GENFASTSIM_CFFPath global GENFASTSIM_PublishName global GENFASTSIM_nEvents global GENFASTSIM_PNFSLocation global GENFASTSIM_jobEff global GENFASTSIM_LHEFiles sim = GENFASTSIMProducer(timestamp, workingDir_sim, log, setarchitecture) sim.createConfig(publish_sim, options.configfile, gt_sim, options.lhedir, options.nEvents, options.campaign) crab = CRABHandler(timestamp, workingDir_sim, log) if options.nEvents == "-1" or int(options.nEvents) > int(sim.getNLHEevents()): options.nEvents = sim.getNLHEevents() crab.nEvents = str(options.nEvents) if not str(options.nEvents) == "-1" and int(options.nEvents) < 500: crab.nEventsPerJob = crab.nEvents else: crab.nEventsPerJob = "500" crab.AdditionalCrabInput = sim.getlhefiles() crab.createCRABcfg( "crab_genfastsim_" + timestamp + ".cfg", publish_sim + "_" + options.campaign, sim.getConfigFileName(), sim.getOutputFileName(), "GENFASTSIM", bool(True), "", "", bool(False), ) # the 'publish' argument set to bool(False) does not work yet, crabhandler encounters a problem because it wants to split "None" (the dataset when doing GEN-FASTSIM) into several pieces divided by "/" (as in a normal DAS dataset)... crab.setForceWhiteList(bool(True)) if not doDry: crab.submitJobs() nEventsDBS = crab.getnEventsDBS() ##for testing # crab.idleTime = int(60) # crab.idleTimeResubmit = int(120) crab.checkJobs() time.sleep(60) # to be sure the jobs are in done status GENFASTSIM_CFFPath = workingDir_sim + "/" + sim.getConfigFileName() GENFASTSIM_LHEFiles = sim.getlhefiles() GENFASTSIM_PublishName = crab.publishDataSet() GENFASTSIM_nEvents = crab.checkFJR() GENFASTSIM_PNFSLocation = crab.getOutputLocation() GENFASTSIM_jobEff = crab.getJobEff() # remove sandbox (lhe files are compressed, but can be sizable when you have a lot of lhe files and tasks: better clean up when a task is done) log.output("--> Removing task sandbox ") Popen( "rm " + workingDir_sim + "/" + crab.UIWorkingDir + "/share/*.tgz", shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True, ).stdout.read() # remove lhe files in crab directory if they were copied when the lhe files in the original lhe directory were gzipped log.output("--> Removing local copied LHE files in directory for crab") Popen( "rm " + workingDir_sim + "/*.lhe", shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True ).stdout.read() log.output("--> Job Efficiency: " + str(GENFASTSIM_jobEff)) endTime = gmtime() log.output( "--> The GEN-FASTSIM production took " + str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) + " hours." ) log.appendToMSG("\n* GEN-FASTSIM production information: ") if not crab.getOutputLocation() == "": log.appendToMSG("\n\t-> Data location: " + GENFASTSIM_PNFSLocation + "\n") log.appendToMSG("\t-> DataSet was published in DBS as: " + GENFASTSIM_PublishName) log.appendToMSG("\t-> Number of events processed: " + str(GENFASTSIM_nEvents))
def processGENFASTSIM(): log.output(" ----> Preparing to produce the GEN-FASTSIM sample <----") startTime = gmtime() global workingDir_sim global doDry global options global cmssw_sim global gt_sim global publish_sim global GENFASTSIM_CFFPath global GENFASTSIM_PublishName global GENFASTSIM_nEvents global GENFASTSIM_PNFSLocation global GENFASTSIM_jobEff global GENFASTSIM_LHEFiles sim = GENFASTSIMProducer(timestamp, workingDir_sim, log, setarchitecture) sim.createConfig(publish_sim, options.configfile, gt_sim, options.lhedir, options.nEvents, options.campaign) crab = CRABHandler(timestamp, workingDir_sim, log) if options.nEvents == "-1" or int(options.nEvents) > int( sim.getNLHEevents()): options.nEvents = sim.getNLHEevents() crab.nEvents = str(options.nEvents) if not str(options.nEvents) == "-1" and int(options.nEvents) < 500: crab.nEventsPerJob = crab.nEvents else: crab.nEventsPerJob = "500" crab.AdditionalCrabInput = sim.getlhefiles() crab.createCRABcfg("crab_genfastsim_" + timestamp + ".cfg", publish_sim + "_" + options.campaign, sim.getConfigFileName(), sim.getOutputFileName(), "GENFASTSIM", bool(True), "", "", bool(False)) #the 'publish' argument set to bool(False) does not work yet, crabhandler encounters a problem because it wants to split "None" (the dataset when doing GEN-FASTSIM) into several pieces divided by "/" (as in a normal DAS dataset)... crab.setForceWhiteList(bool(True)) if not doDry: crab.submitJobs() nEventsDBS = crab.getnEventsDBS() ##for testing #crab.idleTime = int(60) #crab.idleTimeResubmit = int(120) crab.checkJobs() time.sleep(60) # to be sure the jobs are in done status GENFASTSIM_CFFPath = workingDir_sim + "/" + sim.getConfigFileName() GENFASTSIM_LHEFiles = sim.getlhefiles() GENFASTSIM_PublishName = crab.publishDataSet() GENFASTSIM_nEvents = crab.checkFJR() GENFASTSIM_PNFSLocation = crab.getOutputLocation() GENFASTSIM_jobEff = crab.getJobEff() #remove sandbox (lhe files are compressed, but can be sizable when you have a lot of lhe files and tasks: better clean up when a task is done) log.output("--> Removing task sandbox ") Popen('rm ' + workingDir_sim + '/' + crab.UIWorkingDir + '/share/*.tgz', shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True).stdout.read() #remove lhe files in crab directory if they were copied when the lhe files in the original lhe directory were gzipped log.output("--> Removing local copied LHE files in directory for crab") Popen('rm ' + workingDir_sim + '/*.lhe', shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True).stdout.read() log.output("--> Job Efficiency: " + str(GENFASTSIM_jobEff)) endTime = gmtime() log.output("--> The GEN-FASTSIM production took " + str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) + " hours.") log.appendToMSG("\n* GEN-FASTSIM production information: ") if not crab.getOutputLocation() == "": log.appendToMSG("\n\t-> Data location: " + GENFASTSIM_PNFSLocation + "\n") log.appendToMSG("\t-> DataSet was published in DBS as: " + GENFASTSIM_PublishName) log.appendToMSG("\t-> Number of events processed: " + str(GENFASTSIM_nEvents))
# test suite # for getting stuff from fjr files # from fjrHandler import FJRHandler,GreenBoxHandler # from xml.sax import make_parser # from xml.sax.handler import ContentHandler # file="CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711/res/crab_fjr_100.xml" # parser = make_parser() # handler = FJRHandler() # parser.setContentHandler(handler) # parser.parse(open(file)) # print handler.getEventsProcessed() # print handler.getFrameworkExitCode().split("\n")[0] from CrabHandler import CRABHandler from logHandler import logHandler crab = CRABHandler( "1234567", "CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/", logHandler(""), ) crab.UIWorkingDir = "TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711" crab.checkFJR()
from logHandler import logHandler from CrabHandler import CRABHandler crab = CRABHandler("", ".", logHandler("")) crab.createGridProxy() crab.createMyProxyCredentials() #print crab.pickProxy()
dbsPublish = [] CffFilePath = [] # toptree to remove idTop = [] storagePathTop = [] mergedTopLocation = [] # logging log = logHandler("") # update grid-proxy for srm commands crab = CRABHandler("","",log) crab.checkGridProxy(False) #rmSRMdir("/pnfs/iihe/cms/store/user/dhondt/QCD_Pt-20to30_EMEnriched_TuneZ2_7TeV-pythia6/Spring11-PU_S1_START311_V1G1-v1/29032011_213110/TOPTREE") #sys.exit(1) #### Remove DataSet -> ALL associated PatTuples -> All associated TopTrees if not options.cleanup and not options.rmDataSet == "None": log.output("--> Removing dataset "+options.rmDataSet+" and all associated PATtuples and TopTrees")
# test suite # for getting stuff from fjr files #from fjrHandler import FJRHandler,GreenBoxHandler #from xml.sax import make_parser #from xml.sax.handler import ContentHandler #file="CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711/res/crab_fjr_100.xml" #parser = make_parser() #handler = FJRHandler() #parser.setContentHandler(handler) #parser.parse(open(file)) #print handler.getEventsProcessed() #print handler.getFrameworkExitCode().split("\n")[0] from CrabHandler import CRABHandler from logHandler import logHandler crab = CRABHandler( "1234567", "CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/", logHandler("")) crab.UIWorkingDir = "TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711" crab.checkFJR()