Python CRABHandler Examples, CrabHandler.CRABHandler Python Examples

Example #1

0

Show file

File: AutoMaticTopTreeProducer.py Project: TopBrussels/AutoMaticTopTreeProducer

def doStartupChecks():

    global doStartFromPAT

    log.output("********** Checking environment **********")

    log.output("--> Checking if you have a "+options.cmssw_ver+" release.")

    if (os.path.isdir(options.cmssw_ver)):
        log.output(" ---> Ok, the release is present!")
        cmd ='cd '+options.cmssw_ver+'; cmsenv'
        pExe = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
    else:
        log.output(" ---> ERROR: Please scram the proper release first! (Exiting)")
        dieOnError("Environment: resquested CMSSW version is not found in the working directory")

    log.output("--> Checking if "+options.cmssw_ver+" contains the TopTreeProducer package.")

    if (os.path.isdir(options.cmssw_ver+TopTreeProducerDir)):
        log.output(" ---> Ok, the "+TopTreeProducerDir+" directory exists!")
    else:
        log.output(" ---> ERROR: Please ensure that you have the TopTreeProducer package installed! (Exiting)")
        dieOnError("Environment: resquested CMSSW version does not contain the TopTreeProducer package")


    log.output("--> Checking if "+options.cmssw_ver+" contains the PatAlgos package.")

    if (os.path.isdir(options.cmssw_ver+PatDir)):
        log.output(" ---> Ok, the "+PatDir+" directory exists!")
    else:
        log.output(" ---> ERROR: Please ensure that you have the PatAlgos package installed! (Exiting)")
        dieOnError("Environment: resquested CMSSW version does not contain the PatAlgos package")


    log.output("--> Checking DBS to see wether the requested Dataset exists")

    if dbsInst == "":
        dbsMgr = DBSHandler("cms_dbs_prod_global");
    else:
        dbsMgr = DBSHandler(dbsInst);

    if doStartFromPAT:

        dbsMgr.setDBSInst("cms_dbs_ph_analysis_02")

    if not dbsMgr.datasetExists(options.dataset):
        log.output(" ---> ERROR: "+options.dataset+" was not found in DBS! (Exiting)")
        dieOnError("Dataset: DBS query for your dataset returned an empty set.")

    else:
        log.output(" ---> Ok, Dataset was found!")

    log.output("--> Checking status of CRABServer (not yet implemented)")

    crab = CRABHandler("","","",log)

    crab.checkGridProxy(False)

    crab.checkCredentials(False)

Example #2

0

Show file

File: AutoMaticSIMProducer.py Project: TopBrussels/AutoMaticTopTreeProducer

#    log.sendErrorMails=bool(True) # FIXME

##################
## MAIN ROUTINE ##
##################

log.output("--------------------------------------------")
log.output("--> Automated FAST SIMULATION production <--")
log.output("--------------------------------------------")

# display input options and do consistency checks
inputSummary()

# check GRID proxy

crab = CRABHandler("", "", log)

crab.checkGridProxy(False)

crab.checkCredentials(False)

# create working directories
if not cmssw_sim == "":
    workingDir_sim = setupDirs(cmssw_sim + "/src", "GEN-FASTSIM_" + publish_sim)

processGENFASTSIM()

if not options.dryRun:
    updateTopDB("GENFASTSIM")

if not doDry:

Example #3

0

Show file

File: AutoMaticTopTreeProducer.py Project: TopBrussels/AutoMaticTopTreeProducer

def processPATandTOPTREE():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global nEventsDBS
    global workingDir
    global dataType
    global nEventsTT
    global nEventsDBS
    global topTreeLocation
    global topCffName
    global patCffName
    global ttreeEventContent
    global options
    global jobEffPat
    global jobEffTT
    global CrabJSON
    
    log.output("********** Preparing to produce the PAT-tuple and TopTree in one go **********")

    startTime = gmtime()

    # create pat cfg
    
    pat = PatProducer(timestamp,workingDir,log);

    pat.createPatConfig(options.dataset,options.GlobalTag,dataType,options.doGenEvent,options.cmssw_ver,options.cmssw_ver_sample,options.flavourHistoryFilterPath,options.runOnMC,options.pat_config)

    patCffName = pat.getConfigFileName()

    # create toptree cfg

    top = TopTreeProducer(timestamp,workingDir,log)
            
#    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample)
    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample,options.top_config)

    topCffName = top.getConfigFileName()

    log.output(" ---> will expand the TopTree config before sending it with crab " )
    cmd2 = 'cd '+options.cmssw_ver+'; eval `scramv1 runtime -sh`; cd -; python '+workingDir+'/'+top.getConfigFileName()+'; mv -v expanded.py '+workingDir+'/'
    if not workingDir.rfind("CMSSW_5_") == -1:
        log.output("Expanding TopTree config:: CMSSW_5_X_Y release detected, setting scram arch to slc5_amd64_gcc462")
        cmd2 = "export SCRAM_ARCH=\"slc5_amd64_gcc462\";"+cmd2

    pExe = Popen(cmd2, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) 
    log.output(pExe.stdout.read())
		
		# create crab cfg

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log);

    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance "+dbsInst+" to look for your data.")

    #crab.nEventsPerJob = "20000"
    #crab.nEventsPerJob = "500"

    if not doDry:
        
        crab.scaleJobsSize(options.dataset,options.RunSelection,1) # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.runTwoConfigs(patCffName,topCffName)
    
    crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput)
                                
    crab.createCRABcfg("crab_"+timestamp+".cfg",
                       options.dataset,
                       pat.getConfigFileName(),
                       top.getOutputFileName(),
                       "TOPTREE",
                       bool(False),
                       options.CEBlacklist,
                       options.RunSelection,
                       options.forceStandAlone)

    
    topTreeLocation = crab.getOutputLocation().split("\n")[0]
        
    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        crab.publishDataSet()

        nEventsDBS = crab.getnEventsDBS()

        nEventsTT = crab.checkFJR()

        CrabJSON = crab.getCrabJSON()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffPat = crab.getJobEff() # same job-eff for pat & TT in case of duo-jobs

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: "+str(crab.getJobEff()))
        
    endTime = gmtime()

    log.output("--> The TopTree production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: "+topTreeLocation+"\n")

    log.appendToMSG("\t-> Number of events processed: "+str(nEventsTT))

    log.appendToMSG("\n Note: This TopTree was created from PAT inside one single job, the PATtuple was not stored")

Example #4

0

Show file

File: AutoMaticTopTreeProducer.py Project: TopBrussels/AutoMaticTopTreeProducer

def processTOPTREE():

    log.output("********** Preparing to produce the TopTree **********")

    startTime = gmtime()

    global workingDir
    global dataType
    global doDry
    global nEventsDBS
    global nEventsTT
    global doPBS
    global topTreeLocation
    global topCffName
    global options
    global ttreeEventContent
    global jobEffTT
    global CrabJSON

    top = TopTreeProducer(timestamp,workingDir,log)
            
    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample,options.top_config)

    topCffName = top.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log);
    
    useDataSet=""

    if doStartFromPAT:

        useDataSet=options.dataset

    else:

        useDataSet=patPublishName

    options.RunSelection = ""
        
    crab.setDBSInst("cms_dbs_ph_analysis_02")
        
    type = "TOPTREE"

    if not doDry:
            
        crab.scaleJobsSize(useDataSet,options.RunSelection,10) # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_toptree_"+timestamp+".cfg",
                       useDataSet,
                       top.getConfigFileName(),
                       top.getOutputFileName(),
                       type,
                       bool(False),
                       options.CEBlacklist,
                       options.RunSelection,
                       options.forceStandAlone) # empty runselection for top

    topTreeLocation = crab.getOutputLocation().split("\n")[0]
        
    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        #time.sleep(60) # to be shure the jobs are in done status

        crab.publishDataSet()

        CrabJSON = crab.getCrabJSON()        

        nEventsTT = crab.checkFJR()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: "+str(crab.getJobEff()))
        
    endTime = gmtime()

    log.output("--> The TopTree production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: "+topTreeLocation+"\n")

    log.appendToMSG("\t-> Number of events processed: "+str(nEventsTT))

Example #5

0

Show file

File: AutoMaticTopTreeProducer.py Project: TopBrussels/AutoMaticTopTreeProducer

def processPAT():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global patPublishName
    global nEventsPAT
    global nEventsDBS
    global patCffName
    global patLocation
    global patEventContent
    global jobEffPat
    global options
    global CrabJSON

    
    log.output("********** Preparing to produce the PAT-tuple **********")

    startTime = gmtime()

    pat = PatProducer(timestamp,workingDir,log);

    pat.createPatConfig(options.dataset,options.GlobalTag,dataType,options.doGenEvent,options.cmssw_ver,options.cmssw_ver_sample,options.flavourHistoryFilterPath,options.runOnMC,options.pat_config)

    patCffName = pat.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log);

    #print "**"+crab.baseDir
    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance "+dbsInst+" to look for your data.")

    if not doDry:
        
        crab.scaleJobsSize(options.dataset,options.RunSelection,1) # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_pat_"+timestamp+".cfg",
                   options.dataset,
                   pat.getConfigFileName(),
                   pat.getOutputFileName(),
                   "PAT",
                   bool(True),
                   options.CEBlacklist,
                   options.RunSelection,
                   options.forceStandAlone)

    if not doDry:
        
        crab.submitJobs()

        nEventsDBS = crab.getnEventsDBS()

        crab.checkJobs()

        time.sleep(60) # to be shure the jobs are in done status

        patPublishName = crab.publishDataSet()

        nEventsPAT = crab.checkFJR()

        patLocation = crab.getOutputLocation()

        patEventContent = pat.dumpEventContent(patLocation)

        jobEffPat = crab.getJobEff()

        log.output("--> Job Efficiency: "+str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The PAT production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.")

    log.appendToMSG("\n* PAT production information: ")
    
    if not crab.getOutputLocation() == "":

        log.appendToMSG("\n\t-> Data location: "+patLocation+"\n")

    log.appendToMSG("\t-> DataSet was published in DBS as: "+patPublishName)
        
    log.appendToMSG("\t-> Number of events processed: "+str(nEventsPAT))

Example #6

0

Show file

File: AutoMaticTopTreeProducer.py Project: ivanpari/AutoMaticTopTreeProducer

def doStartupChecks():

    global doStartFromPAT

    log.output("********** Checking environment **********")

    log.output("--> Checking if you have a " + options.cmssw_ver + " release.")

    if (os.path.isdir(options.cmssw_ver)):
        log.output(" ---> Ok, the release is present!")
        cmd = 'cd ' + options.cmssw_ver + '; cmsenv'
        pExe = Popen(cmd,
                     shell=True,
                     stdin=PIPE,
                     stdout=PIPE,
                     stderr=STDOUT,
                     close_fds=True)
    else:
        log.output(
            " ---> ERROR: Please scram the proper release first! (Exiting)")
        dieOnError(
            "Environment: resquested CMSSW version is not found in the working directory"
        )

    log.output("--> Checking if " + options.cmssw_ver +
               " contains the TopTreeProducer package.")

    if (os.path.isdir(options.cmssw_ver + TopTreeProducerDir)):
        log.output(" ---> Ok, the " + TopTreeProducerDir +
                   " directory exists!")
    else:
        log.output(
            " ---> ERROR: Please ensure that you have the TopTreeProducer package installed! (Exiting)"
        )
        dieOnError(
            "Environment: resquested CMSSW version does not contain the TopTreeProducer package"
        )

    log.output("--> Checking if " + options.cmssw_ver +
               " contains the PatAlgos package.")

    if (os.path.isdir(options.cmssw_ver + PatDir)):
        log.output(" ---> Ok, the " + PatDir + " directory exists!")
    else:
        log.output(
            " ---> ERROR: Please ensure that you have the PatAlgos package installed! (Exiting)"
        )
        dieOnError(
            "Environment: resquested CMSSW version does not contain the PatAlgos package"
        )

    log.output("--> Checking DBS to see wether the requested Dataset exists")

    if dbsInst == "":
        dbsMgr = DBSHandler("cms_dbs_prod_global")
    else:
        dbsMgr = DBSHandler(dbsInst)

    if doStartFromPAT:

        dbsMgr.setDBSInst("cms_dbs_ph_analysis_02")

    if not dbsMgr.datasetExists(options.dataset):
        log.output(" ---> ERROR: " + options.dataset +
                   " was not found in DBS! (Exiting)")
        dieOnError(
            "Dataset: DBS query for your dataset returned an empty set.")

    else:
        log.output(" ---> Ok, Dataset was found!")

    log.output("--> Checking status of CRABServer (not yet implemented)")

    crab = CRABHandler("", "", "", log)

    crab.checkGridProxy(False)

    crab.checkCredentials(False)

Example #7

0

Show file

File: refreshProxy.py Project: TopBrussels/AutoMaticTopTreeProducer

from logHandler import logHandler
from CrabHandler import CRABHandler

crab = CRABHandler("",".",logHandler(""))

crab.createGridProxy()

crab.createMyProxyCredentials()

#print crab.pickProxy()

Example #8

0

Show file

File: removetool.py Project: ivanpari/AutoMaticTopTreeProducer

dbsPublish = []
CffFilePath = []

# toptree to remove

idTop = []
storagePathTop = []
mergedTopLocation = []

# logging

log = logHandler("")

# update grid-proxy for srm commands

crab = CRABHandler("", "", log)

crab.checkGridProxy(False)

#rmSRMdir("/pnfs/iihe/cms/store/user/dhondt/QCD_Pt-20to30_EMEnriched_TuneZ2_7TeV-pythia6/Spring11-PU_S1_START311_V1G1-v1/29032011_213110/TOPTREE")

#sys.exit(1)

#### Remove DataSet -> ALL associated PatTuples -> All associated TopTrees

if not options.cleanup and not options.rmDataSet == "None":

    log.output("--> Removing dataset " + options.rmDataSet +
               " and all associated PATtuples and TopTrees")

    sql.createQuery("SELECT", "datasets", "id",

Example #9

0

Show file

File: CrabBabysitter.py Project: TopBrussels/AutoMaticTopTreeProducer

## provide the desired logfile name to logHandler
## if you provide an empty string the output will be written on the stdOut

if not options.stdout:
    log = logHandler(logFileName)
else:
    log = logHandler("")


#################
## MAIN METHOD ##
#################

# create a CrabHandler
    
crab = CRABHandler(timestamp,options.dir,log)

# change idle time

crab.idleTime=float(options.idleTime)
crab.idleTimeResubmit=float(options.idleTimeResubmit)

# set the crab env

crab.crabSource = "source /etc/profile.d/set_globus_tcp_port_range.sh; export EDG_WL_LOCATION=/opt/edg "

# check GRID proxy

crab.checkGridProxy(0)
crab.checkCredentials(0)

Example #10

0

Show file

## provide the desired logfile name to logHandler
## if you provide an empty string the output will be written on the stdOut

if not options.stdout:
    log = logHandler(logFileName)
else:
    log = logHandler("")

#################
## MAIN METHOD ##
#################

# create a CrabHandler

crab = CRABHandler(timestamp, options.dir, log)

# change idle time

crab.idleTime = float(options.idleTime)
crab.idleTimeResubmit = float(options.idleTimeResubmit)

# set the crab env

crab.crabSource = "source /etc/profile.d/set_globus_tcp_port_range.sh; export EDG_WL_LOCATION=/opt/edg "

# check GRID proxy

crab.checkGridProxy(0)
crab.checkCredentials(0)

Example #11

0

Show file

File: AutoMaticTopTreeProducer.py Project: ivanpari/AutoMaticTopTreeProducer

def processPATandTOPTREE():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global nEventsDBS
    global workingDir
    global dataType
    global nEventsTT
    global nEventsDBS
    global topTreeLocation
    global topCffName
    global patCffName
    global ttreeEventContent
    global options
    global jobEffPat
    global jobEffTT
    global CrabJSON

    log.output(
        "********** Preparing to produce the PAT-tuple and TopTree in one go **********"
    )

    startTime = gmtime()

    # create pat cfg

    pat = PatProducer(timestamp, workingDir, log)

    pat.createPatConfig(options.dataset, options.GlobalTag, dataType,
                        options.doGenEvent, options.cmssw_ver,
                        options.cmssw_ver_sample,
                        options.flavourHistoryFilterPath, options.runOnMC,
                        options.pat_config)

    patCffName = pat.getConfigFileName()

    # create toptree cfg

    top = TopTreeProducer(timestamp, workingDir, log)

    #    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample)
    top.createTopTreeConfig(options.dataset, dataType, options.doGenEvent,
                            options.GlobalTag, options.cmssw_ver,
                            options.cmssw_ver_sample, options.top_config)

    topCffName = top.getConfigFileName()

    log.output(
        " ---> will expand the TopTree config before sending it with crab ")
    cmd2 = 'cd ' + options.cmssw_ver + '; eval `scramv1 runtime -sh`; cd -; python ' + workingDir + '/' + top.getConfigFileName(
    ) + '; mv -v expanded.py ' + workingDir + '/'
    if not workingDir.rfind("CMSSW_5_") == -1:
        log.output(
            "Expanding TopTree config:: CMSSW_5_X_Y release detected, setting scram arch to slc5_amd64_gcc462"
        )
        cmd2 = "export SCRAM_ARCH=\"slc5_amd64_gcc462\";" + cmd2

    pExe = Popen(cmd2,
                 shell=True,
                 stdin=PIPE,
                 stdout=PIPE,
                 stderr=STDOUT,
                 close_fds=True)
    log.output(pExe.stdout.read())

    # create crab cfg

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log)

    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance " + dbsInst +
                   " to look for your data.")

    #crab.nEventsPerJob = "20000"
    #crab.nEventsPerJob = "500"

    if not doDry:

        crab.scaleJobsSize(
            options.dataset, options.RunSelection,
            1)  # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.runTwoConfigs(patCffName, topCffName)

    crab.AdditionalCrabInput = getAdditionalInputFiles(
        crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_" + timestamp + ".cfg", options.dataset,
                       pat.getConfigFileName(), top.getOutputFileName(),
                       "TOPTREE", bool(False), options.CEBlacklist,
                       options.RunSelection, options.forceStandAlone)

    topTreeLocation = crab.getOutputLocation().split("\n")[0]

    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        crab.publishDataSet()

        nEventsDBS = crab.getnEventsDBS()

        nEventsTT = crab.checkFJR()

        CrabJSON = crab.getCrabJSON()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffPat = crab.getJobEff(
        )  # same job-eff for pat & TT in case of duo-jobs

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: " + str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The TopTree production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: " + topTreeLocation + "\n")

    log.appendToMSG("\t-> Number of events processed: " + str(nEventsTT))

    log.appendToMSG(
        "\n Note: This TopTree was created from PAT inside one single job, the PATtuple was not stored"
    )

Example #12

0

Show file

File: AutoMaticTopTreeProducer.py Project: ivanpari/AutoMaticTopTreeProducer

def processTOPTREE():

    log.output("********** Preparing to produce the TopTree **********")

    startTime = gmtime()

    global workingDir
    global dataType
    global doDry
    global nEventsDBS
    global nEventsTT
    global doPBS
    global topTreeLocation
    global topCffName
    global options
    global ttreeEventContent
    global jobEffTT
    global CrabJSON

    top = TopTreeProducer(timestamp, workingDir, log)

    top.createTopTreeConfig(options.dataset, dataType, options.doGenEvent,
                            options.GlobalTag, options.cmssw_ver,
                            options.cmssw_ver_sample, options.top_config)

    topCffName = top.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log)

    useDataSet = ""

    if doStartFromPAT:

        useDataSet = options.dataset

    else:

        useDataSet = patPublishName

    options.RunSelection = ""

    crab.setDBSInst("cms_dbs_ph_analysis_02")

    type = "TOPTREE"

    if not doDry:

        crab.scaleJobsSize(
            useDataSet, options.RunSelection,
            10)  # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput = getAdditionalInputFiles(
        crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_toptree_" + timestamp + ".cfg", useDataSet,
                       top.getConfigFileName(), top.getOutputFileName(), type,
                       bool(False), options.CEBlacklist, options.RunSelection,
                       options.forceStandAlone)  # empty runselection for top

    topTreeLocation = crab.getOutputLocation().split("\n")[0]

    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        #time.sleep(60) # to be shure the jobs are in done status

        crab.publishDataSet()

        CrabJSON = crab.getCrabJSON()

        nEventsTT = crab.checkFJR()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: " + str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The TopTree production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: " + topTreeLocation + "\n")

    log.appendToMSG("\t-> Number of events processed: " + str(nEventsTT))

Example #13

0

Show file

File: AutoMaticTopTreeProducer.py Project: ivanpari/AutoMaticTopTreeProducer

def processPAT():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global patPublishName
    global nEventsPAT
    global nEventsDBS
    global patCffName
    global patLocation
    global patEventContent
    global jobEffPat
    global options
    global CrabJSON

    log.output("********** Preparing to produce the PAT-tuple **********")

    startTime = gmtime()

    pat = PatProducer(timestamp, workingDir, log)

    pat.createPatConfig(options.dataset, options.GlobalTag, dataType,
                        options.doGenEvent, options.cmssw_ver,
                        options.cmssw_ver_sample,
                        options.flavourHistoryFilterPath, options.runOnMC,
                        options.pat_config)

    patCffName = pat.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log)

    #print "**"+crab.baseDir
    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance " + dbsInst +
                   " to look for your data.")

    if not doDry:

        crab.scaleJobsSize(
            options.dataset, options.RunSelection,
            1)  # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput = getAdditionalInputFiles(
        crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_pat_" + timestamp + ".cfg", options.dataset,
                       pat.getConfigFileName(), pat.getOutputFileName(), "PAT",
                       bool(True), options.CEBlacklist, options.RunSelection,
                       options.forceStandAlone)

    if not doDry:

        crab.submitJobs()

        nEventsDBS = crab.getnEventsDBS()

        crab.checkJobs()

        time.sleep(60)  # to be shure the jobs are in done status

        patPublishName = crab.publishDataSet()

        nEventsPAT = crab.checkFJR()

        patLocation = crab.getOutputLocation()

        patEventContent = pat.dumpEventContent(patLocation)

        jobEffPat = crab.getJobEff()

        log.output("--> Job Efficiency: " + str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The PAT production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* PAT production information: ")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\n\t-> Data location: " + patLocation + "\n")

    log.appendToMSG("\t-> DataSet was published in DBS as: " + patPublishName)

    log.appendToMSG("\t-> Number of events processed: " + str(nEventsPAT))

Example #14

0

Show file

File: AutoMaticSIMProducer.py Project: TopBrussels/AutoMaticTopTreeProducer

def processGENFASTSIM():

    log.output(" ----> Preparing to produce the GEN-FASTSIM sample <----")

    startTime = gmtime()

    global workingDir_sim
    global doDry
    global options

    global cmssw_sim
    global gt_sim
    global publish_sim

    global GENFASTSIM_CFFPath
    global GENFASTSIM_PublishName
    global GENFASTSIM_nEvents
    global GENFASTSIM_PNFSLocation
    global GENFASTSIM_jobEff
    global GENFASTSIM_LHEFiles

    sim = GENFASTSIMProducer(timestamp, workingDir_sim, log, setarchitecture)

    sim.createConfig(publish_sim, options.configfile, gt_sim, options.lhedir, options.nEvents, options.campaign)

    crab = CRABHandler(timestamp, workingDir_sim, log)

    if options.nEvents == "-1" or int(options.nEvents) > int(sim.getNLHEevents()):
        options.nEvents = sim.getNLHEevents()

    crab.nEvents = str(options.nEvents)

    if not str(options.nEvents) == "-1" and int(options.nEvents) < 500:
        crab.nEventsPerJob = crab.nEvents
    else:
        crab.nEventsPerJob = "500"

    crab.AdditionalCrabInput = sim.getlhefiles()

    crab.createCRABcfg(
        "crab_genfastsim_" + timestamp + ".cfg",
        publish_sim + "_" + options.campaign,
        sim.getConfigFileName(),
        sim.getOutputFileName(),
        "GENFASTSIM",
        bool(True),
        "",
        "",
        bool(False),
    )
    # the 'publish' argument set to bool(False) does not work yet, crabhandler encounters a problem because it wants to split "None" (the dataset when doing GEN-FASTSIM) into several pieces divided by "/" (as in a normal DAS dataset)...

    crab.setForceWhiteList(bool(True))

    if not doDry:

        crab.submitJobs()

        nEventsDBS = crab.getnEventsDBS()

        ##for testing
        # crab.idleTime = int(60)
        # crab.idleTimeResubmit = int(120)

        crab.checkJobs()

        time.sleep(60)  # to be sure the jobs are in done status

        GENFASTSIM_CFFPath = workingDir_sim + "/" + sim.getConfigFileName()

        GENFASTSIM_LHEFiles = sim.getlhefiles()

        GENFASTSIM_PublishName = crab.publishDataSet()

        GENFASTSIM_nEvents = crab.checkFJR()

        GENFASTSIM_PNFSLocation = crab.getOutputLocation()

        GENFASTSIM_jobEff = crab.getJobEff()

        # remove sandbox (lhe files are compressed, but can be sizable when you have a lot of lhe files and tasks: better clean up when a task is done)
        log.output("--> Removing task sandbox ")
        Popen(
            "rm " + workingDir_sim + "/" + crab.UIWorkingDir + "/share/*.tgz",
            shell=True,
            stdin=PIPE,
            stdout=PIPE,
            stderr=STDOUT,
            close_fds=True,
        ).stdout.read()
        # remove lhe files in crab directory if they were copied when the lhe files in the original lhe directory were gzipped
        log.output("--> Removing local copied LHE files in directory for crab")
        Popen(
            "rm " + workingDir_sim + "/*.lhe", shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True
        ).stdout.read()

        log.output("--> Job Efficiency: " + str(GENFASTSIM_jobEff))

    endTime = gmtime()

    log.output(
        "--> The GEN-FASTSIM production took "
        + str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0)
        + " hours."
    )

    log.appendToMSG("\n* GEN-FASTSIM production information: ")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\n\t-> Data location: " + GENFASTSIM_PNFSLocation + "\n")

    log.appendToMSG("\t-> DataSet was published in DBS as: " + GENFASTSIM_PublishName)

    log.appendToMSG("\t-> Number of events processed: " + str(GENFASTSIM_nEvents))

Example #15

0

Show file

def processGENFASTSIM():

    log.output(" ----> Preparing to produce the GEN-FASTSIM sample <----")

    startTime = gmtime()

    global workingDir_sim
    global doDry
    global options

    global cmssw_sim
    global gt_sim
    global publish_sim

    global GENFASTSIM_CFFPath
    global GENFASTSIM_PublishName
    global GENFASTSIM_nEvents
    global GENFASTSIM_PNFSLocation
    global GENFASTSIM_jobEff
    global GENFASTSIM_LHEFiles

    sim = GENFASTSIMProducer(timestamp, workingDir_sim, log, setarchitecture)

    sim.createConfig(publish_sim, options.configfile, gt_sim, options.lhedir,
                     options.nEvents, options.campaign)

    crab = CRABHandler(timestamp, workingDir_sim, log)

    if options.nEvents == "-1" or int(options.nEvents) > int(
            sim.getNLHEevents()):
        options.nEvents = sim.getNLHEevents()

    crab.nEvents = str(options.nEvents)

    if not str(options.nEvents) == "-1" and int(options.nEvents) < 500:
        crab.nEventsPerJob = crab.nEvents
    else:
        crab.nEventsPerJob = "500"

    crab.AdditionalCrabInput = sim.getlhefiles()

    crab.createCRABcfg("crab_genfastsim_" + timestamp + ".cfg",
                       publish_sim + "_" + options.campaign,
                       sim.getConfigFileName(), sim.getOutputFileName(),
                       "GENFASTSIM", bool(True), "", "", bool(False))
    #the 'publish' argument set to bool(False) does not work yet, crabhandler encounters a problem because it wants to split "None" (the dataset when doing GEN-FASTSIM) into several pieces divided by "/" (as in a normal DAS dataset)...

    crab.setForceWhiteList(bool(True))

    if not doDry:

        crab.submitJobs()

        nEventsDBS = crab.getnEventsDBS()

        ##for testing
        #crab.idleTime = int(60)
        #crab.idleTimeResubmit = int(120)

        crab.checkJobs()

        time.sleep(60)  # to be sure the jobs are in done status

        GENFASTSIM_CFFPath = workingDir_sim + "/" + sim.getConfigFileName()

        GENFASTSIM_LHEFiles = sim.getlhefiles()

        GENFASTSIM_PublishName = crab.publishDataSet()

        GENFASTSIM_nEvents = crab.checkFJR()

        GENFASTSIM_PNFSLocation = crab.getOutputLocation()

        GENFASTSIM_jobEff = crab.getJobEff()

        #remove sandbox (lhe files are compressed, but can be sizable when you have a lot of lhe files and tasks: better clean up when a task is done)
        log.output("--> Removing task sandbox ")
        Popen('rm ' + workingDir_sim + '/' + crab.UIWorkingDir +
              '/share/*.tgz',
              shell=True,
              stdin=PIPE,
              stdout=PIPE,
              stderr=STDOUT,
              close_fds=True).stdout.read()
        #remove lhe files in crab directory if they were copied when the lhe files in the original lhe directory were gzipped
        log.output("--> Removing local copied LHE files in directory for crab")
        Popen('rm ' + workingDir_sim + '/*.lhe',
              shell=True,
              stdin=PIPE,
              stdout=PIPE,
              stderr=STDOUT,
              close_fds=True).stdout.read()

        log.output("--> Job Efficiency: " + str(GENFASTSIM_jobEff))

    endTime = gmtime()

    log.output("--> The GEN-FASTSIM production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* GEN-FASTSIM production information: ")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\n\t-> Data location: " + GENFASTSIM_PNFSLocation +
                        "\n")

    log.appendToMSG("\t-> DataSet was published in DBS as: " +
                    GENFASTSIM_PublishName)

    log.appendToMSG("\t-> Number of events processed: " +
                    str(GENFASTSIM_nEvents))

Example #16

0

Show file

File: TESTFJR.py Project: TopBrussels/AutoMaticTopTreeProducer

# test suite

# for getting stuff from fjr files
# from fjrHandler import FJRHandler,GreenBoxHandler
# from xml.sax import make_parser

# from xml.sax.handler import ContentHandler

# file="CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711/res/crab_fjr_100.xml"

# parser = make_parser()
# handler = FJRHandler()
# parser.setContentHandler(handler)
# parser.parse(open(file))
# print handler.getEventsProcessed()
# print handler.getFrameworkExitCode().split("\n")[0]

from CrabHandler import CRABHandler
from logHandler import logHandler

crab = CRABHandler(
    "1234567",
    "CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/",
    logHandler(""),
)

crab.UIWorkingDir = "TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711"

crab.checkFJR()

Example #17

0

Show file

from logHandler import logHandler
from CrabHandler import CRABHandler

crab = CRABHandler("", ".", logHandler(""))

crab.createGridProxy()

crab.createMyProxyCredentials()

#print crab.pickProxy()

Example #18

0

Show file

File: removetool.py Project: TopBrussels/AutoMaticTopTreeProducer

dbsPublish = []
CffFilePath = []

# toptree to remove

idTop = []
storagePathTop = []
mergedTopLocation = []

# logging

log = logHandler("")

# update grid-proxy for srm commands

crab = CRABHandler("","",log)

crab.checkGridProxy(False)



#rmSRMdir("/pnfs/iihe/cms/store/user/dhondt/QCD_Pt-20to30_EMEnriched_TuneZ2_7TeV-pythia6/Spring11-PU_S1_START311_V1G1-v1/29032011_213110/TOPTREE")

#sys.exit(1)

#### Remove DataSet -> ALL associated PatTuples -> All associated TopTrees

if not options.cleanup and not options.rmDataSet == "None":

    log.output("--> Removing dataset "+options.rmDataSet+" and all associated PATtuples and TopTrees")

Example #19

0

Show file

File: TESTFJR.py Project: ivanpari/AutoMaticTopTreeProducer

# test suite

# for getting stuff from fjr files
#from fjrHandler import FJRHandler,GreenBoxHandler
#from xml.sax import make_parser

#from xml.sax.handler import ContentHandler

#file="CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711/res/crab_fjr_100.xml"

#parser = make_parser()
#handler = FJRHandler()
#parser.setContentHandler(handler)
#parser.parse(open(file))
#print handler.getEventsProcessed()
#print handler.getFrameworkExitCode().split("\n")[0]

from CrabHandler import CRABHandler
from logHandler import logHandler

crab = CRABHandler(
    "1234567",
    "CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/",
    logHandler(""))

crab.UIWorkingDir = "TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711"

crab.checkFJR()