Python CRABHandler.setDBSInst 예제들, CrabHandler.CRABHandler.setDBSInst Python 예제들

예제 #1

0

파일 보기

파일: AutoMaticTopTreeProducer.py 프로젝트: TopBrussels/AutoMaticTopTreeProducer

def processPATandTOPTREE():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global nEventsDBS
    global workingDir
    global dataType
    global nEventsTT
    global nEventsDBS
    global topTreeLocation
    global topCffName
    global patCffName
    global ttreeEventContent
    global options
    global jobEffPat
    global jobEffTT
    global CrabJSON
    
    log.output("********** Preparing to produce the PAT-tuple and TopTree in one go **********")

    startTime = gmtime()

    # create pat cfg
    
    pat = PatProducer(timestamp,workingDir,log);

    pat.createPatConfig(options.dataset,options.GlobalTag,dataType,options.doGenEvent,options.cmssw_ver,options.cmssw_ver_sample,options.flavourHistoryFilterPath,options.runOnMC,options.pat_config)

    patCffName = pat.getConfigFileName()

    # create toptree cfg

    top = TopTreeProducer(timestamp,workingDir,log)
            
#    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample)
    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample,options.top_config)

    topCffName = top.getConfigFileName()

    log.output(" ---> will expand the TopTree config before sending it with crab " )
    cmd2 = 'cd '+options.cmssw_ver+'; eval `scramv1 runtime -sh`; cd -; python '+workingDir+'/'+top.getConfigFileName()+'; mv -v expanded.py '+workingDir+'/'
    if not workingDir.rfind("CMSSW_5_") == -1:
        log.output("Expanding TopTree config:: CMSSW_5_X_Y release detected, setting scram arch to slc5_amd64_gcc462")
        cmd2 = "export SCRAM_ARCH=\"slc5_amd64_gcc462\";"+cmd2

    pExe = Popen(cmd2, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) 
    log.output(pExe.stdout.read())
		
		# create crab cfg

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log);

    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance "+dbsInst+" to look for your data.")

    #crab.nEventsPerJob = "20000"
    #crab.nEventsPerJob = "500"

    if not doDry:
        
        crab.scaleJobsSize(options.dataset,options.RunSelection,1) # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.runTwoConfigs(patCffName,topCffName)
    
    crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput)
                                
    crab.createCRABcfg("crab_"+timestamp+".cfg",
                       options.dataset,
                       pat.getConfigFileName(),
                       top.getOutputFileName(),
                       "TOPTREE",
                       bool(False),
                       options.CEBlacklist,
                       options.RunSelection,
                       options.forceStandAlone)

    
    topTreeLocation = crab.getOutputLocation().split("\n")[0]
        
    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        crab.publishDataSet()

        nEventsDBS = crab.getnEventsDBS()

        nEventsTT = crab.checkFJR()

        CrabJSON = crab.getCrabJSON()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffPat = crab.getJobEff() # same job-eff for pat & TT in case of duo-jobs

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: "+str(crab.getJobEff()))
        
    endTime = gmtime()

    log.output("--> The TopTree production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: "+topTreeLocation+"\n")

    log.appendToMSG("\t-> Number of events processed: "+str(nEventsTT))

    log.appendToMSG("\n Note: This TopTree was created from PAT inside one single job, the PATtuple was not stored")

예제 #2

0

파일 보기

파일: AutoMaticTopTreeProducer.py 프로젝트: TopBrussels/AutoMaticTopTreeProducer

def processTOPTREE():

    log.output("********** Preparing to produce the TopTree **********")

    startTime = gmtime()

    global workingDir
    global dataType
    global doDry
    global nEventsDBS
    global nEventsTT
    global doPBS
    global topTreeLocation
    global topCffName
    global options
    global ttreeEventContent
    global jobEffTT
    global CrabJSON

    top = TopTreeProducer(timestamp,workingDir,log)
            
    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample,options.top_config)

    topCffName = top.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log);
    
    useDataSet=""

    if doStartFromPAT:

        useDataSet=options.dataset

    else:

        useDataSet=patPublishName

    options.RunSelection = ""
        
    crab.setDBSInst("cms_dbs_ph_analysis_02")
        
    type = "TOPTREE"

    if not doDry:
            
        crab.scaleJobsSize(useDataSet,options.RunSelection,10) # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_toptree_"+timestamp+".cfg",
                       useDataSet,
                       top.getConfigFileName(),
                       top.getOutputFileName(),
                       type,
                       bool(False),
                       options.CEBlacklist,
                       options.RunSelection,
                       options.forceStandAlone) # empty runselection for top

    topTreeLocation = crab.getOutputLocation().split("\n")[0]
        
    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        #time.sleep(60) # to be shure the jobs are in done status

        crab.publishDataSet()

        CrabJSON = crab.getCrabJSON()        

        nEventsTT = crab.checkFJR()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: "+str(crab.getJobEff()))
        
    endTime = gmtime()

    log.output("--> The TopTree production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: "+topTreeLocation+"\n")

    log.appendToMSG("\t-> Number of events processed: "+str(nEventsTT))

예제 #3

0

파일 보기

파일: AutoMaticTopTreeProducer.py 프로젝트: TopBrussels/AutoMaticTopTreeProducer

def processPAT():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global patPublishName
    global nEventsPAT
    global nEventsDBS
    global patCffName
    global patLocation
    global patEventContent
    global jobEffPat
    global options
    global CrabJSON

    
    log.output("********** Preparing to produce the PAT-tuple **********")

    startTime = gmtime()

    pat = PatProducer(timestamp,workingDir,log);

    pat.createPatConfig(options.dataset,options.GlobalTag,dataType,options.doGenEvent,options.cmssw_ver,options.cmssw_ver_sample,options.flavourHistoryFilterPath,options.runOnMC,options.pat_config)

    patCffName = pat.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver,timestamp,workingDir,log);

    #print "**"+crab.baseDir
    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance "+dbsInst+" to look for your data.")

    if not doDry:
        
        crab.scaleJobsSize(options.dataset,options.RunSelection,1) # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput=getAdditionalInputFiles(crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_pat_"+timestamp+".cfg",
                   options.dataset,
                   pat.getConfigFileName(),
                   pat.getOutputFileName(),
                   "PAT",
                   bool(True),
                   options.CEBlacklist,
                   options.RunSelection,
                   options.forceStandAlone)

    if not doDry:
        
        crab.submitJobs()

        nEventsDBS = crab.getnEventsDBS()

        crab.checkJobs()

        time.sleep(60) # to be shure the jobs are in done status

        patPublishName = crab.publishDataSet()

        nEventsPAT = crab.checkFJR()

        patLocation = crab.getOutputLocation()

        patEventContent = pat.dumpEventContent(patLocation)

        jobEffPat = crab.getJobEff()

        log.output("--> Job Efficiency: "+str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The PAT production took "+ str((time.mktime(endTime)-time.mktime(startTime))/3600.0)+" hours.")

    log.appendToMSG("\n* PAT production information: ")
    
    if not crab.getOutputLocation() == "":

        log.appendToMSG("\n\t-> Data location: "+patLocation+"\n")

    log.appendToMSG("\t-> DataSet was published in DBS as: "+patPublishName)
        
    log.appendToMSG("\t-> Number of events processed: "+str(nEventsPAT))

예제 #4

0

파일 보기

파일: AutoMaticTopTreeProducer.py 프로젝트: ivanpari/AutoMaticTopTreeProducer

def processPATandTOPTREE():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global nEventsDBS
    global workingDir
    global dataType
    global nEventsTT
    global nEventsDBS
    global topTreeLocation
    global topCffName
    global patCffName
    global ttreeEventContent
    global options
    global jobEffPat
    global jobEffTT
    global CrabJSON

    log.output(
        "********** Preparing to produce the PAT-tuple and TopTree in one go **********"
    )

    startTime = gmtime()

    # create pat cfg

    pat = PatProducer(timestamp, workingDir, log)

    pat.createPatConfig(options.dataset, options.GlobalTag, dataType,
                        options.doGenEvent, options.cmssw_ver,
                        options.cmssw_ver_sample,
                        options.flavourHistoryFilterPath, options.runOnMC,
                        options.pat_config)

    patCffName = pat.getConfigFileName()

    # create toptree cfg

    top = TopTreeProducer(timestamp, workingDir, log)

    #    top.createTopTreeConfig(options.dataset,dataType,options.doGenEvent,options.GlobalTag,options.cmssw_ver,options.cmssw_ver_sample)
    top.createTopTreeConfig(options.dataset, dataType, options.doGenEvent,
                            options.GlobalTag, options.cmssw_ver,
                            options.cmssw_ver_sample, options.top_config)

    topCffName = top.getConfigFileName()

    log.output(
        " ---> will expand the TopTree config before sending it with crab ")
    cmd2 = 'cd ' + options.cmssw_ver + '; eval `scramv1 runtime -sh`; cd -; python ' + workingDir + '/' + top.getConfigFileName(
    ) + '; mv -v expanded.py ' + workingDir + '/'
    if not workingDir.rfind("CMSSW_5_") == -1:
        log.output(
            "Expanding TopTree config:: CMSSW_5_X_Y release detected, setting scram arch to slc5_amd64_gcc462"
        )
        cmd2 = "export SCRAM_ARCH=\"slc5_amd64_gcc462\";" + cmd2

    pExe = Popen(cmd2,
                 shell=True,
                 stdin=PIPE,
                 stdout=PIPE,
                 stderr=STDOUT,
                 close_fds=True)
    log.output(pExe.stdout.read())

    # create crab cfg

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log)

    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance " + dbsInst +
                   " to look for your data.")

    #crab.nEventsPerJob = "20000"
    #crab.nEventsPerJob = "500"

    if not doDry:

        crab.scaleJobsSize(
            options.dataset, options.RunSelection,
            1)  # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.runTwoConfigs(patCffName, topCffName)

    crab.AdditionalCrabInput = getAdditionalInputFiles(
        crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_" + timestamp + ".cfg", options.dataset,
                       pat.getConfigFileName(), top.getOutputFileName(),
                       "TOPTREE", bool(False), options.CEBlacklist,
                       options.RunSelection, options.forceStandAlone)

    topTreeLocation = crab.getOutputLocation().split("\n")[0]

    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        crab.publishDataSet()

        nEventsDBS = crab.getnEventsDBS()

        nEventsTT = crab.checkFJR()

        CrabJSON = crab.getCrabJSON()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffPat = crab.getJobEff(
        )  # same job-eff for pat & TT in case of duo-jobs

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: " + str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The TopTree production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: " + topTreeLocation + "\n")

    log.appendToMSG("\t-> Number of events processed: " + str(nEventsTT))

    log.appendToMSG(
        "\n Note: This TopTree was created from PAT inside one single job, the PATtuple was not stored"
    )

예제 #5

0

파일 보기

파일: AutoMaticTopTreeProducer.py 프로젝트: ivanpari/AutoMaticTopTreeProducer

def processTOPTREE():

    log.output("********** Preparing to produce the TopTree **********")

    startTime = gmtime()

    global workingDir
    global dataType
    global doDry
    global nEventsDBS
    global nEventsTT
    global doPBS
    global topTreeLocation
    global topCffName
    global options
    global ttreeEventContent
    global jobEffTT
    global CrabJSON

    top = TopTreeProducer(timestamp, workingDir, log)

    top.createTopTreeConfig(options.dataset, dataType, options.doGenEvent,
                            options.GlobalTag, options.cmssw_ver,
                            options.cmssw_ver_sample, options.top_config)

    topCffName = top.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log)

    useDataSet = ""

    if doStartFromPAT:

        useDataSet = options.dataset

    else:

        useDataSet = patPublishName

    options.RunSelection = ""

    crab.setDBSInst("cms_dbs_ph_analysis_02")

    type = "TOPTREE"

    if not doDry:

        crab.scaleJobsSize(
            useDataSet, options.RunSelection,
            10)  # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput = getAdditionalInputFiles(
        crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_toptree_" + timestamp + ".cfg", useDataSet,
                       top.getConfigFileName(), top.getOutputFileName(), type,
                       bool(False), options.CEBlacklist, options.RunSelection,
                       options.forceStandAlone)  # empty runselection for top

    topTreeLocation = crab.getOutputLocation().split("\n")[0]

    if not doDry:

        crab.submitJobs()

        crab.checkJobs()

        #time.sleep(60) # to be shure the jobs are in done status

        crab.publishDataSet()

        CrabJSON = crab.getCrabJSON()

        nEventsTT = crab.checkFJR()

        if doStartFromPAT:
            nEventsDBS = crab.getnEventsDBS()

        ttreeEventContent = top.dumpEventContent(topTreeLocation)

        jobEffTT = crab.getJobEff()

        log.output("--> Job Efficiency: " + str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The TopTree production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* TopTree production information: \n")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\t-> Data location: " + topTreeLocation + "\n")

    log.appendToMSG("\t-> Number of events processed: " + str(nEventsTT))

예제 #6

0

파일 보기

파일: AutoMaticTopTreeProducer.py 프로젝트: ivanpari/AutoMaticTopTreeProducer

def processPAT():

    global workingDir
    global dbsInst
    global dataType
    global doDry
    global patPublishName
    global nEventsPAT
    global nEventsDBS
    global patCffName
    global patLocation
    global patEventContent
    global jobEffPat
    global options
    global CrabJSON

    log.output("********** Preparing to produce the PAT-tuple **********")

    startTime = gmtime()

    pat = PatProducer(timestamp, workingDir, log)

    pat.createPatConfig(options.dataset, options.GlobalTag, dataType,
                        options.doGenEvent, options.cmssw_ver,
                        options.cmssw_ver_sample,
                        options.flavourHistoryFilterPath, options.runOnMC,
                        options.pat_config)

    patCffName = pat.getConfigFileName()

    #crab = CRABHandler(timestamp,workingDir,log);
    crab = CRABHandler(options.toptree_ver, timestamp, workingDir, log)

    #print "**"+crab.baseDir
    if not dbsInst == "":

        crab.setDBSInst(dbsInst)

        log.output(" ---> CRAB will use DBS instance " + dbsInst +
                   " to look for your data.")

    if not doDry:

        crab.scaleJobsSize(
            options.dataset, options.RunSelection,
            1)  # if to much jobs (>2500) we create new cfg with 2500 jobs

    crab.AdditionalCrabInput = getAdditionalInputFiles(
        crab.AdditionalCrabInput)

    crab.createCRABcfg("crab_pat_" + timestamp + ".cfg", options.dataset,
                       pat.getConfigFileName(), pat.getOutputFileName(), "PAT",
                       bool(True), options.CEBlacklist, options.RunSelection,
                       options.forceStandAlone)

    if not doDry:

        crab.submitJobs()

        nEventsDBS = crab.getnEventsDBS()

        crab.checkJobs()

        time.sleep(60)  # to be shure the jobs are in done status

        patPublishName = crab.publishDataSet()

        nEventsPAT = crab.checkFJR()

        patLocation = crab.getOutputLocation()

        patEventContent = pat.dumpEventContent(patLocation)

        jobEffPat = crab.getJobEff()

        log.output("--> Job Efficiency: " + str(crab.getJobEff()))

    endTime = gmtime()

    log.output("--> The PAT production took " +
               str((time.mktime(endTime) - time.mktime(startTime)) / 3600.0) +
               " hours.")

    log.appendToMSG("\n* PAT production information: ")

    if not crab.getOutputLocation() == "":

        log.appendToMSG("\n\t-> Data location: " + patLocation + "\n")

    log.appendToMSG("\t-> DataSet was published in DBS as: " + patPublishName)

    log.appendToMSG("\t-> Number of events processed: " + str(nEventsPAT))