Esempio n. 1
0
def reSplitChunk(compname, splitFactor, options):
    from PhysicsTools.HeppyCore.framework.heppy_loop import split
    try:
        comp = cPickle.load(open("%s/config.pck" % compname))
    except:
        raise RuntimeError, "Pickle file %s/config.pck does not exist or can't be read" % compname
    workDir = compname + ".dir"
    if os.path.exists(workDir):
        raise RuntimeError, "directory %s exists" % workDir
    os.mkdir(workDir)
    shutil.move(compname, "%s/source" % workDir)
    if splitFactor == -1:
        comp.splitFactor = len(comp.files)
        comp.fineSplitFactor = 1
    elif splitFactor < -1:
        comp.splitFactor = 1
        comp.fineSplitFactor = -splitFactor
    else:
        comp.splitFactor = splitFactor
        comp.fineSplitFactor = 1
    comps = split([comp])
    for i, ci in enumerate(comps):
        print "Comp %s: file %s, fineSplit %s" % (
            ci.name, ci.files, getattr(ci, 'fineSplit', None))
        os.mkdir("%s/%s" % (workDir, ci.name))
        for f in ["options.json", "batchScript.sh", "pycfg.py"]:
            if os.path.exists("%s/source/%s" % (workDir, f)):
                shutil.copy("%s/source/%s" % (workDir, f),
                            "%s/%s/%s" % (workDir, ci.name, f))
            fout = open("%s/%s/config.pck" % (workDir, ci.name), 'w')
            cPickle.dump(ci, fout)
            fout.close()
Esempio n. 2
0
handle = open(cfgfile, 'r')
cfo = imp.load_source(cfgfile.rstrip('py'), cfgfile, handle)
if hasattr(cfo, 'configs'):
    config = cfo.configs[cfgname]
else:
    config = cfo.config
handle.close()

from PhysicsTools.HeppyCore.framework.heppy_loop import split
# pick right component from dataset and file from jobID
selectedComponents = []
localPrefix = ""
for comp in config.components:
    if comp.name == dataset:
        # this selects the files and events and changes the name to _ChunkX according to fineSplitFactor and splitFactor
        newComp = split([comp])[job - 1]  # first job number is 1
        if useAAA == "full":
            newComp.files = [
                x.replace("root://eoscms.cern.ch//eos/cms",
                          "root://" + XrootdRedirector())
                for x in newComp.files
            ]
        elif useAAA == "local":
            if localPrefix == "" and len(newComp.files) > 0:
                myfile = newComp.files[0].replace(
                    "root://eoscms.cern.ch//eos/cms", "")  # == /store/...
                mycheck = subprocess.check_output([
                    "edmFileUtil", "-d", myfile
                ]).split('\n')[0]  # == root://storage/store/....root?...
                if len(mycheck) > 0:
                    localPrefix = mycheck.split('?')[0].replace(
Esempio n. 3
0
    else:
        components = [pickle.load(open(arg, 'r')) for arg in args[2:]]

    preprocessor = getattr(cfo, 'PREPROCESSOR', None)

    if options.single:
        if len(components) > 1:
            print(
                "WARNING: option --single specified but multiple components found"
            )
        for comp in components:
            _processOneComponent(copy(pp), comp, outdir, preprocessor, options)

    else:
        from PhysicsTools.HeppyCore.framework.heppy_loop import split
        components = split(components)
        if options.ntasks == 0 or len(
                components) == 1:  # single core, for debugging
            map(_processOneComponentAsync,
                [(copy(pp), comp, outdir, preprocessor, options)
                 for comp in components])
        else:
            pool = multiprocessing.Pool(processes=min(
                len(components), options.ntasks, multiprocessing.cpu_count()))
            pool.map(_processOneComponentAsync,
                     [(cfg, comp, outdir, preprocessor, options)
                      for comp in components])
            pool.close()
            pool.join()
            del pool
Esempio n. 4
0
os.environ["CMG_VERSION"] = options.cmg_version
os.environ["USEAAA"] = options.AAAconfig
os.environ["STAGEOUTREMDIR"] = options.outputDir
os.environ["CFG_FILE"] = heppy_config
os.environ["OUTSITE"] = options.storageSite
if len(options.siteWhitelist) > 0:
    os.environ["WHITESITES"] = ','.join(options.siteWhitelist)
if len(options.siteBlacklist) > 0:
    os.environ["BLACKSITES"] = ','.join(options.siteBlacklist)
if len(options.filesToUnpack) > 0:
    os.environ["FILESTOUNPACK"] = ','.join(options.filesToUnpack)
if len(options.filesToShip) > 0:
    os.environ["FILESTOSHIP"] = ','.join(options.filesToShip)
if options.maxevents > 0: os.environ["MAXNUMEVENTS"] = str(options.maxevents)
os.environ["ONLYUNPACKED"] = str(options.only_unpacked)

for comp in conf.components:
    if getattr(comp, "useAAA", False):
        raise RuntimeError, 'Components should have useAAA disabled in the cfg when running on crab. \
Tune the behaviour of AAA in the crab submission instead!'

    os.environ["DATASET"] = str(comp.name)
    os.environ["NJOBS"] = str(len(split([comp])))
    os.system("crab submit %s -c heppy_crab_config_env.py" %
              ("--dryrun" if options.dryrun else ""))

os.system("rm options.json")
os.system("rm python.tar.gz")
os.system("rm cmgdataset.tar.gz")
os.system("rm cafpython.tar.gz")
Esempio n. 5
0
    Job splitting is determined by your configuration file.
    """

    options, args = batchManager.ParseOptions()

    from PhysicsTools.HeppyCore.framework.heppy_loop import _heppyGlobalOptions
    for opt in options.extraOptions:
        if "=" in opt:
            (key, val) = opt.split("=", 1)
            _heppyGlobalOptions[key] = val
        else:
            _heppyGlobalOptions[opt] = True
    batchManager.heppyOptions_ = _heppyGlobalOptions

    cfgFileName = args[0]

    handle = open(cfgFileName, 'r')
    # import pdb; pdb.set_trace()
    cfo = imp.load_source("pycfg", cfgFileName, handle)
    config = cfo.config
    handle.close()

    components = split(
        [comp for comp in config.components if len(comp.files) > 0])
    listOfValues = list(range(0, len(components)))
    listOfNames = [comp.name for comp in components]

    batchManager.PrepareJobs(listOfValues, listOfNames)
    waitingTime = 0.1
    batchManager.SubmitJobs(waitingTime)
Esempio n. 6
0
    Job splitting is determined by your configuration file.
    """

    options, args = batchManager.ParseOptions()

    from PhysicsTools.HeppyCore.framework.heppy_loop import _heppyGlobalOptions
    for opt in options.extraOptions:
        if "=" in opt:
            (key,val) = opt.split("=",1)
            _heppyGlobalOptions[key] = val
        else:
            _heppyGlobalOptions[opt] = True
    batchManager.heppyOptions_=_heppyGlobalOptions

    cfgFileName = args[0]

    handle = open(cfgFileName, 'r')
    # import pdb; pdb.set_trace()
    cfo = imp.load_source("pycfg", cfgFileName, handle)
    config = cfo.config
    handle.close()

    components = split( [comp for comp in config.components if len(comp.files)>0] )
    listOfValues = range(0, len(components))
    listOfNames = [comp.name for comp in components]

    batchManager.PrepareJobs( listOfValues, listOfNames )
    waitingTime = 0.1
    batchManager.SubmitJobs( waitingTime )

Esempio n. 7
0
if __name__ == '__main__':
    batchManager = MyBatchManager()
    batchManager.parser_.usage = """
    %prog [options] <cfgFile>

    Run nanopy analysis system on the batch.
    Job splitting is determined by your configuration file.
    """

    options, args = batchManager.ParseOptions()

    from PhysicsTools.HeppyCore.framework.heppy_loop import _heppyGlobalOptions
    for opt in options.extraOptions:
        if "=" in opt:
            (key, val) = opt.split("=", 1)
            _heppyGlobalOptions[key] = val
        else:
            _heppyGlobalOptions[opt] = True

    cfgFileName = args[0]
    cfo = imp.load_source("pycfg", cfgFileName, open(cfgFileName, 'r'))

    components = split(
        [comp for comp in cfo.selectedComponents if len(comp.files) > 0])
    listOfValues = range(0, len(components))
    listOfNames = [comp.name for comp in components]

    batchManager.PrepareJobs(listOfValues, listOfNames)
    waitingTime = 0.1
    batchManager.SubmitJobs(waitingTime)
for k,v in opts.iteritems():
    _heppyGlobalOptions[k]=v
jfile.close()
handle = open(cfgfile, 'r')
cfo = imp.load_source(cfgfile.rstrip('py'), cfgfile, handle)
config = cfo.config
handle.close()

from PhysicsTools.HeppyCore.framework.heppy_loop import split
# pick right component from dataset and file from jobID
selectedComponents = []
localPrefix = ""
for comp in config.components:
    if comp.name == dataset:
        # this selects the files and events and changes the name to _ChunkX according to fineSplitFactor and splitFactor
        newComp = split([comp])[job-1] # first job number is 1
        if useAAA=="full": newComp.files = [x.replace("root://eoscms.cern.ch//eos/cms","root://" + XrootdRedirector()) for x in newComp.files]
        elif useAAA=="local":
            if localPrefix=="" and len(newComp.files)>0:
                myfile = newComp.files[0].replace("root://eoscms.cern.ch//eos/cms","") # == /store/...
                mycheck = subprocess.check_output(["edmFileUtil","-d",myfile]).split('\n')[0] # == root://storage/store/....root?...
                if len(mycheck)>0:
                    localPrefix = mycheck.split('?')[0].replace(myfile,"") # == root://storage
                    print 'Will use %s as local file prefix'%localPrefix
            newComp.files = [x.replace("root://eoscms.cern.ch//eos/cms",localPrefix) for x in newComp.files]
        elif useAAA=="eos": pass
        selectedComponents.append(newComp)

# check selectedComponents
if len(selectedComponents) == 0:
    print "No selected components found!!"
Esempio n. 9
0
debug = False
useAAA = True

# update most recent DCS-only json
#os.system("cp -f /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions15/13TeV/DCSOnly/json_DCSONLY_Run2015B.txt ../../data/json/")

handle = open("heppy_config.py", 'r')
cfo = imp.load_source("heppy_config", "heppy_config.py", handle)
conf = cfo.config
handle.close()

os.system("scramv1 runtime -sh")
os.system("source /cvmfs/cms.cern.ch/crab3/crab.sh")

os.environ["PROD_LABEL"] = production_label
os.environ["CMG_VERSION"] = cmg_version
os.environ["DEBUG"] = str(debug)
os.environ["USEAAA"] = str(useAAA)

from PhysicsTools.HeppyCore.framework.heppy_loop import split
for comp in conf.components:
    # get splitting from config file according to splitFactor and fineSplitFactor (priority given to the latter)
    NJOBS = len(split([comp]))
    os.environ["NJOBS"] = str(NJOBS)
    os.environ["DATASET"] = str(comp.name)
    os.system("crab submit -c heppy_crab_config_env.py")

os.system("rm -f python.tar.gz")
os.system("rm -f cmgdataset.tar.gz")
os.system("rm -f cafpython.tar.gz")
Esempio n. 10
0
handle.close()

os.system("tar czf python.tar.gz --dereference --directory $CMSSW_BASE python")
os.system("tar czf cmgdataset.tar.gz --directory $HOME .cmgdataset")
os.system("tar czf cafpython.tar.gz --directory /afs/cern.ch/cms/caf/ python")

os.environ["PROD_LABEL"]  = options.production_label
os.environ["CMG_VERSION"] = options.cmg_version
os.environ["USEAAA"]      = options.AAAconfig
os.environ["STAGEOUTREMDIR"] = options.outputDir
os.environ["CFG_FILE"] = options.cfg_file
os.environ["OUTSITE"] = options.storageSite
if len(options.siteWhitelist)>0: os.environ["WHITESITES"] = ','.join(options.siteWhitelist)
if len(options.filesToUnpack)>0: os.environ["FILESTOUNPACK"] = ','.join(options.filesToUnpack)
if len(options.filesToShip)>0: os.environ["FILESTOSHIP"] = ','.join(options.filesToShip)
if options.maxevents>0: os.environ["MAXNUMEVENTS"] = str(options.maxevents)
os.environ["ONLYUNPACKED"] = str(options.only_unpacked)

from PhysicsTools.HeppyCore.framework.heppy_loop import split
for comp in conf.components:
    if getattr(comp,"useAAA",False):
        raise RuntimeError, 'Components should have useAAA disabled in the cfg when running on crab - tune the behaviour of AAA in the crab submission instead!'
    os.environ["DATASET"] = str(comp.name)
    os.environ["NJOBS"] = str(len(split([comp])))
    os.system("crab submit %s -c heppy_crab_config_env.py"%("--dryrun" if options.dryrun else ""))

os.system("rm options.json")
os.system("rm python.tar.gz")
os.system("rm cmgdataset.tar.gz")
os.system("rm cafpython.tar.gz")