def reSplitChunk(compname, splitFactor, options): from PhysicsTools.HeppyCore.framework.heppy_loop import split try: comp = cPickle.load(open("%s/config.pck" % compname)) except: raise RuntimeError, "Pickle file %s/config.pck does not exist or can't be read" % compname workDir = compname + ".dir" if os.path.exists(workDir): raise RuntimeError, "directory %s exists" % workDir os.mkdir(workDir) shutil.move(compname, "%s/source" % workDir) if splitFactor == -1: comp.splitFactor = len(comp.files) comp.fineSplitFactor = 1 elif splitFactor < -1: comp.splitFactor = 1 comp.fineSplitFactor = -splitFactor else: comp.splitFactor = splitFactor comp.fineSplitFactor = 1 comps = split([comp]) for i, ci in enumerate(comps): print "Comp %s: file %s, fineSplit %s" % ( ci.name, ci.files, getattr(ci, 'fineSplit', None)) os.mkdir("%s/%s" % (workDir, ci.name)) for f in ["options.json", "batchScript.sh", "pycfg.py"]: if os.path.exists("%s/source/%s" % (workDir, f)): shutil.copy("%s/source/%s" % (workDir, f), "%s/%s/%s" % (workDir, ci.name, f)) fout = open("%s/%s/config.pck" % (workDir, ci.name), 'w') cPickle.dump(ci, fout) fout.close()
handle = open(cfgfile, 'r') cfo = imp.load_source(cfgfile.rstrip('py'), cfgfile, handle) if hasattr(cfo, 'configs'): config = cfo.configs[cfgname] else: config = cfo.config handle.close() from PhysicsTools.HeppyCore.framework.heppy_loop import split # pick right component from dataset and file from jobID selectedComponents = [] localPrefix = "" for comp in config.components: if comp.name == dataset: # this selects the files and events and changes the name to _ChunkX according to fineSplitFactor and splitFactor newComp = split([comp])[job - 1] # first job number is 1 if useAAA == "full": newComp.files = [ x.replace("root://eoscms.cern.ch//eos/cms", "root://" + XrootdRedirector()) for x in newComp.files ] elif useAAA == "local": if localPrefix == "" and len(newComp.files) > 0: myfile = newComp.files[0].replace( "root://eoscms.cern.ch//eos/cms", "") # == /store/... mycheck = subprocess.check_output([ "edmFileUtil", "-d", myfile ]).split('\n')[0] # == root://storage/store/....root?... if len(mycheck) > 0: localPrefix = mycheck.split('?')[0].replace(
else: components = [pickle.load(open(arg, 'r')) for arg in args[2:]] preprocessor = getattr(cfo, 'PREPROCESSOR', None) if options.single: if len(components) > 1: print( "WARNING: option --single specified but multiple components found" ) for comp in components: _processOneComponent(copy(pp), comp, outdir, preprocessor, options) else: from PhysicsTools.HeppyCore.framework.heppy_loop import split components = split(components) if options.ntasks == 0 or len( components) == 1: # single core, for debugging map(_processOneComponentAsync, [(copy(pp), comp, outdir, preprocessor, options) for comp in components]) else: pool = multiprocessing.Pool(processes=min( len(components), options.ntasks, multiprocessing.cpu_count())) pool.map(_processOneComponentAsync, [(cfg, comp, outdir, preprocessor, options) for comp in components]) pool.close() pool.join() del pool
os.environ["CMG_VERSION"] = options.cmg_version os.environ["USEAAA"] = options.AAAconfig os.environ["STAGEOUTREMDIR"] = options.outputDir os.environ["CFG_FILE"] = heppy_config os.environ["OUTSITE"] = options.storageSite if len(options.siteWhitelist) > 0: os.environ["WHITESITES"] = ','.join(options.siteWhitelist) if len(options.siteBlacklist) > 0: os.environ["BLACKSITES"] = ','.join(options.siteBlacklist) if len(options.filesToUnpack) > 0: os.environ["FILESTOUNPACK"] = ','.join(options.filesToUnpack) if len(options.filesToShip) > 0: os.environ["FILESTOSHIP"] = ','.join(options.filesToShip) if options.maxevents > 0: os.environ["MAXNUMEVENTS"] = str(options.maxevents) os.environ["ONLYUNPACKED"] = str(options.only_unpacked) for comp in conf.components: if getattr(comp, "useAAA", False): raise RuntimeError, 'Components should have useAAA disabled in the cfg when running on crab. \ Tune the behaviour of AAA in the crab submission instead!' os.environ["DATASET"] = str(comp.name) os.environ["NJOBS"] = str(len(split([comp]))) os.system("crab submit %s -c heppy_crab_config_env.py" % ("--dryrun" if options.dryrun else "")) os.system("rm options.json") os.system("rm python.tar.gz") os.system("rm cmgdataset.tar.gz") os.system("rm cafpython.tar.gz")
Job splitting is determined by your configuration file. """ options, args = batchManager.ParseOptions() from PhysicsTools.HeppyCore.framework.heppy_loop import _heppyGlobalOptions for opt in options.extraOptions: if "=" in opt: (key, val) = opt.split("=", 1) _heppyGlobalOptions[key] = val else: _heppyGlobalOptions[opt] = True batchManager.heppyOptions_ = _heppyGlobalOptions cfgFileName = args[0] handle = open(cfgFileName, 'r') # import pdb; pdb.set_trace() cfo = imp.load_source("pycfg", cfgFileName, handle) config = cfo.config handle.close() components = split( [comp for comp in config.components if len(comp.files) > 0]) listOfValues = list(range(0, len(components))) listOfNames = [comp.name for comp in components] batchManager.PrepareJobs(listOfValues, listOfNames) waitingTime = 0.1 batchManager.SubmitJobs(waitingTime)
Job splitting is determined by your configuration file. """ options, args = batchManager.ParseOptions() from PhysicsTools.HeppyCore.framework.heppy_loop import _heppyGlobalOptions for opt in options.extraOptions: if "=" in opt: (key,val) = opt.split("=",1) _heppyGlobalOptions[key] = val else: _heppyGlobalOptions[opt] = True batchManager.heppyOptions_=_heppyGlobalOptions cfgFileName = args[0] handle = open(cfgFileName, 'r') # import pdb; pdb.set_trace() cfo = imp.load_source("pycfg", cfgFileName, handle) config = cfo.config handle.close() components = split( [comp for comp in config.components if len(comp.files)>0] ) listOfValues = range(0, len(components)) listOfNames = [comp.name for comp in components] batchManager.PrepareJobs( listOfValues, listOfNames ) waitingTime = 0.1 batchManager.SubmitJobs( waitingTime )
if __name__ == '__main__': batchManager = MyBatchManager() batchManager.parser_.usage = """ %prog [options] <cfgFile> Run nanopy analysis system on the batch. Job splitting is determined by your configuration file. """ options, args = batchManager.ParseOptions() from PhysicsTools.HeppyCore.framework.heppy_loop import _heppyGlobalOptions for opt in options.extraOptions: if "=" in opt: (key, val) = opt.split("=", 1) _heppyGlobalOptions[key] = val else: _heppyGlobalOptions[opt] = True cfgFileName = args[0] cfo = imp.load_source("pycfg", cfgFileName, open(cfgFileName, 'r')) components = split( [comp for comp in cfo.selectedComponents if len(comp.files) > 0]) listOfValues = range(0, len(components)) listOfNames = [comp.name for comp in components] batchManager.PrepareJobs(listOfValues, listOfNames) waitingTime = 0.1 batchManager.SubmitJobs(waitingTime)
for k,v in opts.iteritems(): _heppyGlobalOptions[k]=v jfile.close() handle = open(cfgfile, 'r') cfo = imp.load_source(cfgfile.rstrip('py'), cfgfile, handle) config = cfo.config handle.close() from PhysicsTools.HeppyCore.framework.heppy_loop import split # pick right component from dataset and file from jobID selectedComponents = [] localPrefix = "" for comp in config.components: if comp.name == dataset: # this selects the files and events and changes the name to _ChunkX according to fineSplitFactor and splitFactor newComp = split([comp])[job-1] # first job number is 1 if useAAA=="full": newComp.files = [x.replace("root://eoscms.cern.ch//eos/cms","root://" + XrootdRedirector()) for x in newComp.files] elif useAAA=="local": if localPrefix=="" and len(newComp.files)>0: myfile = newComp.files[0].replace("root://eoscms.cern.ch//eos/cms","") # == /store/... mycheck = subprocess.check_output(["edmFileUtil","-d",myfile]).split('\n')[0] # == root://storage/store/....root?... if len(mycheck)>0: localPrefix = mycheck.split('?')[0].replace(myfile,"") # == root://storage print 'Will use %s as local file prefix'%localPrefix newComp.files = [x.replace("root://eoscms.cern.ch//eos/cms",localPrefix) for x in newComp.files] elif useAAA=="eos": pass selectedComponents.append(newComp) # check selectedComponents if len(selectedComponents) == 0: print "No selected components found!!"
debug = False useAAA = True # update most recent DCS-only json #os.system("cp -f /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions15/13TeV/DCSOnly/json_DCSONLY_Run2015B.txt ../../data/json/") handle = open("heppy_config.py", 'r') cfo = imp.load_source("heppy_config", "heppy_config.py", handle) conf = cfo.config handle.close() os.system("scramv1 runtime -sh") os.system("source /cvmfs/cms.cern.ch/crab3/crab.sh") os.environ["PROD_LABEL"] = production_label os.environ["CMG_VERSION"] = cmg_version os.environ["DEBUG"] = str(debug) os.environ["USEAAA"] = str(useAAA) from PhysicsTools.HeppyCore.framework.heppy_loop import split for comp in conf.components: # get splitting from config file according to splitFactor and fineSplitFactor (priority given to the latter) NJOBS = len(split([comp])) os.environ["NJOBS"] = str(NJOBS) os.environ["DATASET"] = str(comp.name) os.system("crab submit -c heppy_crab_config_env.py") os.system("rm -f python.tar.gz") os.system("rm -f cmgdataset.tar.gz") os.system("rm -f cafpython.tar.gz")
handle.close() os.system("tar czf python.tar.gz --dereference --directory $CMSSW_BASE python") os.system("tar czf cmgdataset.tar.gz --directory $HOME .cmgdataset") os.system("tar czf cafpython.tar.gz --directory /afs/cern.ch/cms/caf/ python") os.environ["PROD_LABEL"] = options.production_label os.environ["CMG_VERSION"] = options.cmg_version os.environ["USEAAA"] = options.AAAconfig os.environ["STAGEOUTREMDIR"] = options.outputDir os.environ["CFG_FILE"] = options.cfg_file os.environ["OUTSITE"] = options.storageSite if len(options.siteWhitelist)>0: os.environ["WHITESITES"] = ','.join(options.siteWhitelist) if len(options.filesToUnpack)>0: os.environ["FILESTOUNPACK"] = ','.join(options.filesToUnpack) if len(options.filesToShip)>0: os.environ["FILESTOSHIP"] = ','.join(options.filesToShip) if options.maxevents>0: os.environ["MAXNUMEVENTS"] = str(options.maxevents) os.environ["ONLYUNPACKED"] = str(options.only_unpacked) from PhysicsTools.HeppyCore.framework.heppy_loop import split for comp in conf.components: if getattr(comp,"useAAA",False): raise RuntimeError, 'Components should have useAAA disabled in the cfg when running on crab - tune the behaviour of AAA in the crab submission instead!' os.environ["DATASET"] = str(comp.name) os.environ["NJOBS"] = str(len(split([comp]))) os.system("crab submit %s -c heppy_crab_config_env.py"%("--dryrun" if options.dryrun else "")) os.system("rm options.json") os.system("rm python.tar.gz") os.system("rm cmgdataset.tar.gz") os.system("rm cafpython.tar.gz")