def replacepowheg(oldfilename, newfilename, powhegprocess, jhugen): oldfilename = os.path.abspath(oldfilename) newfilename = os.path.abspath(newfilename) with cdtemp() as tmpfolder: subprocess.check_call(["scram", "p", "CMSSW", os.environ["CMSSW_VERSION"]]) with cd(os.path.join(os.environ["CMSSW_VERSION"], "src")): for filename in glob.iglob(os.path.join(genproductions, "bin", "Powheg", "*")): if filename.endswith(".py") or filename.endswith(".sh") or filename.endswith("/patches") or filename.endswith("/examples"): os.symlink(filename, os.path.basename(filename)) card = "examples/gg_H_quark-mass-effects_withJHUGen_NNPDF30_13TeV/gg_H_quark-mass-effects_NNPDF30_13TeV.input" JHUGencard = "examples/gg_H_quark-mass-effects_withJHUGen_NNPDF30_13TeV/JHUGen.input" command = ["./run_pwg_condor.py", "-i", card, "-m", powhegprocess, '-f', "tmp", "-p", "0", "-d", "1"] if jhugen: command += ["-g", JHUGencard] subprocess.check_call(command) with cd("tmp"): newcompiledfolder = os.getcwd() with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) if not os.path.exists("original_pwhg_main"): shutil.move("pwhg_main", "original_pwhg_main") shutil.move(os.path.join(newcompiledfolder, "pwhg_main"), "pwhg_main") if jhugen: if not os.path.exists("original_JHUGen"): shutil.move("JHUGen", "original_JHUGen") shutil.move(os.path.join(newcompiledfolder, "JHUGen"), "pwhg_main") subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*"))
def cardsurl(self): icard = "VBF_" if self.signalbkgbsi == "SIG" or self.signalbkgbsi == "BSI": icard += "H125" if self.signalbkgbsi == "BKG" or self.signalbkgbsi == "BSI": icard += "ZZcont" icard += "_NNPDF31_13TeV_" icard += { "4e": "ee_ee_", "4mu": "mumu_mumu_", "2e2mu": "ee_mumu_", "2e2nue": "ee_veve_", "2e2num": "ee_vmvm_", "2e2nut": "ee_vtvt_", "2mu2nue": "mumu_veve_", "2mu2num": "mumu_vmvm_", "2mu2nut": "mumu_vtvt_", }[self.finalstate] icard += ".py" card = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", self.genproductionscommit, "bin/Phantom/cards/production/13TeV/HZZ_VBFoffshell_Phantom", icard) with cdtemp(): wget(card) with open(os.path.basename(card)) as f: gitcardcontents = f.read() with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*"): raise ValueError( "There is a core dump in the tarball\n{}".format(self)) cardnameintarball = icard try: with open(cardnameintarball) as f: cardcontents = f.read() except IOError: raise ValueError("no " + cardnameintarball + " in the tarball\n{}".format(self)) if cardcontents != gitcardcontents: with cd(here): with open("cardcontents", "w") as f: f.write(cardcontents) with open("powheggitcard", "w") as f: f.write(gitcardcontents) raise ValueError( "cardcontents != gitcardcontents\n{}\nSee ./cardcontents and ./gitcardcontents" .format(self)) return card
def cardsurl(self): commit = self.genproductionscommit if self.powhegcardusesscript: powhegdir, powhegcard = os.path.split(self.powhegcard) powhegscript = os.path.join(powhegdir, "makecards.py") powhegscript = os.path.join("https://raw.githubusercontent.com/cms-sw/genproductions/", commit, powhegscript.split("genproductions/")[-1]) result = ( powhegscript + "\n" + "# " + powhegcard) else: powhegcard = os.path.join("https://raw.githubusercontent.com/cms-sw/genproductions/", commit, self.powhegcard.split("genproductions/")[-1]) result = powhegcard with cdtemp(): if self.powhegcardusesscript: wget(powhegscript) wget(os.path.join(os.path.dirname(powhegscript), powhegcard.replace("M{}".format(self.mass), "template").replace("Wplus", "W").replace("Wminus", "W"))) subprocess.check_call(["python", "makecards.py"]) else: wget(powhegcard) with open(os.path.basename(powhegcard)) as f: powheggitcard = f.read() powheggitcardlines = [re.sub(" *([#!].*)?$", "", line) for line in powheggitcard.split("\n")] powheggitcardlines = [re.sub("(iseed|ncall2|fakevirt) *", r"\1 ", line) for line in powheggitcardlines if line and all(_ not in line for _ in ("pdfreweight", "storeinfo_rwgt", "withnegweights", "rwl_", "lhapdf6maxsets", "xgriditeration", "fakevirt") )] powheggitcard = "\n".join(line for line in powheggitcardlines) with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*") and self.cvmfstarball != "/cvmfs/cms.cern.ch/phys_generator/gridpacks/2017/13TeV/powheg/V2/HJJ_M125_13TeV/HJJ_slc6_amd64_gcc630_CMSSW_9_3_0_HJJ_NNPDF31_13TeV_M125.tgz": raise ValueError("There is a core dump in the tarball\n{}".format(self)) try: with open("powheg.input") as f: powhegcard = f.read() powhegcardlines = [re.sub(" *([#!].*)?$", "", line) for line in powhegcard.split("\n")] powhegcardlines = [re.sub("(iseed|ncall2|fakevirt) *", r"\1 ", line) for line in powhegcardlines if line and all(_ not in line for _ in ("pdfreweight", "storeinfo_rwgt", "withnegweights", "rwl_", "lhapdf6maxsets", "xgriditeration", "fakevirt") )] powhegcard = "\n".join(line for line in powhegcardlines) except IOError: raise ValueError("no powheg.input in the tarball\n{}".format(self)) if powhegcard != powheggitcard: with cd(here): with open("powhegcard", "w") as f: f.write(powhegcard) with open("powheggitcard", "w") as f: f.write(powheggitcard) raise ValueError("powhegcard != powheggitcard\n{}\nSee ./powhegcard and ./powheggitcard".format(self)) return result
def addJHUGentomadgraph(oldfilename, newfilename, JHUGenversion, decaycard): newfilename = os.path.abspath(newfilename) with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) if not os.path.exists("original_runcmsgrid.sh"): shutil.move("runcmsgrid.sh", "original_runcmsgrid.sh") with open("original_runcmsgrid.sh") as f, open("runcmsgrid.sh", "w") as newf: sawexit = False for line in f: if "exit" in line: if sawexit: raise IOError("Multiple exit lines in runcmsgrid.sh") newf.write(JHUGenpart) sawexit = True if "JHUGen" in line: raise IOError("runcmsgrid.sh already has JHUGen decay") newf.write(line) if not sawexit: newf.write(JHUGenpart) os.chmod("runcmsgrid.sh", os.stat("original_runcmsgrid.sh").st_mode) tmpdir = os.getcwd() with cdtemp(): subprocess.check_call([ "wget", "http://spin.pha.jhu.edu/Generator/JHUGenerator." + JHUGenversion + ".tar.gz" ]) subprocess.check_call( ["tar", "xvzf", "JHUGenerator." + JHUGenversion + ".tar.gz"]) with cd("JHUGenerator"): with open("makefile") as f: oldmakefile = f.read() newmakefile = re.sub("(linkMELA *= *)Yes", r"\1No", oldmakefile) assert re.search("linkMELA *= *No", newmakefile) with open("makefile", "w") as f: f.write(newmakefile) os.system("make") shutil.copy("JHUGen", tmpdir) shutil.copy(decaycard, "InputCards/JHUGen.input") subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*"))
def multiplepatches(oldfilename, newfilename, listofkwargs): cdminus = os.getcwd() results = [] if len(listofkwargs) == 0: shutil.copy(oldfilename, newfilename) return results with cdtemp() as tmpdir, cd(cdminus): base, extension = os.path.split(oldfilename) oldfilenames = [oldfilename] + [ os.path.join(tmpdir, "tmp{}.{}".format(i, extension)) for i in range(1, len(listofkwargs)) ] newfilenames = oldfilenames[1:] + [newfilename] for kwargs, oldfilename, newfilename in izip(listofkwargs, oldfilenames, newfilenames): kwargs = kwargs.copy() if "oldfilename" in kwargs or "newfilename" in kwargs: raise TypeError( "can't provide oldfilename or newfilename in the individual kwargs for multiplepatches\n\n{}" .format(kwargs)) kwargs.update(oldfilename=oldfilename, newfilename=newfilename) results.append(dopatch(**kwargs)) return results
def tweakseed(oldfilename, newfilename, increaseby, verbose=False): oldfilename = os.path.abspath(oldfilename) newfilename = os.path.abspath(newfilename) with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) if not os.path.exists("original-runcmsgrid.sh"): shutil.move("runcmsgrid.sh", "original-runcmsgrid.sh") with open("original-runcmsgrid.sh") as f, open("runcmsgrid.sh", "w") as newf: contents = f.read() if contents.count("${2}") != 1: raise ValueError( "{}\n\n\n${{2}} appears {} times in ^^^ runcmsgrid.sh". format(contents, contents.count("${2}"))) contents = contents.replace( "${2}", "$(expr ${{2}} + {})".format(increaseby)) newf.write(contents) os.chmod( 'runcmsgrid.sh', os.stat('runcmsgrid.sh').st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*"))
def getxsec(self, error=False): with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) dats = set(glob.iglob("*.dat")) - { "fferr.dat", "ffperm5.dat", "ffwarn.dat", "hto_output.dat" } if len(dats) != 1: raise ValueError( "Expected to find exactly 1 .dat in the tarball\n" "(besides fferr.dat, ffperm5.dat, ffwarn.dat, hto_output.dat)\n" "but found {}:\n{}\n\n{}".format(len(dats), ", ".join(dats), self.cvmfstarball)) with open(dats.pop()) as f: matches = re.findall( r"Cross-section is:\s*([0-9.Ee+-]*)\s*[+]/-\s*([0-9.Ee+-]*)\s*", f.read()) if not matches: raise ValueError( "Didn't find the cross section in the dat\n\n" + self.cvmfstarball) if len(matches) > 1: raise ValueError( "Found multiple cross section lines in the dat\n\n" + self.cvmfstarball) xsec, xsecerror = matches[0] self.xsec = float(xsec) self.xsecerror = float(xsecerror) return self.xsecerror if error else self.xsec
def cardsurl(self): commit = self.genproductionscommit JHUGencard = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, self.decaycard.split("genproductions/")[-1]) result = super(MadGraphJHUGenMCSample, self).cardsurl + "\n# " + JHUGencard with contextlib.closing(urllib.urlopen(JHUGencard)) as f: JHUGengitcard = f.read() with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob( "core.*" ) and self.cvmfstarball != "/cvmfs/cms.cern.ch/phys_generator/gridpacks/2017/13TeV/madgraph/V2/HJJ_M125_13TeV/HJJ_slc6_amd64_gcc630_CMSSW_9_3_0_HJJ_NNPDF31_13TeV_M125.tgz": raise ValueError( "There is a core dump in the tarball\n{}".format(self)) try: with open("InputCards/JHUGen.input") as f: JHUGencard = f.read() except IOError: raise ValueError( "no InputCards/JHUGen.input in the tarball\n{}".format( self)) if JHUGencard != JHUGengitcard: raise ValueError("JHUGencard != JHUGengitcard\n{}\n{}\n{}".format( self, JHUGencard, JHUGengitcard)) return result
def patchmcfmgridpack(oldfilename, newfilename): oldfilename = os.path.abspath(oldfilename) newfilename = os.path.abspath(newfilename) with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) with open("runcmsgrid.sh") as f: contents = f.read() SCRAM_ARCH_VERSION_REPLACE = set(re.findall(r"scram_arch_version=([^$\s]+)", contents)) assert len(SCRAM_ARCH_VERSION_REPLACE) == 1, SCRAM_ARCH_VERSION_REPLACE SCRAM_ARCH_VERSION_REPLACE = SCRAM_ARCH_VERSION_REPLACE.pop() CMSSW_VERSION_REPLACE = set(re.findall(r"cmssw_version=([^$\s]+)", contents)) assert len(CMSSW_VERSION_REPLACE) == 1, CMSSW_VERSION_REPLACE CMSSW_VERSION_REPLACE = CMSSW_VERSION_REPLACE.pop() shutil.copy(os.path.join(genproductions, "bin", "MCFM", "runcmsgrid_template.sh"), "runcmsgrid.sh") shutil.copy(os.path.join(genproductions, "bin", "MCFM", "adjlheevent.py"), ".") with open("runcmsgrid.sh") as f: contents = (f.read() .replace("SCRAM_ARCH_VERSION_REPLACE", SCRAM_ARCH_VERSION_REPLACE) .replace("CMSSW_VERSION_REPLACE", CMSSW_VERSION_REPLACE) .replace("./mcfm INPUT.DAT", "./Bin/mcfm readInput.DAT") .replace("INPUT.DAT", "readInput.DAT") ) with open("runcmsgrid.sh", "w") as f: f.write(contents) os.chmod("runcmsgrid.sh", os.stat("runcmsgrid.sh").st_mode | stat.S_IEXEC) subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*"))
def request_fragment_check(self): with cdtemp(): with open(os.path.join(genproductions, "bin", "utils", "request_fragment_check.py")) as f: contents = f.read() cookies = [line for line in contents.split("\n") if "os.system" in line and "cookie" in line.lower()] assert len(cookies) == 2 for cookie in cookies: contents = contents.replace(cookie, "#I already ate the cookie") with open("request_fragment_check.py", "w") as f: f.write(contents) pipe = subprocess.Popen(["python", "request_fragment_check.py", self.prepid], stdout=subprocess.PIPE, bufsize=1) output = "" with pipe.stdout: for line in iter(pipe.stdout.readline, b''): print line, output += line for line in output.split("\n"): if line.strip() == self.prepid: continue elif "cookie" in line: continue elif not line.strip().strip("*"): continue elif line.startswith("* [OK]"): continue elif line.startswith("* [ERROR]"): return "request_fragment_check gave an error!\n"+line elif line.startswith("* [WARNING]"): result = self.handle_request_fragment_check_warning(line) if result == "ok": continue return result+"\n"+line else: if line.strip() == "* as number of final state particles (BEFORE THE DECAYS)": continue if line.strip() == "* in the LHE other than emitted extra parton.": continue if line.strip() == "* which may not have all the necessary GEN code.": continue if line.strip() == "* 'JetMatching:nJetMax' is set correctly as number of partons": continue if line.strip() == "* in born matrix element for highest multiplicity.": continue if line.strip() == "* as number of partons in born matrix element for highest multiplicity.": continue return "Unknown line in request_fragment_check output!\n"+line
def createtarball(self): mkdir_p(self.workdir) with KeepWhileOpenFile(self.tmptarball + ".tmp") as kwof: if not kwof: return "another process is already copying the tarball" if not os.path.exists(self.originaltarball): return "original tarball does not exist" if datetime.datetime.fromtimestamp( os.path.getmtime( self.originaltarball)) <= self.modifiedafter: return "original tarball is an older version than we want" mkdir_p(os.path.dirname(self.foreostarball)) if self.patchkwargs: kwargs = self.patchkwargs for _ in "oldfilename", "newfilename", "sample": assert _ not in kwargs, _ with cdtemp(): kwargs["oldfilename"] = self.originaltarball kwargs["newfilename"] = os.path.abspath( os.path.basename(self.originaltarball)) #kwargs["sample"] = self #??? patches.dopatch(**kwargs) shutil.move(os.path.basename(self.originaltarball), self.foreostarball) else: shutil.copy(self.originaltarball, self.foreostarball) return "gridpack is copied from " + self.originaltarball + " to this folder, to be copied to eos"
def cardsurl(self): commit = self.genproductionscommit JHUGencard = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, self.decaycard.split("genproductions/")[-1]) result = super(POWHEGJHUGenMCSample, self).cardsurl + "\n# " + JHUGencard with contextlib.closing(urllib.urlopen(JHUGencard)) as f: JHUGengitcard = f.read() with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*"): raise ValueError( "There is a core dump in the tarball\n{}".format(self)) try: with open("JHUGen.input") as f: JHUGencard = f.read() except IOError: raise ValueError( "no JHUGen.input in the tarball\n{}".format(self)) if JHUGencard != JHUGengitcard: raise ValueError("JHUGencard != JHUGengitcard\n{}\n{}\n{}".format( self, JHUGencard, JHUGengitcard)) return result
def createtarball(self, *args, **kwargs): with cdtemp(): subprocess.check_output( ["tar", "xvaf", self.mainsample.cvmfstarball]) with open("readInput.DAT") as f: for line in f: if "ncalls" in line: assert int(line.split()[0]) < 1000000, ( self, self.mainsample.cvmfstarball, line) return super(RedoMCFMMoreNcalls, self).createtarball(*args, **kwargs)
def parallelizeJHUGen(oldfilename, newfilename, overwrite=None): oldfilename = os.path.abspath(oldfilename) newfilename = os.path.abspath(newfilename) with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) if not os.path.exists( "original_runcmsgrid.sh") or overwrite == "runcmsgrid.sh": shutil.move("runcmsgrid.sh", "original_runcmsgrid.sh") elif overwrite == "original_runcmsgrid.sh": pass elif overwrite is not None: raise ValueError( "overwrite has to be either None, runcmsgrid.sh, or original_runcmsgrid.sh" ) else: raise IOError("original_runcmsgrid.sh already exists") with open("original_runcmsgrid.sh") as f, open("runcmsgrid.sh", "w") as newf: sawJHUGencommand = False inJHUGencommand = False JHUGencommand = "" for line in f: if "./JHUGen" in line: if sawJHUGencommand: raise IOError( "Multiple noncontiguous lines with ./JHUGen in runcmsgrid.sh" ) inJHUGencommand = True JHUGencommand += line elif inJHUGencommand: newf.write( parallelizationpart % { "JHUGencommand": JHUGencommand, "newJHUGencommand": JHUGencommand.replace(" ../", " ../../") }) newf.write(line) sawJHUGencommand = True inJHUGencommand = False else: newf.write(line) if "parallel" in line: raise IOError("runcmsgrid.sh already has parallel") if "xargs" in line: raise IOError("runcmsgrid.sh already has xargs") if not sawJHUGencommand: raise IOError("runcmsgrid.sh doesn't have ./JHUGen") os.chmod("runcmsgrid.sh", os.stat("original_runcmsgrid.sh").st_mode) subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*"))
def getxsec(self): if not os.path.exists(self.cvmfstarball): raise NoXsecError with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) dats = set(glob.iglob("result")) if len(dats) != 1: raise ValueError("Expected to find result in the tarball {}\n".foramt(self.cvmfstarball)) with open(dats.pop()) as f: matches = re.findall(r"total cross section=\s*([0-9.Ee+-]*)\s*[+]/-\s*([0-9.Ee+-]*)\s*", f.read()) if not matches: raise ValueError("Didn't find the cross section in the result\n\n"+self.cvmfstarball) if len(matches) > 1: raise ValueError("Found multiple cross section lines in the result\n\n") xsec, xsecerror = matches[0] return uncertainties.ufloat(xsec, xsecerror)
def filterjobscript(self, jobindex): olddir = os.getcwd() with cdtemp(): wget(os.path.join("https://cms-pdmv.cern.ch/mcm/public/restapi/requests/get_test/", self.prepid, str(self.neventsfortest) if self.neventsfortest else "").rstrip("/"), output=self.prepid) with open(self.prepid) as f: testjob = f.read() try: testjob = eval(testjob) #sometimes it's a string within a string except SyntaxError: pass #sometimes it's not lines = testjob.split("\n") cmsdriverindex = {i for i, line in enumerate(lines) if "cmsDriver.py" in line} assert len(cmsdriverindex) == 1, cmsdriverindex cmsdriverindex = cmsdriverindex.pop() lines.insert(cmsdriverindex+1, 'sed -i "/Services/aprocess.RandomNumberGeneratorService.externalLHEProducer.initialSeed = {}" *_cfg.py'.format(abs(hash(self))%900000000 + jobindex)) #The CLHEP::HepJamesRandom engine seed should be in the range 0 to 900000000. return "\n".join(lines)
def dofilterjob(self, jobindex): olddir = os.getcwd() with cdtemp(): wget(os.path.join( "https://cms-pdmv.cern.ch/mcm/public/restapi/requests/get_test/", self.prepid, str(self.neventsfortest) if self.neventsfortest else "").rstrip("/"), output=self.prepid) with open(self.prepid) as f: testjob = f.read() with open(self.prepid, "w") as newf: newf.write(eval(testjob)) os.chmod(self.prepid, os.stat(self.prepid).st_mode | stat.S_IEXEC) subprocess.check_call(["./" + self.prepid], stderr=subprocess.STDOUT) shutil.move(self.prepid + "_rt.xml", olddir)
def getxsec(self): if not os.path.exists(self.cvmfstarball): raise NoXsecError with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) try: subprocess.check_output( ["./runcmsgrid.sh", "0", "123456", "1"], stderr=subprocess.STDOUT) except CalledProcessError as e: print e.output raise with open("cmsgrid_final.lhe") as f: for line in f: if "<init>" in line: break next(f) line = next(f) xsec, xsecerror, _, _ = line.split() return uncertainties.ufloat(float(xsec), float(xsecerror))
def checkandfixtarball(self): mkdir_p(self.workdir) with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid + '.tmp'), message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: return " check in progress" if not LSB_JOBID(): self.submitLSF() return "Check if the tarball needs fixing" with cdtemp(): subprocess.call(['cp', self.cvmfstarball, '.']) subprocess.check_call(['tar', 'xzvf', self.cvmfstarball]) subprocess.call(['cp', 'readInput.DAT', 'readInput.DAT_bak']) os.system('chmod 755 runcmsgrid.sh') try: output = subprocess.check_output( ['bash', 'runcmsgrid.sh', '1', '31313', '12'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: output = e.output for line in output.split('\n'): if not 'Reading in vegas grid from' in line: continue else: line = line.split()[-2] internalgridname = line.split('CMS_')[1] internalgridname = str(internalgridname) print "internal tarball name: " + internalgridname if self.datasetname + '_grid' == internalgridname: with open(os.path.join(self.workdir, 'INTACT'), 'w') as fout: fout.write(LSB_JOBID()) return str(self.identifiers) + "'s gridpack is intact" else: os.system('cp ' + self.datasetname + '_grid ' + internalgridname) os.system('mv readInput.DAT_bak readInput.DAT') os.system('rm -r *tgz CMSSW*') curdirpath = subprocess.check_output(['pwd']) os.system('tar cvaf ' + self.tmptarball + ' ./*') if os.path.exists(self.tmptarball): with open(os.path.join(self.workdir, 'FIXED'), 'w') as fout: fout.write(LSB_JOBID())
def prunepwgrwl(oldfilename, newfilename, filter, verbose=False): newfilename = os.path.abspath(newfilename) with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) if not os.path.exists("original-pwg-rwl.dat"): shutil.move("pwg-rwl.dat", "original-pwg-rwl.dat") if verbose: keep = OrderedCounter() remove = OrderedCounter() with open("original-pwg-rwl.dat") as f, open("pwg-rwl.dat", "w") as newf: for line in f: if "<weight id" in line: match = re.match( r"^<weight id='[^']*'>((?:\s*\w*=[\w.]*\s*)*)</weight>$", line.strip()) if not match: raise ValueError("Bad pwg-rwl line:\n" + line) kwargs = dict(_.split("=") for _ in match.group(1).split()) weight = AlternateWeight(**kwargs) if filter and not filter(weight): if verbose: remove[weight.pdfname] += 1 continue if verbose: keep[weight.pdfname] += 1 newf.write(line) subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*")) if verbose: print "Keeping", sum(keep.values()), "alternate weights:" for name, n in keep.iteritems(): if n > 1: print " {} ({} variations)".format(name, n) else: print " {}".format(name) print print "Removing", sum(remove.values()), "alternate weights:" for name, n in remove.iteritems(): if n > 1: print " {} ({} variations)".format(name, n)
def parallelizepowheg(oldfilename, newfilename, overwrite=None): oldfilename = os.path.abspath(oldfilename) newfilename = os.path.abspath(newfilename) with cdtemp(): subprocess.check_call(["tar", "xvaf", oldfilename]) if not os.path.exists( "original_runcmsgrid.sh") or overwrite == "runcmsgrid.sh": shutil.move("runcmsgrid.sh", "original_runcmsgrid.sh") elif overwrite == "original_runcmsgrid.sh": pass elif overwrite is not None: raise ValueError( "overwrite has to be either None, runcmsgrid.sh, or original_runcmsgrid.sh" ) else: raise IOError("original_runcmsgrid.sh already exists") with open("original_runcmsgrid.sh") as f, open("runcmsgrid.sh", "w") as newf: sawpowhegcommand = False for line in f: if line.rstrip() == powhegcommand: if sawpowhegcommand: raise IOError( "Multiple lines like this in runcmsgrid.sh:\n" + powhegcommand) newf.write(parallelizationpart) sawpowhegcommand = True else: newf.write(line) if "parallel" in line: raise IOError("runcmsgrid.sh already has parallel") if "xargs" in line: raise IOError("runcmsgrid.sh already has xargs") if not sawpowhegcommand: raise IOError("runcmsgrid.sh doesn't have this line:\n" + powhegcommand) os.chmod("runcmsgrid.sh", os.stat("original_runcmsgrid.sh").st_mode) subprocess.check_call(["tar", "cvaf", newfilename] + glob.glob("*"))
def dofilterjob(self, jobindex): if self.hasnonJHUGenfilter: return super(JHUGenFilter, self).dofilterjob(jobindex) oldpath = os.path.join(os.getcwd(), "") with cdtemp(): subprocess.check_call(["tar", "xvaf", self.cvmfstarball]) if os.path.exists("powheg.input"): with open("powheg.input") as f: powheginput = f.read() powheginput = re.sub("^(rwl_|lhapdf6maxsets)", r"#\1", powheginput, flags=re.MULTILINE) with open("powheg.input", "w") as f: f.write(powheginput) subprocess.check_call([ "./runcmsgrid.sh", "1000", str(abs(hash(self)) % 2147483647 + jobindex), "1" ]) shutil.move("cmsgrid_final.lhe", oldpath)
def getsizeandtime(self): mkdir_p(self.workdir) with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid+".tmp"), message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: return "job to get the size and time is already running" if not LSB_JOBID(): return "need to get time and size per event, submitting to LSF" if submitLSF(self.timepereventqueue) else "need to get time and size per event, job is pending on LSF" if not queuematches(self.timepereventqueue): return "need to get time and size per event, but on the wrong queue" with cdtemp(): wget(os.path.join("https://cms-pdmv.cern.ch/mcm/public/restapi/requests/get_test/", self.prepid, str(self.neventsfortest) if self.neventsfortest else "").rstrip("/"), output=self.prepid) with open(self.prepid) as f: testjob = f.read() with open(self.prepid, "w") as newf: newf.write(eval(testjob)) os.chmod(self.prepid, os.stat(self.prepid).st_mode | stat.S_IEXEC) subprocess.check_call(["./"+self.prepid], stderr=subprocess.STDOUT) with open(self.prepid+"_rt.xml") as f: nevents = totalsize = None for line in f: line = line.strip() match = re.match('<TotalEvents>([0-9]*)</TotalEvents>', line) if match: nevents = int(match.group(1)) match = re.match('<Metric Name="Timing-tstoragefile-write-totalMegabytes" Value="([0-9.]*)"/>', line) if match: totalsize = float(match.group(1)) if self.year >= 2017: match = re.match('<Metric Name="EventThroughput" Value="([0-9.eE+-]*)"/>', line) if match: self.timeperevent = 1/float(match.group(1)) else: match = re.match('<Metric Name="AvgEventTime" Value="([0-9.eE+-]*)"/>', line) if match: self.timeperevent = float(match.group(1)) if nevents is not None is not totalsize: self.sizeperevent = totalsize * 1024 / nevents shutil.rmtree(self.workdir) if not (self.sizeperevent and self.timeperevent): return "failed to get the size and time" if LSB_JOBID(): return "size and time per event are found to be {} and {}, run locally to send to McM".format(self.sizeperevent, self.timeperevent) self.updaterequest() return "size and time per event are found to be {} and {}, sent it to McM".format(self.sizeperevent, self.timeperevent)
def cardsurl(self): commit = self.genproductionscommit if self.powhegcardusesscript: powhegdir, powhegcard = os.path.split(self.powhegcard) powhegscript = os.path.join(powhegdir, "makecards.py") powhegscript = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, powhegscript.split("genproductions/")[-1]) result = (powhegscript + "\n" + "# " + powhegcard) else: powhegcard = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, self.powhegcard.split("genproductions/")[-1]) result = powhegcard with cdtemp(): if self.powhegcardusesscript: wget(powhegscript) wget( os.path.join( os.path.dirname(powhegscript), powhegcard.replace("M{}".format(self.mass), "template").replace("Wplus", "W").replace( "Wminus", "W"))) subprocess.check_call(["python", "makecards.py"]) else: wget(powhegcard) with open(os.path.basename(powhegcard)) as f: powheggitcard = f.read() powheggitcardlines = [ re.sub(" *([#!].*)?$", "", line) for line in powheggitcard.split("\n") ] powheggitcardlines = [ re.sub("(iseed|ncall2|fakevirt) *", r"\1 ", line) for line in powheggitcardlines if line and all( _ not in line for _ in ("pdfreweight", "storeinfo_rwgt", "withnegweights", "rwl_", "lhapdf6maxsets", "xgriditeration", "fakevirt")) ] powheggitcard = "\n".join(line for line in powheggitcardlines) try: with open("powheg.input") as f: powhegcard = f.read() powhegcardlines = [ re.sub(" *([#!].*)?$", "", line) for line in powhegcard.split("\n") ] powhegcardlines = [ re.sub("(iseed|ncall2|fakevirt) *", r"\1 ", line) for line in powhegcardlines if line and all( _ not in line for _ in ("pdfreweight", "storeinfo_rwgt", "withnegweights", "rwl_", "lhapdf6maxsets", "xgriditeration", "fakevirt")) ] powhegcard = "\n".join(line for line in powhegcardlines) except IOError: raise ValueError("no powheg.input in the tarball\n{}".format(self)) if powhegcard != powheggitcard: with cd(here): with open("powhegcard", "w") as f: f.write(powhegcard) with open("powheggitcard", "w") as f: f.write(powheggitcard) raise ValueError( "powhegcard != powheggitcard\n{}\nSee ./powhegcard and ./powheggitcard" .format(self)) moreresult = super(POWHEGMCSample, self).cardsurl if moreresult: result += "\n# " + moreresult return result
def cardsurl(self): commit = self.genproductionscommit productioncardurl = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, self.productioncard.split("genproductions/")[-1]) mdatascript = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, "bin/MCFM/ACmdataConfig.py") with cdtemp(): with contextlib.closing(urllib.urlopen(productioncardurl)) as f: productiongitcard = f.read() with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*"): raise ValueError( "There is a core dump in the tarball\n{}".format(self)) # for root, dirs, files in os.walk("."): # for ifile in files: # try: # os.stat(ifile) # except Exception as e: # if e.args == 'No such file or directory': continue # print ifile # print e.message, e.args # raise ValueError("There is a broken symlink in the tarball\n{}".format(self)) try: with open("readInput.DAT") as f: productioncard = f.read() except IOError: raise ValueError( "no readInput.DAT in the tarball\n{}".format(self)) try: with open("src/User/mdata.f") as f: mdatacard = f.read() except IOError: raise ValueError( "no src/User/mdata.f in the tarball\n{}".format(self)) if differentproductioncards( productioncard, productiongitcard) and not 'BKG' in self.identifiers: with cd(here): with open("productioncard", "w") as f: f.write(productioncard) with open("productiongitcard", "w") as f: f.write(productiongitcard) raise ValueError( "productioncard != productiongitcard\n{}\nSee ./productioncard and ./productiongitcard" .format(self)) with contextlib.closing( urllib.urlopen( os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/" + commit + "/bin/MCFM/run_mcfm_AC.py"))) as f: infunction = False for line in f: if re.match(r"^\s*def .*", line): infunction = False if re.match(r"^\s*def downloadmcfm.*", line): infunction = True if not infunction: continue match = re.search(r"git checkout ([\w.]*)", line) if match: mcfmcommit = match.group(1) with cdtemp(): mkdir_p("src/User") with cd("src/User"): wget( os.path.join( "https://raw.githubusercontent.com/usarica/MCFM-7.0_JHUGen", mcfmcommit, "src/User/mdata.f")) wget(mdatascript) subprocess.check_call([ "python", os.path.basename(mdatascript), "--coupling", self.coupling, "--mcfmdir", ".", "--bsisigbkg", self.signalbkgbsi ]) with open("src/User/mdata.f") as f: mdatagitcard = f.read() if mdatacard != mdatagitcard and not 'BKG' in self.identifiers: with cd(here): with open("mdatacard", "w") as f: f.write(mdatacard) with open("mdatagitcard", "w") as f: f.write(mdatagitcard) raise ValueError( "mdatacard != mdatagitcard\n{}\nSee ./mdatacard and ./mdatagitcard" .format(self)) result = (productioncardurl + "\n" + "# " + mdatascript + "\n" + "# --coupling " + self.coupling + " --bsisigbkg " + self.signalbkgbsi) return result
def cardsurl(self): def getcontents(f): contents = "" for line in f: line = line.split("!")[0] line = line.split("#")[0] line = line.strip() line = re.sub(" *= *", " = ", line) if not line: continue if line.startswith("define p = "): continue if line.startswith("define j = "): continue contents += line+"\n" return contents gitcardcontents = [] if self.madgraphcardscript is None: cardurls = tuple( os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", self.genproductionscommit, (_[0] if len(_) == 2 else _).replace(genproductions+"/", "") ) for _ in self.madgraphcards ) with cdtemp(): for cardurl in cardurls: wget(cardurl) with open(os.path.basename(cardurl)) as f: gitcardcontents.append(getcontents(f)) else: scripturls = tuple( os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", self.genproductionscommit, _.replace(genproductions+"/", "") ) for _ in self.madgraphcardscript ) with cdtemp(): wget(scripturls[0]) for _ in scripturls[1:]: relpath = os.path.relpath(os.path.dirname(_), os.path.dirname(scripturls[0])) assert ".." not in relpath, relpath mkdir_p(relpath) with cd(relpath): wget(_) subprocess.check_call(["chmod", "u+x", os.path.basename(scripturls[0])]) try: subprocess.check_output(["./"+os.path.basename(scripturls[0])], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print e.output raise for _ in self.madgraphcards: if len(_) == 2: _ = _[0] with open(_) as f: gitcardcontents.append(getcontents(f)) with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*"): raise ValueError("There is a core dump in the tarball\n{}".format(self)) cardnamesintarball = tuple( os.path.join("InputCards", os.path.basename(_[1] if len(_) == 2 else _)) for _ in self.madgraphcards ) cardcontents = [] for cardnameintarball in cardnamesintarball: try: with open(cardnameintarball) as f: cardcontents.append(getcontents(f)) except IOError: raise ValueError("no "+cardnameintarball+" in the tarball\n{}".format(self)) for _ in glob.iglob("InputCards/*"): if _ not in cardnamesintarball and not _.endswith(".tar.gz") and _ not in self.otherthingsininputcards: raise ValueError("unknown thing "+_+" in InputCards\n{}".format(self)) for name, cc, gcc in itertools.izip(cardnamesintarball, cardcontents, gitcardcontents): _, suffix = os.path.splitext(os.path.basename(name)) if cc != gcc: with cd(here): with open("cardcontents"+suffix, "w") as f: f.write(cc) with open("gitcardcontents"+suffix, "w") as f: f.write(gcc) raise ValueError(name + " in tarball != " + name + " in git\n{}\nSee ./cardcontents{} and ./gitcardcontents{}".format(self, suffix, suffix)) if self.madgraphcardscript: return "\n# ".join((scripturls[0],) + tuple(self.madgraphcards)) else: return "\n# ".join(cardurls)
def createtarball(self): if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return mkdir_p(self.workdir) with cd(self.workdir), KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof: if not kwof: with open(self.tmptarball+".tmp") as f: try: jobid = int(f.read().strip()) except ValueError: return "try running again, probably you just got really bad timing" if jobended(str(jobid)): if self.makinggridpacksubmitsjob: os.remove(self.tmptarball+".tmp") return "job died at a very odd time, cleaned it up. Try running again." for _ in os.listdir("."): #--> delete everything in the folder, except the tarball if that exists if os.path.basename(_) != os.path.basename(self.tmptarball) and os.path.basename(_) != os.path.basename(self.tmptarball)+".tmp": try: os.remove(_) except OSError: shutil.rmtree(_) os.remove(os.path.basename(self.tmptarball)+".tmp") #remove that last return "gridpack job died, cleaned it up. run makegridpacks.py again." else: return "job to make the tarball is already running" if self.gridpackjobsrunning: return "job to make the tarball is already running" if not os.path.exists(self.tmptarball): if not self.inthemiddleofmultistepgridpackcreation: for _ in os.listdir("."): if not _.endswith(".tmp"): try: os.remove(_) except OSError: shutil.rmtree(_) if not self.makinggridpacksubmitsjob and self.creategridpackqueue is not None: if not LSB_JOBID(): return "need to create the gridpack, submitting to LSF" if submitLSF(self.creategridpackqueue) else "need to create the gridpack, job is pending on LSF" if not queuematches(self.creategridpackqueue): return "need to create the gridpack, but on the wrong queue" for filename in self.makegridpackscriptstolink: os.symlink(filename, os.path.basename(filename)) makinggridpacksubmitsjob = self.makinggridpacksubmitsjob #https://stackoverflow.com/a/17698359/5228524 makegridpackstdout = "" pipe = subprocess.Popen(self.makegridpackcommand, stdout=subprocess.PIPE, bufsize=1) with pipe.stdout: for line in iter(pipe.stdout.readline, b''): print line, makegridpackstdout += line self.processmakegridpackstdout(makegridpackstdout) if makinggridpacksubmitsjob: return "submitted the gridpack creation job" if self.inthemiddleofmultistepgridpackcreation: return "ran one step of gridpack creation, run again to continue" mkdir_p(os.path.dirname(self.foreostarball)) if self.patchkwargs: kwargs = self.patchkwargs for _ in "oldfilename", "newfilename", "sample": assert _ not in kwargs, _ with cdtemp(): kwargs["oldfilename"] = self.tmptarball kwargs["newfilename"] = os.path.abspath(os.path.basename(self.tmptarball)) #kwargs["sample"] = self #??? patches.dopatch(**kwargs) shutil.move(os.path.basename(self.tmptarball), self.tmptarball) if self.timeperevent is not None: del self.timeperevent shutil.move(self.tmptarball, self.foreostarball) shutil.rmtree(os.path.dirname(self.tmptarball)) return "tarball is created and moved to this folder, to be copied to eos"