def AlienCopy(source, destination, attempts=3, overwrite=False): i = 0 fileExists = False if AlienFileExists(destination): if overwrite: AlienDelete(destination) else: return True if destination.find("alien://") == -1: dest = "alien://{0}".format(destination) else: dest = destination while True: alisimtools.subprocess_call(["alien_cp", source, dest]) i += 1 fileExists = AlienFileExists(destination) if fileExists: break if i >= attempts: logging.info( "After {0} attempts I could not copy {1} to {2}".format( i, source, dest)) break return fileExists
def GenerateComments(): branch = alisimtools.subprocess_checkoutput( ["git", "rev-parse", "--abbrev-ref", "HEAD"]) hash = alisimtools.subprocess_checkoutput(["git", "rev-parse", "HEAD"]) comments = "# This is the startup script \n\ # alice-yale-hfjet \n\ # Generated using branch {branch} ({hash}) \n\ ".format(branch=branch.strip('\n'), hash=hash.strip('\n')) return comments
def AlienFileExists(fileName): if fileName.find("alien://") == -1: fname = fileName else: fname = fileName[8:] fileExists = True try: alisimtools.subprocess_checkcall(["alien_ls", fname]) except subprocess.CalledProcessError: fileExists = False return fileExists
def CopyFilesToTheGrid(Files, AlienDest, LocalDest, Offline, GridUpdate): if not Offline: alisimtools.subprocess_call(["alien_mkdir", "-p", AlienDest]) alisimtools.subprocess_call( ["alien_mkdir", "-p", "{0}/output".format(AlienDest)]) if not os.path.isdir(LocalDest): logging.info("Creating directory %s", LocalDest) os.makedirs(LocalDest) for file in Files: fname = os.path.basename(file) if not Offline: AlienCopy(file, "alien://{}/{}".format(AlienDest, fname), 3, GridUpdate) shutil.copy(file, os.path.join(LocalDest, fname))
def GetLastTrainName(AlienPath, Gen, Proc): TrainName = "FastSim_{0}_{1}".format(Gen, Proc) AlienPathContent = alisimtools.subprocess_checkoutput( ["alien_ls", AlienPath]).splitlines() regex = re.compile("{0}.*".format(TrainName)) Timestamps = [ int(subdir[len(TrainName) + 1:]) for subdir in AlienPathContent if re.match(regex, subdir) ] if len(Timestamps) == 0: logging.error( "Could not find any train in the alien path {0} provided!".format( AlienPath)) logging.error("\n".join(AlienPathContent)) logging.error("{0}.*".format(TrainName)) return None TrainName += "_{0}".format(max(Timestamps)) return TrainName
def submitJobs(self, repo, simtask, workdir, jobscriptbase, logfilebase, envscript, batchconfig, njobs, joboffset): for ijob in range(joboffset, njobs + joboffset): taskjobscriptname = jobscriptbase taskjobscriptname = os.path.join(workdir, taskjobscriptname.replace("RANK", "%04d" %ijob)) tasklogfile = logfilebase tasklogfile = os.path.join(workdir, tasklogfile.replace("RANK", "%04d" %ijob)) logging.info("Using jobscript {jobscript}, writing to {logfile}".format(jobscript=taskjobscriptname,logfile=tasklogfile)) with open(taskjobscriptname, 'w') as jobscriptwriter: jobscriptwriter.write("#!/bin/bash\n") jobscriptwriter.write(alipackagetools.GenerateComments()) self.get_batchhandler()(jobscriptwriter, batchconfig, tasklogfile) self.writeSimCommand(repo, jobscriptwriter, envscript, workdir, simtask.create_task_command_serial(ijob)) jobscriptwriter.write("cd %s\n" %workdir) jobscriptwriter.write("echo WD: $PWD\n") self.writeCleanCommand(jobscriptwriter, envscript, ijob) jobscriptwriter.close() os.chmod(taskjobscriptname, 0o755) output = alisimtools.subprocess_checkoutput([self.get_batchsub(), taskjobscriptname]) logging.info("%s", output)
def DetermineMergingStage(AlienPath, TrainName): AlienOutput = "{0}/{1}".format(AlienPath, TrainName) if not aligridtools.AlienFileExists(AlienOutput): return -1 AlienOuputContent_orig = alisimtools.subprocess_checkoutput( ["alien_ls", AlienOutput]).splitlines() AlienOuputContent = [] for p in AlienOuputContent_orig: i = p.rfind("/") if i >= 0: p = p[i + 1:] AlienOuputContent.append(p) if not "output" in AlienOuputContent: logging.info("%s", AlienOuputContent) return -1 regex = re.compile("stage_.") MergingStages = [ string for string in AlienOuputContent if re.match(regex, string) ] MergingStage = len(MergingStages) return MergingStage
def SubmitProcessingJobs(TrainName, LocalPath, Events, Jobs, Gen, Proc, yamlFileName, batchconfig, copy_files, PowhegStage, XGridIter, HerwigTune): logging.info("Submitting processing jobs for train {0}".format(TrainName)) ExeFile = "runFastSim.py" LocalDest = "{0}/{1}".format(LocalPath, TrainName) envscript = "std_env.sh" if "powheg" in Gen: envscript = "powheg_env.sh" elif "herwig" in Gen: envscript = "herwig_env.sh" if copy_files: os.makedirs(LocalDest) FilesToCopy = {} FilesToDelete = [] FilesToCopy["%s/%s" % (repo, yamlFileName)] = "%s/%s" % ( LocalDest, os.path.basename(yamlFileName)) FilesToCopy["%s/%s" % (repo, ExeFile)] = "%s/%s" % (LocalDest, ExeFile) Sourcefiles = [ "OnTheFlySimulationGenerator.cxx", "OnTheFlySimulationGenerator.h", "runJetSimulation.C", "start_simulation.C", "lhapdf_utils.py", "Makefile", "HepMC.tar", "THepMCParser_dev.h", "THepMCParser_dev.cxx", "AliGenExtFile_dev.h", "AliGenExtFile_dev.cxx", "AliGenReaderHepMC_dev.h", "AliGenReaderHepMC_dev.cxx", "AliGenEvtGen_dev.h", "AliGenEvtGen_dev.cxx", "AliGenPythia_dev.h", "AliGenPythia_dev.cxx", "AliPythia6_dev.h", "AliPythia6_dev.cxx", "AliPythia8_dev.h", "AliPythia8_dev.cxx", "AliPythiaBase_dev.h", "AliPythiaBase_dev.cxx" ] if "pythia8" in Gen: Sourcefiles.append("powheg_pythia8_conf.cmnd") if "powheg" in Gen: alipowhegtools.main(yamlFileName, LocalDest, Events, 1, 1) alipowhegtools.main(yamlFileName, LocalDest, Events, 1, 2) alipowhegtools.main(yamlFileName, LocalDest, Events, 1, 3) alipowhegtools.main(yamlFileName, LocalDest, Events, 2) alipowhegtools.main(yamlFileName, LocalDest, Events, 3) alipowhegtools.main(yamlFileName, LocalDest, Events, 4) with open("{}/pwgseeds.dat".format(LocalDest), "w") as myfile: if Jobs > 20: nseeds = Jobs else: nseeds = 20 for iseed in range(1, nseeds + 1): rnd = random.randint(0, 1073741824) # 2^30 myfile.write("{}\n".format(rnd)) elif "herwig" in Gen: aliherwigtools.main(yamlFileName, "./", Events) Sourcefiles.extend([ "herwig.in", "MB.in", "PPCollider.in", "SoftModel.in", "SoftTune.in" ]) if HerwigTune: Sourcefiles.append(HerwigTune) FilesToDelete.append("herwig.in") for f in Sourcefiles: FilesToCopy["%s/%s" % (repo, f)] = "%s/%s" % (LocalDest, f) alisimtools.copy_to_workdir(FilesToCopy) logging.info("Compiling analysis code...") get_batchtools().run_build(repo, LocalDest, envscript) for file in FilesToDelete: os.remove(file) if "powheg" in Gen: SubmitParallelPowheg(LocalDest, ExeFile, Events, Jobs, yamlFileName, batchconfig, envscript, PowhegStage, XGridIter) else: SubmitParallel(LocalDest, ExeFile, Events, Jobs, yamlFileName, batchconfig, envscript) logging.info("Done.")
def GenerateXMLCollection(Path, XmlName): return alisimtools.subprocess_checkoutput( ["alien_find", "-x", XmlName, Path, "*/AnalysisResults*.root"])
def DownloadResults(TrainName, LocalPath, AlienPath, Gen, Proc, PtHardList, MergingStage): if PtHardList and len(PtHardList) > 1: minPtHardBin = 0 maxPtHardBin = len(PtHardList) - 1 else: minPtHardBin = -1 maxPtHardBin = 0 for ptHardBin in range(minPtHardBin, maxPtHardBin): if ptHardBin < 0: TrainPtHardName = TrainName else: minPtHard = PtHardList[ptHardBin] maxPtHard = PtHardList[ptHardBin + 1] TrainPtHardName = "{0}/{1}".format(TrainName, ptHardBin) logging.info( "Downloading results from train {0}".format(TrainPtHardName)) if MergingStage < 0: MergingStage = DetermineMergingStage(AlienPath, TrainPtHardName) if MergingStage < 0: logging.error( "Could not find any results from train {0}! Aborting...". format(TrainPtHardName)) exit(0) elif MergingStage == 0: logging.warning( "Merging stage determined to be 0 (i.e. no grid merging has been performed)" ) AlienOutputPath = "{0}/{1}/output".format(AlienPath, TrainPtHardName) LocalDest = "{0}/{1}/output".format(LocalPath, TrainPtHardName) else: logging.info( "Merging stage determined to be {0}".format(MergingStage)) AlienOutputPath = "{0}/{1}/stage_{2}/output".format( AlienPath, TrainPtHardName, MergingStage - 1) LocalDest = "{0}/{1}/stage_{2}/output".format( LocalPath, TrainPtHardName, MergingStage - 1) if not os.path.isdir(LocalDest): os.makedirs(LocalDest) AlienOuputContent = alisimtools.subprocess_checkoutput( ["alien_ls", AlienOutputPath]).splitlines() for SubDir in AlienOuputContent: i = SubDir.rfind("/") if i >= 0: SubDir = SubDir[i + 1:] SubDirDest = "{0}/{1}".format(LocalDest, SubDir) SubDirOrig = "{0}/{1}".format(AlienOutputPath, SubDir) if not os.path.isdir(SubDirDest): os.makedirs(SubDirDest) FilesToDownload = alisimtools.subprocess_checkoutput( ["alien_ls", "{0}/AnalysisResults*.root".format(SubDirOrig)]).splitlines() for FileName in FilesToDownload: i = FileName.rfind("/") if i >= 0: FileName = FileName[i + 1:] FileDest = "{0}/{1}".format(SubDirDest, FileName) if os.path.isfile(FileDest): logging.warning( "File {0} already exists, skipping...".format( FileDest)) continue FileOrig = "{0}/{1}".format(SubDirOrig, FileName) FileDestTemp = "{0}/temp_{1}".format(SubDirDest, FileName) if os.path.isfile(FileDestTemp): os.remove(FileDestTemp) logging.info("Downloading from {0} to {1}".format( FileOrig, FileDestTemp)) alisimtools.subprocess_call( ["alien_cp", "alien://{0}".format(FileOrig), FileDestTemp]) if os.path.getsize(FileDestTemp) > 0: logging.info("Renaming {0} to {1}".format( FileDestTemp, FileDest)) os.rename(FileDestTemp, FileDest) else: logging.error( "Downloading of {0} failed!".format(FileOrig)) os.remove(FileDestTemp)
def SubmitProcessingJobs(TrainName, LocalPath, AlienPath, AliPhysicsVersion, Offline, GridUpdate, TTL, Events, Jobs, Gen, Proc, yamlFileName, PtHardList, OldPowhegInit, PowhegStage, HerwigTune, LoadPackagesSeparately): logging.info("Submitting processing jobs for train {0}".format(TrainName)) ValidationScript = "FastSim_validation.sh" ExeFile = "runFastSim.py" JdlFile = "FastSim_{0}_{1}.jdl".format(Gen, Proc) FilesToDelete = [JdlFile] FilesToCopy = [ yamlFileName, "OnTheFlySimulationGenerator.cxx", "OnTheFlySimulationGenerator.h", "runJetSimulation.C", "start_simulation.C", "lhapdf_utils.py", "Makefile", "HepMC.tar", "AliGenExtFile_dev.h", "AliGenExtFile_dev.cxx", "AliGenReaderHepMC_dev.h", "AliGenReaderHepMC_dev.cxx", "AliGenEvtGen_dev.h", "AliGenEvtGen_dev.cxx", "AliGenPythia_dev.h", "AliGenPythia_dev.cxx", "AliPythia6_dev.h", "AliPythia6_dev.cxx", "AliPythia8_dev.h", "AliPythia8_dev.cxx", "AliPythiaBase_dev.h", "AliPythiaBase_dev.cxx", "THepMCParser_dev.h", "THepMCParser_dev.cxx" ] Packages = "\"VO_ALICE@Python-modules::1.0-27\",\n" if not LoadPackagesSeparately: Packages += "\"VO_ALICE@AliPhysics::{aliphysics}\",\n".format( aliphysics=AliPhysicsVersion) if "pythia8" in Gen: FilesToCopy.append("powheg_pythia8_conf.cmnd") if "powheg" in Gen: if OldPowhegInit: if PowhegStage == 0: alipowhegtools.main(yamlFileName, "./", Events, 0) FilesToCopy.extend([ "data/{}/pwggrid.dat".format(OldPowhegInit), "data/{}/pwgubound.dat".format(OldPowhegInit) ]) elif PowhegStage == 4: alipowhegtools.main(yamlFileName, "./", Events, 4) os.rename(alipowhegtools.GetParallelInputFileName(4), "powheg.input") EssentialFilesToCopy = [ "pwggrid-????.dat", "pwggridinfo-btl-xg?-????.dat", "pwgubound-????.dat" ] for fpattern in EssentialFilesToCopy: for file in glob.glob("data/{}/{}".format( OldPowhegInit, fpattern)): FilesToCopy.append(file) seed_file_name = "pwgseeds.dat" FilesToDelete.append(seed_file_name) FilesToCopy.append(seed_file_name) with open(seed_file_name, "w") as seed_file: for iseed in range(0, Jobs + 1): seed_file.write(str(random.randint(0, 1073741824))) seed_file.write("\n") else: logging.error( "Not implemented for POWHEG stage {}".format(PowhegStage)) exit(1) else: if PowhegStage != 0: logging.error( "Not implemented for POWHEG stage {}".format(PowhegStage)) exit(1) else: alipowhegtools.main(yamlFileName, "./", Events, 0) FilesToCopy.append("powheg.input") FilesToDelete.append("powheg.input") if not LoadPackagesSeparately: Packages += "\"VO_ALICE@POWHEG::r3178-alice1-1\",\n" if "herwig" in Gen: aliherwigtools.main(yamlFileName, "./", Events) FilesToCopy.extend([ "herwig.in", "MB.in", "PPCollider.in", "SoftModel.in", "SoftTune.in" ]) if HerwigTune: FilesToCopy.append(HerwigTune) FilesToDelete.append("herwig.in") if not LoadPackagesSeparately: Packages += "\"VO_ALICE@Herwig::v7.1.2-alice1-3\",\n" if PtHardList and len(PtHardList) > 1: minPtHardBin = 0 maxPtHardBin = len(PtHardList) - 1 else: minPtHardBin = -1 maxPtHardBin = 0 Packages = Packages[:-2] # remove trailing ",\n" for ptHardBin in range(minPtHardBin, maxPtHardBin): if ptHardBin < 0: AlienDest = "{0}/{1}".format(AlienPath, TrainName) LocalDest = "{0}/{1}".format(LocalPath, TrainName) minPtHard = -1 maxPtHard = -1 JobsPtHard = Jobs else: minPtHard = PtHardList[ptHardBin] maxPtHard = PtHardList[ptHardBin + 1] AlienDest = "{0}/{1}/{2}".format(AlienPath, TrainName, ptHardBin) LocalDest = "{0}/{1}/{2}".format(LocalPath, TrainName, ptHardBin) JobsPtHard = Jobs[ptHardBin] JdlContent = GenerateProcessingJDL(ExeFile, AlienDest, Packages, ValidationScript, FilesToCopy, TTL, Events, JobsPtHard, yamlFileName, minPtHard, maxPtHard, PowhegStage) f = open(JdlFile, 'w') f.write(JdlContent) f.close() FilesToCopy.extend([JdlFile, ExeFile, ValidationScript]) aligridtools.CopyFilesToTheGrid(FilesToCopy, AlienDest, LocalDest, Offline, GridUpdate) if not Offline: alisimtools.subprocess_call( ["alien_submit", "alien://{0}/{1}".format(AlienDest, JdlFile)]) for file in FilesToDelete: os.remove(file) logging.info("Done.") alisimtools.subprocess_call(["ls", LocalDest])
def SubmitMergingJobs(TrainName, LocalPath, AlienPath, AliPhysicsVersion, Offline, GridUpdate, TTL, MaxFilesPerJob, Gen, Proc, PtHardList, MergingStage): if PtHardList and len(PtHardList) > 1: minPtHardBin = 0 maxPtHardBin = len(PtHardList) - 1 else: minPtHardBin = -1 maxPtHardBin = 0 for ptHardBin in range(minPtHardBin, maxPtHardBin): if ptHardBin < 0: TrainPtHardName = TrainName else: TrainPtHardName = "{0}/{1}".format(TrainName, ptHardBin) if MergingStage < 0: MergingStage = DetermineMergingStage(AlienPath, TrainPtHardName) if MergingStage < 0: logging.error( "Could not find any results from train {0}! Aborting...". format(TrainName)) exit(1) elif MergingStage == 0: logging.info( "Merging stage determined to be 0 (i.e. first merging stage)") PreviousStagePath = "{0}/{1}/output".format( AlienPath, TrainPtHardName) SplitMethod = "parentdirectory" else: logging.info( "Merging stage determined to be {0}".format(MergingStage)) PreviousStagePath = "{0}/{1}/stage_{2}/output".format( AlienPath, TrainPtHardName, MergingStage - 1) SplitMethod = "parentdirectory" AlienDest = "{0}/{1}/stage_{2}".format(AlienPath, TrainPtHardName, MergingStage) LocalDest = "{0}/{1}/stage_{2}".format(LocalPath, TrainPtHardName, MergingStage) if AlienFileExists(AlienDest): AlienDeleteDir(AlienDest) ValidationScript = "FastSim_validation.sh" ExeFile = "runFastSimMerging.py" JdlFile = "FastSim_Merging_{0}_{1}.jdl".format(Gen, Proc) XmlFile = "FastSim_Merging_{0}_{1}_stage_{2}.xml".format( Gen, Proc, MergingStage) FilesToCopy = ["runJetSimulationMergingGrid.C", "start_merging.C"] JdlContent = GenerateMergingJDL(ExeFile, XmlFile, AlienDest, TrainName, AliPhysicsVersion, ValidationScript, FilesToCopy, TTL, MaxFilesPerJob, SplitMethod) f = open(JdlFile, 'w') f.write(JdlContent) f.close() XmlContent = GenerateXMLCollection(PreviousStagePath, XmlFile) f = open(XmlFile, 'w') f.write(XmlContent) f.close() FilesToCopy.extend([JdlFile, XmlFile, ExeFile, ValidationScript]) CopyFilesToTheGrid(FilesToCopy, AlienDest, LocalDest, Offline, GridUpdate) if not Offline: alisimtools.subprocess_call( ["alien_submit", "alien://{0}/{1}".format(AlienDest, JdlFile)]) os.remove(JdlFile) os.remove(XmlFile) logging.info("Done.") alisimtools.subprocess_call(["ls", LocalDest])
def submitJobs(self, repo, simtask, workdir, jobscriptbase, logfilebase, envscript, batchconfig, njobs, joboffset): breader = open(batchconfig, "r") bcdata = yaml.load(breader, yaml.SafeLoader) breader.close() ismpiqueue = bcdata["qos"] != "shared" nerscsystem = os.environ["NERSC_HOST"] if ismpiqueue: #determine nodes and number of CPUs taskspernode = {"edison": 24, "cori": 68} nnodes = int( math.ceil(float(njobs) / float(taskspernode[nerscsystem]))) nslots = int(nnodes) * int(taskspernode[nerscsystem]) taskjobscriptname = jobscriptbase taskjobscriptname = os.path.join( workdir, taskjobscriptname.replace("RANK", "MPI")) generallogfile = logfilebase generallogfile = os.path.join( workdir, generallogfile.replace("RANK", "ALL")) #submit one single mpi job with open(taskjobscriptname, "w") as jobscriptwriter: jobscriptwriter.write("#!/bin/bash\n") jobscriptwriter.write(alipackagetools.GenerateComments()) self.configbatch_slurm(jobscriptwriter, batchconfig, nnodes, 0, 0, generallogfile) jobscriptwriter.write("module load cray-python/2.7.15.1\n" ) # python with mpi, needed for srun self.writeSimCommandMPI(repo, jobscriptwriter, nslots, njobs, joboffset, envscript, workdir, simtask.create_task_command_mpi(), os.path.join(workdir, logfilebase)) jobscriptwriter.close() os.chmod(taskjobscriptname, 0o755) output = alisimtools.subprocess_checkoutput( [self.get_batchsub(), taskjobscriptname]) logging.info("%s", output) else: #submit multiple serial jobs for ijob in range(joboffset, njobs + joboffset): taskjobscriptname = jobscriptbase taskjobscriptname = os.path.join( workdir, taskjobscriptname.replace("RANK", "%0d" % ijob)) tasklogfile = logfilebase tasklogfile = os.path.join( workdir, tasklogfile.replace("RANK", "%04d" % ijob)) with open(taskjobscriptname, "w") as jobscriptwriter: jobscriptwriter.write("#!/bin/bash\n") jobscriptwriter.write(alipackagetools.GenerateComments()) self.configbatch_slurm(jobscriptwriter, batchconfig, 1, 1, 1, tasklogfile) self.writeSimCommand( repo, jobscriptwriter, envscript, workdir, simtask.create_task_command_serial(ijob)) self.writeCleanCommand(jobscriptwriter, envscript, ijob) jobscriptwriter.close() os.chmod(taskjobscriptname, 0o755) output = alisimtools.subprocess_checkoutput( [self.get_batchsub(), taskjobscriptname]) logging.info("%s", output)
def get_batchsub(self): return "sbatch" if alisimtools.test_slurm() else "qsub"
def get_batchhandler(self): return self.configbatch_slurm if alisimtools.test_slurm() else self.configbatch_pbs
def AlienDeleteDir(fileName): if fileName.find("alien://") == -1: fname = fileName else: fname = fileName[8:] alisimtools.subprocess_call(["alien_rmdir", fname])