def GenerateComments(): branch = alisimtools.subprocess_checkoutput( ["git", "rev-parse", "--abbrev-ref", "HEAD"]) hash = alisimtools.subprocess_checkoutput(["git", "rev-parse", "HEAD"]) comments = "# This is the startup script \n\ # alice-yale-hfjet \n\ # Generated using branch {branch} ({hash}) \n\ ".format(branch=branch.strip('\n'), hash=hash.strip('\n')) return comments
def GetLastTrainName(AlienPath, Gen, Proc): TrainName = "FastSim_{0}_{1}".format(Gen, Proc) AlienPathContent = alisimtools.subprocess_checkoutput( ["alien_ls", AlienPath]).splitlines() regex = re.compile("{0}.*".format(TrainName)) Timestamps = [ int(subdir[len(TrainName) + 1:]) for subdir in AlienPathContent if re.match(regex, subdir) ] if len(Timestamps) == 0: logging.error( "Could not find any train in the alien path {0} provided!".format( AlienPath)) logging.error("\n".join(AlienPathContent)) logging.error("{0}.*".format(TrainName)) return None TrainName += "_{0}".format(max(Timestamps)) return TrainName
def submitJobs(self, repo, simtask, workdir, jobscriptbase, logfilebase, envscript, batchconfig, njobs, joboffset): for ijob in range(joboffset, njobs + joboffset): taskjobscriptname = jobscriptbase taskjobscriptname = os.path.join(workdir, taskjobscriptname.replace("RANK", "%04d" %ijob)) tasklogfile = logfilebase tasklogfile = os.path.join(workdir, tasklogfile.replace("RANK", "%04d" %ijob)) logging.info("Using jobscript {jobscript}, writing to {logfile}".format(jobscript=taskjobscriptname,logfile=tasklogfile)) with open(taskjobscriptname, 'w') as jobscriptwriter: jobscriptwriter.write("#!/bin/bash\n") jobscriptwriter.write(alipackagetools.GenerateComments()) self.get_batchhandler()(jobscriptwriter, batchconfig, tasklogfile) self.writeSimCommand(repo, jobscriptwriter, envscript, workdir, simtask.create_task_command_serial(ijob)) jobscriptwriter.write("cd %s\n" %workdir) jobscriptwriter.write("echo WD: $PWD\n") self.writeCleanCommand(jobscriptwriter, envscript, ijob) jobscriptwriter.close() os.chmod(taskjobscriptname, 0o755) output = alisimtools.subprocess_checkoutput([self.get_batchsub(), taskjobscriptname]) logging.info("%s", output)
def DetermineMergingStage(AlienPath, TrainName): AlienOutput = "{0}/{1}".format(AlienPath, TrainName) if not aligridtools.AlienFileExists(AlienOutput): return -1 AlienOuputContent_orig = alisimtools.subprocess_checkoutput( ["alien_ls", AlienOutput]).splitlines() AlienOuputContent = [] for p in AlienOuputContent_orig: i = p.rfind("/") if i >= 0: p = p[i + 1:] AlienOuputContent.append(p) if not "output" in AlienOuputContent: logging.info("%s", AlienOuputContent) return -1 regex = re.compile("stage_.") MergingStages = [ string for string in AlienOuputContent if re.match(regex, string) ] MergingStage = len(MergingStages) return MergingStage
def GenerateXMLCollection(Path, XmlName): return alisimtools.subprocess_checkoutput( ["alien_find", "-x", XmlName, Path, "*/AnalysisResults*.root"])
def DownloadResults(TrainName, LocalPath, AlienPath, Gen, Proc, PtHardList, MergingStage): if PtHardList and len(PtHardList) > 1: minPtHardBin = 0 maxPtHardBin = len(PtHardList) - 1 else: minPtHardBin = -1 maxPtHardBin = 0 for ptHardBin in range(minPtHardBin, maxPtHardBin): if ptHardBin < 0: TrainPtHardName = TrainName else: minPtHard = PtHardList[ptHardBin] maxPtHard = PtHardList[ptHardBin + 1] TrainPtHardName = "{0}/{1}".format(TrainName, ptHardBin) logging.info( "Downloading results from train {0}".format(TrainPtHardName)) if MergingStage < 0: MergingStage = DetermineMergingStage(AlienPath, TrainPtHardName) if MergingStage < 0: logging.error( "Could not find any results from train {0}! Aborting...". format(TrainPtHardName)) exit(0) elif MergingStage == 0: logging.warning( "Merging stage determined to be 0 (i.e. no grid merging has been performed)" ) AlienOutputPath = "{0}/{1}/output".format(AlienPath, TrainPtHardName) LocalDest = "{0}/{1}/output".format(LocalPath, TrainPtHardName) else: logging.info( "Merging stage determined to be {0}".format(MergingStage)) AlienOutputPath = "{0}/{1}/stage_{2}/output".format( AlienPath, TrainPtHardName, MergingStage - 1) LocalDest = "{0}/{1}/stage_{2}/output".format( LocalPath, TrainPtHardName, MergingStage - 1) if not os.path.isdir(LocalDest): os.makedirs(LocalDest) AlienOuputContent = alisimtools.subprocess_checkoutput( ["alien_ls", AlienOutputPath]).splitlines() for SubDir in AlienOuputContent: i = SubDir.rfind("/") if i >= 0: SubDir = SubDir[i + 1:] SubDirDest = "{0}/{1}".format(LocalDest, SubDir) SubDirOrig = "{0}/{1}".format(AlienOutputPath, SubDir) if not os.path.isdir(SubDirDest): os.makedirs(SubDirDest) FilesToDownload = alisimtools.subprocess_checkoutput( ["alien_ls", "{0}/AnalysisResults*.root".format(SubDirOrig)]).splitlines() for FileName in FilesToDownload: i = FileName.rfind("/") if i >= 0: FileName = FileName[i + 1:] FileDest = "{0}/{1}".format(SubDirDest, FileName) if os.path.isfile(FileDest): logging.warning( "File {0} already exists, skipping...".format( FileDest)) continue FileOrig = "{0}/{1}".format(SubDirOrig, FileName) FileDestTemp = "{0}/temp_{1}".format(SubDirDest, FileName) if os.path.isfile(FileDestTemp): os.remove(FileDestTemp) logging.info("Downloading from {0} to {1}".format( FileOrig, FileDestTemp)) alisimtools.subprocess_call( ["alien_cp", "alien://{0}".format(FileOrig), FileDestTemp]) if os.path.getsize(FileDestTemp) > 0: logging.info("Renaming {0} to {1}".format( FileDestTemp, FileDest)) os.rename(FileDestTemp, FileDest) else: logging.error( "Downloading of {0} failed!".format(FileOrig)) os.remove(FileDestTemp)
def submitJobs(self, repo, simtask, workdir, jobscriptbase, logfilebase, envscript, batchconfig, njobs, joboffset): breader = open(batchconfig, "r") bcdata = yaml.load(breader, yaml.SafeLoader) breader.close() ismpiqueue = bcdata["qos"] != "shared" nerscsystem = os.environ["NERSC_HOST"] if ismpiqueue: #determine nodes and number of CPUs taskspernode = {"edison": 24, "cori": 68} nnodes = int( math.ceil(float(njobs) / float(taskspernode[nerscsystem]))) nslots = int(nnodes) * int(taskspernode[nerscsystem]) taskjobscriptname = jobscriptbase taskjobscriptname = os.path.join( workdir, taskjobscriptname.replace("RANK", "MPI")) generallogfile = logfilebase generallogfile = os.path.join( workdir, generallogfile.replace("RANK", "ALL")) #submit one single mpi job with open(taskjobscriptname, "w") as jobscriptwriter: jobscriptwriter.write("#!/bin/bash\n") jobscriptwriter.write(alipackagetools.GenerateComments()) self.configbatch_slurm(jobscriptwriter, batchconfig, nnodes, 0, 0, generallogfile) jobscriptwriter.write("module load cray-python/2.7.15.1\n" ) # python with mpi, needed for srun self.writeSimCommandMPI(repo, jobscriptwriter, nslots, njobs, joboffset, envscript, workdir, simtask.create_task_command_mpi(), os.path.join(workdir, logfilebase)) jobscriptwriter.close() os.chmod(taskjobscriptname, 0o755) output = alisimtools.subprocess_checkoutput( [self.get_batchsub(), taskjobscriptname]) logging.info("%s", output) else: #submit multiple serial jobs for ijob in range(joboffset, njobs + joboffset): taskjobscriptname = jobscriptbase taskjobscriptname = os.path.join( workdir, taskjobscriptname.replace("RANK", "%0d" % ijob)) tasklogfile = logfilebase tasklogfile = os.path.join( workdir, tasklogfile.replace("RANK", "%04d" % ijob)) with open(taskjobscriptname, "w") as jobscriptwriter: jobscriptwriter.write("#!/bin/bash\n") jobscriptwriter.write(alipackagetools.GenerateComments()) self.configbatch_slurm(jobscriptwriter, batchconfig, 1, 1, 1, tasklogfile) self.writeSimCommand( repo, jobscriptwriter, envscript, workdir, simtask.create_task_command_serial(ijob)) self.writeCleanCommand(jobscriptwriter, envscript, ijob) jobscriptwriter.close() os.chmod(taskjobscriptname, 0o755) output = alisimtools.subprocess_checkoutput( [self.get_batchsub(), taskjobscriptname]) logging.info("%s", output)