def generate(self): # Parent config dict is none configDicts = [None] if self.cCluster.jobIdxParent >= self.cCluster.nJobs-1: raise ValueError("jobIdxParent=%i not feasible, since you generate %i jobs from idx 0 till %i !" % (self.cCluster.jobIdxParent,self.cCluster.nJobs, self.cCluster.nJobs-1) ) for jobIdx in range(0,self.cCluster.nJobs): CE.printHeader("Generating Simple Job: ====================================") CE.printKeyMessage("JobIndex","%i" % jobIdx + (" (not files because parent job!)" if jobIdx <= self.cCluster.jobIdxParent else "") ) # first make a new self.config self.config = self.makeInterpolationConfig(); # setting the jobIdx self.config["Job"]["jobIdx"] = str(jobIdx) # final interpolation of all automatic generated options and conversion to self.configDict # and checking of feasible values before template writting # from now self.cCluster , self.cJob and self.cRigidBodySim are available (reference to self.configDict) self.convertValues() # only generate files for the job for jobIndices which are greater then jobIdxParent # jobIdxParent is the parent job, which we base our nJobs on if jobIdx > self.cCluster.jobIdxParent : self.checkConfig(interact=self.cCluster.interact) self.printOptions() # make job script dir (if exists, try to remove it, if no -> abort) cF.makeDirectory( self.cJob.scriptDir, name="Job script dir", defaultMakeEmpty=False, interact= self.cCluster.interact) self.writeJobScriptArgs( os.path.join(self.cJob.scriptDir, "configureScriptArgs.txt" ) ) # write all templates self.writeTemplates(self, configDicts); # save conficDict for next jobIdx (possibly) configDicts.append(self.configDict) CE.printHeader("==========================================================")
def __checkConfig(self, interact): # Check all values cF.makeDirectory(self.cCluster.jobGeneratorOutputDir, interact=False, name="Job output dir") self.cCluster.jobGeneratorOutputDir = os.path.abspath(self.cCluster.jobGeneratorOutputDir); if not path.exists(self.cCluster.jobGeneratorConfig): raise NameError("Job config file: %s does not exist!" % self.cCluster.jobGeneratorConfig) else: self.cCluster.jobGeneratorConfig = os.path.abspath(self.cCluster.jobGeneratorConfig); if(self.cCluster.nProcesses < 1): raise NameError("Process number: %i wrong (>1)!" % self.cCluster.nProcesses ) if(self.cCluster.runTime < 0): raise NameError("Run time value: %i wrong (>1)!" % self.cCluster.runTime ) if(self.cCluster.ramPerCore < 1): raise NameError("Ram per core value: %i wrong (>1)!" % self. cCluster.ramPerCore ) if(self.cCluster.localScratchPerCore < 1): raise NameError("Local scratch per core value: %i wrong (>1)!" % self.cCluster.localScratchPerCore ) if(self.cCluster.nJobs < 1): raise NameError("Number of jobs to generate: %i wrong (>1)!" % self.cCluster.nJobs )
def main(): """ {old validatation file infos} is compared to { new validation file infos} ==> outputs new file validation info """ parser = MyOptParser() parser.add_argument("-s", "--searchDirNew", dest="searchDirNew", help="""This is the search directory where it is looked for output files (.tiff,.exr,.rib.gz). """, metavar="<path>", default=None, required=False) parser.add_argument("--valFileInfoGlobNew", dest="valFileInfoGlobNew", help=""" The globbing expression for all input xmls with file status which are consolidated into a new file info under --output. The found and validated files in --searchDir (if specified) are added to the set of new files. """, default=None, metavar="<glob>", required=False) parser.add_argument("--valFileInfoGlobOld", dest="valFileInfoGlobOld", help=""" The globbing expression for all old input xmls with file status which are consolidated with the new files into a combined file info under --output. """, default=None, metavar="<glob>", required=False) parser.add_argument("--pipelineSpecs", dest="pipelineSpecs", default="", help="""Json file with info about the pipeline, fileValidation, fileValidationTools. """, metavar="<string>", required=True) parser.add_argument("--statusFolder", dest="statusFolder", default=None, help="""The output status folder which contains links to files which are finished, or can be recovered. """, metavar="<string>", required=False) parser.add_argument("--validateOnlyLastModified", dest="validateOnlyLastModified", type=cF.toBool, default=True, help="""The file with the moset recent modified time is only validated, all others are set to finished!.""", required=False) parser.add_argument("-o", "--output", dest="output", help="""The output xml which is written, which proivides validation info for each file found""", metavar="<path>", required=True) try: print("====================== FileValidation ===========================") opts= AttrMap(vars(parser.parse_args())) if not opts.searchDirNew and not opts.valFileInfoGlobNew: raise ValueError("You need to define either searchDirNew or valFileInfoGlobNew!") if opts.valFileInfoGlobOld == "": opts.valFileInfoGlobOld = None print("searchDir: %s" % opts.searchDirNew) print("valFileInfoGlobNew: %s" % opts.valFileInfoGlobNew) print("valFileInfoGlobOld: %s" % opts.valFileInfoGlobOld) print("output: %s" % opts.output) d = cF.jsonLoad(opts.pipelineSpecs) pipelineTools = d["pipelineTools"] fileValidationSpecs = d["fileValidationSpecs"] fileValidationTools = d["fileValidationTools"] valDataAllNew = dict() deleteFiles = [] # load new validataion datas if opts.valFileInfoGlobNew is not None: print("Load new validation files") valDataAllNew , valFilesNew = loadValidationFiles(opts.valFileInfoGlobNew) preferGlobalPaths(valDataAllNew) # add searchDir files to new set # search files ============================================================================ if opts.searchDirNew is not None: print("Validate all files in: %s with pipeLineSpecs: %s" % (opts.searchDirNew , opts.pipelineSpecs) ) allFiles = searchFiles(opts.searchDirNew, opts, fileValidationSpecs,fileValidationTools,pipelineTools) for ha, f in allFiles.items(): if ha in valDataAllNew: print("""WARNING: File %s already found in validation data set from globbing expr. %s """ % (f["absPath"], opts.valFileInfoGlobNew)) else: valDataAllNew[ha] = f # =============================================================================================== # load old validation datas if opts.valFileInfoGlobOld is not None: print("Load old validation files") valDataAllOld , valFilesOld = loadValidationFiles(opts.valFileInfoGlobOld) preferGlobalPaths(valDataAllOld) # add old to new validatation infos for ha, valInfo in valDataAllOld.items(): if ha not in valDataAllNew: # this old file hash is not in our current list, so add it! # check absPath if it exists otherwise try to extent the relPath with dir of this validation file. if not os.path.exists(valInfo["absPath"]): absPath = os.path.join( os.path.dirname(valInfo["validatationInfoPath"]) , valInfo["relPath"] ) if not os.path.exists(absPath): print(valInfo["validatationInfoPath"]) raise NameError("""File path in valid. info file: %s does not exist, extended rel. path to: %s does also not exist!""" % (valInfo["absPath"],absPath)) else: print("Replacing inexisting path %s with %s", valInfo["absPath"], absPath) valInfo["absPath"] = absPath # copy element to new file info valDataAllNew[ha] = valInfo else: # we have the same hash in the new info # take our new one which is better! # delete old file if it is not linked to by new file if os.path.realpath(valDataAllNew[ha]["absPath"]) != os.path.realpath(valInfo["absPath"]): deleteFiles.append(valInfo["absPath"]) # make final list finalFiles = [ f for f in valDataAllNew.values() ] printSummary(finalFiles,pipelineTools,False) print("Make output validation file") f = open(opts.output,"w+") cF.jsonDump(finalFiles,f, sort_keys=True) f.close(); # Renew status folder, move over new xml info if opts.statusFolder is not None: print("Renew status folder:") finished = os.path.join(opts.statusFolder,"finished") recover = os.path.join(opts.statusFolder,"recover") cF.makeDirectory(finished,interact=False, defaultMakeEmpty=True) cF.makeDirectory(recover ,interact=False, defaultMakeEmpty=True) # make symlinks for all files in the appropriate folder: paths = {"recover": recover, "finished": finished} for f in finalFiles: h = f["hash"] p = os.path.relpath(f["absPath"],start=paths[f["status"]]) filename = os.path.basename(p) head,ext = os.path.splitext(filename) os.symlink(p, os.path.join( paths[f["status"]] , head+"-uuid-"+h+ext ) ); print("=================================================================") except Exception as e: print("====================================================================") print("Exception occured: " + str(e)) print("====================================================================") traceback.print_exc(file=sys.stdout) parser.print_help() return 1
def generate(self): # Parent config dict is none configDicts = [None] if self.cCluster.jobIdxParent >= self.cCluster.nJobs-1: raise ValueError("jobIdxParent=%i not feasible, since you generate %i jobs from idx 0 till %i !" % (self.cCluster.jobIdxParent,self.cCluster.nJobs, self.cCluster.nJobs-1) ) for jobIdx in range(0,self.cCluster.nJobs): print("Generating MPI Job: SimpleExecutable =================") print("-> JobIndex: %i" % jobIdx + (" (not files because parent job!)" if jobIdx <= self.cCluster.jobIdxParent else "") ) # first make a new self.config self.config = self.makeInterpolationConfig(); # setting the jobIdx self.config["Job"]["jobIdx"] = str(jobIdx) self.config["Job"]["submitCommand"] = " ".join([self.config["Cluster"]["submitCommand"] , (configDicts[-1].Cluster.submitArgsChainJob if jobIdx != 0 else "") , self.config["Job"]["submitArgs"]]) # final interpolation of all automatic generated options and conversion to self.configDict # and checking of feasible values before template writting # from now self.cCluster , self.cJob and are available (reference to self.configDict) self.convertValues() # only generate files for the job for jobIndices which are greater then jobIdxParent # jobIdxParent is the parent job, which we base our nJobs on if jobIdx > self.cCluster.jobIdxParent : self.checkConfig(interact=self.cCluster.interact) self.printOptions() # make job script dir (if exists, try to remove it, if no -> abort) cf.makeDirectory( self.cJob.scriptDir, name="Job script dir", defaultMakeEmpty=False, interact= self.cCluster.interact) self.writeJobScriptArgs( os.path.join(self.cJob.scriptDir, "submitScriptArgs.txt" ) ) # write all templates self.writeTemplates(self, configDicts); # check if we submit the job if self.cCluster.submitJobs: print("Trying to submit job with command: \n%s" % self.cJob.submitCommand) cf.callProcess(self.cJob.submitCommand) #end if # save conficDict for next jobIdx (possibly) configDicts.append(self.configDict) print("==========================================================") # Write total submit file to first folder config0 = configDicts[1]; filePath = os.path.join(config0.Job.scriptDir,"submitAll.sh") f = open(filePath,"w+") commands = []; for c in configDicts: if c: commands.append(c.Job.submitCommand) f.write("\n".join(commands)) cf.makeExecutable(filePath);
def generate(self): configDicts = [None] if self.cCluster.jobIdxParent >= self.cCluster.nJobs-1: raise CE.MyValueError("jobIdxParent=%i not feasible, since you generate %i jobs from idx 0 till %i !" % (self.cCluster.jobIdxParent,self.cCluster.nJobs, self.cCluster.nJobs-1) ) for jobIdx in range(0,self.cCluster.nJobs): CE.printHeader("Generating MPI Job: Tool Pipeline ========================") CE.printKeyMessage("JobIndex","%i" % jobIdx + (" (no generating, because parent job!)" if jobIdx <= self.cCluster.jobIdxParent else "") ) # first make a new self.config self.config = self.makeInterpolationConfig(); # setting the jobIdx self.config["Job"]["jobIdx"] = str(jobIdx) self.config["Job"]["submitCommand"] = " ".join([self.config["Cluster"]["submitCommand"] , (configDicts[-1].Cluster.submitArgsChainJob if jobIdx > self.cCluster.jobIdxParent+1 else "") , self.config["Job"]["submitArgs"]]) if configDicts[-1]: self.config["Pipeline-PreProcess"]["validationInfoFile"] = configDicts[-1]["Pipeline-PostProcess"]["validationInfoFile"] else: self.config["Pipeline-PreProcess"]["validationInfoFile"] = "" # final interpolation of all automatic generated options and conversion to self.configDict # and checking of feasible values before template writting # from now self.cCluster , self.cJob and self.cRigidBodySim are available (reference to self.configDict) self.convertValues() # only make jobs which are greater then parent! if jobIdx > self.cCluster.jobIdxParent: self.checkConfig(interact=self.cCluster.interact) self.printOptions() # make job script dir (if exists, try to remove it, if no -> abort) cF.makeDirectory( self.cJob.scriptDir, name="Job script dir", defaultMakeEmpty=False, interact= self.cCluster.interact) self.writeJobScriptArgs( os.path.join(self.cJob.scriptDir, "configureScriptArgs.txt" ) ) # write all templates self.writeTemplates(self, configDicts); # check if we submit command # overwrite submit command by appending all submit args if self.cCluster.submitJobs: print("Trying to submit job with command: \n%s" % self.cJob.submitCommand) cF.callProcess(self.cJob.submitCommand) # save conficDict for next jobIdx (possibly) # TODO save config dict in job script folder (json file) configDicts.append(self.configDict) CE.printHeader("==========================================================") # Write total submit file to first folder config0 = configDicts[self.cCluster.jobIdxParent+2]; filePath = os.path.join(config0.Job.scriptDir,"submitAll.sh") f = open(filePath,"w+") commands = []; for c in configDicts[self.cCluster.jobIdxParent+2:]: if c: commands.append(c.Job.submitCommand) f.write("\n".join(commands)) cF.makeExecutable(filePath);