def searchFiles(searchDir,opts,fileValidationSpecs,fileValidationTools,pipelineTools): # compile all regexes regexes = {} for i,spec in enumerate(fileValidationSpecs): try: regexes[i] = re.compile(spec["regex"]) except: raise ValueError("Could not compile regex: %s" % spec["regex"]) allFiles = {} filesPerProc = {} # walk directory and for dirpath, dirs, files in os.walk(searchDir,followlinks=True): for file in files: filePath = os.path.realpath(os.path.join(dirpath, file)) #print(os.path.join(dirpath, file)) # try to match path with all regexes till one matches: for specIdx, spec in enumerate(fileValidationSpecs): m=regexes[specIdx].search(filePath) # we have a file match if m: try: processId = int(m.group("processId")) except: raise ValueError("Non convertable processId found in filePath %s" % filePath) if processId not in filesPerProc: filesPerProc[processId] = {"allFiles" : [] , "tools" : { tool:[] for tool in pipelineTools.keys() } }; #make dict for this file f = {} # add regex groups f.update(m.groupdict()) # add all values from the validation spec (deep copy since we want for each one a different) f.update(copy.deepcopy(spec)) # set file status on finished, (as initial guess, validation fully determines this value) f.update({"status":"finished"}) # format all values again with the regex results f = cF.formatAll(f,m.groupdict(),exceptKeys={"regex":None}) # get tool of this file if "tool" in f: tool = f["tool"] if tool not in pipelineTools.keys(): raise ValueError("The tool %s is not in %s!" % (tool,str(pipelineTools.keys())) ) else: raise ValueError("You need to define a 'tool' key for %s " % str(spec)) # make hashes if "hashString" in spec: h = cF.makeUUID( spec["hashString"].format(**m.groupdict()) ) f["hash"] = h else: raise ValueError("You need to define a 'hash' key for file %s " % str(spec)) # convert frameIdx if "frameIdx" in f: f["frameIdx"] = int(f["frameIdx"]) else: raise ValueError("You need to define a 'frameIdx' key for %s (or in regex!) " % str(spec)) # add file to the lists filesPerProc[processId]["allFiles"].append( f ) filesPerProc[processId]["tools"][ tool ].append(f) if f["hash"] not in allFiles: allFiles[f["hash"]] = f else: raise ValueError("Found files with the same hash %s, %s, this should not happen!" % (f["absPath"], allFiles[f["hash"]]["absPath"] ) ) break if not allFiles: print("We found no files in folder: %s to validate!" % searchDir) return allFiles # sort files according to maximal modified time of the output files for each tool and each process for procId, procFiles in filesPerProc.items(): for tool,files in procFiles["tools"].items(): filesPerProc[procId]["tools"][tool] = sorted( files , key= lambda file : os.path.getmtime(file["absPath"]) ); #determine files to validate filesToValidate = [] for procid, procFiles in filesPerProc.items(): if opts.validateOnlyLastModified: # validate last file of all tools for each processor, to see if its ok or not, all others are valid for tool, toolFiles in procFiles["tools"].items(): if toolFiles: filesToValidate.append(toolFiles[-1]) else: filesToValidate += procFiles["allFiles"] # Validate all files with the appropriate command for fIdx, file in enumerate(filesToValidate): try: ext = os.path.splitext(file["absPath"])[1]; try: validateCmd = fileValidationTools[ext] except: print("No validation command found for extentsion of file: %s" % file["absPath"]) raise validateCmd = validateCmd.format(**{"file":file["absPath"]}) try: out = subprocess.check_output(validateCmd.split(" ")).decode('utf-8') except: print("Validation command %s failed!" % validateCmd) raise if out not in ["finished","recover"]: print("Validation output %s not in list ['finished','recover']" % out) raise else: validationAttributes = {"status":out} filesToValidate[fIdx].update(validationAttributes); except: # file is invalid, clear this file from the list filesToValidate[fIdx]["status"] = "invalid"; print("Validated last files of each tool in the pipeline: ", "\n".join([ f["absPath"] + " --> " + f["status"] for f in filesToValidate ]) ) # filter all empty stuff from lists: allFiles = dict(filter(lambda x : x[1]["status"] != "invalid" ,allFiles.items())) del filesPerProc return allFiles
def main(): parser = MyOptParser() parser.add_argument("--pipelineSpecs", dest="pipelineSpecs", default="" , help="""Json file with info about the pipeline, fileValidation, fileValidationTools.""", metavar="<path>", required=True) parser.add_argument("--validationFileInfo", dest="validationFileInfo", default="" , help="""XML file with info about render output files.""", metavar="<path>", required=False) parser.add_argument("-p", "--processes", type=int, dest="processes", default=int(1), help="The number of processes for the cluster render", metavar="<integer>", required=True) try: print("================== Prepare for Cluster Pipeline Job============") opts= AttrMap(vars(parser.parse_args())) pipelineSpecs = cF.jsonLoad(opts.pipelineSpecs) pipelineTools = pipelineSpecs["pipelineTools"] # tool1 ---> tool2 ----> tool3 # : dependency on tool2 # :tool3 is parent # define parents and dependencies for all tools for toolName,tool in pipelineTools.items(): if "dependencies" not in tool: tool["dependencies"]=set() tool["parents"]=set() for toolName,tool in pipelineTools.items(): for dep in tool["dependencies"]: t = pipelineTools[dep] t["parents"].add(toolName) frameGenerator = pipelineSpecs["frameGenerator"] # fileValidationSpecs = d["fileValidationSpecs"] # fileValidationTools = d["fileValidationTools"] # Important job modules to hand over to frameGenerators and processFileWriters importantModules = {"importHelpers":iH, "commonFunctions" : cF, "getSimFileInfos" : getSimFileInfos} # Generate Frames ===================================================== mod, frameGenerator["generator"] = iH.importClassFromModuleString(frameGenerator["generator"]) # hand over some modules to the frame generator! fgen = frameGenerator["generator"](pipelineSpecs, jobGenModules = importantModules ) allFrames,framesPerIdx, framesToDistribute = fgen(**frameGenerator["arguments"]) # ===================================================================== # Formatting frames ======================================================== # format strings in all settings (if possible) in allFrames again with itself for i,fr in enumerate(allFrames): allFrames[i] = cF.formatAll(fr,fr,formatter=StrExpr) # Filter Frames ======================================================= recoverFrames(opts,allFrames,framesPerIdx,pipelineTools) #====================================================================== # make a list of all frames which are not-completely finished # (that are frames where all tools with no parent (the last one) are not finished, we need at least one ) notcompleted = lambda frame: sum( 1 if frame["tools"][toolName]["status"] != STATUS_FINISHED else 0 for toolName,tool in pipelineTools.items() if len(tool["parents"])==0 ) > 0 framesCount = len(allFrames); allFrames = list(filter(notcompleted, allFrames)) framesToDistribute = list(filter(notcompleted, framesToDistribute)) print("Removed %d finished frames!" % (framesCount - len(allFrames)) ) #count number of frames to render totalFrames = len(framesToDistribute); print("Number of frames to compute %i" % totalFrames) if(totalFrames == 0): print("No frames to distribute -> exit") return 0 # Distribute the processes over the number of processes =============== processFrames = distributeFrames(opts,framesToDistribute) #====================================================================== # Write for each tool in the pipeline the process file, for each process a seperate one for toolName,tool in pipelineTools.items(): # load the class and module for the tools processFileWriter print("Load processFileGenerator for tool: %s" % toolName ) mod, tool["processFileGenerator"]["generator"] = iH.importClassFromModuleString(tool["processFileGenerator"]["generator"]) tool["processFileGenerator"]["generator"](pipelineSpecs, jobGenModules = importantModules).write(processFrames, **tool["processFileGenerator"]["arguments"]) # if we have some info file generator , produce the output if "infoFileGenerator" in tool: print("Load infoFileGenerator for tool: %s" % toolName ) mod, tool["infoFileGenerator"]["generator"] = iH.importClassFromModuleString(tool["infoFileGenerator"]["generator"]) tool["infoFileGenerator"]["generator"](pipelineSpecs, jobGenModules = importantModules).write(processFrames, **tool["infoFileGenerator"]["arguments"]) # Write FileMover process file ======================================= writeFileMoverProcessFile(pipelineSpecs,processFrames) # ===================================================================== return 0 except Exception as e: print("====================================================================") print("Exception occured: " + str(e)) print("====================================================================") traceback.print_exc(file=sys.stdout) parser.print_help() return 1