コード例 #1
0
def searchFiles(searchDir,opts,fileValidationSpecs,fileValidationTools,pipelineTools):
    # compile all regexes
    regexes = {}
    for i,spec in enumerate(fileValidationSpecs):
        try:
            regexes[i] = re.compile(spec["regex"])
        except:
            raise ValueError("Could not compile regex: %s" % spec["regex"])
        
    allFiles = {}
    filesPerProc = {}
    # walk directory and 
    for dirpath, dirs, files in os.walk(searchDir,followlinks=True):

        for file in files:
            
            filePath = os.path.realpath(os.path.join(dirpath, file))
            #print(os.path.join(dirpath, file))
            # try to match path with all regexes till one matches:
            for specIdx, spec in enumerate(fileValidationSpecs):
                
                m=regexes[specIdx].search(filePath)
                
                # we have a file match
                if m:
                    
                    try:
                        processId = int(m.group("processId"))                
                    except:
                        raise ValueError("Non convertable processId found in filePath %s" % filePath)  
                    
                    if processId not in filesPerProc:
                        filesPerProc[processId] = {"allFiles" : [] , "tools" : { tool:[] for tool in pipelineTools.keys() } };
                    
                    #make dict for this file
                    f = {}
                    # add regex groups
                    f.update(m.groupdict())

                    # add all values from the validation spec (deep copy since we want for each one a different)
                    f.update(copy.deepcopy(spec))
                    
                    # set file status on finished, (as initial guess, validation fully determines this value)
                    f.update({"status":"finished"})                            
                    
                    # format all values again with the regex results
                    f = cF.formatAll(f,m.groupdict(),exceptKeys={"regex":None})

                    # get tool of this file
                    if "tool" in f:
                        tool = f["tool"]    
                        if tool not in pipelineTools.keys():
                            raise ValueError("The tool %s is not in %s!" % (tool,str(pipelineTools.keys())) )
                    else:
                        raise ValueError("You need to define a 'tool' key for %s " % str(spec))
                        
                    # make hashes
                    if "hashString" in spec:
                        h = cF.makeUUID( spec["hashString"].format(**m.groupdict()) )
                        f["hash"] = h
                    else:
                        raise ValueError("You need to define a 'hash' key for file %s " % str(spec))
                        
                        
                    # convert frameIdx
                    if "frameIdx" in f:
                         f["frameIdx"] = int(f["frameIdx"])
                    else:
                         raise ValueError("You need to define a 'frameIdx' key for %s (or in regex!) " % str(spec))                            
                    
                    # add file to the lists
                    filesPerProc[processId]["allFiles"].append( f )
                    filesPerProc[processId]["tools"][ tool ].append(f)
                    
                    
                    if f["hash"] not in allFiles:
                            allFiles[f["hash"]] = f
                    else:
                        raise ValueError("Found files with the same hash %s, %s, this should not happen!" % (f["absPath"], allFiles[f["hash"]]["absPath"] ) )
                    
                    
                    break
                        
    if not allFiles:
        print("We found no files in folder: %s to validate!" % searchDir) 
        return allFiles          
     
    # sort files according to maximal modified time of the output files for each tool and each process
    for procId, procFiles in filesPerProc.items():
        for tool,files in procFiles["tools"].items():
            filesPerProc[procId]["tools"][tool] =  sorted( files , key= lambda file : os.path.getmtime(file["absPath"]) );

    #determine files to validate
    filesToValidate = []
    for procid, procFiles in filesPerProc.items():
            if opts.validateOnlyLastModified:
               # validate last file of all tools for each processor, to see if its ok or not, all others are valid
               for tool, toolFiles in procFiles["tools"].items():
                   if toolFiles:
                       filesToValidate.append(toolFiles[-1])
            else:
               filesToValidate += procFiles["allFiles"]

      
    # Validate all files with the appropriate command

    for fIdx, file in enumerate(filesToValidate):
        try:
            ext = os.path.splitext(file["absPath"])[1];
            try:
                validateCmd = fileValidationTools[ext]
            except:
                print("No validation command found for extentsion of file: %s" % file["absPath"])
                raise
            

            validateCmd = validateCmd.format(**{"file":file["absPath"]})

            try:
                out = subprocess.check_output(validateCmd.split(" ")).decode('utf-8')
            except:
                print("Validation command %s failed!" % validateCmd)
                raise
                
            if out not in ["finished","recover"]:
                print("Validation output %s not in list ['finished','recover']" % out)
                raise
            else:
                validationAttributes = {"status":out}
                
            filesToValidate[fIdx].update(validationAttributes);
        except:

            # file is invalid, clear this file from the list 
            filesToValidate[fIdx]["status"] = "invalid";

    print("Validated last files of each tool in the pipeline: ", "\n".join([ f["absPath"] + " --> " + f["status"] for f in filesToValidate ]) ) 

    # filter all empty stuff from lists:
    allFiles = dict(filter(lambda x : x[1]["status"] != "invalid" ,allFiles.items()))
    del filesPerProc
    return allFiles
コード例 #2
0
def main():
         
    
    parser = MyOptParser()

    parser.add_argument("--pipelineSpecs", dest="pipelineSpecs", default="" ,
            help="""Json file with info about the pipeline, fileValidation, fileValidationTools.""", metavar="<path>", required=True)    
    
    parser.add_argument("--validationFileInfo", dest="validationFileInfo", default="" ,
            help="""XML file with info about render output files.""", metavar="<path>", required=False)
                                                                
    parser.add_argument("-p", "--processes", type=int, dest="processes", default=int(1),
            help="The number of processes for the cluster render", metavar="<integer>", required=True)
    
            
    try:
        
        print("================== Prepare for Cluster Pipeline Job============")
        
        opts= AttrMap(vars(parser.parse_args()))
        
        pipelineSpecs = cF.jsonLoad(opts.pipelineSpecs)
        

        pipelineTools = pipelineSpecs["pipelineTools"]
        
        # tool1 ---> tool2 ----> tool3
        #                        : dependency on tool2
        #            :tool3 is parent
                                 
        # define parents and dependencies for all tools
        for toolName,tool in pipelineTools.items():
            if "dependencies" not in tool:
                tool["dependencies"]=set()
            
            tool["parents"]=set()
            
        for toolName,tool in pipelineTools.items():
            for dep in tool["dependencies"]:
                t = pipelineTools[dep]
                t["parents"].add(toolName)

        
        frameGenerator = pipelineSpecs["frameGenerator"]
#        fileValidationSpecs = d["fileValidationSpecs"]
#        fileValidationTools = d["fileValidationTools"]
        
        # Important job modules to hand over to frameGenerators and processFileWriters
        importantModules = {"importHelpers":iH, "commonFunctions" : cF, "getSimFileInfos" : getSimFileInfos}
        
        # Generate Frames =====================================================
        mod, frameGenerator["generator"] = iH.importClassFromModuleString(frameGenerator["generator"])
        # hand over some modules to the frame generator!
        fgen = frameGenerator["generator"](pipelineSpecs, jobGenModules =  importantModules )
        allFrames,framesPerIdx, framesToDistribute = fgen(**frameGenerator["arguments"])
        # =====================================================================
        
            
        # Formatting frames ========================================================
        # format strings in all settings (if possible) in allFrames again with itself     
        for i,fr in enumerate(allFrames):
            allFrames[i] = cF.formatAll(fr,fr,formatter=StrExpr)
        
        # Filter Frames =======================================================
        recoverFrames(opts,allFrames,framesPerIdx,pipelineTools)
        #======================================================================
                  
        # make a list of all frames which are not-completely finished 
        # (that are frames where all tools with no parent (the last one) are not finished, we need  at least one )
        notcompleted = lambda frame:   sum( 1 if frame["tools"][toolName]["status"] != STATUS_FINISHED 
                                                else 0 for toolName,tool in pipelineTools.items() if len(tool["parents"])==0 ) > 0
        framesCount = len(allFrames);
        allFrames = list(filter(notcompleted, allFrames))
        framesToDistribute = list(filter(notcompleted, framesToDistribute))
        print("Removed %d finished frames!" % (framesCount - len(allFrames)) )
        

        #count number of frames to render
        totalFrames = len(framesToDistribute);
        print("Number of frames to compute %i" % totalFrames)
        if(totalFrames == 0):
          print("No frames to distribute -> exit")
          return 0
        
        # Distribute the processes over the number of processes ===============
        processFrames = distributeFrames(opts,framesToDistribute)
        #======================================================================

        
        # Write for each tool in the pipeline the process file, for each process a seperate one
        for toolName,tool in pipelineTools.items():
            
            # load the class and module for the tools processFileWriter
            print("Load processFileGenerator for tool: %s" % toolName )
            mod, tool["processFileGenerator"]["generator"] = iH.importClassFromModuleString(tool["processFileGenerator"]["generator"])
            tool["processFileGenerator"]["generator"](pipelineSpecs, jobGenModules = importantModules).write(processFrames, **tool["processFileGenerator"]["arguments"])
            
            # if we have some info file generator , produce the output
            
            if "infoFileGenerator" in tool:
                print("Load infoFileGenerator for tool: %s" % toolName )
                mod, tool["infoFileGenerator"]["generator"] = iH.importClassFromModuleString(tool["infoFileGenerator"]["generator"])
                tool["infoFileGenerator"]["generator"](pipelineSpecs, jobGenModules = importantModules).write(processFrames, **tool["infoFileGenerator"]["arguments"])
            
        
        # Write FileMover process file  =======================================
        writeFileMoverProcessFile(pipelineSpecs,processFrames)
        # =====================================================================
        return 0
         
    except Exception as e:
        print("====================================================================")
        print("Exception occured: " + str(e))
        print("====================================================================")
        traceback.print_exc(file=sys.stdout)
        parser.print_help()
        return 1