class EDObject(object): """ Virtual base class for all EDNA Objects (classes). It offers some synchronization and locking capabilities to make the code thread safe. """ __semaphoreId = Semaphore() __iId_class = 0 def __init__(self): """ Constructor of the main pure virtual class. This constructor implements: - the creation of the semaphore - definition of timer object (uninitialized as potentially not used) """ object.__init__(self) with self.__class__.__semaphoreId: self.__class__.__iId_class += 1 self.__iId = self.__class__.__iId_class self.__semaphore = Semaphore() self.__fTimeInit = None self.__fTimeEnd = None self.__classname = None def getId(self): return self.__iId def getClassName(self): """ Retrieves the name of the class @return: the name of the class @rtype: string """ return self.__class__.__name__ def synchronizeOn(self): """ This method must be used in together with the method synchronizeOff(). This method makes the code threadsafe till the method synchronizeOff is called. """ self.__semaphore.acquire() def synchronizeOff(self): """ This method must be used in together with the method synchronizeOn(). """ self.__semaphore.release() def locked(self): return self.__semaphore def setTimeInit(self): """ Initializes the timer for the object """ if self.__fTimeInit is None: self.__fTimeInit = time.time() def getTimeInit(self): """ Retrieves the time of initialization @return: number of seconds since epoch @rtype: float """ return self.__fTimeInit def setTimeEnd(self): """ Set the end of calculation time for the given object """ if self.__fTimeEnd is None: self.__fTimeEnd = time.time() def getTimeEnd(self): """ Retrieves the time of end of task @return: number of seconds since epoch @rtype: float """ return self.__fTimeEnd def getRunTime(self): """ @returns: the RunTime for the given object @rtype: float """ fRetrunRunTime = 0.0 if self.__fTimeInit is not None: if self.__fTimeEnd is None: fRetrunRunTime = time.time() - self.__fTimeInit else: fRetrunRunTime = self.__fTimeEnd - self.__fTimeInit return fRetrunRunTime
class Reprocess(EDLogging): def __init__(self, strPluginName, iNbCpu=None): EDLogging.__init__(self) self.pluginName = strPluginName self.startTime = time.time() try: self.iNbCpu = int(iNbCpu) except: self.iNbCpu = EDUtilsParallel.detectNumberOfCPUs() self.screen("Initializing Reprocess with max %i jobs in parallel." % self.iNbCpu) self.__semaphoreNbThreads = Semaphore(self.iNbCpu) EDUtilsParallel.initializeNbThread(self.iNbCpu) self.jobQueue = Queue() self.processingSem = Semaphore() self.statLock = Semaphore() self.lastStatistics = "No statistics collected yet, please use the 'collectStatistics' method first" self.lastFailure = "No job Failed (yet)" self.lastSuccess = "No job succeeded (yet)" def startJob(self, xsd): """ @param xsd: XML data structure as a string or path to a string @return: jobID which is a sting: Plugin-000001 """ self.DEBUG("In %s.startJob()" % self.__class__.__name__) if type(xsd) in types.StringTypes: if xsd.strip() == "": return if os.path.isfile(xsd): xsd = open(xsd, "rb").read() edJob = EDJob(self.pluginName) if edJob is None: return "Error in load Plugin" jobId = edJob.getJobId() edJob.setDataInput(xsd) self.jobQueue.put(edJob) if self.processingSem._Semaphore__value > 0 : t = threading.Thread(target=self.startProcessing) t.start() return jobId def startProcessing(self): """ Process all jobs in the queue. """ with self.processingSem: while not self.jobQueue.empty(): self.__semaphoreNbThreads.acquire() edJob = self.jobQueue.get() edJob.connectSUCCESS(self.successJobExecution) edJob.connectFAILURE(self.failureJobExecution) edJob.execute() # edJob.synchronize() def successJobExecution(self, jobId): self.DEBUG("In %s.successJobExecution(%s)" % (self.__class__.__name__, jobId)) with self.locked(): self.__semaphoreNbThreads.release() EDJob.cleanJobfromID(jobId, False) self.lastSuccess = jobId gc.collect() def failureJobExecution(self, jobId): self.DEBUG("In %s.failureJobExecution(%s)" % (self.__class__.__name__, jobId)) with self.locked(): self.__semaphoreNbThreads.release() EDJob.cleanJobfromID(jobId, False) self.lastFailure = jobId sys.stdout.flush() sys.stderr.flush() gc.collect() def getRunning(self): """ retrieve the list of plugins currently under execution (with their plugin-Id) """ return EDStatus.getRunning() def getSuccess(self): """ retrieve the list of plugins finished with success (with their plugin-Id) """ return EDStatus.getSuccess() def getFailure(self): """ retrieve the list of plugins finished with failure (with their plugin-Id) """ return EDStatus.getFailure() def collectStatistics(self): """ Retrieve some statistics on all EDNA-Jobs @return: a page of information about EDNA-jobs """ t = threading.Thread(target=self.statistics) t.start() def statistics(self): """ retrieve some statistics about past jobs. """ with self.statLock: fStartStat = time.time() self.lastStatistics = EDJob.stats() self.lastStatistics += os.linesep + "Statistics collected on %s, the collect took: %.3fs" % (time.asctime(), time.time() - fStartStat) return self.lastStatistics def getStatistics(self): """ just return statistics previously calculated """ return self.lastStatistics def getJobOutput(self, jobId): """ Retrieve XML output form a job @param jobId: name of the job @return: output from a job """ return EDJob.getDataOutputFromId(jobId) def getJobInput(self, jobId): """ Retrieve XML input from a job @param jobId: name of the job @return: xml input from a job """ return EDJob.getDataInputFromId(jobId) def join(self): """ wait for all jobs to finish """ while not (self.jobQueue.empty() and \ (self.__semaphoreNbThreads._Semaphore__value == self.iNbCpu) and \ (EDUtilsParallel.getNbRunning() == 0) and \ (self.processingSem._Semaphore__value == 1) and\ (len(EDStatus.getRunning()) == 0)): time.sleep(1)
class EDObject(object): """ Virtual base class for all EDNA Objects (classes). It offers some synchronization and locking capabilities to make the code thread safe. """ __semaphoreId = Semaphore() __iId_class = 0 profiling = {} def __init__(self): """ Constructor of the main pure virtual class. This constructor implements: - the creation of the semaphore - definition of timer object (uninitialized as potentially not used) """ object.__init__(self) with self.__class__.__semaphoreId: self.__class__.__iId_class += 1 self.__iId = self.__class__.__iId_class if self.__class__.__name__ not in self.__class__.profiling: self.__class__.profiling[self.__class__.__name__] = [] self.__semaphore = Semaphore() self.__fTimeInit = None self.__fTimeEnd = None self.__classname = None def getId(self): return self.__iId def getClassName(self): """ Retrieves the name of the class @return: the name of the class @rtype: string """ return self.__class__.__name__ def synchronizeOn(self): """ This method must be used in together with the method synchronizeOff(). This method makes the code thread-safe till the method synchronizeOff is called. """ self.__semaphore.acquire() def synchronizeOff(self): """ This method must be used in together with the method synchronizeOn(). """ self.__semaphore.release() def getSemaphoreValue(self): """ This method should only be used for debugging purpose... @return: the "internal" value of the semaphore @rtype: integer """ iValue = self.__semaphore._Semaphore__value # EDVerbose.WARNING("DEBUG INFO: The value of semaphore for instance of class %s with hash %s is %i" % (self.getClassName(), hash(self), iValue)) return iValue def locked(self): return self.__semaphore def setTimeInit(self): """ Initializes the timer for the object """ if self.__fTimeInit is None: self.__fTimeInit = time.time() def getTimeInit(self): """ Retrieves the time of initialization @return: number of seconds since epoch @rtype: float """ return self.__fTimeInit def setTimeEnd(self): """ Set the end of calculation time for the given object """ if self.__fTimeEnd is None: self.__fTimeEnd = time.time() if self.__fTimeInit: self.__class__.profiling[self.getClassName()].append(self.__fTimeEnd - self.__fTimeInit) def getTimeEnd(self): """ Retrieves the time of end of task @return: number of seconds since epoch @rtype: float """ return self.__fTimeEnd def getRunTime(self): """ @returns: the RunTime for the given object @rtype: float """ fRetrunRunTime = 0.0 if self.__fTimeInit is not None: if self.__fTimeEnd is None: fRetrunRunTime = time.time() - self.__fTimeInit else: fRetrunRunTime = self.__fTimeEnd - self.__fTimeInit return fRetrunRunTime @classmethod def analyze_profiling(cls): "Analyse the profiling an return a list of strings to be printed out" res = ["Analysis on: %s" % time.asctime(), " Calls | Mean (s) | Std dev | Total (s) | Plugin name", "-" * 80] subres = {} import numpy for name, lst_timimgs in cls.profiling.items(): npd = numpy.array(lst_timimgs) tot = npd.sum() line = " %6d | %8.3f | %8.3f | %9.3f | %s " % \ (npd.size, tot / npd.size, npd.std(), tot , name) subres[tot] = line timimgs = list(subres.keys()) timimgs.sort() for key in timimgs: res.append(subres[key]) return res
class EDParallelExecute(EDLogging): """ A class helping to make a multi-threaded application from a plugin name and a list of files. """ def __init__(self, _strPluginName, _functXMLin, \ _functXMLout=None, _functXMLerr=None, \ _iNbThreads=None, _fDelay=1.0, _bVerbose=None, _bDebug=None): """ This is the constructor of the edna plugin launcher. @param _strPluginName: the name of the ENDA plugin @type _strPluginName: python string @param _functXMLin: a function taking a path in input and returning the XML string for input in the EDNA plugin. @type _functXMLin: python function @param _functXMLOut: a function to be called each time a plugin gas finished his job sucessfully, it should take two option: strXMLin an strXMLout @type _functXMLOut: python function @param _functXMLErr: a function to be called each time a plugin gas finished his job and crashed, it should take ONE option: strXMLin @type _functXMLErr: python function @param _iNbThreads: The number of parallel threads to be used by EDNA, usually the number of Cores of the computer. If 0 or None, the number of cores will be auto-detected. @type _iNbThreads: python integer @param _fDelay: The delay in seconds between two directories analysis @type _fDelay: python float @param _bVerbose: Do you want the EDNA plugin execution to be verbose ? @type _bVerbose: boolean @param _bDebug: Do you want EDNA plugin execution debug output (OBS! very verbose) ? @type _bDebug: boolean """ EDLogging.__init__(self) self.__iNbThreads = EDUtilsParallel.detectNumberOfCPUs(_iNbThreads) EDUtilsParallel.initializeNbThread(self.__iNbThreads) ################################################################################ # #We are not using the one from EDUtilsParallel to leave it to control the number of execPlugins. ################################################################################ self.__semaphoreNbThreads = Semaphore(self.__iNbThreads) self.__strPluginName = _strPluginName self.__functXMLin = _functXMLin self.__functXMLout = _functXMLout self.__functXMLerr = _functXMLerr self.__strCurrWorkDir = os.getcwd() self.__strTempDir = None self.__listInputPaths = [] self.__dictCurrentlyRunning = {} if _bVerbose is not None: if _bVerbose: self.setVerboseDebugOn() else: self.setVerboseOff() if _bDebug is not None: if _bDebug: self.setVerboseDebugOn() else: self.setVerboseDebugOff() self.__fDelay = _fDelay #default delay between two directory checks. self.__bQuit = False # To check if we should quit the application self.__bIsFirstExecute = True signal.signal(signal.SIGTERM, self.handleKill) signal.signal(signal.SIGINT, self.handleKill) def runEDNA(self, _pyListInputPaths=["."], _strMode="dirwatch", _bNewerOnly=False): """ This method runs the parallel execution on the list of directories. @param _pyListInputPaths: the name of the directories to look after. @type _pyListInputPaths: python list of strings @param _strMode: can be dirwatch, inotify, or OffLine (inotify being not yet implemented) @type _strMode: python string @param _bNewerOnly: in online mode, process only new files (appearing after the program has started), by default it will process all files then wait for newer files and process them. @type _bNewerOnly: boolean """ self.moveToTempDir() self.__listInputPaths = _pyListInputPaths if _strMode == "dirwatch": self.watch_directories(_bNewerOnly) elif _strMode == "inotify": print "inotify online notify mode not yet implemented" raise else: #mode offline self.runEdnaFunction(self.__listInputPaths, _bIncludeSubdirs=True) self.waitForAllProcessToFinish() def moveToTempDir(self): """ Create a temporary directory and put all logs there """ self.__strCurrWorkDir = os.getcwd() self.__strTempDir = tempfile.mkdtemp(suffix='.log', prefix='edna-') self.screen("The log directory of EDNA will be in " + self.__strTempDir) os.chdir(self.__strTempDir) def start(self, _strXmlInput): """ Launch EDNA with the given XML stream @param _strXmlInput: XML to be passed to the plugin @type _strXmlInput: python string representing the XML data structure """ jobid = None if _strXmlInput not in ["", None]: job = EDJob(self.__strPluginName) job.setDataInput(_strXmlInput) job.connectFAILURE(self.failureJobExecution) job.connectSUCCESS(self.successJobExecution) job.connectCallBack(self.unregisterJob) self.semaphoreNbThreadsAcquire() jobid = job.execute() self.DEBUG("Running Job id %s" % jobid) if jobid is None: self.semaphoreNbThreadsRelease() return jobid def successJobExecution(self, _jobId): """ Method called when the execution of the plugin finishes with success @param _jobId: string of type EDPluginName-number """ self.DEBUG("EDParallelExcecute.successJobExecution for %s" % _jobId) self.semaphoreNbThreadsRelease() with self.locked(): if self.__functXMLout is not None: job = EDJob.getJobFromID(_jobId) self.__functXMLout(job.getPlugin().getDataInput(), job.getPlugin().getDataOutput()) def failureJobExecution(self, _jobId): """ Method called when the execution of the plugin finishes with failure @param _jobId: string of type EDPluginName-number """ self.DEBUG("EDParallelExcecute.failureJobExecution for %s" % _jobId) self.semaphoreNbThreadsRelease() with self.locked(): if self.__functXMLerr is not None: self.__functXMLerr( EDJob.getJobFromID(_jobId).getPlugin().getDataInput()) def unregisterJob(self, _jobid): """ remove the filename from the list of files currently under processing @param _jobId: string of type EDPluginName-number """ with self.locked(): for oneKey in self.__dictCurrentlyRunning.copy(): if self.__dictCurrentlyRunning[oneKey] == _jobid: self.__dictCurrentlyRunning.pop(oneKey) def runEdnaFunction(self, _listNewFiles, _bIncludeSubdirs=False): """ This method is the launcher for new files found by watch_directories ; it is also called directly in offline mode. @param _listNewFiles: list of files newly created in the directory. @type _listNewFiles: python list of strings. @param _bIncludeSubdirs: should we include sub-directories ? yes for offline and no for online. @type _bIncludeSubdirs: boolean """ for oneFile in _listNewFiles: if os.path.isdir(oneFile) and _bIncludeSubdirs == True: for root, _, onesubdirfiles in os.walk(oneFile): for onesubdirfile in onesubdirfiles: strFilename = os.path.abspath( os.path.join(root, onesubdirfile)) if self.__bQuit == True: return self.processOneFile(strFilename) elif os.path.isfile(oneFile): if self.__bQuit == True: return self.processOneFile(oneFile) def processOneFile(self, _strFilename): """ Process on file by calling subsequently the XML generator and the start method unless this file is already under process (can happend with the watch_directory method). @param _strFilename: filename to process @type _strFilename: string """ if _strFilename not in self.__dictCurrentlyRunning: with self.locked(): self.__dictCurrentlyRunning[ _strFilename] = self.__strPluginName strXmlData = self.__functXMLin(_strFilename) if strXmlData in [None, ""]: self.log("Not processing % s" % _strFilename) with self.locked(): self.__dictCurrentlyRunning.pop(_strFilename) else: self.screen("Processing % s" % _strFilename) jobid = self.start(strXmlData) with self.locked(): if jobid is None: self.__dictCurrentlyRunning.pop(_strFilename) else: self.__dictCurrentlyRunning[_strFilename] = jobid def watch_directories(self, _bNewerOnly=False): """ Continuously monitors the paths and their subdirectories for changes. If any files or directories are modified, the callable function ( here the method self.runEdnaFunction() ) is called with a list of the modified paths of both files and directories. This function can return a Boolean value for rescanning; if it returns True, the directory tree will be rescanned without calling the function for any found changes. (This is so this function can write changes into the tree and prevent itself from being immediately called again.) @param _bNewerOnly : Do you want to process only newer files @type _bNewerOnly : Boolean Basic principle: pyDictAllFiles is a dictionary mapping paths to modification times. We repeatedly crawl through the directory tree rooted at 'path', doing a stat() on each file and comparing the modification time. """ dictAllFiles = {} dictRemainingFiles = {} listChangedFiles = [] def internalUpdateDict(unused, dirname, files): "Traversal function for directories" for strFilename in files: path = os.path.join(dirname, strFilename) try: tempStat = os.stat(path) except os.error: # If a file has been deleted between os.path.walk() # scanning the directory and now, we'll get an # os.error here. Just ignore it -- we'll report # the deletion on the next pass through the main loop. continue mtime = dictRemainingFiles.get(path) if mtime is not None: # Record this file as having been seen del dictRemainingFiles[path] # File's mtime has been changed since we last looked at it. if tempStat.st_mtime > mtime: listChangedFiles.append(path) else: # No recorded modification time, so it must be # a brand new file. listChangedFiles.append(path) # Record current mtime of file. dictAllFiles[path] = tempStat.st_mtime if _bNewerOnly: for path in self.__listInputPaths: os.path.walk(path, internalUpdateDict, None) # Main loop rescan = False while not self.__bQuit: listChangedFiles = [] dictRemainingFiles = dictAllFiles.copy() dictAllFiles = {} for path in self.__listInputPaths: os.path.walk(path, internalUpdateDict, None) #removed_list = dictRemainingFiles.keys() if rescan: rescan = False elif listChangedFiles: rescan = self.runEdnaFunction(listChangedFiles, _bIncludeSubdirs=False) time.sleep(self.__fDelay) print "Quitting the online mode." def handleKill(self, signum, frame): """ This method is launched when the program catches ctrl-c or get killed. It initialize the exit of the program """ self.__bQuit = True sys.stderr.write("Exit requested by signal %s with frame %s.\n" % (signum, frame)) self.waitForAllProcessToFinish() os.chdir(self.__strCurrWorkDir) def flush(self): """ This method calls the functXMLin a few times with a flush=True argument or without arguments and finishes the work """ bFinished = False while not bFinished: xml = None try: xml = self.__functXMLin(None, flush=True) except TypeError: try: xml = self.__functXMLin("", flush=True) except TypeError: try: xml = self.__functXMLin("") except TypeError: try: xml = self.__functXMLin("") except TypeError: xml = None if (xml is None) or (xml == ""): bFinished = True else: self.screen("Flushing data ...") self.start(xml) def waitForAllProcessToFinish(self): """ as it names says, this method waits for all plug-ins which are currently running to finish before returning. """ self.screen("Waiting for launched jobs to finish .") while (self.getNbRunning() > 0): time.sleep(1) sys.stderr.write(".") sys.stderr.write("Done.\n") EDJob.stats() def cleanUp(self, listMethods=[]): """ Final hook if you need to execute something after all processes finished (like killAllWorkers in SPD) @param listMethods: allows to finish some things in the plugin. @type listMethods: list of strings representing names of methods of the plugin to be called. """ self.waitForAllProcessToFinish() for strOneMethod in listMethods: try: print "calling edPlugin.%s" % strOneMethod exec "edPlugin.%s" % strOneMethod except Exception: print "error in processing %s" % strOneMethod ################################################################################ # Nota: there are 2 levels of controls for the number of thread currently running: # * One here to limit the number of control plugin running at once # * One on the Exec plugin level for finer grain optimisation ################################################################################ def semaphoreNbThreadsAcquire(self): """Method to acquire the semaphore that controls the number of plugins running concurrently""" # pass self.__semaphoreNbThreads.acquire() def semaphoreNbThreadsRelease(self): """Method to release the semaphore that controls the number of plugins running concurrently""" # pass self.__semaphoreNbThreads.release() def getNbRunning(self): """ Class method: getter for the number of CPU-active threads running @return: the number of CPU-active threads runnings @rtype: integer """ #return EDUtilsParallel.getNbRunning() return self.__iNbThreads - self.__semaphoreNbThreads._Semaphore__value
class EDParallelExecute(EDLogging): """ A class helping to make a multi-threaded application from a plugin name and a list of files. """ def __init__(self, _strPluginName, _functXMLin, \ _functXMLout=None, _functXMLerr=None, \ _iNbThreads=None, _fDelay=1.0, _bVerbose=None, _bDebug=None): """ This is the constructor of the edna plugin launcher. @param _strPluginName: the name of the ENDA plugin @type _strPluginName: python string @param _functXMLin: a function taking a path in input and returning the XML string for input in the EDNA plugin. @type _functXMLin: python function @param _functXMLOut: a function to be called each time a plugin gas finished his job sucessfully, it should take two option: strXMLin an strXMLout @type _functXMLOut: python function @param _functXMLErr: a function to be called each time a plugin gas finished his job and crashed, it should take ONE option: strXMLin @type _functXMLErr: python function @param _iNbThreads: The number of parallel threads to be used by EDNA, usually the number of Cores of the computer. If 0 or None, the number of cores will be auto-detected. @type _iNbThreads: python integer @param _fDelay: The delay in seconds between two directories analysis @type _fDelay: python float @param _bVerbose: Do you want the EDNA plugin execution to be verbose ? @type _bVerbose: boolean @param _bDebug: Do you want EDNA plugin execution debug output (OBS! very verbose) ? @type _bDebug: boolean """ EDLogging.__init__(self) self.__iNbThreads = EDUtilsParallel.detectNumberOfCPUs(_iNbThreads) EDUtilsParallel.initializeNbThread(self.__iNbThreads) ################################################################################ # #We are not using the one from EDUtilsParallel to leave it to control the number of execPlugins. ################################################################################ self.__semaphoreNbThreads = Semaphore(self.__iNbThreads) self.__strPluginName = _strPluginName self.__functXMLin = _functXMLin self.__functXMLout = _functXMLout self.__functXMLerr = _functXMLerr self.__strCurrWorkDir = os.getcwd() self.__strTempDir = None self.__listInputPaths = [] self.__dictCurrentlyRunning = {} if _bVerbose is not None: if _bVerbose: self.setVerboseDebugOn() else: self.setVerboseOff() if _bDebug is not None: if _bDebug: self.setVerboseDebugOn() else: self.setVerboseDebugOff() self.__fDelay = _fDelay #default delay between two directory checks. self.__bQuit = False # To check if we should quit the application self.__bIsFirstExecute = True signal.signal(signal.SIGTERM, self.handleKill) signal.signal(signal.SIGINT, self.handleKill) def runEDNA(self, _pyListInputPaths=[ "." ], _strMode="dirwatch", _bNewerOnly=False): """ This method runs the parallel execution on the list of directories. @param _pyListInputPaths: the name of the directories to look after. @type _pyListInputPaths: python list of strings @param _strMode: can be dirwatch, inotify, or OffLine (inotify being not yet implemented) @type _strMode: python string @param _bNewerOnly: in online mode, process only new files (appearing after the program has started), by default it will process all files then wait for newer files and process them. @type _bNewerOnly: boolean """ self.moveToTempDir() self.__listInputPaths = _pyListInputPaths if _strMode == "dirwatch": self.watch_directories(_bNewerOnly) elif _strMode == "inotify": print("inotify online notify mode not yet implemented") raise else: #mode offline self.runEdnaFunction(self.__listInputPaths, _bIncludeSubdirs=True) self.waitForAllProcessToFinish() def moveToTempDir(self): """ Create a temporary directory and put all logs there """ self.__strCurrWorkDir = os.getcwd() self.__strTempDir = tempfile.mkdtemp(suffix='.log', prefix='edna-') self.screen("The log directory of EDNA will be in " + self.__strTempDir) os.chdir(self.__strTempDir) def start(self, _strXmlInput): """ Launch EDNA with the given XML stream @param _strXmlInput: XML to be passed to the plugin @type _strXmlInput: python string representing the XML data structure """ jobid = None if _strXmlInput not in ["", None]: job = EDJob(self.__strPluginName) job.setDataInput(_strXmlInput) job.connectFAILURE(self.failureJobExecution) job.connectSUCCESS(self.successJobExecution) job.connectCallBack(self.unregisterJob) self.semaphoreNbThreadsAcquire() jobid = job.execute() self.DEBUG("Running Job id %s" % jobid) if jobid is None: self.semaphoreNbThreadsRelease() return jobid def successJobExecution(self, _jobId): """ Method called when the execution of the plugin finishes with success @param _jobId: string of type EDPluginName-number """ self.DEBUG("EDParallelExcecute.successJobExecution for %s" % _jobId) self.semaphoreNbThreadsRelease() with self.locked(): if self.__functXMLout is not None: job = EDJob.getJobFromID(_jobId) self.__functXMLout(job.getPlugin().getDataInput(), job.getPlugin().getDataOutput()) def failureJobExecution(self, _jobId): """ Method called when the execution of the plugin finishes with failure @param _jobId: string of type EDPluginName-number """ self.DEBUG("EDParallelExcecute.failureJobExecution for %s" % _jobId) self.semaphoreNbThreadsRelease() with self.locked(): if self.__functXMLerr is not None: self.__functXMLerr(EDJob.getJobFromID(_jobId).getPlugin().getDataInput()) def unregisterJob(self, _jobid): """ remove the filename from the list of files currently under processing @param _jobId: string of type EDPluginName-number """ with self.locked(): for oneKey in self.__dictCurrentlyRunning.copy(): if self.__dictCurrentlyRunning[oneKey] == _jobid: self.__dictCurrentlyRunning.pop(oneKey) def runEdnaFunction(self, _listNewFiles, _bIncludeSubdirs=False): """ This method is the launcher for new files found by watch_directories ; it is also called directly in offline mode. @param _listNewFiles: list of files newly created in the directory. @type _listNewFiles: python list of strings. @param _bIncludeSubdirs: should we include sub-directories ? yes for offline and no for online. @type _bIncludeSubdirs: boolean """ for oneFile in _listNewFiles: if os.path.isdir(oneFile) and _bIncludeSubdirs == True: for root, _, onesubdirfiles in os.walk(oneFile): for onesubdirfile in onesubdirfiles: strFilename = os.path.abspath(os.path.join(root, onesubdirfile)) if self.__bQuit == True: return self.processOneFile(strFilename) elif os.path.isfile(oneFile): if self.__bQuit == True: return self.processOneFile(oneFile) def processOneFile(self, _strFilename): """ Process on file by calling subsequently the XML generator and the start method unless this file is already under process (can happend with the watch_directory method). @param _strFilename: filename to process @type _strFilename: string """ if _strFilename not in self.__dictCurrentlyRunning: with self.locked(): self.__dictCurrentlyRunning[_strFilename] = self.__strPluginName strXmlData = self.__functXMLin(_strFilename) if strXmlData in [None, ""]: self.log("Not processing % s" % _strFilename) with self.locked(): self.__dictCurrentlyRunning.pop(_strFilename) else: self.screen("Processing % s" % _strFilename) jobid = self.start(strXmlData) with self.locked(): if jobid is None: self.__dictCurrentlyRunning.pop(_strFilename) else: self.__dictCurrentlyRunning[_strFilename] = jobid def watch_directories (self, _bNewerOnly=False): """ Continuously monitors the paths and their subdirectories for changes. If any files or directories are modified, the callable function ( here the method self.runEdnaFunction() ) is called with a list of the modified paths of both files and directories. This function can return a Boolean value for rescanning; if it returns True, the directory tree will be rescanned without calling the function for any found changes. (This is so this function can write changes into the tree and prevent itself from being immediately called again.) @param _bNewerOnly : Do you want to process only newer files @type _bNewerOnly : Boolean Basic principle: pyDictAllFiles is a dictionary mapping paths to modification times. We repeatedly crawl through the directory tree rooted at 'path', doing a stat() on each file and comparing the modification time. """ dictAllFiles = {} dictRemainingFiles = {} listChangedFiles = [] def internalUpdateDict (unused, dirname, files): "Traversal function for directories" for strFilename in files: path = os.path.join(dirname, strFilename) try: tempStat = os.stat(path) except os.error: # If a file has been deleted between os.path.walk() # scanning the directory and now, we'll get an # os.error here. Just ignore it -- we'll report # the deletion on the next pass through the main loop. continue mtime = dictRemainingFiles.get(path) if mtime is not None: # Record this file as having been seen del dictRemainingFiles[path] # File's mtime has been changed since we last looked at it. if tempStat.st_mtime > mtime: listChangedFiles.append(path) else: # No recorded modification time, so it must be # a brand new file. listChangedFiles.append(path) # Record current mtime of file. dictAllFiles[path] = tempStat.st_mtime if _bNewerOnly: for path in self.__listInputPaths: os.path.walk(path, internalUpdateDict, None) # Main loop rescan = False while not self.__bQuit: listChangedFiles = [] dictRemainingFiles = dictAllFiles.copy() dictAllFiles = {} for path in self.__listInputPaths: os.path.walk(path, internalUpdateDict, None) #removed_list = dictRemainingFiles.keys() if rescan: rescan = False elif listChangedFiles: rescan = self.runEdnaFunction(listChangedFiles, _bIncludeSubdirs=False) time.sleep(self.__fDelay) print("Quitting the online mode.") def handleKill(self, signum, frame): """ This method is launched when the program catches ctrl-c or get killed. It initialize the exit of the program """ self.__bQuit = True sys.stderr.write("Exit requested by signal %s with frame %s.\n" % (signum, frame)) self.waitForAllProcessToFinish() os.chdir(self.__strCurrWorkDir) def flush(self): """ This method calls the functXMLin a few times with a flush=True argument or without arguments and finishes the work """ bFinished = False while not bFinished: xml = None try: xml = self.__functXMLin(None, flush=True) except TypeError: try: xml = self.__functXMLin("", flush=True) except TypeError: try: xml = self.__functXMLin("") except TypeError: try: xml = self.__functXMLin("") except TypeError: xml = None if (xml is None) or (xml == ""): bFinished = True else: self.screen ("Flushing data ...") self.start(xml) def waitForAllProcessToFinish(self): """ as it names says, this method waits for all plug-ins which are currently running to finish before returning. """ self.screen("Waiting for launched jobs to finish .") while (self.getNbRunning() > 0): time.sleep(1) sys.stderr.write(".") sys.stderr.write("Done.\n") EDJob.stats() def cleanUp(self, listMethods=[]): """ Final hook if you need to execute something after all processes finished (like killAllWorkers in SPD) @param listMethods: allows to finish some things in the plugin. @type listMethods: list of strings representing names of methods of the plugin to be called. """ self.waitForAllProcessToFinish() for strOneMethod in listMethods: try: print("calling edPlugin.%s" % strOneMethod) exec("edPlugin.%s" % strOneMethod) except Exception: print("error in processing %s" % strOneMethod) ################################################################################ # Nota: there are 2 levels of controls for the number of thread currently running: # * One here to limit the number of control plugin running at once # * One on the Exec plugin level for finer grain optimisation ################################################################################ def semaphoreNbThreadsAcquire(self): """Method to acquire the semaphore that controls the number of plugins running concurrently""" # pass self.__semaphoreNbThreads.acquire() def semaphoreNbThreadsRelease(self): """Method to release the semaphore that controls the number of plugins running concurrently""" # pass self.__semaphoreNbThreads.release() def getNbRunning(self): """ Class method: getter for the number of CPU-active threads running @return: the number of CPU-active threads runnings @rtype: integer """ #return EDUtilsParallel.getNbRunning() return self.__iNbThreads - self.__semaphoreNbThreads._Semaphore__value
class Reprocess(object): EDNAPluginName = "EDPluginBioSaxsProcessOneFilev1_4" hc = 12.398419292004204 def __init__(self): self.XML = "<XSDataInput>\ <normalizedImage><path><value>${FULLPATH}</value></path></normalizedImage>\ <correctedImage><path><value>${DIRDIRNAME}/2d/${BASENAME}.edf</value></path></correctedImage>\ <normalizedImageSize><value>4100000</value></normalizedImageSize>\ <integratedCurve><path><value>${DIRDIRNAME}/1d/${BASENAME}.edf</value></path></integratedCurve>\ <maskFile><path><value>${MASKFILE}</value></path></maskFile>\ <code><value>BSA</value></code>\ </XSDataInput>" self.maskfile = None self.dataFiles = [] self.wavelength = 1.0 self.debug = False self.mode = "offline" self.newerOnly = False self.nbcpu = multiprocessing.cpu_count() self.cpu_sem = Semaphore(self.nbcpu) self.process_sem = Semaphore() self.queue = Queue() def fileName2xml(self, filename): """Here we create the XML string to be passed to the EDNA plugin from the input filename This can / should be modified by the final user @param filename: full path of the input file @type filename: python string representing the path @rtype: XML string @return: python string """ if filename.endswith(".edf"): FULLPATH = os.path.abspath(filename) DIRNAME, NAME = os.path.split(FULLPATH) DIRDIRNAME = os.path.dirname(DIRNAME) BASENAME, EXT = os.path.splitext(NAME) if not os.path.isdir(os.path.join(DIRDIRNAME, "1d")): os.makedirs(os.path.join(DIRDIRNAME, "1d"), int("775", 8)) return self.xml.replace("${FULLPATH}", FULLPATH).\ replace("${DIRNAME}", DIRNAME).replace("${NAME}", NAME).\ replace("${DIRDIRNAME}", DIRDIRNAME).replace("${BASENAME}", BASENAME).\ replace("${EXT}", EXT).replace("$MASKFILE", self.maskfile or "") def XMLerr(self, strXMLin): """ This is an example of XMLerr function ... it prints only the name of the file created @param srXMLin: The XML string used to launch the job @type strXMLin: python string with the input XML @rtype: None @return: None """ self.cpu_sem.release() if type(strXMLin) not in types.StringTypes: strXMLin= strXMLin.marshal() EDVerbose.WARNING("Error in the processing of :\n%s" % strXMLin) return None def XMLsuccess(self, strXMLin): """ This is an example of XMLerr function ... it prints only the name of the file created @param srXMLin: The XML string used to launch the job @type strXMLin: python string with the input XML @rtype: None @return: None """ self.cpu_sem.release() # EDVerbose.WARNING("Error in the processing of :\n%s" % strXMLin) return None def parse(self): """ parse options from command line """ parser = optparse.OptionParser() parser.add_option("-V", "--version", dest="version", action="store_true", help="print version of the program and quit", metavar="FILE", default=False) parser.add_option("-v", "--verbose", action="store_true", dest="debug", default=False, help="switch to debug/verbose mode") parser.add_option("-m", "--mask", dest="mask", help="file containing the mask (for image reconstruction)", default=None) parser.add_option("-M", "--mode", dest="mode", help="Mode can be online/offline/all", default="offline") parser.add_option("-o", "--out", dest="output", help="file for log", default=None) parser.add_option("-w", "--wavelength", dest="wavelength", type="float", help="wavelength of the X-Ray beam in Angstrom", default=None) parser.add_option("-e", "--energy", dest="energy", type="float", help="energy of the X-Ray beam in keV (hc=%skeV.A)" % self.hc, default=None) parser.add_option("-t", "--template", dest="template", type="str", help="template XML file", default=None) parser.add_option("-n", "--nbcpu", dest="nbcpu", type="int", help="template XML file", default=self.nbcpu) (options, args) = parser.parse_args() # Analyse aruments and options if options.version: print("BioSaxs Azimuthal integration version %s" % __version__) sys.exit(0) if options.debug: EDVerbose.setVerboseDebugOn() self.debug = True if options.output: EDVerbose.setLogFileName(options.output) if options.mask and os.path.isfile(options.mask): self.maskfile = options.mask if options.template and os.path.isfile(options.template): self.xml = open(options.template).read() if options.wavelength: self.wavelength = 1e-10 * options.wavelength elif options.energy: self.wavelength = 1e-10 * self.hc / options.energy if options.mode=="offline": self.mode = "offline" self.newerOnly = False elif options.mode=="online": self.mode = "dirwarch" self.newerOnly = True elif options.mode=="dirwatch": self.mode = "dirwarch" self.newerOnly = False self.cpu_sem = Semaphore(options.nbcpu) self.nbcpu = options.nbcpu self.dataFiles = [f for f in args if os.path.isfile(f)] if not self.dataFiles: raise RuntimeError("Please provide datafiles or read the --help") def process(self): for fn in self.dataFiles: EDVerbose.screen("Processing file %s" % fn) edj = EDJob(self.EDNAPluginName) edj.dataInput = self.fileName2xml(fn) edj.connectSUCCESS(self.XMLsuccess) edj.connectFAILURE(self.XMLerr) self.queue.put(edj) if self.process_sem._Semaphore__value > 0 : t = threading.Thread(target=self.startProcessing) t.start() EDVerbose.screen("Back in main") while self.cpu_sem._Semaphore__value < self.nbcpu: time.sleep(0.1) EDJob.synchronizeAll() EDJob.stats() def startProcessing(self): with self.process_sem: while not self.queue.empty(): self.cpu_sem.acquire() edj = self.queue.get() edj.execute()