Example #1
0
class EDObject(object):
    """
    Virtual base class for all EDNA Objects (classes).
    It offers some synchronization and locking capabilities to make the code thread safe.
    """
    __semaphoreId = Semaphore()
    __iId_class = 0

    def __init__(self):
        """
        Constructor of the main pure virtual class.
        This constructor implements:
        - the creation of the semaphore
        - definition of timer object (uninitialized as potentially not used)
        """
        object.__init__(self)
        with self.__class__.__semaphoreId:
            self.__class__.__iId_class += 1
            self.__iId = self.__class__.__iId_class
        self.__semaphore = Semaphore()
        self.__fTimeInit = None
        self.__fTimeEnd = None
        self.__classname = None

    def getId(self):
        return self.__iId

    def getClassName(self):
        """
        Retrieves the name of the class
        @return: the name of the class 
        @rtype: string 
        """
        return self.__class__.__name__

    def synchronizeOn(self):
        """
        This method must be used in together with the method synchronizeOff().
        This method makes the code threadsafe till the method synchronizeOff
        is called.
        """
        self.__semaphore.acquire()

    def synchronizeOff(self):
        """
        This method must be used in together with the method synchronizeOn().
        """
        self.__semaphore.release()

    def locked(self):
        return self.__semaphore

    def setTimeInit(self):
        """
        Initializes the timer for the object
        """
        if self.__fTimeInit is None:
            self.__fTimeInit = time.time()

    def getTimeInit(self):
        """
        Retrieves the time of initialization
        @return: number of seconds since epoch
        @rtype: float  
        """
        return self.__fTimeInit

    def setTimeEnd(self):
        """
        Set the end of calculation time for the given object
        """
        if self.__fTimeEnd is None:
            self.__fTimeEnd = time.time()

    def getTimeEnd(self):
        """
        Retrieves the time of end of task
        @return: number of seconds since epoch
        @rtype: float  
        """
        return self.__fTimeEnd

    def getRunTime(self):
        """
        @returns: the RunTime for the given object
        @rtype: float
        """
        fRetrunRunTime = 0.0
        if self.__fTimeInit is not None:
            if self.__fTimeEnd is None:
                fRetrunRunTime = time.time() - self.__fTimeInit
            else:
                fRetrunRunTime = self.__fTimeEnd - self.__fTimeInit
        return fRetrunRunTime
Example #2
0
class Reprocess(EDLogging):
    def __init__(self, strPluginName, iNbCpu=None):
        EDLogging.__init__(self)
        self.pluginName = strPluginName
        self.startTime = time.time()
        try:
            self.iNbCpu = int(iNbCpu)
        except:
            self.iNbCpu = EDUtilsParallel.detectNumberOfCPUs()

        self.screen("Initializing Reprocess with max %i jobs in parallel." % self.iNbCpu)
        self.__semaphoreNbThreads = Semaphore(self.iNbCpu)
        EDUtilsParallel.initializeNbThread(self.iNbCpu)
        self.jobQueue = Queue()
        self.processingSem = Semaphore()
        self.statLock = Semaphore()
        self.lastStatistics = "No statistics collected yet, please use the 'collectStatistics' method first"
        self.lastFailure = "No job Failed (yet)"
        self.lastSuccess = "No job succeeded (yet)"


    def startJob(self, xsd):
        """
        @param xsd: XML data structure as a string or path to a string
        @return: jobID which is a sting: Plugin-000001
        """
        self.DEBUG("In %s.startJob()" % self.__class__.__name__)
        if type(xsd) in types.StringTypes:
            if xsd.strip() == "":
                return
            if os.path.isfile(xsd):
                xsd = open(xsd, "rb").read()
        edJob = EDJob(self.pluginName)
        if edJob is None:
            return "Error in load Plugin"
        jobId = edJob.getJobId()
        edJob.setDataInput(xsd)
        self.jobQueue.put(edJob)
        if self.processingSem._Semaphore__value > 0 :
            t = threading.Thread(target=self.startProcessing)
            t.start()
        return jobId

    def startProcessing(self):
        """
        Process all jobs in the queue.
        """
        with self.processingSem:
            while not self.jobQueue.empty():
                self.__semaphoreNbThreads.acquire()
                edJob = self.jobQueue.get()
                edJob.connectSUCCESS(self.successJobExecution)
                edJob.connectFAILURE(self.failureJobExecution)
                edJob.execute()
#                edJob.synchronize()

    def successJobExecution(self, jobId):
        self.DEBUG("In %s.successJobExecution(%s)" % (self.__class__.__name__, jobId))
        with self.locked():
            self.__semaphoreNbThreads.release()
            EDJob.cleanJobfromID(jobId, False)
            self.lastSuccess = jobId
            gc.collect()

    def failureJobExecution(self, jobId):
        self.DEBUG("In %s.failureJobExecution(%s)" % (self.__class__.__name__, jobId))
        with self.locked():
            self.__semaphoreNbThreads.release()
            EDJob.cleanJobfromID(jobId, False)
            self.lastFailure = jobId
            sys.stdout.flush()
            sys.stderr.flush()
            gc.collect()

    def getRunning(self):
        """
        retrieve the list of plugins currently under execution (with their plugin-Id)
        """
        return EDStatus.getRunning()

    def getSuccess(self):
        """
        retrieve the list of plugins finished with success (with their plugin-Id)
        """
        return EDStatus.getSuccess()

    def getFailure(self):
        """
        retrieve the list of plugins finished with failure (with their plugin-Id)
        """
        return EDStatus.getFailure()

    def collectStatistics(self):
        """
        Retrieve some statistics on all EDNA-Jobs
        @return: a page of information about EDNA-jobs
        """
        t = threading.Thread(target=self.statistics)
        t.start()


    def statistics(self):
        """
        retrieve some statistics about past jobs.
        """
        with self.statLock:
            fStartStat = time.time()
            self.lastStatistics = EDJob.stats()
            self.lastStatistics += os.linesep + "Statistics collected on %s, the collect took: %.3fs" % (time.asctime(), time.time() - fStartStat)
        return self.lastStatistics

    def getStatistics(self):
        """
        just return statistics previously calculated
        """
        return self.lastStatistics

    def getJobOutput(self, jobId):
        """
        Retrieve XML output form a job
        @param jobId: name of the job
        @return: output from a job
        """
        return EDJob.getDataOutputFromId(jobId)

    def getJobInput(self, jobId):
        """
        Retrieve XML input from a job
        @param jobId: name of the job
        @return: xml input from a job
        """
        return EDJob.getDataInputFromId(jobId)

    def join(self):
        """
        wait for all jobs to finish
        """
        while not (self.jobQueue.empty() and \
                (self.__semaphoreNbThreads._Semaphore__value == self.iNbCpu) and \
                (EDUtilsParallel.getNbRunning() == 0) and \
                (self.processingSem._Semaphore__value == 1) and\
                (len(EDStatus.getRunning()) == 0)):
            time.sleep(1)
Example #3
0
class EDObject(object):
    """
    Virtual base class for all EDNA Objects (classes).
    It offers some synchronization and locking capabilities to make the code thread safe.
    """
    __semaphoreId = Semaphore()
    __iId_class = 0

    def __init__(self):
        """
        Constructor of the main pure virtual class.
        This constructor implements:
        - the creation of the semaphore
        - definition of timer object (uninitialized as potentially not used)
        """
        object.__init__(self)
        with self.__class__.__semaphoreId:
            self.__class__.__iId_class += 1
            self.__iId = self.__class__.__iId_class
        self.__semaphore = Semaphore()
        self.__fTimeInit = None
        self.__fTimeEnd = None
        self.__classname = None


    def getId(self):
        return self.__iId


    def getClassName(self):
        """
        Retrieves the name of the class
        @return: the name of the class 
        @rtype: string 
        """
        return self.__class__.__name__


    def synchronizeOn(self):
        """
        This method must be used in together with the method synchronizeOff().
        This method makes the code threadsafe till the method synchronizeOff
        is called.
        """
        self.__semaphore.acquire()


    def synchronizeOff(self):
        """
        This method must be used in together with the method synchronizeOn().
        """
        self.__semaphore.release()


    def locked(self):
        return self.__semaphore


    def setTimeInit(self):
        """
        Initializes the timer for the object
        """
        if self.__fTimeInit is None:
            self.__fTimeInit = time.time()




    def getTimeInit(self):
        """
        Retrieves the time of initialization
        @return: number of seconds since epoch
        @rtype: float  
        """
        return self.__fTimeInit


    def setTimeEnd(self):
        """
        Set the end of calculation time for the given object
        """
        if self.__fTimeEnd is None:
            self.__fTimeEnd = time.time()


    def getTimeEnd(self):
        """
        Retrieves the time of end of task
        @return: number of seconds since epoch
        @rtype: float  
        """
        return self.__fTimeEnd

    def getRunTime(self):
        """
        @returns: the RunTime for the given object
        @rtype: float
        """
        fRetrunRunTime = 0.0
        if self.__fTimeInit is not None:
            if self.__fTimeEnd is None:
                fRetrunRunTime = time.time() - self.__fTimeInit
            else:
                fRetrunRunTime = self.__fTimeEnd - self.__fTimeInit
        return fRetrunRunTime
Example #4
0
File: EDObject.py Project: kif/edna
class EDObject(object):
    """
    Virtual base class for all EDNA Objects (classes).
    It offers some synchronization and locking capabilities to make the code thread safe.
    """
    __semaphoreId = Semaphore()
    __iId_class = 0
    profiling = {}

    def __init__(self):
        """
        Constructor of the main pure virtual class.
        This constructor implements:
        - the creation of the semaphore
        - definition of timer object (uninitialized as potentially not used)
        """
        object.__init__(self)
        with self.__class__.__semaphoreId:
            self.__class__.__iId_class += 1
            self.__iId = self.__class__.__iId_class
            if self.__class__.__name__ not in self.__class__.profiling:
                self.__class__.profiling[self.__class__.__name__] = []
        self.__semaphore = Semaphore()
        self.__fTimeInit = None
        self.__fTimeEnd = None
        self.__classname = None

    def getId(self):
        return self.__iId

    def getClassName(self):
        """
        Retrieves the name of the class
        @return: the name of the class 
        @rtype: string 
        """
        return self.__class__.__name__

    def synchronizeOn(self):
        """
        This method must be used in together with the method synchronizeOff().
        This method makes the code thread-safe till the method synchronizeOff
        is called.
        """
        self.__semaphore.acquire()

    def synchronizeOff(self):
        """
        This method must be used in together with the method synchronizeOn().
        """
        self.__semaphore.release()

    def getSemaphoreValue(self):
        """
        This method should only be used for debugging purpose...
        @return: the "internal" value of the semaphore
        @rtype: integer
        """
        iValue = self.__semaphore._Semaphore__value
        # EDVerbose.WARNING("DEBUG INFO: The value of semaphore for instance of class %s with hash %s is %i" % (self.getClassName(), hash(self), iValue))
        return iValue

    def locked(self):
        return self.__semaphore

    def setTimeInit(self):
        """
        Initializes the timer for the object
        """
        if self.__fTimeInit is None:
            self.__fTimeInit = time.time()

    def getTimeInit(self):
        """
        Retrieves the time of initialization
        @return: number of seconds since epoch
        @rtype: float  
        """
        return self.__fTimeInit

    def setTimeEnd(self):
        """
        Set the end of calculation time for the given object
        """
        if self.__fTimeEnd is None:
            self.__fTimeEnd = time.time()
            if self.__fTimeInit:
                self.__class__.profiling[self.getClassName()].append(self.__fTimeEnd - self.__fTimeInit)

    def getTimeEnd(self):
        """
        Retrieves the time of end of task
        @return: number of seconds since epoch
        @rtype: float  
        """
        return self.__fTimeEnd

    def getRunTime(self):
        """
        @returns: the RunTime for the given object
        @rtype: float
        """
        fRetrunRunTime = 0.0
        if self.__fTimeInit is not None:
            if self.__fTimeEnd is None:
                fRetrunRunTime = time.time() - self.__fTimeInit
            else:
                fRetrunRunTime = self.__fTimeEnd - self.__fTimeInit
        return fRetrunRunTime

    @classmethod
    def analyze_profiling(cls):
        "Analyse the profiling an return a list of strings to be printed out"
        res = ["Analysis on: %s" % time.asctime(),
               "  Calls | Mean (s) | Std dev  | Total (s) | Plugin name",
               "-" * 80]
        subres = {}
        import numpy
        for name, lst_timimgs in cls.profiling.items():
            npd = numpy.array(lst_timimgs)
            tot = npd.sum()
            line = " %6d | %8.3f | %8.3f | %9.3f | %s " % \
                    (npd.size, tot / npd.size, npd.std(), tot , name)
            subres[tot] = line
        timimgs = list(subres.keys())
        timimgs.sort()
        for key in timimgs:
            res.append(subres[key])
        return res
Example #5
0
class EDParallelExecute(EDLogging):
    """ 
    A class helping to make a multi-threaded application from a plugin name and a list of files. 
    """

    def __init__(self, _strPluginName, _functXMLin, \
                  _functXMLout=None, _functXMLerr=None, \
                  _iNbThreads=None, _fDelay=1.0, _bVerbose=None, _bDebug=None):
        """
        This is the constructor of the edna plugin launcher.
        
        @param _strPluginName: the name of the ENDA plugin
        @type  _strPluginName: python string
        
        @param _functXMLin: a function taking a path in input and returning the XML string for input in the EDNA plugin. 
        @type  _functXMLin: python function
        
        @param _functXMLOut: a function to be called each time a plugin gas finished his job sucessfully, it should take two option: strXMLin an strXMLout
        @type  _functXMLOut: python function
         
        @param _functXMLErr: a function to be called each time a plugin gas finished his job and crashed, it should take ONE option: strXMLin
        @type  _functXMLErr: python function 
        
        @param _iNbThreads: The number of parallel threads to be used by EDNA, usually the number of Cores of the computer. If 0 or None, the number of cores  will be auto-detected. 
        @type  _iNbThreads: python integer
        
        @param _fDelay: The delay in seconds between two directories analysis 
        @type  _fDelay: python float
        
        @param _bVerbose:  Do you want the EDNA plugin execution to be verbose ?
        @type  _bVerbose: boolean

        @param _bDebug:  Do you want EDNA plugin execution debug output (OBS! very verbose) ?
        @type  _bDebug: boolean
        """
        EDLogging.__init__(self)
        self.__iNbThreads = EDUtilsParallel.detectNumberOfCPUs(_iNbThreads)
        EDUtilsParallel.initializeNbThread(self.__iNbThreads)
        ################################################################################
        # #We are not using the one from EDUtilsParallel to leave it to control the number of  execPlugins.
        ################################################################################
        self.__semaphoreNbThreads = Semaphore(self.__iNbThreads)
        self.__strPluginName = _strPluginName
        self.__functXMLin = _functXMLin
        self.__functXMLout = _functXMLout
        self.__functXMLerr = _functXMLerr
        self.__strCurrWorkDir = os.getcwd()
        self.__strTempDir = None
        self.__listInputPaths = []
        self.__dictCurrentlyRunning = {}
        if _bVerbose is not None:
            if _bVerbose:
                self.setVerboseDebugOn()
            else:
                self.setVerboseOff()
        if _bDebug is not None:
            if _bDebug:
                self.setVerboseDebugOn()
            else:
                self.setVerboseDebugOff()
        self.__fDelay = _fDelay  #default delay between two directory checks.
        self.__bQuit = False  # To check if we should quit the application
        self.__bIsFirstExecute = True
        signal.signal(signal.SIGTERM, self.handleKill)
        signal.signal(signal.SIGINT, self.handleKill)

    def runEDNA(self,
                _pyListInputPaths=["."],
                _strMode="dirwatch",
                _bNewerOnly=False):
        """
        This method runs the parallel execution on the list of directories.

        @param _pyListInputPaths: the name of the directories to look after.
        @type  _pyListInputPaths: python list of strings
        
        @param _strMode: can be dirwatch, inotify, or OffLine (inotify being not yet implemented)
        @type _strMode: python string

        @param _bNewerOnly: in online mode, process only new files (appearing after the program has started), by default it will process all files then wait for newer files and process them.
        @type _bNewerOnly: boolean
        """
        self.moveToTempDir()
        self.__listInputPaths = _pyListInputPaths
        if _strMode == "dirwatch":
            self.watch_directories(_bNewerOnly)
        elif _strMode == "inotify":
            print "inotify online notify mode not yet implemented"
            raise
        else:  #mode offline
            self.runEdnaFunction(self.__listInputPaths, _bIncludeSubdirs=True)
            self.waitForAllProcessToFinish()

    def moveToTempDir(self):
        """
        Create a temporary directory and put all logs there
        """
        self.__strCurrWorkDir = os.getcwd()
        self.__strTempDir = tempfile.mkdtemp(suffix='.log', prefix='edna-')
        self.screen("The log directory of EDNA will be in " +
                    self.__strTempDir)
        os.chdir(self.__strTempDir)

    def start(self, _strXmlInput):
        """
        Launch EDNA with the given XML stream
        @param _strXmlInput:  XML to be passed to the plugin
        @type  _strXmlInput: python string representing the XML data structure
        """
        jobid = None
        if _strXmlInput not in ["", None]:
            job = EDJob(self.__strPluginName)
            job.setDataInput(_strXmlInput)
            job.connectFAILURE(self.failureJobExecution)
            job.connectSUCCESS(self.successJobExecution)
            job.connectCallBack(self.unregisterJob)
            self.semaphoreNbThreadsAcquire()
            jobid = job.execute()
            self.DEBUG("Running Job id %s" % jobid)
            if jobid is None:
                self.semaphoreNbThreadsRelease()
        return jobid

    def successJobExecution(self, _jobId):
        """
        Method called when the execution of the plugin finishes with success

        @param  _jobId: string of type EDPluginName-number 
        """
        self.DEBUG("EDParallelExcecute.successJobExecution for %s" % _jobId)
        self.semaphoreNbThreadsRelease()
        with self.locked():
            if self.__functXMLout is not None:
                job = EDJob.getJobFromID(_jobId)
                self.__functXMLout(job.getPlugin().getDataInput(),
                                   job.getPlugin().getDataOutput())

    def failureJobExecution(self, _jobId):
        """
        Method called when the execution of the plugin finishes with failure 
        
        @param  _jobId: string of type EDPluginName-number
        """
        self.DEBUG("EDParallelExcecute.failureJobExecution for %s" % _jobId)
        self.semaphoreNbThreadsRelease()
        with self.locked():
            if self.__functXMLerr is not None:
                self.__functXMLerr(
                    EDJob.getJobFromID(_jobId).getPlugin().getDataInput())

    def unregisterJob(self, _jobid):
        """
        remove the filename from the list of files currently under processing

        @param  _jobId: string of type EDPluginName-number          
        """
        with self.locked():
            for oneKey in self.__dictCurrentlyRunning.copy():
                if self.__dictCurrentlyRunning[oneKey] == _jobid:
                    self.__dictCurrentlyRunning.pop(oneKey)

    def runEdnaFunction(self, _listNewFiles, _bIncludeSubdirs=False):
        """
        This method is the launcher for new files found by watch_directories ; it is also called directly in offline mode.
        
        @param  _listNewFiles: list of files newly created in the directory.
        @type   _listNewFiles: python list of strings.
        @param  _bIncludeSubdirs: should we include sub-directories ? yes for offline and no for online.
        @type   _bIncludeSubdirs: boolean
        """
        for oneFile in _listNewFiles:
            if os.path.isdir(oneFile) and _bIncludeSubdirs == True:
                for root, _, onesubdirfiles in os.walk(oneFile):
                    for onesubdirfile in onesubdirfiles:
                        strFilename = os.path.abspath(
                            os.path.join(root, onesubdirfile))
                        if self.__bQuit == True:
                            return
                        self.processOneFile(strFilename)
            elif os.path.isfile(oneFile):
                if self.__bQuit == True:
                    return
                self.processOneFile(oneFile)

    def processOneFile(self, _strFilename):
        """
        Process on file by calling subsequently the XML generator and the start method unless this file
        is already under process (can happend with the watch_directory method).

        @param  _strFilename: filename to process
        @type _strFilename: string
        """
        if _strFilename not in self.__dictCurrentlyRunning:
            with self.locked():
                self.__dictCurrentlyRunning[
                    _strFilename] = self.__strPluginName
            strXmlData = self.__functXMLin(_strFilename)
            if strXmlData in [None, ""]:
                self.log("Not processing % s" % _strFilename)
                with self.locked():
                    self.__dictCurrentlyRunning.pop(_strFilename)
            else:
                self.screen("Processing % s" % _strFilename)
                jobid = self.start(strXmlData)
                with self.locked():
                    if jobid is None:
                        self.__dictCurrentlyRunning.pop(_strFilename)
                    else:
                        self.__dictCurrentlyRunning[_strFilename] = jobid

    def watch_directories(self, _bNewerOnly=False):
        """
        Continuously monitors the paths and their subdirectories
        for changes.  If any files or directories are modified,
        the callable function ( here the method self.runEdnaFunction() ) is called 
        with a list of the modified paths of both
        files and directories.  This function can return a Boolean value
        for rescanning; if it returns True, the directory tree will be
        rescanned without calling the function for any found changes.
        (This is so this function can write changes into the tree and prevent itself
        from being immediately called again.)

        @param _bNewerOnly : Do you want to process only newer files  
        @type _bNewerOnly  : Boolean

        Basic principle: pyDictAllFiles is a dictionary mapping paths to
        modification times.  We repeatedly crawl through the directory
        tree rooted at 'path', doing a stat() on each file and comparing
        the modification time.

        """
        dictAllFiles = {}
        dictRemainingFiles = {}
        listChangedFiles = []

        def internalUpdateDict(unused, dirname, files):
            "Traversal function for directories"
            for strFilename in files:
                path = os.path.join(dirname, strFilename)

                try:
                    tempStat = os.stat(path)
                except os.error:
                    # If a file has been deleted between os.path.walk()
                    # scanning the directory and now, we'll get an
                    # os.error here.  Just ignore it -- we'll report
                    # the deletion on the next pass through the main loop.
                    continue

                mtime = dictRemainingFiles.get(path)
                if mtime is not None:
                    # Record this file as having been seen
                    del dictRemainingFiles[path]
                    # File's mtime has been changed since we last looked at it.
                    if tempStat.st_mtime > mtime:
                        listChangedFiles.append(path)
                else:
                    # No recorded modification time, so it must be
                    # a brand new file.
                    listChangedFiles.append(path)

                # Record current mtime of file.
                dictAllFiles[path] = tempStat.st_mtime

        if _bNewerOnly:
            for path in self.__listInputPaths:
                os.path.walk(path, internalUpdateDict, None)

        # Main loop
        rescan = False
        while not self.__bQuit:
            listChangedFiles = []
            dictRemainingFiles = dictAllFiles.copy()
            dictAllFiles = {}
            for path in self.__listInputPaths:
                os.path.walk(path, internalUpdateDict, None)
            #removed_list = dictRemainingFiles.keys()
            if rescan:
                rescan = False
            elif listChangedFiles:
                rescan = self.runEdnaFunction(listChangedFiles,
                                              _bIncludeSubdirs=False)
            time.sleep(self.__fDelay)
        print "Quitting the online mode."

    def handleKill(self, signum, frame):
        """ 
        This method is launched when the program catches ctrl-c or get killed. It initialize the exit of the program
        """
        self.__bQuit = True
        sys.stderr.write("Exit requested by signal %s with frame %s.\n" %
                         (signum, frame))
        self.waitForAllProcessToFinish()
        os.chdir(self.__strCurrWorkDir)

    def flush(self):
        """
        This method calls the functXMLin a few times with a flush=True argument or without arguments and finishes the work 
        """
        bFinished = False
        while not bFinished:
            xml = None

            try:
                xml = self.__functXMLin(None, flush=True)
            except TypeError:
                try:
                    xml = self.__functXMLin("", flush=True)
                except TypeError:
                    try:
                        xml = self.__functXMLin("")
                    except TypeError:
                        try:
                            xml = self.__functXMLin("")
                        except TypeError:
                            xml = None
        if (xml is None) or (xml == ""):
            bFinished = True
        else:
            self.screen("Flushing data ...")
            self.start(xml)

    def waitForAllProcessToFinish(self):
        """
        as it names says, this method waits for all plug-ins which are currently running to finish before returning.
        """
        self.screen("Waiting for launched jobs to finish .")
        while (self.getNbRunning() > 0):
            time.sleep(1)
            sys.stderr.write(".")
        sys.stderr.write("Done.\n")
        EDJob.stats()

    def cleanUp(self, listMethods=[]):
        """
        Final hook if you need to execute something after all processes finished (like killAllWorkers in SPD) 
        @param listMethods: allows to finish some things in the plugin. 
        @type listMethods: list of strings representing names of methods of the plugin to be called.
        """
        self.waitForAllProcessToFinish()
        for strOneMethod in listMethods:
            try:
                print "calling edPlugin.%s" % strOneMethod
                exec "edPlugin.%s" % strOneMethod
            except Exception:
                print "error in processing %s" % strOneMethod

################################################################################
# Nota: there are 2 levels of controls for the number of thread currently running:
# * One here to limit the number of control plugin running at once
# * One on the Exec plugin level for finer grain optimisation
################################################################################

    def semaphoreNbThreadsAcquire(self):
        """Method to acquire the semaphore that controls the number of plugins running concurrently"""
        #        pass
        self.__semaphoreNbThreads.acquire()

    def semaphoreNbThreadsRelease(self):
        """Method to release the semaphore that controls the number of plugins running concurrently"""
        #        pass
        self.__semaphoreNbThreads.release()

    def getNbRunning(self):
        """
        Class method:
        getter for the number of CPU-active threads running
    
        @return: the number of CPU-active threads runnings
        @rtype: integer
        """
        #return EDUtilsParallel.getNbRunning()
        return self.__iNbThreads - self.__semaphoreNbThreads._Semaphore__value
Example #6
0
class EDParallelExecute(EDLogging):
    """ 
    A class helping to make a multi-threaded application from a plugin name and a list of files. 
    """

    def __init__(self, _strPluginName, _functXMLin, \
                  _functXMLout=None, _functXMLerr=None, \
                  _iNbThreads=None, _fDelay=1.0, _bVerbose=None, _bDebug=None):
        """
        This is the constructor of the edna plugin launcher.
        
        @param _strPluginName: the name of the ENDA plugin
        @type  _strPluginName: python string
        
        @param _functXMLin: a function taking a path in input and returning the XML string for input in the EDNA plugin. 
        @type  _functXMLin: python function
        
        @param _functXMLOut: a function to be called each time a plugin gas finished his job sucessfully, it should take two option: strXMLin an strXMLout
        @type  _functXMLOut: python function
         
        @param _functXMLErr: a function to be called each time a plugin gas finished his job and crashed, it should take ONE option: strXMLin
        @type  _functXMLErr: python function 
        
        @param _iNbThreads: The number of parallel threads to be used by EDNA, usually the number of Cores of the computer. If 0 or None, the number of cores  will be auto-detected. 
        @type  _iNbThreads: python integer
        
        @param _fDelay: The delay in seconds between two directories analysis 
        @type  _fDelay: python float
        
        @param _bVerbose:  Do you want the EDNA plugin execution to be verbose ?
        @type  _bVerbose: boolean

        @param _bDebug:  Do you want EDNA plugin execution debug output (OBS! very verbose) ?
        @type  _bDebug: boolean
        """
        EDLogging.__init__(self)
        self.__iNbThreads = EDUtilsParallel.detectNumberOfCPUs(_iNbThreads)
        EDUtilsParallel.initializeNbThread(self.__iNbThreads)
################################################################################
# #We are not using the one from EDUtilsParallel to leave it to control the number of  execPlugins.
################################################################################
        self.__semaphoreNbThreads = Semaphore(self.__iNbThreads)
        self.__strPluginName = _strPluginName
        self.__functXMLin = _functXMLin
        self.__functXMLout = _functXMLout
        self.__functXMLerr = _functXMLerr
        self.__strCurrWorkDir = os.getcwd()
        self.__strTempDir = None
        self.__listInputPaths = []
        self.__dictCurrentlyRunning = {}
        if _bVerbose is not None:
            if _bVerbose:
                self.setVerboseDebugOn()
            else:
                self.setVerboseOff()
        if _bDebug is not None:
            if _bDebug:
                self.setVerboseDebugOn()
            else:
                self.setVerboseDebugOff()
        self.__fDelay = _fDelay #default delay between two directory checks.
        self.__bQuit = False    # To check if we should quit the application
        self.__bIsFirstExecute = True
        signal.signal(signal.SIGTERM, self.handleKill)
        signal.signal(signal.SIGINT, self.handleKill)


    def runEDNA(self, _pyListInputPaths=[ "." ], _strMode="dirwatch", _bNewerOnly=False):
        """
        This method runs the parallel execution on the list of directories.

        @param _pyListInputPaths: the name of the directories to look after.
        @type  _pyListInputPaths: python list of strings
        
        @param _strMode: can be dirwatch, inotify, or OffLine (inotify being not yet implemented)
        @type _strMode: python string

        @param _bNewerOnly: in online mode, process only new files (appearing after the program has started), by default it will process all files then wait for newer files and process them.
        @type _bNewerOnly: boolean
        """
        self.moveToTempDir()
        self.__listInputPaths = _pyListInputPaths
        if _strMode == "dirwatch":
            self.watch_directories(_bNewerOnly)
        elif _strMode == "inotify":
            print("inotify online notify mode not yet implemented")
            raise
        else: #mode offline
            self.runEdnaFunction(self.__listInputPaths, _bIncludeSubdirs=True)
            self.waitForAllProcessToFinish()


    def moveToTempDir(self):
        """
        Create a temporary directory and put all logs there
        """
        self.__strCurrWorkDir = os.getcwd()
        self.__strTempDir = tempfile.mkdtemp(suffix='.log', prefix='edna-')
        self.screen("The log directory of EDNA will be in " + self.__strTempDir)
        os.chdir(self.__strTempDir)


    def start(self, _strXmlInput):
        """
        Launch EDNA with the given XML stream
        @param _strXmlInput:  XML to be passed to the plugin
        @type  _strXmlInput: python string representing the XML data structure
        """
        jobid = None
        if _strXmlInput not in ["", None]:
            job = EDJob(self.__strPluginName)
            job.setDataInput(_strXmlInput)
            job.connectFAILURE(self.failureJobExecution)
            job.connectSUCCESS(self.successJobExecution)
            job.connectCallBack(self.unregisterJob)
            self.semaphoreNbThreadsAcquire()
            jobid = job.execute()
            self.DEBUG("Running Job id %s" % jobid)
            if jobid is None:
                self.semaphoreNbThreadsRelease()
        return jobid


    def successJobExecution(self, _jobId):
        """
        Method called when the execution of the plugin finishes with success

        @param  _jobId: string of type EDPluginName-number 
        """
        self.DEBUG("EDParallelExcecute.successJobExecution for %s" % _jobId)
        self.semaphoreNbThreadsRelease()
        with self.locked():
            if self.__functXMLout is not None:
                job = EDJob.getJobFromID(_jobId)
                self.__functXMLout(job.getPlugin().getDataInput(), job.getPlugin().getDataOutput())


    def failureJobExecution(self, _jobId):
        """
        Method called when the execution of the plugin finishes with failure 
        
        @param  _jobId: string of type EDPluginName-number
        """
        self.DEBUG("EDParallelExcecute.failureJobExecution for %s" % _jobId)
        self.semaphoreNbThreadsRelease()
        with self.locked():
            if self.__functXMLerr is not None:
                self.__functXMLerr(EDJob.getJobFromID(_jobId).getPlugin().getDataInput())


    def unregisterJob(self, _jobid):
        """
        remove the filename from the list of files currently under processing

        @param  _jobId: string of type EDPluginName-number          
        """
        with self.locked():
            for oneKey in self.__dictCurrentlyRunning.copy():
                if self.__dictCurrentlyRunning[oneKey] == _jobid:
                    self.__dictCurrentlyRunning.pop(oneKey)


    def runEdnaFunction(self, _listNewFiles, _bIncludeSubdirs=False):
        """
        This method is the launcher for new files found by watch_directories ; it is also called directly in offline mode.
        
        @param  _listNewFiles: list of files newly created in the directory.
        @type   _listNewFiles: python list of strings.
        @param  _bIncludeSubdirs: should we include sub-directories ? yes for offline and no for online.
        @type   _bIncludeSubdirs: boolean
        """
        for oneFile in _listNewFiles:
            if os.path.isdir(oneFile) and _bIncludeSubdirs == True:
                for root, _, onesubdirfiles in os.walk(oneFile):
                    for onesubdirfile in onesubdirfiles:
                        strFilename = os.path.abspath(os.path.join(root, onesubdirfile))
                        if self.__bQuit == True:
                            return
                        self.processOneFile(strFilename)
            elif os.path.isfile(oneFile):
                if self.__bQuit == True:
                    return
                self.processOneFile(oneFile)


    def processOneFile(self, _strFilename):
        """
        Process on file by calling subsequently the XML generator and the start method unless this file
        is already under process (can happend with the watch_directory method).

        @param  _strFilename: filename to process
        @type _strFilename: string
        """
        if _strFilename not in self.__dictCurrentlyRunning:
            with self.locked():
                self.__dictCurrentlyRunning[_strFilename] = self.__strPluginName
            strXmlData = self.__functXMLin(_strFilename)
            if strXmlData in [None, ""]:
                self.log("Not processing % s" % _strFilename)
                with self.locked():
                    self.__dictCurrentlyRunning.pop(_strFilename)
            else:
                self.screen("Processing % s" % _strFilename)
                jobid = self.start(strXmlData)
                with self.locked():
                    if jobid is None:
                        self.__dictCurrentlyRunning.pop(_strFilename)
                    else:
                        self.__dictCurrentlyRunning[_strFilename] = jobid

    def watch_directories (self, _bNewerOnly=False):
        """
        Continuously monitors the paths and their subdirectories
        for changes.  If any files or directories are modified,
        the callable function ( here the method self.runEdnaFunction() ) is called 
        with a list of the modified paths of both
        files and directories.  This function can return a Boolean value
        for rescanning; if it returns True, the directory tree will be
        rescanned without calling the function for any found changes.
        (This is so this function can write changes into the tree and prevent itself
        from being immediately called again.)

        @param _bNewerOnly : Do you want to process only newer files  
        @type _bNewerOnly  : Boolean

        Basic principle: pyDictAllFiles is a dictionary mapping paths to
        modification times.  We repeatedly crawl through the directory
        tree rooted at 'path', doing a stat() on each file and comparing
        the modification time.

        """
        dictAllFiles = {}
        dictRemainingFiles = {}
        listChangedFiles = []

        def internalUpdateDict (unused, dirname, files):
            "Traversal function for directories"
            for strFilename in files:
                path = os.path.join(dirname, strFilename)

                try:
                    tempStat = os.stat(path)
                except os.error:
                    # If a file has been deleted between os.path.walk()
                    # scanning the directory and now, we'll get an
                    # os.error here.  Just ignore it -- we'll report
                    # the deletion on the next pass through the main loop.
                    continue

                mtime = dictRemainingFiles.get(path)
                if mtime is not None:
                    # Record this file as having been seen
                    del dictRemainingFiles[path]
                    # File's mtime has been changed since we last looked at it.
                    if tempStat.st_mtime > mtime:
                        listChangedFiles.append(path)
                else:
                    # No recorded modification time, so it must be
                    # a brand new file.
                    listChangedFiles.append(path)

                # Record current mtime of file.
                dictAllFiles[path] = tempStat.st_mtime

        if _bNewerOnly:
            for path in self.__listInputPaths:
                os.path.walk(path, internalUpdateDict, None)

        # Main loop
        rescan = False
        while not self.__bQuit:
            listChangedFiles = []
            dictRemainingFiles = dictAllFiles.copy()
            dictAllFiles = {}
            for path in  self.__listInputPaths:
                os.path.walk(path, internalUpdateDict, None)
            #removed_list = dictRemainingFiles.keys()
            if rescan:
                rescan = False
            elif listChangedFiles:
                rescan = self.runEdnaFunction(listChangedFiles, _bIncludeSubdirs=False)
            time.sleep(self.__fDelay)
        print("Quitting the online mode.")


    def handleKill(self, signum, frame):
        """ 
        This method is launched when the program catches ctrl-c or get killed. It initialize the exit of the program
        """
        self.__bQuit = True
        sys.stderr.write("Exit requested by signal %s with frame %s.\n" % (signum, frame))
        self.waitForAllProcessToFinish()
        os.chdir(self.__strCurrWorkDir)


    def flush(self):
        """
        This method calls the functXMLin a few times with a flush=True argument or without arguments and finishes the work 
        """
        bFinished = False
        while not bFinished:
            xml = None

            try:
                xml = self.__functXMLin(None, flush=True)
            except TypeError:
                try:
                    xml = self.__functXMLin("", flush=True)
                except TypeError:
                    try:
                        xml = self.__functXMLin("")
                    except TypeError:
                        try:
                            xml = self.__functXMLin("")
                        except TypeError:
                            xml = None
        if (xml is None) or (xml == ""):
            bFinished = True
        else:
            self.screen ("Flushing data ...")
            self.start(xml)


    def waitForAllProcessToFinish(self):
        """
        as it names says, this method waits for all plug-ins which are currently running to finish before returning.
        """
        self.screen("Waiting for launched jobs to finish .")
        while (self.getNbRunning() > 0):
            time.sleep(1)
            sys.stderr.write(".")
        sys.stderr.write("Done.\n")
        EDJob.stats()


    def cleanUp(self, listMethods=[]):
        """
        Final hook if you need to execute something after all processes finished (like killAllWorkers in SPD) 
        @param listMethods: allows to finish some things in the plugin. 
        @type listMethods: list of strings representing names of methods of the plugin to be called.
        """
        self.waitForAllProcessToFinish()
        for strOneMethod in  listMethods:
            try:
                print("calling edPlugin.%s" % strOneMethod)
                exec("edPlugin.%s" % strOneMethod)
            except Exception:
                print("error in processing %s" % strOneMethod)


################################################################################
# Nota: there are 2 levels of controls for the number of thread currently running:
# * One here to limit the number of control plugin running at once
# * One on the Exec plugin level for finer grain optimisation 
################################################################################
    def semaphoreNbThreadsAcquire(self):
        """Method to acquire the semaphore that controls the number of plugins running concurrently"""
#        pass
        self.__semaphoreNbThreads.acquire()


    def semaphoreNbThreadsRelease(self):
        """Method to release the semaphore that controls the number of plugins running concurrently"""
#        pass
        self.__semaphoreNbThreads.release()


    def getNbRunning(self):
        """
        Class method:
        getter for the number of CPU-active threads running
    
        @return: the number of CPU-active threads runnings
        @rtype: integer
        """
        #return EDUtilsParallel.getNbRunning()
        return self.__iNbThreads - self.__semaphoreNbThreads._Semaphore__value
Example #7
0
class Reprocess(object):
    EDNAPluginName = "EDPluginBioSaxsProcessOneFilev1_4"
    hc = 12.398419292004204
    def __init__(self):
        self.XML = "<XSDataInput>\
<normalizedImage><path><value>${FULLPATH}</value></path></normalizedImage>\
<correctedImage><path><value>${DIRDIRNAME}/2d/${BASENAME}.edf</value></path></correctedImage>\
<normalizedImageSize><value>4100000</value></normalizedImageSize>\
<integratedCurve><path><value>${DIRDIRNAME}/1d/${BASENAME}.edf</value></path></integratedCurve>\
<maskFile><path><value>${MASKFILE}</value></path></maskFile>\
<code><value>BSA</value></code>\
</XSDataInput>"
        self.maskfile = None
        self.dataFiles = []
        self.wavelength = 1.0
        self.debug = False
        self.mode = "offline"
        self.newerOnly = False
        self.nbcpu = multiprocessing.cpu_count()
        self.cpu_sem = Semaphore(self.nbcpu)
        self.process_sem = Semaphore()
        self.queue = Queue()
        
    def fileName2xml(self, filename):
        """Here we create the XML string to be passed to the EDNA plugin from the input filename
        This can / should be modified by the final user
        
        @param filename: full path of the input file
        @type filename: python string representing the path
        @rtype: XML string
        @return: python string  
        """
        if filename.endswith(".edf"):
            FULLPATH = os.path.abspath(filename)
            DIRNAME, NAME = os.path.split(FULLPATH)
            DIRDIRNAME = os.path.dirname(DIRNAME)
            BASENAME, EXT = os.path.splitext(NAME)
            if not os.path.isdir(os.path.join(DIRDIRNAME, "1d")):
                   os.makedirs(os.path.join(DIRDIRNAME, "1d"), int("775", 8))
            return self.xml.replace("${FULLPATH}", FULLPATH).\
                replace("${DIRNAME}", DIRNAME).replace("${NAME}", NAME).\
                replace("${DIRDIRNAME}", DIRDIRNAME).replace("${BASENAME}", BASENAME).\
                replace("${EXT}", EXT).replace("$MASKFILE", self.maskfile or "")

    def XMLerr(self, strXMLin):
        """
        This is an example of XMLerr function ... it prints only the name of the file created
        @param srXMLin: The XML string used to launch the job
        @type strXMLin: python string with the input XML
        @rtype: None
        @return: None     
        """
        self.cpu_sem.release()
        if type(strXMLin) not in types.StringTypes:
            strXMLin= strXMLin.marshal()
        EDVerbose.WARNING("Error in the processing of :\n%s" % strXMLin)
        return None

    def XMLsuccess(self, strXMLin):
        """
        This is an example of XMLerr function ... it prints only the name of the file created
        @param srXMLin: The XML string used to launch the job
        @type strXMLin: python string with the input XML
        @rtype: None
        @return: None     
        """
        self.cpu_sem.release()
#        EDVerbose.WARNING("Error in the processing of :\n%s" % strXMLin)
        return None


    def parse(self):
        """
        parse options from command line
        """
        parser = optparse.OptionParser()
        parser.add_option("-V", "--version", dest="version", action="store_true",
                          help="print version of the program and quit", metavar="FILE", default=False)
        parser.add_option("-v", "--verbose",
                          action="store_true", dest="debug", default=False,
                          help="switch to debug/verbose mode")
        parser.add_option("-m", "--mask", dest="mask",
                      help="file containing the mask (for image reconstruction)", default=None)
        parser.add_option("-M", "--mode", dest="mode",
                      help="Mode can be online/offline/all", default="offline")
        parser.add_option("-o", "--out", dest="output",
                      help="file for log", default=None)
        parser.add_option("-w", "--wavelength", dest="wavelength", type="float",
                      help="wavelength of the X-Ray beam in Angstrom", default=None)
        parser.add_option("-e", "--energy", dest="energy", type="float",
                      help="energy of the X-Ray beam in keV (hc=%skeV.A)" % self.hc, default=None)
        parser.add_option("-t", "--template", dest="template", type="str",
                      help="template XML file", default=None)
        parser.add_option("-n", "--nbcpu", dest="nbcpu", type="int",
                      help="template XML file", default=self.nbcpu)


        (options, args) = parser.parse_args()

        # Analyse aruments and options
        if options.version:
            print("BioSaxs Azimuthal integration version %s" % __version__)
            sys.exit(0)
        if options.debug:
            EDVerbose.setVerboseDebugOn()
            self.debug = True
        if options.output:
            EDVerbose.setLogFileName(options.output)
        if options.mask and os.path.isfile(options.mask):
            self.maskfile = options.mask
        if options.template and os.path.isfile(options.template):
            self.xml = open(options.template).read()
        if options.wavelength:
            self.wavelength = 1e-10 * options.wavelength
        elif options.energy:
            self.wavelength = 1e-10 * self.hc / options.energy
        if options.mode=="offline":
            self.mode = "offline"
            self.newerOnly = False
        elif options.mode=="online":
            self.mode = "dirwarch"
            self.newerOnly = True
        elif options.mode=="dirwatch":
            self.mode = "dirwarch"
            self.newerOnly = False
        self.cpu_sem = Semaphore(options.nbcpu)
        self.nbcpu = options.nbcpu
        self.dataFiles = [f for f in args if os.path.isfile(f)]
        if not self.dataFiles:
            raise RuntimeError("Please provide datafiles or read the --help")


    def process(self):
        for fn in self.dataFiles:
            EDVerbose.screen("Processing file %s" % fn)
            edj = EDJob(self.EDNAPluginName)
            edj.dataInput = self.fileName2xml(fn)
            edj.connectSUCCESS(self.XMLsuccess)
            edj.connectFAILURE(self.XMLerr)
            self.queue.put(edj)
            if self.process_sem._Semaphore__value > 0 :
                t = threading.Thread(target=self.startProcessing)
                t.start()
        EDVerbose.screen("Back in main")
        while self.cpu_sem._Semaphore__value < self.nbcpu:
            time.sleep(0.1)
        EDJob.synchronizeAll()
        EDJob.stats()

    def startProcessing(self):
        with self.process_sem:
            while not self.queue.empty():
                self.cpu_sem.acquire()
                edj = self.queue.get()
                edj.execute()