예제 #1
0
    def _setup(self,
               dataSourceID,
               metadata,
               resultsFilename,
               startAt=0,
               dataSourceModule=None,
               serverfilter=clusterIO.local_serverfilter):
        self.dataSourceID = dataSourceID
        if '~' in self.dataSourceID or '~' in resultsFilename:
            raise RuntimeError(
                'File, queue or results name must NOT contain ~')

        #where the results are when we want to read them
        self.resultsURI = 'PYME-CLUSTER://%s/%s' % (serverfilter,
                                                    resultsFilename)

        # it's faster (and safer for race condition avoidance) to pick a server in advance and give workers the direct
        # HTTP endpoint to write to. This should also be an aggregate endpoint, as multiple writes are needed.
        self.worker_resultsURI = clusterResults.pickResultsServer(
            '__aggregate_h5r/%s' % resultsFilename, serverfilter)

        self.resultsMDFilename = resultsFilename + '.json'
        self.results_md_uri = 'PYME-CLUSTER://%s/%s' % (serverfilter,
                                                        self.resultsMDFilename)

        self.mdh = metadata
        self.start_at = startAt
        self.serverfilter = serverfilter
    def __init__(self, dataSourceID, metadata, resultsFilename, queueName = None, startAt = 10, dataSourceModule=None, serverfilter=clusterIO.local_serverfilter):
        """
        Create a pusher and push tasks for each frame in a series. For use with the new cluster distribution architecture

        Parameters
        ----------
        dataSourceID : str
            The URI of the data source - e.g. PYME-CLUSTER://serverfilter/path/to/data
        metadata : PYME.IO.MetaDataHandler object
            The acquisition and analysis metadata
        resultsFilename : str
            The cluster relative path to the results file. e.g. "<username>/analysis/<date>/seriesname.h5r"
        queueName : str
            a name to give the queue. The results filename is used if no name is given.
        startAt : int
            which frame to start at. TODO - read from metadata instead of taking as a parameter.
        dataSourceModule : str [optional]
            The name of the module to use for reading the raw data. If not given, it will be inferred from the dataSourceID
        serverfilter : str
            A cluster filter, for use when multiple PYME clusters are visible on the same network segment.
        """
        if queueName is None:
            queueName = resultsFilename

        self.queueID = queueName
        self.dataSourceID = dataSourceID
        if '~' in self.dataSourceID or '~' in self.queueID or '~' in resultsFilename:
            raise RuntimeError('File, queue or results name must NOT contain ~')

        #self.resultsURI = 'PYME-CLUSTER://%s/__aggregate_h5r/%s' % (serverfilter, resultsFilename)
        self.resultsURI = clusterResults.pickResultsServer('__aggregate_h5r/%s' % resultsFilename, serverfilter)

        resultsMDFilename = resultsFilename + '.json'
        self.results_md_uri = 'PYME-CLUSTER://%s/%s' % (serverfilter, resultsMDFilename)
        #self.results_md_uri = self.resultsURI.replace('__aggregate_h5r/', '') + '.json'

        self.taskQueueURI = _getTaskQueueURI()

        self.mdh = metadata

        #load data source
        if dataSourceModule is None:
            DataSource = DataSources.getDataSourceForFilename(dataSourceID)
        else:
            DataSource = __import__('PYME.IO.DataSources.' + dataSourceModule, fromlist=['PYME', 'io', 'DataSources']).DataSource #import our data source
        self.ds = DataSource(self.dataSourceID)
        
        #set up results file:
        logging.debug('resultsURI: ' + self.resultsURI)
        clusterResults.fileResults(self.resultsURI + '/MetaData', metadata)
        clusterResults.fileResults(self.resultsURI + '/Events', self.ds.getEvents())

        # set up metadata file which is used for deciding how to launch the analysis
        clusterIO.put_file(resultsMDFilename, self.mdh.to_JSON().encode(), serverfilter=serverfilter)
        
        #wait until clusterIO caches clear to avoid replicating the results file.
        #time.sleep(1.5) #moved inside polling thread so launches will run quicker

        self.currentFrameNum = startAt

        self._task_template = None
        
        self._ruleID = None
        
        self.doPoll = True
        
        #post our rule
        self.post_rule()
        
        self.pollT = threading.Thread(target=self._updatePoll)
        self.pollT.start()