def _setup(self, dataSourceID, metadata, resultsFilename, startAt=0, dataSourceModule=None, serverfilter=clusterIO.local_serverfilter): self.dataSourceID = dataSourceID if '~' in self.dataSourceID or '~' in resultsFilename: raise RuntimeError( 'File, queue or results name must NOT contain ~') #where the results are when we want to read them self.resultsURI = 'PYME-CLUSTER://%s/%s' % (serverfilter, resultsFilename) # it's faster (and safer for race condition avoidance) to pick a server in advance and give workers the direct # HTTP endpoint to write to. This should also be an aggregate endpoint, as multiple writes are needed. self.worker_resultsURI = clusterResults.pickResultsServer( '__aggregate_h5r/%s' % resultsFilename, serverfilter) self.resultsMDFilename = resultsFilename + '.json' self.results_md_uri = 'PYME-CLUSTER://%s/%s' % (serverfilter, self.resultsMDFilename) self.mdh = metadata self.start_at = startAt self.serverfilter = serverfilter
def __init__(self, dataSourceID, metadata, resultsFilename, queueName = None, startAt = 10, dataSourceModule=None, serverfilter=clusterIO.local_serverfilter): """ Create a pusher and push tasks for each frame in a series. For use with the new cluster distribution architecture Parameters ---------- dataSourceID : str The URI of the data source - e.g. PYME-CLUSTER://serverfilter/path/to/data metadata : PYME.IO.MetaDataHandler object The acquisition and analysis metadata resultsFilename : str The cluster relative path to the results file. e.g. "<username>/analysis/<date>/seriesname.h5r" queueName : str a name to give the queue. The results filename is used if no name is given. startAt : int which frame to start at. TODO - read from metadata instead of taking as a parameter. dataSourceModule : str [optional] The name of the module to use for reading the raw data. If not given, it will be inferred from the dataSourceID serverfilter : str A cluster filter, for use when multiple PYME clusters are visible on the same network segment. """ if queueName is None: queueName = resultsFilename self.queueID = queueName self.dataSourceID = dataSourceID if '~' in self.dataSourceID or '~' in self.queueID or '~' in resultsFilename: raise RuntimeError('File, queue or results name must NOT contain ~') #self.resultsURI = 'PYME-CLUSTER://%s/__aggregate_h5r/%s' % (serverfilter, resultsFilename) self.resultsURI = clusterResults.pickResultsServer('__aggregate_h5r/%s' % resultsFilename, serverfilter) resultsMDFilename = resultsFilename + '.json' self.results_md_uri = 'PYME-CLUSTER://%s/%s' % (serverfilter, resultsMDFilename) #self.results_md_uri = self.resultsURI.replace('__aggregate_h5r/', '') + '.json' self.taskQueueURI = _getTaskQueueURI() self.mdh = metadata #load data source if dataSourceModule is None: DataSource = DataSources.getDataSourceForFilename(dataSourceID) else: DataSource = __import__('PYME.IO.DataSources.' + dataSourceModule, fromlist=['PYME', 'io', 'DataSources']).DataSource #import our data source self.ds = DataSource(self.dataSourceID) #set up results file: logging.debug('resultsURI: ' + self.resultsURI) clusterResults.fileResults(self.resultsURI + '/MetaData', metadata) clusterResults.fileResults(self.resultsURI + '/Events', self.ds.getEvents()) # set up metadata file which is used for deciding how to launch the analysis clusterIO.put_file(resultsMDFilename, self.mdh.to_JSON().encode(), serverfilter=serverfilter) #wait until clusterIO caches clear to avoid replicating the results file. #time.sleep(1.5) #moved inside polling thread so launches will run quicker self.currentFrameNum = startAt self._task_template = None self._ruleID = None self.doPoll = True #post our rule self.post_rule() self.pollT = threading.Thread(target=self._updatePoll) self.pollT.start()