def save(self, namespace, context={}): """ Save recipes output(s) to HDF5 Parameters ---------- namespace : dict The recipe namespace context : dict Information about the source file to allow pattern substitution to generate the output name. At least 'basedir' (which is the fully resolved directory name in which the input file resides) and 'filestub' (which is the filename without any extension) should be resolved. Returns ------- """ out_filename = self.filePattern.format(**context) if self.scheme == 'pyme-cluster:// - aggregate': from PYME.IO import clusterResults for name, h5_name in self.inputVariables.items(): v = namespace[name] URI = '/'.join(['pyme-cluster:///_aggregate_h5r', out_filename.lstrip('/'), h5_name]) clusterResults.fileResults(URI, v.to_recarray()) #NOTE - aggregation does not support metadata else: out_filename = self._schemafy_filename(out_filename) for name, h5_name in self.inputVariables.items(): v = namespace[name] v.to_hdf(out_filename, tablename=h5_name, metadata=getattr(v, 'mdh', None))
def save(self, namespace, context={}): """ Save recipes output(s) to CSV Parameters ---------- namespace : dict The recipe namespace context : dict Information about the source file to allow pattern substitution to generate the output name. At least 'basedir' (which is the fully resolved directory name in which the input file resides) and 'file_stub' (which is the filename without any extension) should be resolved. Returns ------- """ import pandas as pd out_filename = self.filePattern.format(**context) v = namespace[self.inputName] if self.scheme == 'pyme-cluster:// - aggregate': from PYME.IO import clusterResults clusterResults.fileResults( 'pyme-cluster://_aggregate_csv/' + out_filename.lstrip('/'), v.toDataFrame()) else: out_filename = self._schemafy_filename(out_filename) _ensure_output_directory(out_filename) if not isinstance(v, pd.DataFrame): v = v.toDataFrame() v.to_csv(out_filename)
def prepare(self): """ Do any setup work - e.g. uploading metadata required before the rule is triggered Returns ------- post_args : dict a dictionary with arguments to pass to RulePusher._post_rule() - specifically timeout, max_tasks, release_start, release_end """ #set up results file: logging.debug('resultsURI: ' + self.worker_resultsURI) clusterResults.fileResults(self.worker_resultsURI + '/MetaData', self.mdh) # defer copying events to after series completion #clusterResults.fileResults(self.worker_resultsURI + '/Events', self.ds.getEvents()) # set up metadata file which is used for deciding how to launch the analysis clusterIO.put_file(self.resultsMDFilename, self.mdh.to_JSON().encode(), serverfilter=self.serverfilter) #wait until clusterIO caches clear to avoid replicating the results file. #time.sleep(1.5) #moved inside polling thread so launches will run quicker self._next_release_start = self.start_at self.frames_outstanding = self.total_frames - self._next_release_start if self.data_complete: return dict(max_tasks=self.total_frames) return {}
def test_aggregate_h5r(): import numpy as np from PYME.IO import clusterResults testdata = np.ones(10, dtype=[('a', '<f4'), ('b', '<f4')]) clusterResults.fileResults('pyme-cluster://TES1/__aggregate_h5r/_testing/test_results.h5r/foo', testdata) clusterResults.fileResults('pyme-cluster://TES1/__aggregate_h5r/_testing/test_results.h5r/foo', testdata) clusterResults.fileResults('pyme-cluster://TES1/__aggregate_h5r/_testing/test_results.h5r/foo', testdata)
def __init__(self, dataSourceID, metadata, resultsFilename, queueName = None, startAt = 10, dataSourceModule=None, serverfilter=''): """ Create a pusher and push tasks for each frame in a series. For use with the new cluster distribution architecture Parameters ---------- dataSourceID : str The URI of the data source - e.g. PYME-CLUSTER://serverfilter/path/to/data metadata : PYME.IO.MetaDataHandler object The acquisition and analysis metadata resultsFilename : str The cluster relative path to the results file. e.g. "<username>/analysis/<date>/seriesname.h5r" queueName : str a name to give the queue. The results filename is used if no name is given. startAt : int which frame to start at. TODO - read from metadata instead of taking as a parameter. dataSourceModule : str [optional] The name of the module to use for reading the raw data. If not given, it will be inferred from the dataSourceID serverfilter : str A cluster filter, for use when multiple PYME clusters are visible on the same network segment. """ if queueName is None: queueName = resultsFilename self.queueID = queueName self.dataSourceID = dataSourceID if '~' in self.dataSourceID or '~' in self.queueID or '~' in resultsFilename: raise RuntimeError('File, queue or results name must NOT contain dashes') self.resultsURI = 'PYME-CLUSTER://%s/__aggregate_h5r/%s' % (serverfilter, resultsFilename) resultsMDFilename = resultsFilename + '.json' self.results_md_uri = 'PYME-CLUSTER://%s/%s' % (serverfilter, resultsMDFilename) self.taskQueueURI = _getTaskQueueURI() self.mdh = metadata #load data source if dataSourceModule is None: DataSource = DataSources.getDataSourceForFilename(dataSourceID) else: DataSource = __import__('PYME.IO.DataSources.' + dataSourceModule, fromlist=['PYME', 'io', 'DataSources']).DataSource #import our data source self.ds = DataSource(self.dataSourceID) #set up results file: logging.debug('resultsURI: ' + self.resultsURI) clusterResults.fileResults(self.resultsURI + '/MetaData', metadata) clusterResults.fileResults(self.resultsURI + '/Events', self.ds.getEvents()) # set up metadata file which is used for deciding how to launch the analysis clusterIO.put_file(resultsMDFilename, self.mdh.to_JSON(), serverfilter=serverfilter) #wait until clusterIO caches clear to avoid replicating the results file. #time.sleep(1.5) #moved inside polling thread so launches will run quicker self.currentFrameNum = startAt self._task_template = None self.doPoll = True self.pollT = threading.Thread(target=self._updatePoll) self.pollT.start()
def main(): #ns=Pyro.naming.NameServerLocator().getNS() ns = hybrid_ns.getNS() #tq = Pyro.core.getProxyForURI("PYRONAME://" + taskQueueName) procName = compName + ' - PID:%d' % os.getpid() import logging logging.basicConfig(filename='taskWorkerZC_%d.log' % os.getpid(), level=logging.INFO) logger = logging.getLogger(__file__) serverFails = {} #loop forever asking for tasks while 1: queueNames = ns.list('TaskQueues') #print queueNames tasks = [] #loop over all queues, looking for tasks to process while len(tasks) == 0 and len(queueNames) > 0: #try queue on current machine first #print queueNames if compName in queueNames: qName = compName queueNames.remove(qName) else: #pick a queue at random qName = queueNames.pop(random.randint(0, len(queueNames) - 1)) try: #print qName tq = Pyro.core.getProxyForURI(ns.resolve(qName)) tq._setTimeout(10) tq._setOneway(['returnCompletedTask']) #print qName #ask the queue for tasks logging.debug('Getting tasks from server') tasks = tq.getTasks(procName, PYME.version.version) logging.debug('Got %d tasks' % len(tasks)) #we succesfully contacted the server, so reset it's fail count serverFails[qName] = 0 except Pyro.core.ProtocolError as e: logging.exception('Pyro error: %s' % e.message) if e.message == 'connection failed': #remember that the server failed - and put it 'on notice' nFails = 1 if qName in serverFails.keys(): nFails += serverFails[qName] serverFails[qName] = nFails if False: #nFails >= 4: #server is dead in the water - put it out of it's misery print(('Killing:', qName)) try: ns.unregister('TaskQueues.%s' % qName) except Pyro.errors.NamingError: pass except Exception: import traceback logger.exception(traceback.format_exc()) #pass if len(tasks) == 0: #no queues had tasks logger.debug('No tasks avaialable, waiting') time.sleep(1) #put ourselves to sleep to avoid constant polling #else: # print qName, len(tasks) #results = [] #loop over tasks - we pop each task and then delete it after processing #to keep memory usage down while len(tasks) > 0: #get the next task (a task is a function, or more generally, a class with #a __call__ method task = tasks.pop(0) try: #execute the task, t1 = time.time() logger.debug('running task') res = task(taskQueue=tq) t2 = time.time() if not task.resultsURI is None: # new style way of returning results to reduce load on server from PYME.IO import clusterResults clusterResults.fileResults(task.resultsURI, res) logging.debug('Returning task for frame %d' % res.index) tq.returnCompletedTask(res, procName, t2 - t1) except: import traceback logger.exception('Error returning results') traceback.print_exc() del task #tq.returnCompletedTasks(results, name) del tasks
def on_data_complete(self): events = self.spooler.evtLogger.to_recarray() clusterResults.fileResults(self.worker_resultsURI + '/Events', events)
def on_data_complete(self): logger.debug('Data complete, copying events to output file') clusterResults.fileResults(self.worker_resultsURI + '/Events', self.ds.getEvents())
def _return_task_results(self): """ File all results that this worker has completed Returns ------- """ while True: # loop over results queue until it's empty # print 'getting results' try: queueURL, taskDescr, res = self.resultsQueue.get_nowait() outputs = taskDescr.get('outputs', {}) except Queue.Empty: # queue is empty return if isinstance(res, TaskError): # failure clusterResults.fileResults(res.log_url, res.to_string()) s = clusterIO._getSession(queueURL) r = s.post(queueURL + 'node/handin?taskID=%s&status=failure' % taskDescr['id']) if not r.status_code == 200: logger.error('Returning task failed with error: %s' % r.status_code) elif res is None: # failure s = clusterIO._getSession(queueURL) r = s.post(queueURL + 'node/handin?taskID=%s&status=failure' % taskDescr['id']) if not r.status_code == 200: logger.error('Returning task failed with error: %s' % r.status_code) elif res == True: # isinstance(res, ModuleCollection): #recipe output # res.save(outputs) #abuse outputs dictionary as context s = clusterIO._getSession(queueURL) r = s.post(queueURL + 'node/handin?taskID=%s&status=success' % taskDescr['id']) if not r.status_code == 200: logger.error('Returning task failed with error: %s' % r.status_code) else: # success try: if 'results' in outputs.keys(): # old style pickled results clusterResults.fileResults(outputs['results'], res) else: if len(res.results) > 0: clusterResults.fileResults(outputs['fitResults'], res.results) if len(res.driftResults) > 0: clusterResults.fileResults(outputs['driftResults'], res.driftResults) except requests.Timeout: logger.exception('Filing results failed on timeout.') s = clusterIO._getSession(queueURL) r = s.post(queueURL + 'node/handin?taskID=%s&status=failure' % taskDescr['id']) if not r.status_code == 200: logger.error('Returning task failed with error: %s' % r.status_code) else: s = clusterIO._getSession(queueURL) r = s.post(queueURL + 'node/handin?taskID=%s&status=success' % taskDescr['id']) if not r.status_code == 200: logger.error('Returning task failed with error: %s' % r.status_code)
def on_data_complete(self): logger.debug('Data complete, copying events to output file') clusterResults.fileResults(self.worker_resultsURI + '/Events', self.spooler.evtLogger.to_JSON())