def _getTaskQueueURI(n_retries=2): """Discover the distributors using zeroconf and choose one""" ns = hybrid_ns.getNS('_pyme-taskdist') queueURLs = {} def _search(): for name, info in ns.get_advertised_services(): if name.startswith('PYMERuleServer'): print(info, info.address) queueURLs[name] = 'http://%s:%d' % (socket.inet_ntoa(info.address), info.port) _search() while not queueURLs and (n_retries > 0): logging.info('could not find a rule server, waiting 5s and trying again') time.sleep(5) n_retries -= 1 _search() try: #try to grab the distributor on the local computer return queueURLs[compName] except KeyError: #if there is no local distributor, choose one at random logging.info('no local rule server, choosing one at random') return random.choice(list(queueURLs.values()))
def _genURI(taskQueueName): try: from PYME.misc import hybrid_ns ns = hybrid_ns.getNS() return ns.resolve(taskQueueName) except: return 'PYRONAME://' + taskQueueName
def _getTaskQueueURI(n_retries=2): """Discover the distributors using zeroconf and choose one""" ns = hybrid_ns.getNS('_pyme-taskdist') queueURLs = {} def _search(): for name, info in ns.get_advertised_services(): if name.startswith('PYMEDistributor'): queueURLs[name] = 'http://%s:%d' % (socket.inet_ntoa( info.address), info.port) _search() while not queueURLs and (n_retries > 0): logging.info( 'could not find a distributor, waiting 5s and trying again') time.sleep(5) n_retries -= 1 _search() try: #try to grab the distributor on the local computer local_queues = [q for q in queueURLs if compName in q] logger.debug('local_queues: %s' % local_queues) return queueURLs[local_queues[0]] except (KeyError, IndexError): #if there is no local distributor, choose one at random logger.info('no local distributor, choosing one at random') return random.choice(queueURLs.values())
def getDistributorInfo(ns=None): if ns is None: ns = hybrid_ns.getNS('_pyme-taskdist') queueURLs = {} for name, info in ns.get_advertised_services(): if name.startswith('PYMEDistributor') or name.startswith('PYMERuleServer'): queueURLs[name] = 'http://%s:%d/' % (socket.inet_ntoa(info.address), info.port) return queueURLs
def get_ns(): global _ns with _ns_lock: if _ns is None: #stagger query times time.sleep(3 * np.random.rand()) #_ns = pzc.getNS('_pyme-http') _ns = hybrid_ns.getNS('_pyme-http') #wait for replies time.sleep(5) return _ns
def getNodeInfo(ns=None): if ns is None: ns = hybrid_ns.getNS('_pyme-taskdist') queueURLs = {} for name, info in ns.get_advertised_services(): if name.startswith('PYMENodeServer'): try: queueURLs[name] = 'http://%s:%d/' % (socket.inet_ntoa(info.address), info.port) except TypeError: if info.port is None: logger.debug('Service info from %s has no port info' % name) else: logger.debug('ValueError: %s %s, %s' % (name, repr(info), info.port)) return queueURLs
def _loadQueue(self, filename): """Load data from a remote PYME.ParallelTasks.HDFTaskQueue queue using Pyro. Parameters: ----------- filename : string the name of the queue """ import Pyro.core from PYME.IO.DataSources import TQDataSource from PYME.misc.computerName import GetComputerName compName = GetComputerName() if self.queueURI is None: #do a lookup taskQueueName = 'TaskQueues.%s' % compName try: from PYME.misc import hybrid_ns ns = hybrid_ns.getNS() URI = ns.resolve(taskQueueName) except: URI = 'PYRONAME://' + taskQueueName self.tq = Pyro.core.getProxyForURI(URI) else: self.tq = Pyro.core.getProxyForURI(self.queueURI) self.seriesName = filename[len('QUEUE://'):] self.dataSource = TQDataSource.DataSource(self.seriesName, self.tq) self.data = self.dataSource #this will get replaced with a wrapped version self.mdh = MetaDataHandler.QueueMDHandler(self.tq, self.seriesName) MetaData.fillInBlanks(self.mdh, self.dataSource) #self.timer.WantNotification.append(self.dsRefresh) self.events = self.dataSource.getEvents() self.mode = 'LM'
def main(): #ns=Pyro.naming.NameServerLocator().getNS() ns = hybrid_ns.getNS() #tq = Pyro.core.getProxyForURI("PYRONAME://" + taskQueueName) procName = compName + ' - PID:%d' % os.getpid() import logging logging.basicConfig(filename='taskWorkerZC_%d.log' % os.getpid(), level=logging.INFO) logger = logging.getLogger(__file__) serverFails = {} #loop forever asking for tasks while 1: queueNames = ns.list('TaskQueues') #print queueNames tasks = [] #loop over all queues, looking for tasks to process while len(tasks) == 0 and len(queueNames) > 0: #try queue on current machine first #print queueNames if compName in queueNames: qName = compName queueNames.remove(qName) else: #pick a queue at random qName = queueNames.pop(random.randint(0, len(queueNames) - 1)) try: #print qName tq = Pyro.core.getProxyForURI(ns.resolve(qName)) tq._setTimeout(10) tq._setOneway(['returnCompletedTask']) #print qName #ask the queue for tasks logging.debug('Getting tasks from server') tasks = tq.getTasks(procName, PYME.version.version) logging.debug('Got %d tasks' % len(tasks)) #we succesfully contacted the server, so reset it's fail count serverFails[qName] = 0 except Pyro.core.ProtocolError as e: logging.exception('Pyro error: %s' % e.message) if e.message == 'connection failed': #remember that the server failed - and put it 'on notice' nFails = 1 if qName in serverFails.keys(): nFails += serverFails[qName] serverFails[qName] = nFails if False: #nFails >= 4: #server is dead in the water - put it out of it's misery print(('Killing:', qName)) try: ns.unregister('TaskQueues.%s' % qName) except Pyro.errors.NamingError: pass except Exception: import traceback logger.exception(traceback.format_exc()) #pass if len(tasks) == 0: #no queues had tasks logger.debug('No tasks avaialable, waiting') time.sleep(1) #put ourselves to sleep to avoid constant polling #else: # print qName, len(tasks) #results = [] #loop over tasks - we pop each task and then delete it after processing #to keep memory usage down while len(tasks) > 0: #get the next task (a task is a function, or more generally, a class with #a __call__ method task = tasks.pop(0) try: #execute the task, t1 = time.time() logger.debug('running task') res = task(taskQueue=tq) t2 = time.time() if not task.resultsURI is None: # new style way of returning results to reduce load on server from PYME.IO import clusterResults clusterResults.fileResults(task.resultsURI, res) logging.debug('Returning task for frame %d' % res.index) tq.returnCompletedTask(res, procName, t2 - t1) except: import traceback logger.exception('Error returning results') traceback.print_exc() del task #tq.returnCompletedTasks(results, name) del tasks
def __init__(self, scope, defDir=genHDFDataFilepath(), defSeries='%(day)d_%(month)d_series'): """Initialise the spooling controller. Parameters ---------- scope : microscope instance The currently active microscope class (see microscope.py) defDir : string pattern The default directory to save data to. Any keys of the form `%(<key>)` will be substituted using the values defined in `PYME.fileUtils.nameUtils.dateDict` defSeries : string pattern This specifies a pattern for file naming. Keys will be substituted as for `defDir` """ self.scope = scope if int(sys.version[0]) < 3: #default to Queue for Py2 self.spoolType = 'Queue' else: #else default to file self.spoolType = 'File' #dtn = datetime.datetime.now() #dateDict = {'username' : win32api.GetUserName(), 'day' : dtn.day, 'month' : dtn.month, 'year':dtn.year} self._user_dir = None self._base_dir = nameUtils.get_local_data_directory() self._subdir = nameUtils.get_spool_subdir() self.seriesStub = defSeries % nameUtils.dateDict self.seriesCounter = 0 self._series_name = None self.protocol = prot.NullProtocol self.protocolZ = prot.NullZProtocol self.onSpoolProgress = dispatch.Signal() self.onSpoolStart = dispatch.Signal() self.onSpoolStop = dispatch.Signal() self._analysis_launchers = queue.Queue(3) self._status_changed_condition = threading.Condition() #settings which were managed by GUI self.hdf_compression_level = 2 # zlib compression level that pytables should use (spool to file and queue) self.z_stepped = False # z-step during acquisition self.z_dwell = 100 # time to spend at each z level (if z_stepped == True) self.cluster_h5 = False # spool to h5 on cluster (cluster of one) self.pzf_compression_settings = HTTPSpooler.defaultCompSettings # only for cluster spooling #check to see if we have a cluster self._N_data_servers = len( hybrid_ns.getNS('_pyme-http').get_advertised_services()) if self._N_data_servers > 0: # switch to cluster as spool method if available. self.SetSpoolMethod('Cluster') if self._N_data_servers == 1: self.cluster_h5 = True # we have a cluster of one
# ################## import tables from PYME.IO import MetaDataHandler import os import time import PYME.Acquire.Spooler as sp #from PYME.Acquire import protocol as p from PYME.IO.FileUtils import fileID try: from PYME.misc import hybrid_ns ns = hybrid_ns.getNS() except ImportError: ns = None #rom PYME.Acquire import eventLog class SpoolEvent(tables.IsDescription): EventName = tables.StringCol(32) Time = tables.Time64Col() EventDescr = tables.StringCol(256) class EventLogger: def __init__(self, spool, tq, queueName): self.spooler = spool #self.scope = scope self.tq = tq
def main(): print('Starting PYME taskServer ...') import socket from PYME import config ip_addr = socket.gethostbyname(socket.gethostname()) profile = False if len(sys.argv) > 1 and sys.argv[1] == '-p': print('profiling') profile = True from PYME.util.mProfile import mProfile mProfile.profileOn( ['taskServerZC.py', 'HDFTaskQueue.py', 'TaskQueue.py']) if len(sys.argv) > 1 and sys.argv[1] == '-fp': print('profiling') #profile = True from PYME.util.fProfile import fProfile tp = fProfile.thread_profiler() tp.profileOn('.*taskServerZC.*|.*TaskQueue.*|.*h5rFile.*', 'taskServer_prof.txt') Pyro.config.PYRO_MOBILE_CODE = 0 Pyro.core.initServer() #ns=Pyro.naming.NameServerLocator().getNS() ns = hybrid_ns.getNS() daemon = Pyro.core.Daemon(host=ip_addr) daemon.useNameServer(ns) #check to see if we've got the TaskQueues group #if not 'TaskQueues' in [n[0] for n in ns.list('')]: # ns.createGroup('TaskQueues') #get rid of any previous queue #try: # ns.unregister(taskQueueName) #except Pyro.errors.NamingError: # pass tq = TaskQueueSet(process_queues_in_order=config.get( 'TaskServer.process_queues_in_order', True)) uri = daemon.connect(tq, taskQueueName) logging.debug('taskserver URI -> %s, %s' % (uri, type(uri))) tw = TaskWatcher(tq) tw.start() try: daemon.requestLoop(tq.isAlive) except (KeyboardInterrupt, SystemExit): logging.debug( 'Got a keyboard interrupt, attempting to shut down cleanly') #raise finally: daemon.shutdown(True) tw.alive = False #ns.unregister(taskQueueName) logging.info('Task server is shut down') if profile: mProfile.report()