def __init__(self): ObjBase.__init__(self) # {name : workerObj} self.workers = dict() self.logger = get_logger('WorkerService_%s' % getnodename())
def create(self, config): """ parse config and create relevant classess """ self.logger.debug('Creating new worker') report = Report() status = Status() """ configuration is stored in JSON format, in database. I chose JSON for - being able to store as text in database - easily pass data between server and clients (web, console) - i like cjson for no particular reason and i want to use it """ config = cjson.decode(config) """ name string is rewritten as name_timestamp_nodename, to separate the job/worker easily from others when there are a lot of nodes running around. Ah yes, forgot to tell. Domo provides a distributed job system, where a job is a crawling process. Codebase can easily be modified to provide another distributed somethingation system. Then you can control your remote nodes via a single client. Thanks to wonderful Pyro framework. """ name = config.get('options').get('name')[0] version = datetime.today().strftime('%Y%m%d%H%M%S') name = '%s_%s_%s' % (name, version, getnodename()) config.get('options')['name'] = [name] crawler = Crawler(config, report=report, status=status) if crawler is not None: """ Here spawning a crawler process. Multiprocess library makes controlling spawned processes as if they were threads, possible. """ worker = Process(target=crawler.run, name=name) # attach shared objects to worker worker.report = report worker.status = status self.workers.update({name: worker}) self.logger.info('Created new worker: %s with status %s' % (name, worker.status.get())) # get worker ready for commands worker.start() return (True, '%s' % name) self.logger.error('Could not create new worker') return (False, 'Could not create new worker')
def exit(self): self.logger.info('Node shutting down.. Terminating workers..') try: for name, worker in self.workers.items(): # hack for enabling the daemoned mode if not self.checkworker(name): continue worker.status.set('killed') worker.join() self.logger.info('Worker terminated: %s' % name) except: self.logger.error(traceback.format_exc()) return (False, traceback.format_exc()) return (True, 'Node %s shutdown', getnodename())
def serverprocess(): Pyro.config.PYRO_DETAILED_TRACEBACK = True Pyro.config.PYRO_TRACELEVEL = 0 Pyro.config.PYRO_USER_TRACELEVEL = 4 Pyro.config.PYRO_MULTITHREADED = 0 # necessary for pyprocessing Pyro.core.initServer() # Get nameserver #domain = settings.DOMAIN hostname = getnodename() #ns = probeNS() daemon = Pyro.core.Daemon() service = WorkerService() #daemon.useNameServer(ns) #publisher = Clients.Publisher() #buf = '' #for item in (domain, hostname): # buf += ((buf != '') and '.' or '') + '%s' % item # try: # ns.createGroup('%s' % buf) # except NamingError: # pass #try: # ns.unregister('%s.%s.jobservice' % (domain, hostname)) #except NamingError: # pass #daemon.connect(service, '%s.%s.jobservice' % (domain, hostname)) daemon.connect(service, 'jobservice') # Tell everyone that there is a new guy in town #publisher.publish("JOBSERVICE", (hostname, 'started')) try: while 1: daemon.handleRequests(timeout=60, callback=service.checkworker()) except KeyboardInterrupt: # tell service to shut down all workers service.exit()