def start(self): """ Start worker processes and a control loop """ title = 'oq-zworkerpool %s' % self.ctrl_url[6:] # strip tcp:// print('Starting ' + title, file=sys.stderr) setproctitle(title) # start workers self.workers = [] for _ in range(self.num_workers): sock = z.Socket(self.task_server_url, z.zmq.PULL, 'connect') sock.proc = multiprocessing.Process(target=worker, args=(sock, self.executing)) sock.proc.start() self.workers.append(sock) # start control loop accepting the commands stop and kill with z.Socket(self.ctrl_url, z.zmq.REP, 'bind') as ctrlsock: for cmd in ctrlsock: if cmd in ('stop', 'kill'): msg = getattr(self, cmd)() ctrlsock.send(msg) break elif cmd == 'getpid': ctrlsock.send(self.proc.pid) elif cmd == 'get_num_workers': ctrlsock.send(self.num_workers) elif cmd == 'get_executing': ctrlsock.send(' '.join(sorted(os.listdir(self.executing)))) shutil.rmtree(self.executing)
def start(self): """ Start worker processes and a control loop """ setproctitle('oq-zworkerpool %s' % self.ctrl_url[6:]) # strip tcp:// # start workers self.workers = [] for _ in range(self.num_workers): sock = z.Socket(self.task_server_url, z.zmq.PULL, 'connect') proc = multiprocessing.Process(target=self.worker, args=(sock, )) proc.start() sock.pid = proc.pid self.workers.append(sock) # start control loop accepting the commands stop and kill with z.Socket(self.ctrl_url, z.zmq.REP, 'bind') as ctrlsock: for cmd in ctrlsock: if cmd in ('stop', 'kill'): msg = getattr(self, cmd)() ctrlsock.send(msg) break elif cmd == 'getpid': ctrlsock.send(self.pid) elif cmd == 'get_num_workers': ctrlsock.send(self.num_workers) elif cmd == 'get_executing': ctrlsock.send(self.executing.value)
def set_concurrent_tasks_default(calc): """ Set the default for concurrent_tasks based on the available worker pools . """ num_workers = 0 w = config.zworkers if w.host_cores: host_cores = [hc.split() for hc in w.host_cores.split(',')] else: host_cores = [] for host, _cores in host_cores: url = 'tcp://%s:%s' % (host, w.ctrl_port) with z.Socket(url, z.zmq.REQ, 'connect') as sock: if not general.socket_ready(url): logging.warning('%s is not running', host) continue num_workers += sock.send('get_num_workers') if num_workers == 0: num_workers = os.cpu_count() logging.warning( 'Missing host_cores, no idea about how many cores ' 'are available, using %d', num_workers) parallel.CT = num_workers * 2 OqParam.concurrent_tasks.default = num_workers * 2 logging.warning('Using %d zmq workers', num_workers)
def start(self): """ Start database worker threads """ # give a nice name to the process w.setproctitle('oq-dbserver') dworkers = [] for _ in range(self.num_workers): sock = z.Socket(self.backend, z.zmq.REP, 'connect') threading.Thread(target=self.dworker, args=(sock, )).start() dworkers.append(sock) logging.warning('DB server started with %s on %s, pid %d', sys.executable, self.frontend, self.pid) if ZMQ: # start task_in->task_server streamer thread threading.Thread(target=w._streamer, daemon=True).start() logging.warning('Task streamer started on port %d', int(config.zworkers.ctrl_port) + 1) # start frontend->backend proxy for the database workers try: z.zmq.proxy(z.bind(self.frontend, z.zmq.ROUTER), z.bind(self.backend, z.zmq.DEALER)) except (KeyboardInterrupt, z.zmq.ContextTerminated): for sock in dworkers: sock.running = False sock.zsocket.close() logging.warning('DB server stopped') finally: self.stop()
def worker(self, sock): """ :param sock: a zeromq.Socket of kind PULL receiving (cmd, args) """ setproctitle('oq-zworker') for cmd, args in sock: backurl = args[-1].backurl # attached to the monitor with z.Socket(backurl, z.zmq.PUSH, 'connect') as s: s.send(safely_call(cmd, args))
def _starmap(func, iterargs, host, task_in_port, receiver_ports): # called by parallel.Starmap.submit_all; should not be used directly receiver_url = 'tcp://%s:%s' % (host, receiver_ports) task_in_url = 'tcp://%s:%s' % (host, task_in_port) with z.Socket(receiver_url, z.zmq.PULL, 'bind') as receiver: logging.info('Receiver port for %s=%s', func.__name__, receiver.port) receiver_host = receiver.end_point.rsplit(':', 1)[0] backurl = '%s:%s' % (receiver_host, receiver.port) with z.Socket(task_in_url, z.zmq.PUSH, 'connect') as sender: n = 0 for args in iterargs: args[-1].backurl = backurl # args[-1] is a Monitor instance sender.send((func, args)) n += 1 yield n for _ in range(n): obj = receiver.zsocket.recv_pyobj() # receive n responses for the n requests sent yield obj
def inspect(self): executing = [] for host, _ in self.host_cores: if self.status(host)[0][1] == 'not-running': print('%s not running' % host) continue ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port) with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock: tasks = sock.send('get_executing') executing.append((host, tasks)) return executing
def kill(self): """ Send a "kill" command to all worker pools """ killed = [] for host, _ in self.host_cores: if self.status(host)[0][1] == 'not-running': print('%s not running' % host) continue ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port) with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock: sock.send('kill') killed.append(host) return 'killed %s' % killed
def dbcmd(action, *args): """ A dispatcher to the database server. :param action: database action to perform :param args: arguments """ sock = zeromq.Socket('tcp://%s:%s' % (config.dbserver.host, DBSERVER_PORT), zeromq.zmq.REQ, 'connect') with sock: res = sock.send((action,) + args) if isinstance(res, parallel.Result): return res.get() return res
def status(self): """ :returns: a list [(host, running, total), ...] """ executing = [] for host, _cores in self.host_cores: if not general.socket_ready((host, self.ctrl_port)): continue ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port) with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock: running = len(sock.send('get_executing').split()) total = sock.send('get_num_workers') executing.append((host, running, total)) return executing
def dbcmd(action, *args): """ A dispatcher to the database server. :param action: database action to perform :param args: arguments """ sock = zeromq.Socket('tcp://%s:%s' % (config.dbserver.host, DBSERVER_PORT), zeromq.zmq.REQ, 'connect') with sock: res, etype, _mon = sock.send((action, ) + args) if etype: raise etype(res) return res
def stop(self): """ Send a "stop" command to all worker pools """ stopped = [] for host, _ in self.host_cores: if self.status(host)[0][1] == 'not-running': print('%s not running' % host) continue ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port) with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock: sock.send('stop') stopped.append(host) if hasattr(self, 'streamer'): self.streamer.terminate() return 'stopped %s' % stopped
def kill(self): """ Send a "kill" command to all worker pools """ killed = [] for host, _ in self.host_cores: if not general.socket_ready((host, self.ctrl_port)): continue ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port) with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock: sock.send('kill') killed.append(host) for popen in self.popens: popen.kill() self.popens = [] return 'killed %s' % killed
def set_concurrent_tasks_default(job_id): """ Set the default for concurrent_tasks based on the available worker pools . """ num_workers = 0 w = config.zworkers for host, _cores in [hc.split() for hc in w.host_cores.split(',')]: url = 'tcp://%s:%s' % (host, w.ctrl_port) with z.Socket(url, z.zmq.REQ, 'connect') as sock: if not general.socket_ready(url): logs.LOG.warn('%s is not running', host) continue num_workers += sock.send('get_num_workers') OqParam.concurrent_tasks.default = num_workers * 2 logs.LOG.warn('Using %d zmq workers', num_workers)
def dbcmd(action, *args): """ A dispatcher to the database server. :param action: database action to perform :param args: arguments """ global sock if sock is None: sock = zeromq.Socket( 'tcp://%s:%s' % (config.dbserver.host, DBSERVER_PORT), zeromq.zmq.REQ, 'connect').__enter__() # the socket will be closed when the calculation ends res = sock.send((action, ) + args) if isinstance(res, parallel.Result): return res.get() return res
def stop(self): """ Send a "stop" command to all worker pools """ stopped = [] for host, _ in self.host_cores: if not general.socket_ready((host, self.ctrl_port)): continue ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port) with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock: sock.send('stop') stopped.append(host) for popen in self.popens: popen.terminate() # since we are not consuming any output from the spawned process # we must call wait() after terminate() to have Popen() # fully deallocate the process file descriptors, otherwise # zombies will arise popen.wait() self.popens = [] return 'stopped %s' % stopped
def start(self): """ Start database worker threads """ # give a nice name to the process w.setproctitle('oq-dbserver') dworkers = [] for _ in range(self.num_workers): sock = z.Socket(self.backend, z.zmq.REP, 'connect') threading.Thread(target=self.dworker, args=(sock,)).start() dworkers.append(sock) logging.warning('DB server started with %s on %s, pid %d', sys.executable, self.frontend, self.pid) if ZMQ: # start task_in->task_out streamer thread c = config.zworkers threading.Thread( target=w._streamer, args=(self.master_host, c.task_in_port, c.task_out_port) ).start() logging.warning('Task streamer started from %s -> %s', c.task_in_port, c.task_out_port) # start zworkers and wait a bit for them msg = self.master.start() logging.warning(msg) time.sleep(1) # start frontend->backend proxy for the database workers try: z.zmq.proxy(z.bind(self.frontend, z.zmq.ROUTER), z.bind(self.backend, z.zmq.DEALER)) except (KeyboardInterrupt, z.zmq.ZMQError): for sock in dworkers: sock.running = False sock.zsocket.close() logging.warning('DB server stopped') finally: self.stop()
def run_calc(self): """ Run a calculation and return results (reinvented from openquake.calculators.base) """ with self.calculator._monitor: self.calculator._monitor.username = '' try: # Pre-execute setups self.calculator.pre_execute() #self.calculator.datastore.swmr_on() oq = self.calculator.oqparam dstore = self.calculator.datastore self.calculator.set_param() self.calculator.offset = 0 # Source model print('self.__dict__ = ') print(self.calculator.__dict__) if oq.hazard_calculation_id: # from ruptures dstore.parent = self.calculator.datastore.read( oq.hazard_calculation_id) elif hasattr(self.calculator, 'csm'): # from sources self.calculator_build_events_from_sources() #self.calculator.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} elif 'rupture_model' not in oq.inputs: logging.warning( 'There is no rupture_model, the calculator will just ' 'import data without performing any calculation') fake = logictree.FullLogicTree.fake() dstore['full_lt'] = fake # needed to expose the outputs dstore['weights'] = [1.] return {} else: # scenario self.calculator._read_scenario_ruptures() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} # Intensity measure models if oq.ground_motion_fields: imts = oq.get_primary_imtls() nrups = len(dstore['ruptures']) base.create_gmf_data(dstore, imts, oq.get_sec_imts()) dstore.create_dset('gmf_data/sigma_epsilon', getters.sig_eps_dt(oq.imtls)) dstore.create_dset('gmf_data/time_by_rup', getters.time_dt, (nrups, ), fillvalue=None) # Prepare inputs for GmfGetter nr = len(dstore['ruptures']) logging.info('Reading {:_d} ruptures'.format(nr)) rgetters = getters.get_rupture_getters( dstore, oq.concurrent_tasks * 1.25, srcfilter=self.calculator.srcfilter) args = [(rgetter, self.calculator.param) for rgetter in rgetters] mon = performance.Monitor() mon.version = version mon.config = config rcvr = 'tcp://%s:%s' % (config.dbserver.listen, config.dbserver.receiver_ports) skt = zeromq.Socket(rcvr, zeromq.zmq.PULL, 'bind').__enter__() mon.backurl = 'tcp://%s:%s' % (config.dbserver.host, skt.port) mon = mon.new(operation='total ' + self.calculator.core_task.__func__.__name__, measuremem=True) mon.weight = getattr(args[0], 'weight', 1.) # used in task_info mon.task_no = 1 # initialize the task number args += (mon, ) self.args = args self.mon = mon self.dstore = dstore finally: print('FetchOpenQuake: OpenQuake Hazard Calculator defined.')