def handle(self): print "SERVER REQUEST", self.__class__, "ME: ", self.request.getsockname() print "FROM :", self.request.getpeername() message = recv_unicode_netstring(self.rfile) print message pl = PollingObject() parsed = self.server.mtool.unpack_msg(message) if parsed.type == "find_eeg_experiments" or parsed.type == "find_eeg_amplifiers": pull_addr = "tcp://" + socket.gethostname() + ":" + str(self.server.pull_port) parsed.client_push_address = pull_addr srv_sock = self.make_srv_sock() try: send_msg(srv_sock, parsed.SerializeToString()) response, det = pl.poll_recv(srv_sock, timeout=5000) finally: srv_sock.close() print "passed msg and got result: ", response if not response: self.bad_response(self.wfile, det) return if parsed.type == "find_eeg_experiments" or parsed.type == "find_eeg_amplifiers": response, det = pl.poll_recv(self.server.pull_sock, timeout=20000) if not response: self.bad_response(self.wfile, det) return data = make_unicode_netstring(response) self.wfile.write(data)
def handle(self): print "SERVER REQUEST", self.__class__, "ME: ", self.request.getsockname( ) print "FROM :", self.request.getpeername() message = recv_unicode_netstring(self.rfile) print message pl = PollingObject() parsed = self.server.mtool.unpack_msg(message) if parsed.type == 'find_eeg_experiments' or parsed.type == 'find_eeg_amplifiers': pull_addr = 'tcp://' + socket.gethostname() + ':' + str( self.server.pull_port) parsed.client_push_address = pull_addr srv_sock = self.make_srv_sock() try: send_msg(srv_sock, parsed.SerializeToString()) response, det = pl.poll_recv(srv_sock, timeout=5000) finally: srv_sock.close() print "passed msg and got result: ", response if not response: self.bad_response(self.wfile, det) return if parsed.type == 'find_eeg_experiments' or parsed.type == 'find_eeg_amplifiers': response, det = pl.poll_recv(self.server.pull_sock, timeout=20000) if not response: self.bad_response(self.wfile, det) return data = make_unicode_netstring(response) self.wfile.write(data)
def _publisher_thread(self, pub_addrs, pull_address, push_addr): #FIXME aaaaahhh pub_addresses are set here, not in the main thread # (which reads them in _register method) pub_sock, self.pub_addresses = self._init_socket( pub_addrs, zmq.PUB) pull_sock = self.ctx.socket(zmq.PULL) pull_sock.bind(pull_address) push_sock = self.ctx.socket(zmq.PUSH) push_sock.connect(push_addr) send_msg(push_sock, u'1') po = PollingObject() while not self._stop_publishing: try: to_publish, det = po.poll_recv(pull_sock, 500) if to_publish: send_msg(pub_sock, to_publish) except: #print self.name, '.Publisher -- STOP.' break # self.logger.info( "close sock %s %s", pub_addrs, pub_sock) pub_sock.close() pull_sock.close() push_sock.close()
def _publisher_thread(self, pub_addrs, pull_address, push_addr): # FIXME aaaaahhh pub_addresses are set here, not in the main thread # (which reads them in _register method) pub_sock, self.pub_addresses = self._init_socket( pub_addrs, zmq.PUB) pull_sock = self.ctx.socket(zmq.PULL) pull_sock.bind(pull_address) push_sock = self.ctx.socket(zmq.PUSH) push_sock.connect(push_addr) send_msg(push_sock, b'1') po = PollingObject() while not self._stop_publishing: try: to_publish, det = po.poll_recv(pull_sock, 500) if to_publish: send_msg(pub_sock, to_publish) except: # print self.name, '.Publisher -- STOP.' break # self.logger.info( "close sock %s %s", pub_addrs, pub_sock) pub_sock.close() pull_sock.close() push_sock.close()
def __init__(self, proc_description, reg_timeout_desc=None, monitoring_optflags=PING, logger=None): self.desc = proc_description self.must_register = reg_timeout_desc is not None self._status_lock = threading.RLock() self._status = UNKNOWN if self.must_register else RUNNING self._status_details = None self.ping_it = monitoring_optflags & PING self.check_returncode = monitoring_optflags & RETURNCODE if \ self.desc.pid is not None else False self.logger = logger or get_logger( 'subprocess_monitor' + '-' + self.desc.name + '-' + str(self.desc.pid), stream_level='info') self.set_registration_timeout_handler(reg_timeout_desc) self.registration_data = None self._stop_monitoring = False self._ping_thread = None self._ping_retries = 8 self._returncode_thread = None self._mtool = OBCIMessageTool(message_templates) self._ctx = None self.rq_sock = None self._poller = PollingObject() self.delete = False
def __init__(self, zmq_ctx, uuid, logger=None, obci_dns=None): self._processes = {} self._ctx = zmq_ctx self.uuid = uuid self.logger = logger or get_logger('subprocess_monitor', stream_level='warning') self.obci_dns = obci_dns self._mtool = OBCIMessageTool(message_templates) self.poller = PollingObject() self._proc_lock = threading.RLock()
def __init__(self, srv_addrs, ctx, client_push_address, nearby_servers): self.ctx = ctx self.server_req_socket = self.ctx.socket(zmq.REQ) for addr in srv_addrs: self.server_req_socket.connect(addr) self.poller = PollingObject() self.mtool = OBCIMessageTool(message_templates) self.nearby_servers = nearby_servers self._amplified_cache = {}
def handle(self): message = recv_unicode_netstring(self.rfile) srv_sock = self.make_srv_sock() try: send_msg(srv_sock, message) pl = PollingObject() response, det = pl.poll_recv(srv_sock, timeout=5000) finally: srv_sock.close() if not response: self.bad_response(self.wfile, det) self.rfile.write(make_unicode_netstring(response))
def __init__(self, proc_description, reg_timeout_desc=None, monitoring_optflags=PING, logger=None): self.desc = proc_description self.must_register = reg_timeout_desc is not None self._status_lock = threading.RLock() self._status = UNKNOWN if self.must_register else RUNNING self._status_details = None self.ping_it = monitoring_optflags & PING self.check_returncode = monitoring_optflags & RETURNCODE if \ self.desc.pid is not None else False self.logger = logger or get_logger( 'subprocess_monitor'+'-'+self.desc.name+'-'+str(self.desc.pid), stream_level='info') self.set_registration_timeout_handler(reg_timeout_desc) self.registration_data = None self._stop_monitoring = False self._ping_thread = None self._ping_retries = 8 self._returncode_thread = None self._mtool = OBCIMessageTool(message_templates) self._ctx = None self.rq_sock = None self._poller = PollingObject() self.delete = False
def kill(self): # send "kill" to the process or kill request to its supervisor? self.stop_monitoring() if not self._ctx: self._ctx = zmq.Context() rq_sock = self._ctx.socket(zmq.REQ) try: rq_sock.connect(self.rq_address) mtool = OBCIMessageTool(message_templates) poller = PollingObject() send_msg(rq_sock, mtool.fill_msg("kill_process", pid=self.pid, machine=self.machine_ip)) res, _ = poller.poll_recv(rq_sock, timeout=5000) finally: rq_sock.close() if res: res = mtool.unpack_msg(res) print "Response to kill request: ", res with self._status_lock: self._status = TERMINATED
def stringReceived(self, string): req_sock = self.factory.ctx.socket(zmq.REQ) req_sock.connect(self.factory.zmq_rep_addr) try: req = unicode(string, encoding='utf-8') print "twisted got:", req bad = False try: parsed = self.factory.mtool.unpack_msg(req) except ValueError: bad = True if not bad: if parsed.type in self.factory.long_rqs: sock, port = self.factory.long_rqs[parsed.type] pull_addr = 'tcp://' + socket.gethostname() + ':' + str( port) parsed.client_push_address = pull_addr send_msg(req_sock, parsed.SerializeToString()) else: send_msg(req_sock, req) pl = PollingObject() msg, det = pl.poll_recv(req_sock, timeout=5000) finally: req_sock.close() if not msg: msg = self.factory.mtool.fill_msg("rq_error", details=det) if not bad: if parsed.type in self.factory.long_rqs: sock, port = self.factory.long_rqs[parsed.type] msg, det = pl.poll_recv(sock, timeout=20000) if not msg: msg = self.factory.mtool.fill_msg("rq_error", details=det) return encmsg = msg.encode('utf-8') self.sendString(encmsg) reactor.callFromThread(self.sendString, encmsg)
def stringReceived(self, string): req_sock = self.factory.ctx.socket(zmq.REQ) req_sock.connect(self.factory.zmq_rep_addr) try: req = unicode(string, encoding='utf-8') print "twisted got:", req bad = False try: parsed = self.factory.mtool.unpack_msg(req) except ValueError: bad = True if not bad: if parsed.type in self.factory.long_rqs: sock, port = self.factory.long_rqs[parsed.type] pull_addr = 'tcp://' + socket.gethostname() + ':' + str(port) parsed.client_push_address = pull_addr send_msg(req_sock, parsed.SerializeToString()) else: send_msg(req_sock, req) pl = PollingObject() msg, det = pl.poll_recv(req_sock, timeout=5000) finally: req_sock.close() if not msg: msg = self.factory.mtool.fill_msg("rq_error", details=det) if not bad: if parsed.type in self.factory.long_rqs: sock, port = self.factory.long_rqs[parsed.type] msg, det = pl.poll_recv(sock, timeout=20000) if not msg: msg = self.factory.mtool.fill_msg("rq_error", details=det) return encmsg = msg.encode('utf-8') self.sendString(encmsg) reactor.callFromThread(self.sendString, encmsg)
def __init__(self, server_addresses, zmq_context=None): self.ctx = zmq_context if zmq_context else zmq.Context() self.server_addresses = server_addresses self.server_req_socket = None self.init_server_socket(server_addresses) # self = zmq.Poller() # self.register(self.server_req_socket, zmq.POLLIN) self.poller = PollingObject() self.mtool = OBCIMessageTool(message_templates) self.dns = net.DNS()
def __init__(self, server_addresses, zmq_context=None): self.ctx = zmq_context if zmq_context else zmq.Context() self.server_addresses = server_addresses self.server_req_socket = None self.init_server_socket(server_addresses) #self = zmq.Poller() #self.register(self.server_req_socket, zmq.POLLIN) self.poller = PollingObject() self.mtool = OBCIMessageTool(message_templates) self.dns = net.DNS()
def __init__(self, server_address, handler_class, bind_and_activate=True, zmq_ctx=None, zmq_rep_addr=None): daemon_threads = True server_timeout = 45 SocketServer.TCPServer.__init__(self, server_address, handler_class, bind_and_activate) self.mtool = OBCIMessageTool(message_templates) self.pl = PollingObject() self.ctx = zmq_ctx self.rep_addr = zmq_rep_addr
def find_eeg_experiments_and_push_results(ctx, srv_addrs, rq_message, nearby_servers): LOGGER = logger.get_logger("eeg_experiment_finder", "info") finder = EEGExperimentFinder(srv_addrs, ctx, rq_message.client_push_address, nearby_servers) exps = finder.find_amplified_experiments() mpoller = PollingObject() checked = rq_message.checked_srvs if not isinstance(checked, list): checked = [] nrb = {} for uid, srv in nearby_servers.snapshot().iteritems(): if srv.ip not in checked: nrb[uid] = srv if not checked and nearby_servers.dict_snapshot(): my_addr = nearby_servers.ip(hostname=socket.gethostname()) LOGGER.info("checking other servers") print[(srv.hostname, srv.ip) for srv in nrb.values()] ip_list = [srv.ip for srv in nrb.values() if \ srv.ip != my_addr] LOGGER.info("number of servers to query: " + str(len(ip_list))) exps += _gather_other_server_results(ctx, my_addr, ip_list) else: LOGGER.info("not checking other servers") LOGGER.info("return to: " + rq_message.client_push_address) to_client = ctx.socket(zmq.PUSH) to_client.connect(rq_message.client_push_address) send_msg( to_client, finder.mtool.fill_msg('eeg_experiments', sender_ip=socket.gethostname(), experiment_list=exps)) LOGGER.info("sent exp data... " + str(exps)[:500] + ' [...]') time.sleep(0.1)
def __init__(self, preset_data=None, launcher_data=None, ctx=None): self.preset_data = preset_data self.launch_file = None self.launcher_data = launcher_data self.name = None self.info = "" self.public_params = [] self.origin_machine = '' self.unsupervised_peers = {} self.old_uid = None self.ctx = ctx self.exp_req = None self.mtool = OBCIMessageTool(message_templates) self.poller = PollingObject() self.category = DEFAULT_CATEGORY if preset_data is not None: self.setup_from_preset(preset_data) elif launcher_data is not None: self.setup_from_launcher(launcher_data) super(ExperimentEngineInfo, self).__init__()
class Process(object): def __init__(self, proc_description, reg_timeout_desc=None, monitoring_optflags=PING, logger=None): self.desc = proc_description self.must_register = reg_timeout_desc is not None self._status_lock = threading.RLock() self._status = UNKNOWN if self.must_register else RUNNING self._status_details = None self.ping_it = monitoring_optflags & PING self.check_returncode = monitoring_optflags & RETURNCODE if \ self.desc.pid is not None else False self.logger = logger or get_logger( 'subprocess_monitor' + '-' + self.desc.name + '-' + str(self.desc.pid), stream_level='info') self.set_registration_timeout_handler(reg_timeout_desc) self.registration_data = None self._stop_monitoring = False self._ping_thread = None self._ping_retries = 8 self._returncode_thread = None self._mtool = OBCIMessageTool(message_templates) self._ctx = None self.rq_sock = None self._poller = PollingObject() self.delete = False @property def machine_ip(self): return self.desc.machine_ip @property def pid(self): return self.desc.pid @property def path(self): return self.desc.path @property def proc_type(self): return self.desc.proc_type @property def name(self): return self.desc.name def status(self): with self._status_lock: return self._status, self._status_details def set_registration_timeout_handler(self, reg_timeout_desc): with self._status_lock: self._status = UNKNOWN self._status_details = None self.must_register = reg_timeout_desc is not None self.reg_timeout_desc = reg_timeout_desc self.reg_timer = None if not self.must_register else \ self.new_timer(self.reg_timeout_desc, REG_TIMER) if self.must_register: self.reg_timer.start() def is_local(self): raise NotImplementedError() def timeout_handler(self, custom_method, args, type_): self._do_handle_timeout(type_) custom_method(*args) def _do_handle_timeout(self, type_): raise NotImplementedError() def new_timer(self, tim_desc, type_): return threading.Timer( tim_desc.timeout, self.timeout_handler, [tim_desc.timeout_method, tim_desc.timeout_args, type_]) def registered(self, reg_data): if self.reg_timer is not None: self.reg_timer.cancel() self.logger.info("{0} [{1}] REGISTERED!!! {2}".format( self.name, self.proc_type, reg_data.machine_ip)) # print "ping:", self.ping_it, "ret:", self.check_returncode with self._status_lock: self._status = RUNNING # TODO validate registration data self.registration_data = reg_data self.logger.info("reg_data" + str(vars(reg_data))) if self.ping_it: if not self._ctx: self._ctx = zmq.Context() self.rq_sock = self._ctx.socket(zmq.REQ) for addr in reg_data.rep_addrs: if reg_data.machine_ip != socket.gethostname() and\ net.addr_is_local(addr): continue self.logger.debug(self.name + "connecting to " + addr) self.rq_sock.connect(addr) def stop_monitoring(self): if self.reg_timer: self.reg_timer.cancel() self.reg_timer = None self._stop_monitoring = True if self._ping_thread is not None: self.logger.info("%s, %s, %s", self.proc_type, self.name, "Joining ping thread") self._ping_thread.join() if self._returncode_thread is not None: self.logger.info("%s %s %s", self.proc_type, self.name, "joining returncode thread") self._returncode_thread.join() self.logger.info("monitor for: %s, %s, %s", self.proc_type, self.name, " ...monitoring threads stopped.") def finished(self): finished = True if self._ping_thread is not None: finished = not self._ping_thread.is_alive() if self._returncode_thread is not None: finished = finished and not self._returncode_thread.is_alive() return finished def process_is_running(self): running = True if self._ping_thread is not None: running = self._ping_thread.is_alive() if self._returncode_thread is not None: running = running and self._returncode_thread.is_alive() return running def start_monitoring(self): if self.ping_it: self._ping_thread = threading.Thread(target=self.ping_monitor, args=()) self._ping_thread.daemon = True self._ping_thread.start() if self.check_returncode: self._returncode_thread = threading.Thread( target=self.returncode_monitor, args=()) self._returncode_thread.daemon = True self._returncode_thread.start() def ping_monitor(self): is_alive = True try: while not self._stop_monitoring and is_alive: time.sleep(2) if self.rq_sock is not None: send_msg(self.rq_sock, self._mtool.fill_msg('ping')) result = None while self._ping_retries and not result and not self._stop_monitoring: result, det = self._poller.poll_recv( socket=self.rq_sock, timeout=1500) if not result and not self._stop_monitoring: self.logger.info("%s %s %s", self.proc_type, self.name, "NO RESPONSE TO PING!") with self._status_lock: if self._status not in [FAILED, FINISHED]: self._status = NON_RESPONSIVE self._status_details = 'ping response timeout' print "status:", self._status is_alive = False finally: if self.rq_sock is not None: self.rq_sock.close(linger=0) def returncode_monitor(self): raise NotImplementedError() def kill(self): raise NotImplementedError() def mark_delete(self): with self._status_lock: self.delete = True def marked_delete(self): with self._status_lock: return self.delete
#!/usr/bin/python3 import json import zmq import sys from obci.control.common.message import OBCIMessageTool, send_msg, PollingObject from obci.control.launcher.launcher_messages import message_templates from obci.control.common.obci_control_settings import PORT_RANGE import obci.control.common.net_tools as net if __name__ == '__main__': mtool = OBCIMessageTool(message_templates) pl = PollingObject() # ifname = net.server_ifname() my_addr = 'tcp://' + 'localhost' ctx = zmq.Context() server_req = ctx.socket(zmq.REQ) server_req.connect(my_addr + ':' + net.server_rep_port()) exp_info_pull = ctx.socket(zmq.PULL) port = exp_info_pull.bind_to_random_port('tcp://*', min_port=PORT_RANGE[0], max_port=PORT_RANGE[1], max_tries=500)
class SubprocessMonitor(object): def __init__(self, zmq_ctx, uuid, logger=None, obci_dns=None): self._processes = {} self._ctx = zmq_ctx self.uuid = uuid self.logger = logger or get_logger('subprocess_monitor', stream_level='warning') self.obci_dns = obci_dns self._mtool = OBCIMessageTool(message_templates) self.poller = PollingObject() self._proc_lock = threading.RLock() def not_running_processes(self): status = {} with self._proc_lock: for key, proc in self._processes.iteritems(): st = proc.status() if st[0] in [FINISHED, FAILED, TERMINATED] and not proc.marked_delete(): status[key] = st return status def unknown_status_processes(self): with self._proc_lock: return [proc for proc in self._processes.values()\ if proc.status()[0] == UNKNOWN] def process(self, machine_ip, pid): with self._proc_lock: return self._processes.get((machine_ip, pid), None) def killall(self, force=False): with self._proc_lock: for proc in self._processes.values(): kill_method = proc.kill if not force else proc.kill_with_force if proc.status()[0] not in [FINISHED, FAILED, TERMINATED]: kill_method() def delete(self, machine, pid): proc = self._processes.get((machine, pid), None) if proc is None: raise Exception("Process not found: " + str((machine, pid))) if not proc.running(): del self._processes[(machine, pid)] return True else: self.logger.error("Process is running, will not delete! " + str((machine, pid))) return False def delete_all(self): with self._proc_lock: for proc in self._processes.values(): del proc self._processes = {} def stop_monitoring(self): with self._proc_lock: for proc in self._processes.values(): proc.stop_monitoring() def _launch_args(self, path, args): #TODO fix interpreter calls // only python is supported if path.endswith('.py'): launch_args = PYTHON_CALL +[path] + args else: launch_args = [path] + args return launch_args def _stdio_actions(self, io_flags): out = subprocess.PIPE if io_flags & STDOUT else None if io_flags & STDERR: err = subprocess.PIPE elif out is not None: err = subprocess.STDOUT else: err = None stdin = subprocess.PIPE if io_flags & STDIN else None return (out, err, stdin) def _local_launch(self, launch_args, stdio_actions, env): ON_POSIX = 'posix' in sys.builtin_module_names out, err, stdin = stdio_actions try: if sys.platform == "win32": crflags = subprocess.CREATE_NEW_PROCESS_GROUP popen_obj = subprocess.Popen(launch_args, stdout=out, stderr=err, stdin=stdin, bufsize=1, close_fds=ON_POSIX, env=env, creationflags=crflags) else: popen_obj = subprocess.Popen(launch_args, stdout=out, stderr=err, stdin=stdin, bufsize=1, close_fds=ON_POSIX, env=env) details = "Popen constructor finished for " +\ str(launch_args[:3]) + "(...)" self.logger.info(details) return popen_obj, details except OSError as e: details = "Unable to spawn process {0} [{1}]".format(launch_args, e.args) self.logger.error(details) return None, details except ValueError as e: details = "Unable to spawn process (bad arguments) {0} [{1}]".format(launch_args, e.args) self.logger.error(details) return None, details except Exception as e: details = "Process launch Error: " + str(e) + str(e.args) + str(vars(e)) self.logger.error(details) return None, details def new_local_process(self, path, args, proc_type='', name='', capture_io= STDOUT | STDIN, stdout_log=None, stderr_log=None, register_timeout_desc=None, monitoring_optflags=RETURNCODE | PING, machine_ip=None, env=None): launch_args = self._launch_args(path, args) self.logger.debug(proc_type + " local path: " + path) machine = machine_ip if machine_ip else socket.gethostname() std_actions = self._stdio_actions(capture_io) timeout_desc = register_timeout_desc self.logger.debug('process launch arg list: %s', launch_args) popen_obj, details = self._local_launch(launch_args, std_actions, env) if popen_obj is None: return None, details if popen_obj.returncode is not None: det = "opened process already terminated" + popen_obj.communicate() self.logger.warning(det) if not name: name = os.path.basename(path) process_desc = ProcessDescription(proc_type=proc_type, name=name, path=path, args=args, machine_ip=machine, pid=popen_obj.pid) # io_handler will be None if no stdio is captured io_handler = start_stdio_handler(popen_obj, std_actions, ':'.join([machine, path, name]), stdout_log, stderr_log) new_proc = LocalProcess(process_desc, popen_obj, io_handler=io_handler, reg_timeout_desc=timeout_desc, monitoring_optflags=monitoring_optflags, logger=self.logger) if monitoring_optflags & PING: new_proc._ctx = self._ctx with self._proc_lock: self._processes[(machine, popen_obj.pid)] = new_proc new_proc.start_monitoring() return new_proc, None def new_remote_process(self, path, args, proc_type, name, machine_ip, conn_addr, capture_io= STDOUT | STDIN, stdout_log=None, stderr_log=None, register_timeout_desc=None, monitoring_optflags=PING): """Send a request to conn_addr for a process launch. By default the process will be monitored with ping requests and locally by the remote peer.""" timeout_desc = register_timeout_desc rq_message = self._mtool.fill_msg('launch_process', path=path, args=args, proc_type=proc_type, name=name, machine_ip=machine_ip, capture_io=capture_io, stdout_log=stdout_log, stderr_log=stderr_log) rq_sock = self._ctx.socket(zmq.REQ) try: rq_sock.connect(conn_addr) except zmq.ZMQError as e: det = "Could not connect to {0}, err: {1}, {2}".format( conn_addr, e, e.args) self.logger.error(det) return None, det self.logger.info("SENDING LAUNCH REQUEST {0} {1} {2} {3}".format( machine_ip, _DEFAULT_TIMEOUT_MS, 'ms', conn_addr)) send_msg(rq_sock, rq_message) result, details = self.poller.poll_recv(rq_sock, _DEFAULT_TIMEOUT_MS) rq_sock.close() if not result: details = details + " [address was: {0}]".format(conn_addr) self.logger.error(details) return None, details else: result = self._mtool.unpack_msg(result) if result.type == 'rq_error': det = "REQUEST FAILED" + str(result.err_code) + ':' + str(result.details) self.logger.error(det) return None, det elif result.type == 'launched_process_info': self.logger.info("REQUEST SUCCESS %s", result.dict()) process_desc = ProcessDescription(proc_type=result.proc_type, name=result.name, path=result.path, args=args, machine_ip=result.machine, pid=result.pid) new_proc = RemoteProcess(process_desc, conn_addr, reg_timeout_desc=timeout_desc, monitoring_optflags=monitoring_optflags, logger=self.logger) if monitoring_optflags & PING: new_proc._ctx = self._ctx with self._proc_lock: self._processes[(result.machine, result.pid)] = new_proc new_proc.start_monitoring() return new_proc, None
# -*- coding: utf-8 -*- import json import zmq import sys import socket from obci.control.common.message import OBCIMessageTool, send_msg, recv_msg, PollingObject from obci.control.launcher.launcher_messages import message_templates, error_codes from obci.control.common.obci_control_settings import PORT_RANGE import obci.control.common.net_tools as net if __name__ == '__main__': mtool = OBCIMessageTool(message_templates) pl = PollingObject() # ifname = net.server_ifname() my_addr = 'tcp://' + 'localhost' ctx = zmq.Context() server_req = ctx.socket(zmq.REQ) server_req.connect(my_addr + ':' + net.server_rep_port()) exp_info_pull = ctx.socket(zmq.PULL) port = exp_info_pull.bind_to_random_port('tcp://*', min_port=PORT_RANGE[0], max_port=PORT_RANGE[1], max_tries=500)
class OBCIClient(object): default_timeout = 5000 def __init__(self, server_addresses, zmq_context=None): self.ctx = zmq_context if zmq_context else zmq.Context() self.server_addresses = server_addresses self.server_req_socket = None self.init_server_socket(server_addresses) # self = zmq.Poller() # self.register(self.server_req_socket, zmq.POLLIN) self.poller = PollingObject() self.mtool = OBCIMessageTool(message_templates) self.dns = net.DNS() def init_server_socket(self, srv_addrs): if self.server_req_socket is not None: print("server socket restart") self.server_req_socket.close() self.server_req_socket = self.ctx.socket(zmq.REQ) for addr in srv_addrs: print(addr) self.server_req_socket.connect(addr) def launch(self, launch_file=None, sandbox_dir=None, name=None, overwrites=None): result = self.send_create_experiment(launch_file, sandbox_dir, name, overwrites) print("create result:", result) if not result: self.init_server_socket(self.server_addresses) return result if result.type != "experiment_created": return result print(result) machine = result.origin_machine addrs = [addr for addr in result.rep_addrs if self._addr_connectable(addr, machine)] return self.send_start_experiment(addrs) def morph(self, exp_strname, launch_file, name=None, overwrites=None, leave_on=None): response = self.get_experiment_contact(exp_strname) exp_sock = self.ctx.socket(zmq.REQ) try: if response.type == "rq_error" or response.type == "no_data": return response for addr in response.rep_addrs: exp_sock.connect(addr) msg = self.mtool.fill_msg('morph_to_new_scenario', launch_file=launch_file, name=name, overwrites=overwrites, leave_on=leave_on) send_msg(exp_sock, msg) response, details = self.poll_recv(exp_sock, 6000) return response finally: exp_sock.close() def _addr_connectable(self, addr, machine): return machine == socket.gethostname() or \ (net.is_ip(addr) and not net.addr_is_local(addr)) def start_chosen_experiment(self, exp_strname): response = self.get_experiment_contact(exp_strname) if response.type == "rq_error" or response.type == "no_data": return response return self.send_start_experiment(response.rep_addrs) def send_start_experiment(self, exp_addrs): exp_sock = self.ctx.socket(zmq.REQ) try: for addr in exp_addrs: exp_sock.connect(addr) send_msg(exp_sock, self.mtool.fill_msg("start_experiment")) reply, details = self.poll_recv(exp_sock, 20000) # print reply return reply finally: exp_sock.close() def force_kill_experiment(self, strname): pass def get_experiment_contact(self, strname): send_msg(self.server_req_socket, self.mtool.fill_msg("get_experiment_contact", strname=strname)) response, details = self.poll_recv(self.server_req_socket, self.default_timeout) return response def ping_server(self, timeout=50): send_msg(self.server_req_socket, self.mtool.fill_msg("ping")) response, details = self.poll_recv(self.server_req_socket, timeout) return response def retry_ping(self, timeout=50): response, details = self.poll_recv(self.server_req_socket, timeout) return response def send_create_experiment(self, launch_file=None, sandbox_dir=None, name=None, overwrites=None): send_msg( self.server_req_socket, self.mtool.fill_msg( "create_experiment", launch_file=launch_file, sandbox_dir=sandbox_dir, name=name, overwrites=overwrites)) response, details = self.poll_recv(self.server_req_socket, 5000) return response def send_list_experiments(self): send_msg(self.server_req_socket, self.mtool.fill_msg("list_experiments")) response, details = self.poll_recv(self.server_req_socket, 4000) return response def send_list_nearby_machines(self): send_msg(self.server_req_socket, self.mtool.fill_msg("list_nearby_machines")) response, details = self.poll_recv(self.server_req_socket, 4000) return response def get_experiment_details(self, strname, peer_id=None): response = self.get_experiment_contact(strname) if response.type == "rq_error" or response.type == "no_data": return response sock = self.ctx.socket(zmq.REQ) try: for addr in response.rep_addrs: sock.connect(addr) if peer_id: send_msg(sock, self.mtool.fill_msg("get_peer_info", peer_id=peer_id)) else: send_msg(sock, self.mtool.fill_msg("get_experiment_info")) response, details = self.poll_recv(sock, 2000) return response finally: sock.close() def configure_peer(self, exp_strname, peer_id, config_overrides, override_files=None): response = self.get_experiment_contact(exp_strname) if response.type == "rq_error" or response.type == "no_data": return response sock = self.ctx.socket(zmq.REQ) try: for addr in response.rep_addrs: sock.connect(addr) if override_files: send_msg(sock, self.mtool.fill_msg("get_peer_info", peer_id=peer_id)) response, details = self.poll_recv(sock, 2000) if response.type is 'rq_error': return response msg = self.mtool.fill_msg("update_peer_config", peer_id=peer_id, **config_overrides) send_msg(sock, msg) # print msg response, details = self.poll_recv(sock, 2000) return response finally: sock.close() # "update_peer_config" : dict(peer_id='', local_params='', # external_params='', launch_dependencies='', config_sources=''), def kill_exp(self, strname, force=False): send_msg(self.server_req_socket, self.mtool.fill_msg("kill_experiment", strname=strname)) return self.poll_recv(self.server_req_socket, 2000)[0] def srv_kill(self): send_msg(self.server_req_socket, self.mtool.fill_msg("kill")) return self.poll_recv(self.server_req_socket, 2000)[0] def join_experiment(self, strname, peer_id, path): response = self.get_experiment_contact(strname) if response.type == "rq_error" or response.type == "no_data": return response sock = self.ctx.socket(zmq.REQ) try: self._connect(sock, response.rep_addrs) send_msg(sock, self.mtool.fill_msg("join_experiment", peer_id=peer_id, path=path, peer_type='obci_peer')) response, details = self.poll_recv(sock, 5000) sock.close() return response finally: sock.close() def add_peer(self, strname, peer_id, path, machine, param_overwrites=None, custom_config_path=None, config_sources=None, launch_dependencies=None, apply_globals=True): response = self.get_experiment_contact(strname) if response.type == "rq_error" or response.type == "no_data": return response sock = self.ctx.socket(zmq.REQ) try: self._connect(sock, response.rep_addrs) send_msg(sock, self.mtool.fill_msg("add_peer", peer_id=peer_id, peer_path=path, peer_type='obci_peer', machine=machine, param_overwrites=param_overwrites, custom_config_path=custom_config_path, config_sources=config_sources, launch_dependencies=launch_dependencies, apply_globals=apply_globals)) response, details = self.poll_recv(sock, 5000) return response finally: sock.close() def kill_peer(self, exp_strname, peer_id, remove_config=False): response = self.get_experiment_contact(exp_strname) if response.type == "rq_error" or response.type == "no_data": return response try: sock = self.ctx.socket(zmq.REQ) self._connect(sock, response.rep_addrs) send_msg(sock, self.mtool.fill_msg("kill_peer", peer_id=peer_id, remove_config=remove_config)) response, details = self.poll_recv(sock, 5000) return response finally: sock.close() def _connect(self, sock, addr_list): print("**** ", addr_list) this = self._is_this_machine(addr_list) connected = False for addr in addr_list: if not this and ('localhost' in addr or '127.0.0.1' in addr): continue try: sock.connect(addr) connected = True except zmq.ZMQError as e: print(addr, " ::: ", str(e)) if not connected: raise Exception("Could not connect to any of the addresses: " + str(addr_list)) def _is_this_machine(self, addr_list): for addr in addr_list: if self.dns.is_this_machine(addr): return True return False def leave_experiment(self, strname, peer_id): response = self.get_experiment_contact(strname) if response.type == "rq_error" or response.type == "no_data": return response try: sock = self.ctx.socket(zmq.REQ) self._connect(sock, response.rep_addrs) send_msg(sock, self.mtool.fill_msg("leave_experiment", peer_id=peer_id)) response, details = self.poll_recv(sock, 5000) return response finally: sock.close() def get_tail(self, strname, peer_id, len_): response = self.get_experiment_contact(strname) if response.type == "rq_error" or response.type == "no_data": return response sock = self.ctx.socket(zmq.REQ) try: for addr in response.rep_addrs: sock.connect(addr) send_msg(sock, self.mtool.fill_msg("get_tail", peer_id=peer_id, len=len_)) response, details = self.poll_recv(sock, 4000) return response finally: sock.close() def poll_recv(self, socket, timeout): result, details = self.poller.poll_recv(socket, timeout) if result: result = self.mtool.unpack_msg(result) else: result = EmptyResponse(details) return result, details
class SubprocessMonitor(object): def __init__(self, zmq_ctx, uuid, logger=None, obci_dns=None): self._processes = {} self._ctx = zmq_ctx self.uuid = uuid self.logger = logger or get_logger('subprocess_monitor', stream_level='warning') self.obci_dns = obci_dns self._mtool = OBCIMessageTool(message_templates) self.poller = PollingObject() self._proc_lock = threading.RLock() def not_running_processes(self): status = {} with self._proc_lock: for key, proc in self._processes.iteritems(): st = proc.status() if st[0] in [FINISHED, FAILED, TERMINATED ] and not proc.marked_delete(): status[key] = st return status def unknown_status_processes(self): with self._proc_lock: return [ proc for proc in self._processes.values() if proc.status()[0] == UNKNOWN ] def process(self, machine_ip, pid): with self._proc_lock: return self._processes.get((machine_ip, pid), None) def killall(self, force=False): with self._proc_lock: for proc in self._processes.values(): kill_method = proc.kill if not force else proc.kill_with_force if proc.status()[0] not in [FINISHED, FAILED, TERMINATED]: kill_method() def delete(self, machine, pid): proc = self._processes.get((machine, pid), None) if proc is None: raise Exception("Process not found: " + str((machine, pid))) if not proc.running(): del self._processes[(machine, pid)] return True else: self.logger.error("Process is running, will not delete! " + str((machine, pid))) return False def delete_all(self): with self._proc_lock: for proc in self._processes.values(): del proc self._processes = {} def stop_monitoring(self): with self._proc_lock: for proc in self._processes.values(): proc.stop_monitoring() def _launch_args(self, path, args): # TODO fix interpreter calls // only python is supported if path.endswith('.py'): launch_args = PYTHON_CALL + [path] + args else: launch_args = [path] + args return launch_args def _stdio_actions(self, io_flags): out = subprocess.PIPE if io_flags & STDOUT else None if io_flags & STDERR: err = subprocess.PIPE elif out is not None: err = subprocess.STDOUT else: err = None stdin = subprocess.PIPE if io_flags & STDIN else None return (out, err, stdin) def _local_launch(self, launch_args, stdio_actions, env): ON_POSIX = 'posix' in sys.builtin_module_names out, err, stdin = stdio_actions try: if sys.platform == "win32": crflags = subprocess.CREATE_NEW_PROCESS_GROUP popen_obj = subprocess.Popen(launch_args, stdout=out, stderr=err, stdin=stdin, bufsize=1, close_fds=ON_POSIX, env=env, creationflags=crflags) else: popen_obj = subprocess.Popen(launch_args, stdout=out, stderr=err, stdin=stdin, bufsize=1, close_fds=ON_POSIX, env=env) details = "Popen constructor finished for " +\ str(launch_args[:3]) + "(...)" self.logger.info(details) return popen_obj, details except OSError as e: details = "Unable to spawn process {0} [{1}]".format( launch_args, e.args) self.logger.error(details) return None, details except ValueError as e: details = "Unable to spawn process (bad arguments) {0} [{1}]".format( launch_args, e.args) self.logger.error(details) return None, details except Exception as e: details = "Process launch Error: " + str(e) + str(e.args) + str( vars(e)) self.logger.error(details) return None, details def new_local_process(self, path, args, proc_type='', name='', capture_io=STDOUT | STDIN, stdout_log=None, stderr_log=None, register_timeout_desc=None, monitoring_optflags=RETURNCODE | PING, machine_ip=None, env=None): launch_args = self._launch_args(path, args) self.logger.debug(proc_type + " local path: " + path) machine = machine_ip if machine_ip else socket.gethostname() std_actions = self._stdio_actions(capture_io) timeout_desc = register_timeout_desc self.logger.debug('process launch arg list: %s', launch_args) popen_obj, details = self._local_launch(launch_args, std_actions, env) if popen_obj is None: return None, details if popen_obj.returncode is not None: det = "opened process already terminated" + popen_obj.communicate() self.logger.warning(det) if not name: name = os.path.basename(path) process_desc = ProcessDescription(proc_type=proc_type, name=name, path=path, args=args, machine_ip=machine, pid=popen_obj.pid) # io_handler will be None if no stdio is captured io_handler = start_stdio_handler(popen_obj, std_actions, ':'.join([machine, path, name]), stdout_log, stderr_log) new_proc = LocalProcess(process_desc, popen_obj, io_handler=io_handler, reg_timeout_desc=timeout_desc, monitoring_optflags=monitoring_optflags, logger=self.logger) if monitoring_optflags & PING: new_proc._ctx = self._ctx with self._proc_lock: self._processes[(machine, popen_obj.pid)] = new_proc new_proc.start_monitoring() return new_proc, None def new_remote_process(self, path, args, proc_type, name, machine_ip, conn_addr, capture_io=STDOUT | STDIN, stdout_log=None, stderr_log=None, register_timeout_desc=None, monitoring_optflags=PING): """Send a request to conn_addr for a process launch. By default the process will be monitored with ping requests and locally by the remote peer.""" timeout_desc = register_timeout_desc rq_message = self._mtool.fill_msg('launch_process', path=path, args=args, proc_type=proc_type, name=name, machine_ip=machine_ip, capture_io=capture_io, stdout_log=stdout_log, stderr_log=stderr_log) rq_sock = self._ctx.socket(zmq.REQ) try: rq_sock.connect(conn_addr) except zmq.ZMQError as e: det = "Could not connect to {0}, err: {1}, {2}".format( conn_addr, e, e.args) self.logger.error(det) return None, det self.logger.info("SENDING LAUNCH REQUEST {0} {1} {2} {3}".format( machine_ip, _DEFAULT_TIMEOUT_MS, 'ms', conn_addr)) send_msg(rq_sock, rq_message) result, details = self.poller.poll_recv(rq_sock, _DEFAULT_TIMEOUT_MS) rq_sock.close() if not result: details = details + " [address was: {0}]".format(conn_addr) self.logger.error(details) return None, details else: result = self._mtool.unpack_msg(result) if result.type == 'rq_error': det = "REQUEST FAILED" + str(result.err_code) + ':' + str( result.details) self.logger.error(det) return None, det elif result.type == 'launched_process_info': self.logger.info("REQUEST SUCCESS %s", result.dict()) process_desc = ProcessDescription(proc_type=result.proc_type, name=result.name, path=result.path, args=args, machine_ip=result.machine, pid=result.pid) new_proc = RemoteProcess(process_desc, conn_addr, reg_timeout_desc=timeout_desc, monitoring_optflags=monitoring_optflags, logger=self.logger) if monitoring_optflags & PING: new_proc._ctx = self._ctx with self._proc_lock: self._processes[(result.machine, result.pid)] = new_proc new_proc.start_monitoring() return new_proc, None
class Process(object): def __init__(self, proc_description, reg_timeout_desc=None, monitoring_optflags=PING, logger=None): self.desc = proc_description self.must_register = reg_timeout_desc is not None self._status_lock = threading.RLock() self._status = UNKNOWN if self.must_register else RUNNING self._status_details = None self.ping_it = monitoring_optflags & PING self.check_returncode = monitoring_optflags & RETURNCODE if \ self.desc.pid is not None else False self.logger = logger or get_logger( 'subprocess_monitor'+'-'+self.desc.name+'-'+str(self.desc.pid), stream_level='info') self.set_registration_timeout_handler(reg_timeout_desc) self.registration_data = None self._stop_monitoring = False self._ping_thread = None self._ping_retries = 8 self._returncode_thread = None self._mtool = OBCIMessageTool(message_templates) self._ctx = None self.rq_sock = None self._poller = PollingObject() self.delete = False @property def machine_ip(self): return self.desc.machine_ip @property def pid(self): return self.desc.pid @property def path(self): return self.desc.path @property def proc_type(self): return self.desc.proc_type @property def name(self): return self.desc.name def status(self): with self._status_lock: return self._status, self._status_details def set_registration_timeout_handler(self, reg_timeout_desc): with self._status_lock: self._status = UNKNOWN self._status_details = None self.must_register = reg_timeout_desc is not None self.reg_timeout_desc = reg_timeout_desc self.reg_timer = None if not self.must_register else \ self.new_timer(self.reg_timeout_desc, REG_TIMER) if self.must_register: self.reg_timer.start() def is_local(self): raise NotImplementedError() def timeout_handler(self, custom_method, args, type_): self._do_handle_timeout(type_) custom_method(*args) def _do_handle_timeout(self, type_): raise NotImplementedError() def new_timer(self, tim_desc, type_): return threading.Timer(tim_desc.timeout, self.timeout_handler, [tim_desc.timeout_method, tim_desc.timeout_args, type_]) def registered(self, reg_data): if self.reg_timer is not None: self.reg_timer.cancel() self.logger.info("{0} [{1}] REGISTERED!!! {2}".format( self.name, self.proc_type, reg_data.machine_ip)) #print "ping:", self.ping_it, "ret:", self.check_returncode with self._status_lock: self._status = RUNNING #TODO validate registration data self.registration_data = reg_data self.logger.info("reg_data" + str(vars(reg_data))) if self.ping_it: if not self._ctx: self._ctx = zmq.Context() self.rq_sock = self._ctx.socket(zmq.REQ) for addr in reg_data.rep_addrs: if reg_data.machine_ip != socket.gethostname() and\ net.addr_is_local(addr): continue self.logger.debug(self.name + "connecting to " + addr) self.rq_sock.connect(addr) def stop_monitoring(self): if self.reg_timer: self.reg_timer.cancel() self.reg_timer = None self._stop_monitoring = True if self._ping_thread is not None: self.logger.info("%s, %s, %s", self.proc_type, self.name ,"Joining ping thread") self._ping_thread.join() if self._returncode_thread is not None: self.logger.info("%s %s %s", self.proc_type,self.name, "joining returncode thread") self._returncode_thread.join() self.logger.info("monitor for: %s, %s, %s", self.proc_type,self.name, " ...monitoring threads stopped.") def finished(self): finished = True if self._ping_thread is not None: finished = not self._ping_thread.is_alive() if self._returncode_thread is not None: finished = finished and not self._returncode_thread.is_alive() return finished def process_is_running(self): running = True if self._ping_thread is not None: running = self._ping_thread.is_alive() if self._returncode_thread is not None: running = running and self._returncode_thread.is_alive() return running def start_monitoring(self): if self.ping_it: self._ping_thread = threading.Thread(target=self.ping_monitor, args=()) self._ping_thread.daemon = True self._ping_thread.start() if self.check_returncode: self._returncode_thread = threading.Thread(target=self.returncode_monitor, args=()) self._returncode_thread.daemon = True self._returncode_thread.start() def ping_monitor(self): is_alive = True try: while not self._stop_monitoring and is_alive: time.sleep(2) if self.rq_sock is not None: send_msg(self.rq_sock, self._mtool.fill_msg('ping')) result = None while self._ping_retries and not result and not self._stop_monitoring: result, det = self._poller.poll_recv(socket=self.rq_sock, timeout=1500) if not result and not self._stop_monitoring: self.logger.info("%s %s %s", self.proc_type, self.name, "NO RESPONSE TO PING!") with self._status_lock: if self._status not in [FAILED, FINISHED]: self._status = NON_RESPONSIVE self._status_details = 'ping response timeout' print "status:", self._status is_alive = False finally: self.rq_sock.close(linger=0) def returncode_monitor(self): raise NotImplementedError() def kill(self): raise NotImplementedError() def mark_delete(self): with self._status_lock: self.delete = True def marked_delete(self): with self._status_lock: return self.delete
class EEGExperimentFinder(object): def __init__(self, srv_addrs, ctx, client_push_address, nearby_servers): self.ctx = ctx self.server_req_socket = self.ctx.socket(zmq.REQ) for addr in srv_addrs: self.server_req_socket.connect(addr) self.poller = PollingObject() self.mtool = OBCIMessageTool(message_templates) self.nearby_servers = nearby_servers self._amplified_cache = {} def _running_experiments(self): send_msg(self.server_req_socket, self.mtool.fill_msg("list_experiments")) exp_list, details = self.poll_recv(self.server_req_socket, 2000) if not exp_list: LOGGER.error("Connection to obci_server failed. (list_experiments)") return None exps = exp_list.exp_data running = [] for exp in exps.values(): if exp['status_name'] == launcher_tools.RUNNING or \ exp['status_name'] == launcher_tools.LAUNCHING: running.append(exp) return running def find_amplified_experiments(self): running_exps = self._running_experiments() amplified = [] for exp in running_exps: LOGGER.info("Found running experiment: " + str(exp['name'])) infos = self._info_amplified(exp) if infos is not None: print("Found experiments...", str(infos)[:500]) amplified += infos return amplified def _info_amplified(self, exp_desc): amp_options = [] LOGGER.info("Processing experiment " + str(exp_desc['name']) + "w/ addr: " + str(exp_desc['rep_addrs'])) tcp_addrs = exp_desc['tcp_addrs'] rep_addrs = net.choose_not_local(exp_desc['rep_addrs']) if not rep_addrs: rep_addrs = net.choose_local(exp_desc['rep_addrs'], ip=True) rep_addr = rep_addrs.pop() pub_addrs = net.choose_not_local(exp_desc['pub_addrs']) if not pub_addrs: pub_addrs = net.choose_local(exp_desc['pub_addrs'], ip=True) pub_addr = pub_addrs.pop() tcp_addr = tcp_addrs.pop() LOGGER.info("Chosen experiment addresses: REP -- " + str(rep_addr) + ", PUB -- " + str(pub_addr)) req_sock = self.ctx.socket(zmq.REQ) try: req_sock.connect(rep_addr) send_msg(req_sock, self.mtool.fill_msg('get_experiment_info')) res, details = self.poll_recv(req_sock, 4000) finally: req_sock.close() if not res: LOGGER.error("Connection failed (experiment " + exp_desc['name'] + "), get_experiment_info") return None exp_info = res.dict() # json.loads(res) for field in ["peers_status", "details"]: del exp_info["experiment_status"][field] peer_list = exp_info["peers"] if not self._has_mx(peer_list): LOGGER.info("Experiment " + exp_desc['name'] + " does not have a multiplexer.") return None maybe_amps = self._amp_like_peers(peer_list) if not maybe_amps: LOGGER.info("Experiment " + exp_desc['name'] + " -- no amplifier.") return None req_sock = self.ctx.socket(zmq.REQ) try: req_sock.connect(rep_addr) for peer in maybe_amps: info, params = self._get_amp_info(req_sock, peer) if not self._is_amplifier(info, params): LOGGER.info("Experiment " + exp_desc['name'] + " -- peer " + str(peer) + "is not an amplifier.") continue else: exp_data = self._create_exp_data(exp_info, info, params['param_values'], rep_addr, pub_addr, tcp_addr) amp_options.append(exp_data) finally: req_sock.close() return amp_options def _get_amp_info(self, exp_sock, peer_id): send_msg(exp_sock, self.mtool.fill_msg('get_peer_info', peer_id=peer_id)) info, details = self.poll_recv(exp_sock, 4000) send_msg(exp_sock, self.mtool.fill_msg('get_peer_param_values', peer_id=peer_id)) params, details = self.poll_recv(exp_sock, 4000) if not info or not params: LOGGER.error("get_peer_info failed " + str(peer_id) + " " + str(details)) return None, None info = info.dict() params = params.dict() for field in ["sender", "sender_ip", "receiver", "type", "local_params", "external_params", "config_sources", "launch_dependencies"]: del info[field] return info, params def _is_amplifier(self, peer_info, peer_params): info = peer_info peer_id = info['peer_id'] if not info['peer_type'] == 'obci_peer': LOGGER.info("Peer " + str(peer_id) + " not obci_peer") return False params = peer_params['param_values'] if 'channels_info' not in params or\ 'active_channels' not in params: LOGGER.info('Peer ' + str(peer_id) + " no channels_info param.") return False return True def _create_exp_data(self, exp_info, peer_info, params, rep_addr, pub_addr, tcp_addr): data = {} data['amplifier_params'] = params data['amplifier_peer_info'] = peer_info data['experiment_info'] = exp_info data['rep_addrs'] = [rep_addr] data['pub_addrs'] = [pub_addr] data['tcp_addrs'] = [tcp_addr] return data def _has_mx(self, peer_list): return [peer for peer in peer_list if peer.startswith('mx')] != [] def _amp_like_peers(self, peer_list): return [peer for peer in peer_list if peer.startswith('amplifier')] def poll_recv(self, socket, timeout): result, details = self.poller.poll_recv(socket, timeout) if result: result = self.mtool.unpack_msg(result) return result, details
class ExperimentEngineInfo(QtCore.QObject): exp_saver_msg = QtCore.pyqtSignal(object) def __init__(self, preset_data=None, launcher_data=None, ctx=None): self.preset_data = preset_data self.launch_file = None self.launcher_data = launcher_data self.name = None self.info = "" self.public_params = [] self.origin_machine = '' self.unsupervised_peers = {} self.old_uid = None self.ctx = ctx self.exp_req = None self.mtool = OBCIMessageTool(message_templates) self.poller = PollingObject() self.category = DEFAULT_CATEGORY if preset_data is not None: self.setup_from_preset(preset_data) elif launcher_data is not None: self.setup_from_launcher(launcher_data) super(ExperimentEngineInfo, self).__init__() def cleanup(self): if self.exp_req: self.exp_req.close() # linger=0) def setup_from_preset(self, preset_data, launcher=False): self.preset_data = preset_data self.overwrites = {} self.runtime_changes = {} self.status = launcher_tools.ExperimentStatus() self.exp_config = system_config.OBCIExperimentConfig() self.name = preset_data['name'] self.launch_file = preset_data['launch_file'] self.info = preset_data['info'] self.public_params = [p.strip() for p in preset_data['public_params'].split(',')] self.exp_config.uuid = self.name + '--' + self.launch_file self.category = preset_data['category'] result, details = self._make_config() self.status.details = details self._set_public_params() def _addr_connectable(self, addr, machine): return machine == socket.gethostname() or \ (net.is_ip(addr) and not net.addr_is_local(addr)) def setup_from_launcher(self, launcher_data, preset=False, transform=False): self.launcher_data = launcher_data self.runtime_changes = {} if preset: self.old_uid = self.exp_config.uuid if not preset or transform: self.overwrites = {} self.status = launcher_tools.ExperimentStatus() self.exp_config = system_config.OBCIExperimentConfig() self.name = launcher_data['name'] # if not preset else self.old_uid if not preset: self.launch_file = launcher_data['launch_file_path'] connected = False if not transform: self.ctx = self.ctx if self.ctx is not None else zmq.Context() self.exp_req = self.ctx.socket(zmq.REQ) machine = launcher_data['origin_machine'] for addr in launcher_data['rep_addrs']: if self._addr_connectable(addr, machine): try: self.exp_req.connect(addr) except Exception as e: print(addr, False) else: connected = True if not connected: print("Connection to experiment ", self.name, "UNSUCCESFUL!!!!!!") return self.exp_config.uuid = launcher_data['uuid'] self.exp_config.origin_machine = launcher_data['origin_machine'] self.uuid = self.exp_config.uuid self.exp_config.launch_file_path = self.launch_file result, details = self._get_experiment_scenario() self.exp_config.status(self.status) self.status.set_status(launcher_data['status_name'], details=launcher_data['details']) self._get_experiment_details() self._set_public_params() def update_scenario(self, launch_file_path, scenario): self.exp_config.launch_file_path = launch_file_path self._process_experiment_scenario(scenario) self.exp_config.status(self.status) def _set_public_params(self): for par in self.public_params: if len(par.split('.')) == 2: [peer, param] = par.split('.') self.exp_config.peers[peer].public_params.append(param) def _make_config(self): self.exp_config.launch_file_path = self.launch_file self.uuid = self.exp_config.uuid result, details = self.make_experiment_config() self.exp_config.status(self.status) return result, details # FIXME !!! copy-paste from obci_experiment def make_experiment_config(self): launch_parser = launch_file_parser.LaunchFileParser( launcher_tools.obci_root(), settings.DEFAULT_SCENARIO_DIR) if not self.launch_file: return False, "Empty scenario." try: with open(launcher_tools.expand_path(self.launch_file)) as f: launch_parser.parse(f, self.exp_config, apply_globals=True) except Exception as e: self.status.set_status(launcher_tools.NOT_READY, details=str(e)) print("config errror ", str(e)) return False, str(e) rd, details = self.exp_config.config_ready() if rd: self.status.set_status(launcher_tools.READY_TO_LAUNCH) else: self.status.set_status(launcher_tools.NOT_READY, details=details) print(rd, details) return True, None def _get_experiment_scenario(self): if not self.exp_req: return False, "No experiment socket" response = self.comm_exp(self.mtool.fill_msg("get_experiment_scenario")) if not response: return False, "No response from experient" print("GOT SCENARIO", response.scenario) return self._process_experiment_scenario(response.scenario) def _process_experiment_scenario(self, json_scenario): jsonpar = launch_file_parser.LaunchJSONParser( launcher_tools.obci_root(), settings.DEFAULT_SCENARIO_DIR) inbuf = io.StringIO(json_scenario) jsonpar.parse(inbuf, self.exp_config) print("MY PEEEEERS:", self.exp_config.peers.keys()) rd, details = self.exp_config.config_ready() if rd: self.status.set_status(launcher_tools.READY_TO_LAUNCH) else: self.status.set_status(launcher_tools.NOT_READY, details=details) print(rd, details) return True, None def _get_experiment_details(self): if not self.exp_req: return exp_msg = self.comm_exp(self.mtool.fill_msg("get_experiment_info")) if not exp_msg: return self.origin_machine = exp_msg.origin_machine for peer, short_info in exp_msg.peers.items(): # self.exp_config.set_peer_machine(peer, short_info['machine']) msg = self.comm_exp(self.mtool.fill_msg("get_peer_info", peer_id=peer)) if not msg: return ext_defs = {} for name, defi in msg.external_params.items(): ext_defs[name] = defi[0] + '.' + defi[1] self.exp_config.update_peer_config(peer, dict(config_sources=msg.config_sources, launch_dependencies=msg.launch_dependencies, local_params=msg.local_params, external_params=ext_defs)) for peer, status in exp_msg.experiment_status['peers_status'].items(): self.status.peer_status(peer).set_status( status['status_name'], details=status['details']) def parameters(self, peer_id, mode): params = {} peer = self.exp_config.peers[peer_id] if mode == MODE_BASIC: for par in peer.public_params: params[par] = (self.exp_config.param_value(peer_id, par), None) else: params = peer.config.local_params for param in peer.config.local_params: params[param] = (self.exp_config.param_value(peer_id, param), None) for param, defi in peer.config.ext_param_defs.items(): source_symbol = defi[0] source = peer.config.config_sources[source_symbol] params[param] = (self.exp_config.param_value(peer_id, param), source + '.' + defi[1]) return params def comm_exp(self, msg): send_msg(self.exp_req, msg) response, _ = self.poller.poll_recv(self.exp_req, timeout=3000) if not response: print("!!!!!!!!!!!!!!!!!!!!!!!!!!!1 no response to ", msg) self.exp_req.close() self.exp_req = self.ctx.socket(zmq.REQ) for addr in self.launcher_data['rep_addrs']: if self._addr_connectable(addr, self.launcher_data['origin_machine']): self.exp_req.connect(addr) return None return self.mtool.unpack_msg(response) def updatable(self, peer_id, config_part, **kwargs): return False def update_peer_param(self, peer_id, param, value, runtime=False): changes = self.overwrites if not runtime else self.runtime_changes ovr = changes.get(peer_id, None) ovr = ovr if ovr is not None else {} if param not in ovr: old = self.exp_config.param_value(peer_id, param) if old != value: ovr[param] = old changes[peer_id] = ovr self.exp_config.update_local_param(peer_id, param, value) def get_launch_args(self): d = dict(launch_file=self.launch_file, name=self.name) args = ['--ovr'] if self.overwrites: for peer_id in self.overwrites: args.append('--peer') args.append(peer_id) for arg in self.overwrites[peer_id]: args += ['-p', arg, self.exp_config.param_value(peer_id, arg)] pack = peer_cmd.peer_overwrites_pack(args) d['overwrites'] = pack print("overwrites pack!!!!!!!!!!!!!!!!!!!!! ", pack) return d def peer_info(self, peer_id): return self.exp_config.peers[peer_id] def add_peer(self, peer_id, peer_path, config_sources=None, launch_deps=None, custom_config_path=None, param_overwrites=None, machine=None): return launch_file_parser.extend_experiment_config( self.exp_config, peer_id, peer_path, config_sources, launch_deps, custom_config_path, param_overwrites, machine, apply_globals=True) def enable_signal_storing(self, store_options): if not store_options: return if int(store_options['append_timestamp']): store_options = dict(store_options) store_options['save_file_name'] = store_options['save_file_name'] + "_" + str(time.time()) for peer, peer_path in SIGNAL_STORAGE_PEERS.items(): if peer not in self.exp_config.peers: self.add_peer(peer, peer_path) saver = self.exp_config.peers['signal_saver'] params = saver.config.param_values for opt, val in store_options.items(): if opt in saver.config.param_values: params[opt] = val def stop_storing(self, client): join_response = client.join_experiment(self.uuid, "dummy_module_" + str(time.time()), "") if join_response is None: print("experiment engine info - ERROR - connection timeout on stop signal storing!") return if not join_response.type == "rq_ok": print("experiment engine info - ERROR - join error on stop signal storing!") return mx_addr = join_response.params["mx_addr"].split(':') # hang and wait ... acquisition_helper.finish_saving([(mx_addr[0], int(mx_addr[1]))])
class EEGExperimentFinder(object): def __init__(self, srv_addrs, ctx, client_push_address, nearby_servers): self.ctx = ctx self.server_req_socket = self.ctx.socket(zmq.REQ) for addr in srv_addrs: self.server_req_socket.connect(addr) self.poller = PollingObject() self.mtool = OBCIMessageTool(message_templates) self.nearby_servers = nearby_servers self._amplified_cache = {} def _running_experiments(self): send_msg(self.server_req_socket, self.mtool.fill_msg("list_experiments")) exp_list, details = self.poll_recv(self.server_req_socket, 2000) if not exp_list: LOGGER.error( "Connection to obci_server failed. (list_experiments)") return None exps = exp_list.exp_data running = [] for exp in exps.values(): if exp['status_name'] == launcher_tools.RUNNING or \ exp['status_name'] == launcher_tools.LAUNCHING: running.append(exp) return running def find_amplified_experiments(self): running_exps = self._running_experiments() amplified = [] for exp in running_exps: LOGGER.info("Found running experiment: " + str(exp['name'])) infos = self._info_amplified(exp) if infos is not None: print "Found experiments...", str(infos)[:500] amplified += infos return amplified def _info_amplified(self, exp_desc): amp_options = [] LOGGER.info("Processing experiment "+ str(exp_desc['name']) +\ "w/ addr: " + str(exp_desc['rep_addrs'])) tcp_addrs = exp_desc['tcp_addrs'] rep_addrs = net.choose_not_local(exp_desc['rep_addrs']) if not rep_addrs: rep_addrs = net.choose_local(exp_desc['rep_addrs'], ip=True) rep_addr = rep_addrs.pop() pub_addrs = net.choose_not_local(exp_desc['pub_addrs']) if not pub_addrs: pub_addrs = net.choose_local(exp_desc['pub_addrs'], ip=True) pub_addr = pub_addrs.pop() tcp_addr = tcp_addrs.pop() LOGGER.info("Chosen experiment addresses: REP -- " + \ str(rep_addr) + ", PUB -- " + str(pub_addr)) req_sock = self.ctx.socket(zmq.REQ) try: req_sock.connect(rep_addr) send_msg(req_sock, self.mtool.fill_msg('get_experiment_info')) res, details = self.poll_recv(req_sock, 4000) finally: req_sock.close() if not res: LOGGER.error("Connection failed (experiment " + exp_desc['name'] + \ "), get_experiment_info") return None exp_info = res.dict() #json.loads(res) for field in ["peers_status", "details"]: del exp_info["experiment_status"][field] peer_list = exp_info["peers"] if not self._has_mx(peer_list): LOGGER.info("Experiment " + exp_desc['name'] + \ " does not have a multiplexer.") return None maybe_amps = self._amp_like_peers(peer_list) if not maybe_amps: LOGGER.info("Experiment "+ exp_desc['name'] + \ " -- no amplifier.") return None req_sock = self.ctx.socket(zmq.REQ) try: req_sock.connect(rep_addr) for peer in maybe_amps: info, params = self._get_amp_info(req_sock, peer) if not self._is_amplifier(info, params): LOGGER.info("Experiment "+ exp_desc['name'] + \ " -- peer " + str(peer) + "is not an amplifier.") continue else: exp_data = self._create_exp_data(exp_info, info, params['param_values'], rep_addr, pub_addr, tcp_addr) amp_options.append(exp_data) finally: req_sock.close() return amp_options def _get_amp_info(self, exp_sock, peer_id): send_msg(exp_sock, self.mtool.fill_msg('get_peer_info', peer_id=peer_id)) info, details = self.poll_recv(exp_sock, 4000) send_msg(exp_sock, self.mtool.fill_msg('get_peer_param_values', peer_id=peer_id)) params, details = self.poll_recv(exp_sock, 4000) if not info or not params: LOGGER.error("get_peer_info failed " + str(peer_id) + " " + str(details)) return None, None info = info.dict() params = params.dict() for field in [ "sender", "sender_ip", "receiver", "type", "local_params", "external_params", "config_sources", "launch_dependencies" ]: del info[field] return info, params def _is_amplifier(self, peer_info, peer_params): info = peer_info peer_id = info['peer_id'] if not info['peer_type'] == 'obci_peer': LOGGER.info("Peer " + str(peer_id) + " not obci_peer") return False params = peer_params['param_values'] if not 'channels_info' in params or\ not 'active_channels' in params: LOGGER.info('Peer ' + str(peer_id) + " no channels_info param.") return False return True def _create_exp_data(self, exp_info, peer_info, params, rep_addr, pub_addr, tcp_addr): data = {} data['amplifier_params'] = params data['amplifier_peer_info'] = peer_info data['experiment_info'] = exp_info data['rep_addrs'] = [rep_addr] data['pub_addrs'] = [pub_addr] data['tcp_addrs'] = [tcp_addr] return data def _has_mx(self, peer_list): return [peer for peer in peer_list if peer.startswith('mx')] != [] def _amp_like_peers(self, peer_list): return [peer for peer in peer_list if peer.startswith('amplifier')] def poll_recv(self, socket, timeout): result, details = self.poller.poll_recv(socket, timeout) if result: result = self.mtool.unpack_msg(result) return result, details