class IPControllerApp(BaseParallelApplication): name = u'ipcontroller' description = _description examples = _examples config_file_name = Unicode(default_config_file_name) classes = [ ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, DictDB ] + real_dbs # change default to True auto_create = Bool( True, config=True, help="""Whether to create profile dir if it doesn't exist.""") reuse_files = Bool(False, config=True, help="""Whether to reuse existing json connection files. If False, connection files will be removed on a clean exit. """) restore_engines = Bool(False, config=True, help="""Reload engine state from JSON file """) ssh_server = Unicode( u'', config=True, help="""ssh url for clients to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) engine_ssh_server = Unicode( u'', config=True, help="""ssh url for engines to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) location = Unicode( u'', config=True, help= """The external IP or domain name of the Controller, used for disambiguating engine and client connections.""", ) import_statements = List( [], config=True, help= "import statements to be run at startup. Necessary in some environments" ) use_threads = Bool( False, config=True, help='Use threads instead of processes for the schedulers', ) engine_json_file = Unicode( 'ipcontroller-engine.json', config=True, help="JSON filename where engine connection info will be stored.") client_json_file = Unicode( 'ipcontroller-client.json', config=True, help="JSON filename where client connection info will be stored.") def _cluster_id_changed(self, name, old, new): super(IPControllerApp, self)._cluster_id_changed(name, old, new) self.engine_json_file = "%s-engine.json" % self.name self.client_json_file = "%s-client.json" % self.name # internal children = List() mq_class = Unicode('zmq.devices.ProcessMonitoredQueue') def _use_threads_changed(self, name, old, new): self.mq_class = 'zmq.devices.%sMonitoredQueue' % ('Thread' if new else 'Process') write_connection_files = Bool( True, help="""Whether to write connection files to disk. True in all cases other than runs with `reuse_files=True` *after the first* """) aliases = Dict(aliases) flags = Dict(flags) def save_connection_dict(self, fname, cdict): """save a connection dict to json file.""" c = self.config url = cdict['registration'] location = cdict['location'] if not location: if PUBLIC_IPS: location = PUBLIC_IPS[-1] else: self.log.warn( "Could not identify this machine's IP, assuming %s." " You may need to specify '--location=<external_ip_address>' to help" " IPython decide when to connect via loopback." % LOCALHOST) location = LOCALHOST cdict['location'] = location fname = os.path.join(self.profile_dir.security_dir, fname) self.log.info("writing connection info to %s", fname) with open(fname, 'w') as f: f.write(json.dumps(cdict, indent=2)) os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR) def load_config_from_json(self): """load config from existing json connector files.""" c = self.config self.log.debug("loading config from JSON") # load engine config fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file) self.log.info("loading connection info from %s", fname) with open(fname) as f: ecfg = json.loads(f.read()) # json gives unicode, Session.key wants bytes c.Session.key = ecfg['exec_key'].encode('ascii') xport, ip = ecfg['interface'].split('://') c.HubFactory.engine_ip = ip c.HubFactory.engine_transport = xport self.location = ecfg['location'] if not self.engine_ssh_server: self.engine_ssh_server = ecfg['ssh'] # load client config fname = os.path.join(self.profile_dir.security_dir, self.client_json_file) self.log.info("loading connection info from %s", fname) with open(fname) as f: ccfg = json.loads(f.read()) for key in ('exec_key', 'registration', 'pack', 'unpack'): assert ccfg[key] == ecfg[ key], "mismatch between engine and client info: %r" % key xport, addr = ccfg['interface'].split('://') c.HubFactory.client_transport = xport c.HubFactory.client_ip = ip if not self.ssh_server: self.ssh_server = ccfg['ssh'] # load port config: c.HubFactory.regport = ecfg['registration'] c.HubFactory.hb = (ecfg['hb_ping'], ecfg['hb_pong']) c.HubFactory.control = (ccfg['control'], ecfg['control']) c.HubFactory.mux = (ccfg['mux'], ecfg['mux']) c.HubFactory.task = (ccfg['task'], ecfg['task']) c.HubFactory.iopub = (ccfg['iopub'], ecfg['iopub']) c.HubFactory.notifier_port = ccfg['notification'] def cleanup_connection_files(self): if self.reuse_files: self.log.debug("leaving JSON connection files for reuse") return self.log.debug("cleaning up JSON connection files") for f in (self.client_json_file, self.engine_json_file): f = os.path.join(self.profile_dir.security_dir, f) try: os.remove(f) except Exception as e: self.log.error("Failed to cleanup connection file: %s", e) else: self.log.debug(u"removed %s", f) def load_secondary_config(self): """secondary config, loading from JSON and setting defaults""" if self.reuse_files: try: self.load_config_from_json() except (AssertionError, IOError) as e: self.log.error("Could not load config from JSON: %s" % e) else: # successfully loaded config from JSON, and reuse=True # no need to wite back the same file self.write_connection_files = False # switch Session.key default to secure default_secure(self.config) self.log.debug("Config changed") self.log.debug(repr(self.config)) def init_hub(self): c = self.config self.do_import_statements() try: self.factory = HubFactory(config=c, log=self.log) # self.start_logging() self.factory.init_hub() except TraitError: raise except Exception: self.log.error("Couldn't construct the Controller", exc_info=True) self.exit(1) if self.write_connection_files: # save to new json config files f = self.factory base = { 'exec_key': f.session.key.decode('ascii'), 'location': self.location, 'pack': f.session.packer, 'unpack': f.session.unpacker, } cdict = {'ssh': self.ssh_server} cdict.update(f.client_info) cdict.update(base) self.save_connection_dict(self.client_json_file, cdict) edict = {'ssh': self.engine_ssh_server} edict.update(f.engine_info) edict.update(base) self.save_connection_dict(self.engine_json_file, edict) fname = "engines%s.json" % self.cluster_id self.factory.hub.engine_state_file = os.path.join( self.profile_dir.log_dir, fname) if self.restore_engines: self.factory.hub._load_engine_state() def init_schedulers(self): children = self.children mq = import_item(str(self.mq_class)) f = self.factory ident = f.session.bsession # disambiguate url, in case of * monitor_url = disambiguate_url(f.monitor_url) # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url # IOPub relay (in a Process) q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A', b'iopub') q.bind_in(f.client_url('iopub')) q.setsockopt_in(zmq.IDENTITY, ident + b"_iopub") q.bind_out(f.engine_url('iopub')) q.setsockopt_out(zmq.SUBSCRIBE, b'') q.connect_mon(monitor_url) q.daemon = True children.append(q) # Multiplexer Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out') q.bind_in(f.client_url('mux')) q.setsockopt_in(zmq.IDENTITY, b'mux_in') q.bind_out(f.engine_url('mux')) q.setsockopt_out(zmq.IDENTITY, b'mux_out') q.connect_mon(monitor_url) q.daemon = True children.append(q) # Control Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol') q.bind_in(f.client_url('control')) q.setsockopt_in(zmq.IDENTITY, b'control_in') q.bind_out(f.engine_url('control')) q.setsockopt_out(zmq.IDENTITY, b'control_out') q.connect_mon(monitor_url) q.daemon = True children.append(q) try: scheme = self.config.TaskScheduler.scheme_name except AttributeError: scheme = TaskScheduler.scheme_name.get_default_value() # Task Queue (in a Process) if scheme == 'pure': self.log.warn("task::using pure DEALER Task scheduler") q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask') # q.setsockopt_out(zmq.HWM, hub.hwm) q.bind_in(f.client_url('task')) q.setsockopt_in(zmq.IDENTITY, b'task_in') q.bind_out(f.engine_url('task')) q.setsockopt_out(zmq.IDENTITY, b'task_out') q.connect_mon(monitor_url) q.daemon = True children.append(q) elif scheme == 'none': self.log.warn("task::using no Task scheduler") else: self.log.info("task::using Python %s Task scheduler" % scheme) sargs = ( f.client_url('task'), f.engine_url('task'), monitor_url, disambiguate_url(f.client_url('notification')), disambiguate_url(f.client_url('registration')), ) kwargs = dict(logname='scheduler', loglevel=self.log_level, log_url=self.log_url, config=dict(self.config)) if 'Process' in self.mq_class: # run the Python scheduler in a Process q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs) q.daemon = True children.append(q) else: # single-threaded Controller kwargs['in_thread'] = True launch_scheduler(*sargs, **kwargs) # set unlimited HWM for all relay devices if hasattr(zmq, 'SNDHWM'): q = children[0] q.setsockopt_in(zmq.RCVHWM, 0) q.setsockopt_out(zmq.SNDHWM, 0) for q in children[1:]: if not hasattr(q, 'setsockopt_in'): continue q.setsockopt_in(zmq.SNDHWM, 0) q.setsockopt_in(zmq.RCVHWM, 0) q.setsockopt_out(zmq.SNDHWM, 0) q.setsockopt_out(zmq.RCVHWM, 0) q.setsockopt_mon(zmq.SNDHWM, 0) def terminate_children(self): child_procs = [] for child in self.children: if isinstance(child, ProcessMonitoredQueue): child_procs.append(child.launcher) elif isinstance(child, Process): child_procs.append(child) if child_procs: self.log.critical("terminating children...") for child in child_procs: try: child.terminate() except OSError: # already dead pass def handle_signal(self, sig, frame): self.log.critical("Received signal %i, shutting down", sig) self.terminate_children() self.loop.stop() def init_signal(self): for sig in (SIGINT, SIGABRT, SIGTERM): signal(sig, self.handle_signal) def do_import_statements(self): statements = self.import_statements for s in statements: try: self.log.msg("Executing statement: '%s'" % s) exec s in globals(), locals() except: self.log.msg("Error running statement: %s" % s) def forward_logging(self): if self.log_url: self.log.info("Forwarding logging to %s" % self.log_url) context = zmq.Context.instance() lsock = context.socket(zmq.PUB) lsock.connect(self.log_url) handler = PUBHandler(lsock) handler.root_topic = 'controller' handler.setLevel(self.log_level) self.log.addHandler(handler) @catch_config_error def initialize(self, argv=None): super(IPControllerApp, self).initialize(argv) self.forward_logging() self.load_secondary_config() self.init_hub() self.init_schedulers() def start(self): # Start the subprocesses: self.factory.start() # children must be started before signals are setup, # otherwise signal-handling will fire multiple times for child in self.children: child.start() self.init_signal() self.write_pid_file(overwrite=True) try: self.factory.loop.start() except KeyboardInterrupt: self.log.critical("Interrupted, Exiting...\n") finally: self.cleanup_connection_files()
class IPControllerApp(BaseParallelApplication): name = u'ipcontroller' description = _description examples = _examples config_file_name = Unicode(default_config_file_name) classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo # change default to True auto_create = Bool(True, config=True, help="""Whether to create profile dir if it doesn't exist.""") reuse_files = Bool(False, config=True, help="""Whether to reuse existing json connection files. If False, connection files will be removed on a clean exit. """ ) ssh_server = Unicode(u'', config=True, help="""ssh url for clients to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) engine_ssh_server = Unicode(u'', config=True, help="""ssh url for engines to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) location = Unicode(u'', config=True, help="""The external IP or domain name of the Controller, used for disambiguating engine and client connections.""", ) import_statements = List([], config=True, help="import statements to be run at startup. Necessary in some environments" ) use_threads = Bool(False, config=True, help='Use threads instead of processes for the schedulers', ) engine_json_file = Unicode('ipcontroller-engine.json', config=True, help="JSON filename where engine connection info will be stored.") client_json_file = Unicode('ipcontroller-client.json', config=True, help="JSON filename where client connection info will be stored.") def _cluster_id_changed(self, name, old, new): super(IPControllerApp, self)._cluster_id_changed(name, old, new) self.engine_json_file = "%s-engine.json" % self.name self.client_json_file = "%s-client.json" % self.name # internal children = List() mq_class = Unicode('zmq.devices.ProcessMonitoredQueue') def _use_threads_changed(self, name, old, new): self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process') write_connection_files = Bool(True, help="""Whether to write connection files to disk. True in all cases other than runs with `reuse_files=True` *after the first* """ ) aliases = Dict(aliases) flags = Dict(flags) def save_connection_dict(self, fname, cdict): """save a connection dict to json file.""" c = self.config url = cdict['url'] location = cdict['location'] if not location: try: proto,ip,port = split_url(url) except AssertionError: pass else: try: location = socket.gethostbyname_ex(socket.gethostname())[2][-1] except (socket.gaierror, IndexError): self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1." " You may need to specify '--location=<external_ip_address>' to help" " IPython decide when to connect via loopback.") location = '127.0.0.1' cdict['location'] = location fname = os.path.join(self.profile_dir.security_dir, fname) self.log.info("writing connection info to %s", fname) with open(fname, 'w') as f: f.write(json.dumps(cdict, indent=2)) os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR) def load_config_from_json(self): """load config from existing json connector files.""" c = self.config self.log.debug("loading config from JSON") # load from engine config fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file) self.log.info("loading connection info from %s", fname) with open(fname) as f: cfg = json.loads(f.read()) key = cfg['exec_key'] # json gives unicode, Session.key wants bytes c.Session.key = key.encode('ascii') xport,addr = cfg['url'].split('://') c.HubFactory.engine_transport = xport ip,ports = addr.split(':') c.HubFactory.engine_ip = ip c.HubFactory.regport = int(ports) self.location = cfg['location'] if not self.engine_ssh_server: self.engine_ssh_server = cfg['ssh'] # load client config fname = os.path.join(self.profile_dir.security_dir, self.client_json_file) self.log.info("loading connection info from %s", fname) with open(fname) as f: cfg = json.loads(f.read()) assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys" xport,addr = cfg['url'].split('://') c.HubFactory.client_transport = xport ip,ports = addr.split(':') c.HubFactory.client_ip = ip if not self.ssh_server: self.ssh_server = cfg['ssh'] assert int(ports) == c.HubFactory.regport, "regport mismatch" def cleanup_connection_files(self): if self.reuse_files: self.log.debug("leaving JSON connection files for reuse") return self.log.debug("cleaning up JSON connection files") for f in (self.client_json_file, self.engine_json_file): f = os.path.join(self.profile_dir.security_dir, f) try: os.remove(f) except Exception as e: self.log.error("Failed to cleanup connection file: %s", e) else: self.log.debug(u"removed %s", f) def load_secondary_config(self): """secondary config, loading from JSON and setting defaults""" if self.reuse_files: try: self.load_config_from_json() except (AssertionError,IOError) as e: self.log.error("Could not load config from JSON: %s" % e) else: # successfully loaded config from JSON, and reuse=True # no need to wite back the same file self.write_connection_files = False # switch Session.key default to secure default_secure(self.config) self.log.debug("Config changed") self.log.debug(repr(self.config)) def init_hub(self): c = self.config self.do_import_statements() try: self.factory = HubFactory(config=c, log=self.log) # self.start_logging() self.factory.init_hub() except TraitError: raise except Exception: self.log.error("Couldn't construct the Controller", exc_info=True) self.exit(1) if self.write_connection_files: # save to new json config files f = self.factory cdict = {'exec_key' : f.session.key.decode('ascii'), 'ssh' : self.ssh_server, 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport), 'location' : self.location } self.save_connection_dict(self.client_json_file, cdict) edict = cdict edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport)) edict['ssh'] = self.engine_ssh_server self.save_connection_dict(self.engine_json_file, edict) def init_schedulers(self): children = self.children mq = import_item(str(self.mq_class)) hub = self.factory # disambiguate url, in case of * monitor_url = disambiguate_url(hub.monitor_url) # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url # IOPub relay (in a Process) q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub') q.bind_in(hub.client_info['iopub']) q.bind_out(hub.engine_info['iopub']) q.setsockopt_out(zmq.SUBSCRIBE, b'') q.connect_mon(monitor_url) q.daemon=True children.append(q) # Multiplexer Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out') q.bind_in(hub.client_info['mux']) q.setsockopt_in(zmq.IDENTITY, b'mux') q.bind_out(hub.engine_info['mux']) q.connect_mon(monitor_url) q.daemon=True children.append(q) # Control Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol') q.bind_in(hub.client_info['control']) q.setsockopt_in(zmq.IDENTITY, b'control') q.bind_out(hub.engine_info['control']) q.connect_mon(monitor_url) q.daemon=True children.append(q) try: scheme = self.config.TaskScheduler.scheme_name except AttributeError: scheme = TaskScheduler.scheme_name.get_default_value() # Task Queue (in a Process) if scheme == 'pure': self.log.warn("task::using pure XREQ Task scheduler") q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask') # q.setsockopt_out(zmq.HWM, hub.hwm) q.bind_in(hub.client_info['task'][1]) q.setsockopt_in(zmq.IDENTITY, b'task') q.bind_out(hub.engine_info['task']) q.connect_mon(monitor_url) q.daemon=True children.append(q) elif scheme == 'none': self.log.warn("task::using no Task scheduler") else: self.log.info("task::using Python %s Task scheduler"%scheme) sargs = (hub.client_info['task'][1], hub.engine_info['task'], monitor_url, disambiguate_url(hub.client_info['notification'])) kwargs = dict(logname='scheduler', loglevel=self.log_level, log_url = self.log_url, config=dict(self.config)) if 'Process' in self.mq_class: # run the Python scheduler in a Process q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs) q.daemon=True children.append(q) else: # single-threaded Controller kwargs['in_thread'] = True launch_scheduler(*sargs, **kwargs) def terminate_children(self): child_procs = [] for child in self.children: if isinstance(child, ProcessMonitoredQueue): child_procs.append(child.launcher) elif isinstance(child, Process): child_procs.append(child) if child_procs: self.log.critical("terminating children...") for child in child_procs: try: child.terminate() except OSError: # already dead pass def handle_signal(self, sig, frame): self.log.critical("Received signal %i, shutting down", sig) self.terminate_children() self.loop.stop() def init_signal(self): for sig in (SIGINT, SIGABRT, SIGTERM): signal(sig, self.handle_signal) def do_import_statements(self): statements = self.import_statements for s in statements: try: self.log.msg("Executing statement: '%s'" % s) exec s in globals(), locals() except: self.log.msg("Error running statement: %s" % s) def forward_logging(self): if self.log_url: self.log.info("Forwarding logging to %s"%self.log_url) context = zmq.Context.instance() lsock = context.socket(zmq.PUB) lsock.connect(self.log_url) handler = PUBHandler(lsock) handler.root_topic = 'controller' handler.setLevel(self.log_level) self.log.addHandler(handler) @catch_config_error def initialize(self, argv=None): super(IPControllerApp, self).initialize(argv) self.forward_logging() self.load_secondary_config() self.init_hub() self.init_schedulers() def start(self): # Start the subprocesses: self.factory.start() # children must be started before signals are setup, # otherwise signal-handling will fire multiple times for child in self.children: child.start() self.init_signal() self.write_pid_file(overwrite=True) try: self.factory.loop.start() except KeyboardInterrupt: self.log.critical("Interrupted, Exiting...\n") finally: self.cleanup_connection_files()
class IPControllerApp(BaseParallelApplication): name = 'ipcontroller' description = _description examples = _examples config_file_name = Unicode(default_config_file_name) classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo # change default to True auto_create = Bool(True, config=True, help="""Whether to create profile dir if it doesn't exist.""") reuse_files = Bool(False, config=True, help='Whether to reuse existing json connection files.' ) secure = Bool(True, config=True, help='Whether to use HMAC digests for extra message authentication.' ) ssh_server = Unicode('', config=True, help="""ssh url for clients to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) location = Unicode('', config=True, help="""The external IP or domain name of the Controller, used for disambiguating engine and client connections.""", ) import_statements = List([], config=True, help="import statements to be run at startup. Necessary in some environments" ) use_threads = Bool(False, config=True, help='Use threads instead of processes for the schedulers', ) # internal children = List() mq_class = Unicode('zmq.devices.ProcessMonitoredQueue') def _use_threads_changed(self, name, old, new): self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process') aliases = Dict(aliases) flags = Dict(flags) def save_connection_dict(self, fname, cdict): """save a connection dict to json file.""" c = self.config url = cdict['url'] location = cdict['location'] if not location: try: proto,ip,port = split_url(url) except AssertionError: pass else: try: location = socket.gethostbyname_ex(socket.gethostname())[2][-1] except (socket.gaierror, IndexError): self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1." " You may need to specify '--location=<external_ip_address>' to help" " IPython decide when to connect via loopback.") location = '127.0.0.1' cdict['location'] = location fname = os.path.join(self.profile_dir.security_dir, fname) with open(fname, 'wb') as f: f.write(json.dumps(cdict, indent=2)) os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR) def load_config_from_json(self): """load config from existing json connector files.""" c = self.config # load from engine config with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-engine.json')) as f: cfg = json.loads(f.read()) key = c.Session.key = asbytes(cfg['exec_key']) xport,addr = cfg['url'].split('://') c.HubFactory.engine_transport = xport ip,ports = addr.split(':') c.HubFactory.engine_ip = ip c.HubFactory.regport = int(ports) self.location = cfg['location'] # load client config with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-client.json')) as f: cfg = json.loads(f.read()) assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys" xport,addr = cfg['url'].split('://') c.HubFactory.client_transport = xport ip,ports = addr.split(':') c.HubFactory.client_ip = ip self.ssh_server = cfg['ssh'] assert int(ports) == c.HubFactory.regport, "regport mismatch" def init_hub(self): c = self.config self.do_import_statements() reusing = self.reuse_files if reusing: try: self.load_config_from_json() except (AssertionError,IOError): reusing=False # check again, because reusing may have failed: if reusing: pass elif self.secure: key = str(uuid.uuid4()) # keyfile = os.path.join(self.profile_dir.security_dir, self.exec_key) # with open(keyfile, 'w') as f: # f.write(key) # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR) c.Session.key = asbytes(key) else: key = c.Session.key = b'' try: self.factory = HubFactory(config=c, log=self.log) # self.start_logging() self.factory.init_hub() except: self.log.error("Couldn't construct the Controller", exc_info=True) self.exit(1) if not reusing: # save to new json config files f = self.factory cdict = {'exec_key' : key, 'ssh' : self.ssh_server, 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport), 'location' : self.location } self.save_connection_dict('ipcontroller-client.json', cdict) edict = cdict edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport)) self.save_connection_dict('ipcontroller-engine.json', edict) # def init_schedulers(self): children = self.children mq = import_item(str(self.mq_class)) hub = self.factory # maybe_inproc = 'inproc://monitor' if self.use_threads else self.monitor_url # IOPub relay (in a Process) q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub') q.bind_in(hub.client_info['iopub']) q.bind_out(hub.engine_info['iopub']) q.setsockopt_out(zmq.SUBSCRIBE, b'') q.connect_mon(hub.monitor_url) q.daemon=True children.append(q) # Multiplexer Queue (in a Process) q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'in', b'out') q.bind_in(hub.client_info['mux']) q.setsockopt_in(zmq.IDENTITY, b'mux') q.bind_out(hub.engine_info['mux']) q.connect_mon(hub.monitor_url) q.daemon=True children.append(q) # Control Queue (in a Process) q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'incontrol', b'outcontrol') q.bind_in(hub.client_info['control']) q.setsockopt_in(zmq.IDENTITY, b'control') q.bind_out(hub.engine_info['control']) q.connect_mon(hub.monitor_url) q.daemon=True children.append(q) try: scheme = self.config.TaskScheduler.scheme_name except AttributeError: scheme = TaskScheduler.scheme_name.get_default_value() # Task Queue (in a Process) if scheme == 'pure': self.log.warn("task::using pure XREQ Task scheduler") q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, b'intask', b'outtask') # q.setsockopt_out(zmq.HWM, hub.hwm) q.bind_in(hub.client_info['task'][1]) q.setsockopt_in(zmq.IDENTITY, b'task') q.bind_out(hub.engine_info['task']) q.connect_mon(hub.monitor_url) q.daemon=True children.append(q) elif scheme == 'none': self.log.warn("task::using no Task scheduler") else: self.log.info("task::using Python %s Task scheduler"%scheme) sargs = (hub.client_info['task'][1], hub.engine_info['task'], hub.monitor_url, hub.client_info['notification']) kwargs = dict(logname='scheduler', loglevel=self.log_level, log_url = self.log_url, config=dict(self.config)) if 'Process' in self.mq_class: # run the Python scheduler in a Process q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs) q.daemon=True children.append(q) else: # single-threaded Controller kwargs['in_thread'] = True launch_scheduler(*sargs, **kwargs) def save_urls(self): """save the registration urls to files.""" c = self.config sec_dir = self.profile_dir.security_dir cf = self.factory with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f: f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport)) with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f: f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport)) def do_import_statements(self): statements = self.import_statements for s in statements: try: self.log.msg("Executing statement: '%s'" % s) exec(s, globals(), locals()) except: self.log.msg("Error running statement: %s" % s) def forward_logging(self): if self.log_url: self.log.info("Forwarding logging to %s"%self.log_url) context = zmq.Context.instance() lsock = context.socket(zmq.PUB) lsock.connect(self.log_url) handler = PUBHandler(lsock) self.log.removeHandler(self._log_handler) handler.root_topic = 'controller' handler.setLevel(self.log_level) self.log.addHandler(handler) self._log_handler = handler # # def initialize(self, argv=None): super(IPControllerApp, self).initialize(argv) self.forward_logging() self.init_hub() self.init_schedulers() def start(self): # Start the subprocesses: self.factory.start() child_procs = [] for child in self.children: child.start() if isinstance(child, ProcessMonitoredQueue): child_procs.append(child.launcher) elif isinstance(child, Process): child_procs.append(child) if child_procs: signal_children(child_procs) self.write_pid_file(overwrite=True) try: self.factory.loop.start() except KeyboardInterrupt: self.log.critical("Interrupted, Exiting...\n")
class IPControllerApp(BaseParallelApplication): name = u'ipcontroller' description = _description examples = _examples config_file_name = Unicode(default_config_file_name) classes = [ ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB ] + maybe_mongo # change default to True auto_create = Bool( True, config=True, help="""Whether to create profile dir if it doesn't exist.""") reuse_files = Bool(False, config=True, help='Whether to reuse existing json connection files.') ssh_server = Unicode( u'', config=True, help="""ssh url for clients to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) engine_ssh_server = Unicode( u'', config=True, help="""ssh url for engines to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) location = Unicode( u'', config=True, help= """The external IP or domain name of the Controller, used for disambiguating engine and client connections.""", ) import_statements = List( [], config=True, help= "import statements to be run at startup. Necessary in some environments" ) use_threads = Bool( False, config=True, help='Use threads instead of processes for the schedulers', ) engine_json_file = Unicode( 'ipcontroller-engine.json', config=True, help="JSON filename where engine connection info will be stored.") client_json_file = Unicode( 'ipcontroller-client.json', config=True, help="JSON filename where client connection info will be stored.") def _cluster_id_changed(self, name, old, new): super(IPControllerApp, self)._cluster_id_changed(name, old, new) self.engine_json_file = "%s-engine.json" % self.name self.client_json_file = "%s-client.json" % self.name # internal children = List() mq_class = Unicode('zmq.devices.ProcessMonitoredQueue') def _use_threads_changed(self, name, old, new): self.mq_class = 'zmq.devices.%sMonitoredQueue' % ('Thread' if new else 'Process') aliases = Dict(aliases) flags = Dict(flags) def save_connection_dict(self, fname, cdict): """save a connection dict to json file.""" c = self.config url = cdict['url'] location = cdict['location'] if not location: try: proto, ip, port = split_url(url) except AssertionError: pass else: try: location = socket.gethostbyname_ex( socket.gethostname())[2][-1] except (socket.gaierror, IndexError): self.log.warn( "Could not identify this machine's IP, assuming 127.0.0.1." " You may need to specify '--location=<external_ip_address>' to help" " IPython decide when to connect via loopback.") location = '127.0.0.1' cdict['location'] = location fname = os.path.join(self.profile_dir.security_dir, fname) with open(fname, 'wb') as f: f.write(json.dumps(cdict, indent=2)) os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR) def load_config_from_json(self): """load config from existing json connector files.""" c = self.config self.log.debug("loading config from JSON") # load from engine config with open( os.path.join(self.profile_dir.security_dir, self.engine_json_file)) as f: cfg = json.loads(f.read()) key = c.Session.key = asbytes(cfg['exec_key']) xport, addr = cfg['url'].split('://') c.HubFactory.engine_transport = xport ip, ports = addr.split(':') c.HubFactory.engine_ip = ip c.HubFactory.regport = int(ports) self.location = cfg['location'] if not self.engine_ssh_server: self.engine_ssh_server = cfg['ssh'] # load client config with open( os.path.join(self.profile_dir.security_dir, self.client_json_file)) as f: cfg = json.loads(f.read()) assert key == cfg[ 'exec_key'], "exec_key mismatch between engine and client keys" xport, addr = cfg['url'].split('://') c.HubFactory.client_transport = xport ip, ports = addr.split(':') c.HubFactory.client_ip = ip if not self.ssh_server: self.ssh_server = cfg['ssh'] assert int(ports) == c.HubFactory.regport, "regport mismatch" def load_secondary_config(self): """secondary config, loading from JSON and setting defaults""" if self.reuse_files: try: self.load_config_from_json() except (AssertionError, IOError) as e: self.log.error("Could not load config from JSON: %s" % e) self.reuse_files = False # switch Session.key default to secure default_secure(self.config) self.log.debug("Config changed") self.log.debug(repr(self.config)) def init_hub(self): c = self.config self.do_import_statements() try: self.factory = HubFactory(config=c, log=self.log) # self.start_logging() self.factory.init_hub() except TraitError: raise except Exception: self.log.error("Couldn't construct the Controller", exc_info=True) self.exit(1) if not self.reuse_files: # save to new json config files f = self.factory cdict = { 'exec_key': f.session.key, 'ssh': self.ssh_server, 'url': "%s://%s:%s" % (f.client_transport, f.client_ip, f.regport), 'location': self.location } self.save_connection_dict(self.client_json_file, cdict) edict = cdict edict['url'] = "%s://%s:%s" % ( (f.client_transport, f.client_ip, f.regport)) edict['ssh'] = self.engine_ssh_server self.save_connection_dict(self.engine_json_file, edict) # def init_schedulers(self): children = self.children mq = import_item(str(self.mq_class)) hub = self.factory # disambiguate url, in case of * monitor_url = disambiguate_url(hub.monitor_url) # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url # IOPub relay (in a Process) q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A', b'iopub') q.bind_in(hub.client_info['iopub']) q.bind_out(hub.engine_info['iopub']) q.setsockopt_out(zmq.SUBSCRIBE, b'') q.connect_mon(monitor_url) q.daemon = True children.append(q) # Multiplexer Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out') q.bind_in(hub.client_info['mux']) q.setsockopt_in(zmq.IDENTITY, b'mux') q.bind_out(hub.engine_info['mux']) q.connect_mon(monitor_url) q.daemon = True children.append(q) # Control Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol') q.bind_in(hub.client_info['control']) q.setsockopt_in(zmq.IDENTITY, b'control') q.bind_out(hub.engine_info['control']) q.connect_mon(monitor_url) q.daemon = True children.append(q) try: scheme = self.config.TaskScheduler.scheme_name except AttributeError: scheme = TaskScheduler.scheme_name.get_default_value() # Task Queue (in a Process) if scheme == 'pure': self.log.warn("task::using pure XREQ Task scheduler") q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask') # q.setsockopt_out(zmq.HWM, hub.hwm) q.bind_in(hub.client_info['task'][1]) q.setsockopt_in(zmq.IDENTITY, b'task') q.bind_out(hub.engine_info['task']) q.connect_mon(monitor_url) q.daemon = True children.append(q) elif scheme == 'none': self.log.warn("task::using no Task scheduler") else: self.log.info("task::using Python %s Task scheduler" % scheme) sargs = (hub.client_info['task'][1], hub.engine_info['task'], monitor_url, disambiguate_url(hub.client_info['notification'])) kwargs = dict(logname='scheduler', loglevel=self.log_level, log_url=self.log_url, config=dict(self.config)) if 'Process' in self.mq_class: # run the Python scheduler in a Process q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs) q.daemon = True children.append(q) else: # single-threaded Controller kwargs['in_thread'] = True launch_scheduler(*sargs, **kwargs) def save_urls(self): """save the registration urls to files.""" c = self.config sec_dir = self.profile_dir.security_dir cf = self.factory with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f: f.write("%s://%s:%s" % (cf.engine_transport, cf.engine_ip, cf.regport)) with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f: f.write("%s://%s:%s" % (cf.client_transport, cf.client_ip, cf.regport)) def do_import_statements(self): statements = self.import_statements for s in statements: try: self.log.msg("Executing statement: '%s'" % s) exec s in globals(), locals() except: self.log.msg("Error running statement: %s" % s) def forward_logging(self): if self.log_url: self.log.info("Forwarding logging to %s" % self.log_url) context = zmq.Context.instance() lsock = context.socket(zmq.PUB) lsock.connect(self.log_url) handler = PUBHandler(lsock) self.log.removeHandler(self._log_handler) handler.root_topic = 'controller' handler.setLevel(self.log_level) self.log.addHandler(handler) self._log_handler = handler @catch_config_error def initialize(self, argv=None): super(IPControllerApp, self).initialize(argv) self.forward_logging() self.load_secondary_config() self.init_hub() self.init_schedulers() def start(self): # Start the subprocesses: self.factory.start() child_procs = [] for child in self.children: child.start() if isinstance(child, ProcessMonitoredQueue): child_procs.append(child.launcher) elif isinstance(child, Process): child_procs.append(child) if child_procs: signal_children(child_procs) self.write_pid_file(overwrite=True) try: self.factory.loop.start() except KeyboardInterrupt: self.log.critical("Interrupted, Exiting...\n")
class IPControllerApp(BaseParallelApplication): name = u"ipcontroller" description = _description examples = _examples config_file_name = Unicode(default_config_file_name) classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, DictDB] + real_dbs # change default to True auto_create = Bool(True, config=True, help="""Whether to create profile dir if it doesn't exist.""") reuse_files = Bool( False, config=True, help="""Whether to reuse existing json connection files. If False, connection files will be removed on a clean exit. """, ) restore_engines = Bool( False, config=True, help="""Reload engine state from JSON file """, ) ssh_server = Unicode( u"", config=True, help="""ssh url for clients to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) engine_ssh_server = Unicode( u"", config=True, help="""ssh url for engines to use when connecting to the Controller processes. It should be of the form: [user@]server[:port]. The Controller's listening addresses must be accessible from the ssh server""", ) location = Unicode( u"", config=True, help="""The external IP or domain name of the Controller, used for disambiguating engine and client connections.""", ) import_statements = List( [], config=True, help="import statements to be run at startup. Necessary in some environments" ) use_threads = Bool(False, config=True, help="Use threads instead of processes for the schedulers") engine_json_file = Unicode( "ipcontroller-engine.json", config=True, help="JSON filename where engine connection info will be stored." ) client_json_file = Unicode( "ipcontroller-client.json", config=True, help="JSON filename where client connection info will be stored." ) def _cluster_id_changed(self, name, old, new): super(IPControllerApp, self)._cluster_id_changed(name, old, new) self.engine_json_file = "%s-engine.json" % self.name self.client_json_file = "%s-client.json" % self.name # internal children = List() mq_class = Unicode("zmq.devices.ProcessMonitoredQueue") def _use_threads_changed(self, name, old, new): self.mq_class = "zmq.devices.%sMonitoredQueue" % ("Thread" if new else "Process") write_connection_files = Bool( True, help="""Whether to write connection files to disk. True in all cases other than runs with `reuse_files=True` *after the first* """, ) aliases = Dict(aliases) flags = Dict(flags) def save_connection_dict(self, fname, cdict): """save a connection dict to json file.""" c = self.config url = cdict["registration"] location = cdict["location"] if not location: if PUBLIC_IPS: location = PUBLIC_IPS[-1] else: self.log.warn( "Could not identify this machine's IP, assuming %s." " You may need to specify '--location=<external_ip_address>' to help" " IPython decide when to connect via loopback." % LOCALHOST ) location = LOCALHOST cdict["location"] = location fname = os.path.join(self.profile_dir.security_dir, fname) self.log.info("writing connection info to %s", fname) with open(fname, "w") as f: f.write(json.dumps(cdict, indent=2)) os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR) def load_config_from_json(self): """load config from existing json connector files.""" c = self.config self.log.debug("loading config from JSON") # load engine config fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file) self.log.info("loading connection info from %s", fname) with open(fname) as f: ecfg = json.loads(f.read()) # json gives unicode, Session.key wants bytes c.Session.key = ecfg["exec_key"].encode("ascii") xport, ip = ecfg["interface"].split("://") c.HubFactory.engine_ip = ip c.HubFactory.engine_transport = xport self.location = ecfg["location"] if not self.engine_ssh_server: self.engine_ssh_server = ecfg["ssh"] # load client config fname = os.path.join(self.profile_dir.security_dir, self.client_json_file) self.log.info("loading connection info from %s", fname) with open(fname) as f: ccfg = json.loads(f.read()) for key in ("exec_key", "registration", "pack", "unpack"): assert ccfg[key] == ecfg[key], "mismatch between engine and client info: %r" % key xport, addr = ccfg["interface"].split("://") c.HubFactory.client_transport = xport c.HubFactory.client_ip = ip if not self.ssh_server: self.ssh_server = ccfg["ssh"] # load port config: c.HubFactory.regport = ecfg["registration"] c.HubFactory.hb = (ecfg["hb_ping"], ecfg["hb_pong"]) c.HubFactory.control = (ccfg["control"], ecfg["control"]) c.HubFactory.mux = (ccfg["mux"], ecfg["mux"]) c.HubFactory.task = (ccfg["task"], ecfg["task"]) c.HubFactory.iopub = (ccfg["iopub"], ecfg["iopub"]) c.HubFactory.notifier_port = ccfg["notification"] def cleanup_connection_files(self): if self.reuse_files: self.log.debug("leaving JSON connection files for reuse") return self.log.debug("cleaning up JSON connection files") for f in (self.client_json_file, self.engine_json_file): f = os.path.join(self.profile_dir.security_dir, f) try: os.remove(f) except Exception as e: self.log.error("Failed to cleanup connection file: %s", e) else: self.log.debug(u"removed %s", f) def load_secondary_config(self): """secondary config, loading from JSON and setting defaults""" if self.reuse_files: try: self.load_config_from_json() except (AssertionError, IOError) as e: self.log.error("Could not load config from JSON: %s" % e) else: # successfully loaded config from JSON, and reuse=True # no need to wite back the same file self.write_connection_files = False # switch Session.key default to secure default_secure(self.config) self.log.debug("Config changed") self.log.debug(repr(self.config)) def init_hub(self): c = self.config self.do_import_statements() try: self.factory = HubFactory(config=c, log=self.log) # self.start_logging() self.factory.init_hub() except TraitError: raise except Exception: self.log.error("Couldn't construct the Controller", exc_info=True) self.exit(1) if self.write_connection_files: # save to new json config files f = self.factory base = { "exec_key": f.session.key.decode("ascii"), "location": self.location, "pack": f.session.packer, "unpack": f.session.unpacker, } cdict = {"ssh": self.ssh_server} cdict.update(f.client_info) cdict.update(base) self.save_connection_dict(self.client_json_file, cdict) edict = {"ssh": self.engine_ssh_server} edict.update(f.engine_info) edict.update(base) self.save_connection_dict(self.engine_json_file, edict) fname = "engines%s.json" % self.cluster_id self.factory.hub.engine_state_file = os.path.join(self.profile_dir.log_dir, fname) if self.restore_engines: self.factory.hub._load_engine_state() def init_schedulers(self): children = self.children mq = import_item(str(self.mq_class)) f = self.factory ident = f.session.bsession # disambiguate url, in case of * monitor_url = disambiguate_url(f.monitor_url) # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url # IOPub relay (in a Process) q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b"N/A", b"iopub") q.bind_in(f.client_url("iopub")) q.setsockopt_in(zmq.IDENTITY, ident + b"_iopub") q.bind_out(f.engine_url("iopub")) q.setsockopt_out(zmq.SUBSCRIBE, b"") q.connect_mon(monitor_url) q.daemon = True children.append(q) # Multiplexer Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b"in", b"out") q.bind_in(f.client_url("mux")) q.setsockopt_in(zmq.IDENTITY, b"mux_in") q.bind_out(f.engine_url("mux")) q.setsockopt_out(zmq.IDENTITY, b"mux_out") q.connect_mon(monitor_url) q.daemon = True children.append(q) # Control Queue (in a Process) q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b"incontrol", b"outcontrol") q.bind_in(f.client_url("control")) q.setsockopt_in(zmq.IDENTITY, b"control_in") q.bind_out(f.engine_url("control")) q.setsockopt_out(zmq.IDENTITY, b"control_out") q.connect_mon(monitor_url) q.daemon = True children.append(q) try: scheme = self.config.TaskScheduler.scheme_name except AttributeError: scheme = TaskScheduler.scheme_name.get_default_value() # Task Queue (in a Process) if scheme == "pure": self.log.warn("task::using pure DEALER Task scheduler") q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b"intask", b"outtask") # q.setsockopt_out(zmq.HWM, hub.hwm) q.bind_in(f.client_url("task")) q.setsockopt_in(zmq.IDENTITY, b"task_in") q.bind_out(f.engine_url("task")) q.setsockopt_out(zmq.IDENTITY, b"task_out") q.connect_mon(monitor_url) q.daemon = True children.append(q) elif scheme == "none": self.log.warn("task::using no Task scheduler") else: self.log.info("task::using Python %s Task scheduler" % scheme) sargs = ( f.client_url("task"), f.engine_url("task"), monitor_url, disambiguate_url(f.client_url("notification")), disambiguate_url(f.client_url("registration")), ) kwargs = dict(logname="scheduler", loglevel=self.log_level, log_url=self.log_url, config=dict(self.config)) if "Process" in self.mq_class: # run the Python scheduler in a Process q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs) q.daemon = True children.append(q) else: # single-threaded Controller kwargs["in_thread"] = True launch_scheduler(*sargs, **kwargs) def terminate_children(self): child_procs = [] for child in self.children: if isinstance(child, ProcessMonitoredQueue): child_procs.append(child.launcher) elif isinstance(child, Process): child_procs.append(child) if child_procs: self.log.critical("terminating children...") for child in child_procs: try: child.terminate() except OSError: # already dead pass def handle_signal(self, sig, frame): self.log.critical("Received signal %i, shutting down", sig) self.terminate_children() self.loop.stop() def init_signal(self): for sig in (SIGINT, SIGABRT, SIGTERM): signal(sig, self.handle_signal) def do_import_statements(self): statements = self.import_statements for s in statements: try: self.log.msg("Executing statement: '%s'" % s) exec s in globals(), locals() except: self.log.msg("Error running statement: %s" % s) def forward_logging(self): if self.log_url: self.log.info("Forwarding logging to %s" % self.log_url) context = zmq.Context.instance() lsock = context.socket(zmq.PUB) lsock.connect(self.log_url) handler = PUBHandler(lsock) handler.root_topic = "controller" handler.setLevel(self.log_level) self.log.addHandler(handler) @catch_config_error def initialize(self, argv=None): super(IPControllerApp, self).initialize(argv) self.forward_logging() self.load_secondary_config() self.init_hub() self.init_schedulers() def start(self): # Start the subprocesses: self.factory.start() # children must be started before signals are setup, # otherwise signal-handling will fire multiple times for child in self.children: child.start() self.init_signal() self.write_pid_file(overwrite=True) try: self.factory.loop.start() except KeyboardInterrupt: self.log.critical("Interrupted, Exiting...\n") finally: self.cleanup_connection_files()