Example #1
0
class IPControllerApp(BaseParallelApplication):

    name = u'ipcontroller'
    description = _description
    examples = _examples
    config_file_name = Unicode(default_config_file_name)
    classes = [
        ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, DictDB
    ] + real_dbs

    # change default to True
    auto_create = Bool(
        True,
        config=True,
        help="""Whether to create profile dir if it doesn't exist.""")

    reuse_files = Bool(False,
                       config=True,
                       help="""Whether to reuse existing json connection files.
        If False, connection files will be removed on a clean exit.
        """)
    restore_engines = Bool(False,
                           config=True,
                           help="""Reload engine state from JSON file
        """)
    ssh_server = Unicode(
        u'',
        config=True,
        help="""ssh url for clients to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    engine_ssh_server = Unicode(
        u'',
        config=True,
        help="""ssh url for engines to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    location = Unicode(
        u'',
        config=True,
        help=
        """The external IP or domain name of the Controller, used for disambiguating
        engine and client connections.""",
    )
    import_statements = List(
        [],
        config=True,
        help=
        "import statements to be run at startup.  Necessary in some environments"
    )

    use_threads = Bool(
        False,
        config=True,
        help='Use threads instead of processes for the schedulers',
    )

    engine_json_file = Unicode(
        'ipcontroller-engine.json',
        config=True,
        help="JSON filename where engine connection info will be stored.")
    client_json_file = Unicode(
        'ipcontroller-client.json',
        config=True,
        help="JSON filename where client connection info will be stored.")

    def _cluster_id_changed(self, name, old, new):
        super(IPControllerApp, self)._cluster_id_changed(name, old, new)
        self.engine_json_file = "%s-engine.json" % self.name
        self.client_json_file = "%s-client.json" % self.name

    # internal
    children = List()
    mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')

    def _use_threads_changed(self, name, old, new):
        self.mq_class = 'zmq.devices.%sMonitoredQueue' % ('Thread' if new else
                                                          'Process')

    write_connection_files = Bool(
        True,
        help="""Whether to write connection files to disk.
        True in all cases other than runs with `reuse_files=True` *after the first*
        """)

    aliases = Dict(aliases)
    flags = Dict(flags)

    def save_connection_dict(self, fname, cdict):
        """save a connection dict to json file."""
        c = self.config
        url = cdict['registration']
        location = cdict['location']

        if not location:
            if PUBLIC_IPS:
                location = PUBLIC_IPS[-1]
            else:
                self.log.warn(
                    "Could not identify this machine's IP, assuming %s."
                    " You may need to specify '--location=<external_ip_address>' to help"
                    " IPython decide when to connect via loopback." %
                    LOCALHOST)
                location = LOCALHOST
            cdict['location'] = location
        fname = os.path.join(self.profile_dir.security_dir, fname)
        self.log.info("writing connection info to %s", fname)
        with open(fname, 'w') as f:
            f.write(json.dumps(cdict, indent=2))
        os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR)

    def load_config_from_json(self):
        """load config from existing json connector files."""
        c = self.config
        self.log.debug("loading config from JSON")

        # load engine config

        fname = os.path.join(self.profile_dir.security_dir,
                             self.engine_json_file)
        self.log.info("loading connection info from %s", fname)
        with open(fname) as f:
            ecfg = json.loads(f.read())

        # json gives unicode, Session.key wants bytes
        c.Session.key = ecfg['exec_key'].encode('ascii')

        xport, ip = ecfg['interface'].split('://')

        c.HubFactory.engine_ip = ip
        c.HubFactory.engine_transport = xport

        self.location = ecfg['location']
        if not self.engine_ssh_server:
            self.engine_ssh_server = ecfg['ssh']

        # load client config

        fname = os.path.join(self.profile_dir.security_dir,
                             self.client_json_file)
        self.log.info("loading connection info from %s", fname)
        with open(fname) as f:
            ccfg = json.loads(f.read())

        for key in ('exec_key', 'registration', 'pack', 'unpack'):
            assert ccfg[key] == ecfg[
                key], "mismatch between engine and client info: %r" % key

        xport, addr = ccfg['interface'].split('://')

        c.HubFactory.client_transport = xport
        c.HubFactory.client_ip = ip
        if not self.ssh_server:
            self.ssh_server = ccfg['ssh']

        # load port config:
        c.HubFactory.regport = ecfg['registration']
        c.HubFactory.hb = (ecfg['hb_ping'], ecfg['hb_pong'])
        c.HubFactory.control = (ccfg['control'], ecfg['control'])
        c.HubFactory.mux = (ccfg['mux'], ecfg['mux'])
        c.HubFactory.task = (ccfg['task'], ecfg['task'])
        c.HubFactory.iopub = (ccfg['iopub'], ecfg['iopub'])
        c.HubFactory.notifier_port = ccfg['notification']

    def cleanup_connection_files(self):
        if self.reuse_files:
            self.log.debug("leaving JSON connection files for reuse")
            return
        self.log.debug("cleaning up JSON connection files")
        for f in (self.client_json_file, self.engine_json_file):
            f = os.path.join(self.profile_dir.security_dir, f)
            try:
                os.remove(f)
            except Exception as e:
                self.log.error("Failed to cleanup connection file: %s", e)
            else:
                self.log.debug(u"removed %s", f)

    def load_secondary_config(self):
        """secondary config, loading from JSON and setting defaults"""
        if self.reuse_files:
            try:
                self.load_config_from_json()
            except (AssertionError, IOError) as e:
                self.log.error("Could not load config from JSON: %s" % e)
            else:
                # successfully loaded config from JSON, and reuse=True
                # no need to wite back the same file
                self.write_connection_files = False

        # switch Session.key default to secure
        default_secure(self.config)
        self.log.debug("Config changed")
        self.log.debug(repr(self.config))

    def init_hub(self):
        c = self.config

        self.do_import_statements()

        try:
            self.factory = HubFactory(config=c, log=self.log)
            # self.start_logging()
            self.factory.init_hub()
        except TraitError:
            raise
        except Exception:
            self.log.error("Couldn't construct the Controller", exc_info=True)
            self.exit(1)

        if self.write_connection_files:
            # save to new json config files
            f = self.factory
            base = {
                'exec_key': f.session.key.decode('ascii'),
                'location': self.location,
                'pack': f.session.packer,
                'unpack': f.session.unpacker,
            }

            cdict = {'ssh': self.ssh_server}
            cdict.update(f.client_info)
            cdict.update(base)
            self.save_connection_dict(self.client_json_file, cdict)

            edict = {'ssh': self.engine_ssh_server}
            edict.update(f.engine_info)
            edict.update(base)
            self.save_connection_dict(self.engine_json_file, edict)

        fname = "engines%s.json" % self.cluster_id
        self.factory.hub.engine_state_file = os.path.join(
            self.profile_dir.log_dir, fname)
        if self.restore_engines:
            self.factory.hub._load_engine_state()

    def init_schedulers(self):
        children = self.children
        mq = import_item(str(self.mq_class))

        f = self.factory
        ident = f.session.bsession
        # disambiguate url, in case of *
        monitor_url = disambiguate_url(f.monitor_url)
        # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
        # IOPub relay (in a Process)
        q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A', b'iopub')
        q.bind_in(f.client_url('iopub'))
        q.setsockopt_in(zmq.IDENTITY, ident + b"_iopub")
        q.bind_out(f.engine_url('iopub'))
        q.setsockopt_out(zmq.SUBSCRIBE, b'')
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)

        # Multiplexer Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')

        q.bind_in(f.client_url('mux'))
        q.setsockopt_in(zmq.IDENTITY, b'mux_in')
        q.bind_out(f.engine_url('mux'))
        q.setsockopt_out(zmq.IDENTITY, b'mux_out')
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)

        # Control Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
        q.bind_in(f.client_url('control'))
        q.setsockopt_in(zmq.IDENTITY, b'control_in')
        q.bind_out(f.engine_url('control'))
        q.setsockopt_out(zmq.IDENTITY, b'control_out')
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)
        try:
            scheme = self.config.TaskScheduler.scheme_name
        except AttributeError:
            scheme = TaskScheduler.scheme_name.get_default_value()
        # Task Queue (in a Process)
        if scheme == 'pure':
            self.log.warn("task::using pure DEALER Task scheduler")
            q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
            # q.setsockopt_out(zmq.HWM, hub.hwm)
            q.bind_in(f.client_url('task'))
            q.setsockopt_in(zmq.IDENTITY, b'task_in')
            q.bind_out(f.engine_url('task'))
            q.setsockopt_out(zmq.IDENTITY, b'task_out')
            q.connect_mon(monitor_url)
            q.daemon = True
            children.append(q)
        elif scheme == 'none':
            self.log.warn("task::using no Task scheduler")

        else:
            self.log.info("task::using Python %s Task scheduler" % scheme)
            sargs = (
                f.client_url('task'),
                f.engine_url('task'),
                monitor_url,
                disambiguate_url(f.client_url('notification')),
                disambiguate_url(f.client_url('registration')),
            )
            kwargs = dict(logname='scheduler',
                          loglevel=self.log_level,
                          log_url=self.log_url,
                          config=dict(self.config))
            if 'Process' in self.mq_class:
                # run the Python scheduler in a Process
                q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
                q.daemon = True
                children.append(q)
            else:
                # single-threaded Controller
                kwargs['in_thread'] = True
                launch_scheduler(*sargs, **kwargs)

        # set unlimited HWM for all relay devices
        if hasattr(zmq, 'SNDHWM'):
            q = children[0]
            q.setsockopt_in(zmq.RCVHWM, 0)
            q.setsockopt_out(zmq.SNDHWM, 0)

            for q in children[1:]:
                if not hasattr(q, 'setsockopt_in'):
                    continue
                q.setsockopt_in(zmq.SNDHWM, 0)
                q.setsockopt_in(zmq.RCVHWM, 0)
                q.setsockopt_out(zmq.SNDHWM, 0)
                q.setsockopt_out(zmq.RCVHWM, 0)
                q.setsockopt_mon(zmq.SNDHWM, 0)

    def terminate_children(self):
        child_procs = []
        for child in self.children:
            if isinstance(child, ProcessMonitoredQueue):
                child_procs.append(child.launcher)
            elif isinstance(child, Process):
                child_procs.append(child)
        if child_procs:
            self.log.critical("terminating children...")
            for child in child_procs:
                try:
                    child.terminate()
                except OSError:
                    # already dead
                    pass

    def handle_signal(self, sig, frame):
        self.log.critical("Received signal %i, shutting down", sig)
        self.terminate_children()
        self.loop.stop()

    def init_signal(self):
        for sig in (SIGINT, SIGABRT, SIGTERM):
            signal(sig, self.handle_signal)

    def do_import_statements(self):
        statements = self.import_statements
        for s in statements:
            try:
                self.log.msg("Executing statement: '%s'" % s)
                exec s in globals(), locals()
            except:
                self.log.msg("Error running statement: %s" % s)

    def forward_logging(self):
        if self.log_url:
            self.log.info("Forwarding logging to %s" % self.log_url)
            context = zmq.Context.instance()
            lsock = context.socket(zmq.PUB)
            lsock.connect(self.log_url)
            handler = PUBHandler(lsock)
            handler.root_topic = 'controller'
            handler.setLevel(self.log_level)
            self.log.addHandler(handler)

    @catch_config_error
    def initialize(self, argv=None):
        super(IPControllerApp, self).initialize(argv)
        self.forward_logging()
        self.load_secondary_config()
        self.init_hub()
        self.init_schedulers()

    def start(self):
        # Start the subprocesses:
        self.factory.start()
        # children must be started before signals are setup,
        # otherwise signal-handling will fire multiple times
        for child in self.children:
            child.start()
        self.init_signal()

        self.write_pid_file(overwrite=True)

        try:
            self.factory.loop.start()
        except KeyboardInterrupt:
            self.log.critical("Interrupted, Exiting...\n")
        finally:
            self.cleanup_connection_files()
class IPControllerApp(BaseParallelApplication):

    name = u'ipcontroller'
    description = _description
    examples = _examples
    config_file_name = Unicode(default_config_file_name)
    classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
    
    # change default to True
    auto_create = Bool(True, config=True,
        help="""Whether to create profile dir if it doesn't exist.""")
    
    reuse_files = Bool(False, config=True,
        help="""Whether to reuse existing json connection files.
        If False, connection files will be removed on a clean exit.
        """
    )
    ssh_server = Unicode(u'', config=True,
        help="""ssh url for clients to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    engine_ssh_server = Unicode(u'', config=True,
        help="""ssh url for engines to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    location = Unicode(u'', config=True,
        help="""The external IP or domain name of the Controller, used for disambiguating
        engine and client connections.""",
    )
    import_statements = List([], config=True,
        help="import statements to be run at startup.  Necessary in some environments"
    )

    use_threads = Bool(False, config=True,
        help='Use threads instead of processes for the schedulers',
    )

    engine_json_file = Unicode('ipcontroller-engine.json', config=True,
        help="JSON filename where engine connection info will be stored.")
    client_json_file = Unicode('ipcontroller-client.json', config=True,
        help="JSON filename where client connection info will be stored.")
    
    def _cluster_id_changed(self, name, old, new):
        super(IPControllerApp, self)._cluster_id_changed(name, old, new)
        self.engine_json_file = "%s-engine.json" % self.name
        self.client_json_file = "%s-client.json" % self.name


    # internal
    children = List()
    mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')

    def _use_threads_changed(self, name, old, new):
        self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
    
    write_connection_files = Bool(True,
        help="""Whether to write connection files to disk.
        True in all cases other than runs with `reuse_files=True` *after the first*
        """
    )

    aliases = Dict(aliases)
    flags = Dict(flags)
    

    def save_connection_dict(self, fname, cdict):
        """save a connection dict to json file."""
        c = self.config
        url = cdict['url']
        location = cdict['location']
        if not location:
            try:
                proto,ip,port = split_url(url)
            except AssertionError:
                pass
            else:
                try:
                    location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
                except (socket.gaierror, IndexError):
                    self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
                    " You may need to specify '--location=<external_ip_address>' to help"
                    " IPython decide when to connect via loopback.")
                    location = '127.0.0.1'
            cdict['location'] = location
        fname = os.path.join(self.profile_dir.security_dir, fname)
        self.log.info("writing connection info to %s", fname)
        with open(fname, 'w') as f:
            f.write(json.dumps(cdict, indent=2))
        os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
    
    def load_config_from_json(self):
        """load config from existing json connector files."""
        c = self.config
        self.log.debug("loading config from JSON")
        # load from engine config
        fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file)
        self.log.info("loading connection info from %s", fname)
        with open(fname) as f:
            cfg = json.loads(f.read())
        key = cfg['exec_key']
        # json gives unicode, Session.key wants bytes
        c.Session.key = key.encode('ascii')
        xport,addr = cfg['url'].split('://')
        c.HubFactory.engine_transport = xport
        ip,ports = addr.split(':')
        c.HubFactory.engine_ip = ip
        c.HubFactory.regport = int(ports)
        self.location = cfg['location']
        if not self.engine_ssh_server:
            self.engine_ssh_server = cfg['ssh']
        # load client config
        fname = os.path.join(self.profile_dir.security_dir, self.client_json_file)
        self.log.info("loading connection info from %s", fname)
        with open(fname) as f:
            cfg = json.loads(f.read())
        assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
        xport,addr = cfg['url'].split('://')
        c.HubFactory.client_transport = xport
        ip,ports = addr.split(':')
        c.HubFactory.client_ip = ip
        if not self.ssh_server:
            self.ssh_server = cfg['ssh']
        assert int(ports) == c.HubFactory.regport, "regport mismatch"
    
    def cleanup_connection_files(self):
        if self.reuse_files:
            self.log.debug("leaving JSON connection files for reuse")
            return
        self.log.debug("cleaning up JSON connection files")
        for f in (self.client_json_file, self.engine_json_file):
            f = os.path.join(self.profile_dir.security_dir, f)
            try:
                os.remove(f)
            except Exception as e:
                self.log.error("Failed to cleanup connection file: %s", e)
            else:
                self.log.debug(u"removed %s", f)
    
    def load_secondary_config(self):
        """secondary config, loading from JSON and setting defaults"""
        if self.reuse_files:
            try:
                self.load_config_from_json()
            except (AssertionError,IOError) as e:
                self.log.error("Could not load config from JSON: %s" % e)
            else:
                # successfully loaded config from JSON, and reuse=True
                # no need to wite back the same file
                self.write_connection_files = False
                
        # switch Session.key default to secure
        default_secure(self.config)
        self.log.debug("Config changed")
        self.log.debug(repr(self.config))
        
    def init_hub(self):
        c = self.config
        
        self.do_import_statements()
        
        try:
            self.factory = HubFactory(config=c, log=self.log)
            # self.start_logging()
            self.factory.init_hub()
        except TraitError:
            raise
        except Exception:
            self.log.error("Couldn't construct the Controller", exc_info=True)
            self.exit(1)
        
        if self.write_connection_files:
            # save to new json config files
            f = self.factory
            cdict = {'exec_key' : f.session.key.decode('ascii'),
                    'ssh' : self.ssh_server,
                    'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
                    'location' : self.location
                    }
            self.save_connection_dict(self.client_json_file, cdict)
            edict = cdict
            edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
            edict['ssh'] = self.engine_ssh_server
            self.save_connection_dict(self.engine_json_file, edict)

    def init_schedulers(self):
        children = self.children
        mq = import_item(str(self.mq_class))
        
        hub = self.factory
        # disambiguate url, in case of *
        monitor_url = disambiguate_url(hub.monitor_url)
        # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
        # IOPub relay (in a Process)
        q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
        q.bind_in(hub.client_info['iopub'])
        q.bind_out(hub.engine_info['iopub'])
        q.setsockopt_out(zmq.SUBSCRIBE, b'')
        q.connect_mon(monitor_url)
        q.daemon=True
        children.append(q)

        # Multiplexer Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
        q.bind_in(hub.client_info['mux'])
        q.setsockopt_in(zmq.IDENTITY, b'mux')
        q.bind_out(hub.engine_info['mux'])
        q.connect_mon(monitor_url)
        q.daemon=True
        children.append(q)

        # Control Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
        q.bind_in(hub.client_info['control'])
        q.setsockopt_in(zmq.IDENTITY, b'control')
        q.bind_out(hub.engine_info['control'])
        q.connect_mon(monitor_url)
        q.daemon=True
        children.append(q)
        try:
            scheme = self.config.TaskScheduler.scheme_name
        except AttributeError:
            scheme = TaskScheduler.scheme_name.get_default_value()
        # Task Queue (in a Process)
        if scheme == 'pure':
            self.log.warn("task::using pure XREQ Task scheduler")
            q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
            # q.setsockopt_out(zmq.HWM, hub.hwm)
            q.bind_in(hub.client_info['task'][1])
            q.setsockopt_in(zmq.IDENTITY, b'task')
            q.bind_out(hub.engine_info['task'])
            q.connect_mon(monitor_url)
            q.daemon=True
            children.append(q)
        elif scheme == 'none':
            self.log.warn("task::using no Task scheduler")

        else:
            self.log.info("task::using Python %s Task scheduler"%scheme)
            sargs = (hub.client_info['task'][1], hub.engine_info['task'],
                                monitor_url, disambiguate_url(hub.client_info['notification']))
            kwargs = dict(logname='scheduler', loglevel=self.log_level,
                            log_url = self.log_url, config=dict(self.config))
            if 'Process' in self.mq_class:
                # run the Python scheduler in a Process
                q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
                q.daemon=True
                children.append(q)
            else:
                # single-threaded Controller
                kwargs['in_thread'] = True
                launch_scheduler(*sargs, **kwargs)

    def terminate_children(self):
        child_procs = []
        for child in self.children:
            if isinstance(child, ProcessMonitoredQueue):
                child_procs.append(child.launcher)
            elif isinstance(child, Process):
                child_procs.append(child)
        if child_procs:
            self.log.critical("terminating children...")
            for child in child_procs:
                try:
                    child.terminate()
                except OSError:
                    # already dead
                    pass
    
    def handle_signal(self, sig, frame):
        self.log.critical("Received signal %i, shutting down", sig)
        self.terminate_children()
        self.loop.stop()
    
    def init_signal(self):
        for sig in (SIGINT, SIGABRT, SIGTERM):
            signal(sig, self.handle_signal)

    def do_import_statements(self):
        statements = self.import_statements
        for s in statements:
            try:
                self.log.msg("Executing statement: '%s'" % s)
                exec s in globals(), locals()
            except:
                self.log.msg("Error running statement: %s" % s)

    def forward_logging(self):
        if self.log_url:
            self.log.info("Forwarding logging to %s"%self.log_url)
            context = zmq.Context.instance()
            lsock = context.socket(zmq.PUB)
            lsock.connect(self.log_url)
            handler = PUBHandler(lsock)
            handler.root_topic = 'controller'
            handler.setLevel(self.log_level)
            self.log.addHandler(handler)
    
    @catch_config_error
    def initialize(self, argv=None):
        super(IPControllerApp, self).initialize(argv)
        self.forward_logging()
        self.load_secondary_config()
        self.init_hub()
        self.init_schedulers()
    
    def start(self):
        # Start the subprocesses:
        self.factory.start()
        # children must be started before signals are setup,
        # otherwise signal-handling will fire multiple times
        for child in self.children:
            child.start()
        self.init_signal()

        self.write_pid_file(overwrite=True)

        try:
            self.factory.loop.start()
        except KeyboardInterrupt:
            self.log.critical("Interrupted, Exiting...\n")
        finally:
            self.cleanup_connection_files()
Example #3
0
class IPControllerApp(BaseParallelApplication):

    name = 'ipcontroller'
    description = _description
    examples = _examples
    config_file_name = Unicode(default_config_file_name)
    classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
    
    # change default to True
    auto_create = Bool(True, config=True,
        help="""Whether to create profile dir if it doesn't exist.""")
    
    reuse_files = Bool(False, config=True,
        help='Whether to reuse existing json connection files.'
    )
    secure = Bool(True, config=True,
        help='Whether to use HMAC digests for extra message authentication.'
    )
    ssh_server = Unicode('', config=True,
        help="""ssh url for clients to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    location = Unicode('', config=True,
        help="""The external IP or domain name of the Controller, used for disambiguating
        engine and client connections.""",
    )
    import_statements = List([], config=True,
        help="import statements to be run at startup.  Necessary in some environments"
    )

    use_threads = Bool(False, config=True,
        help='Use threads instead of processes for the schedulers',
        )

    # internal
    children = List()
    mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')

    def _use_threads_changed(self, name, old, new):
        self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')

    aliases = Dict(aliases)
    flags = Dict(flags)
    

    def save_connection_dict(self, fname, cdict):
        """save a connection dict to json file."""
        c = self.config
        url = cdict['url']
        location = cdict['location']
        if not location:
            try:
                proto,ip,port = split_url(url)
            except AssertionError:
                pass
            else:
                try:
                    location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
                except (socket.gaierror, IndexError):
                    self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
                    " You may need to specify '--location=<external_ip_address>' to help"
                    " IPython decide when to connect via loopback.")
                    location = '127.0.0.1'
            cdict['location'] = location
        fname = os.path.join(self.profile_dir.security_dir, fname)
        with open(fname, 'wb') as f:
            f.write(json.dumps(cdict, indent=2))
        os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
    
    def load_config_from_json(self):
        """load config from existing json connector files."""
        c = self.config
        # load from engine config
        with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-engine.json')) as f:
            cfg = json.loads(f.read())
        key = c.Session.key = asbytes(cfg['exec_key'])
        xport,addr = cfg['url'].split('://')
        c.HubFactory.engine_transport = xport
        ip,ports = addr.split(':')
        c.HubFactory.engine_ip = ip
        c.HubFactory.regport = int(ports)
        self.location = cfg['location']
        # load client config
        with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-client.json')) as f:
            cfg = json.loads(f.read())
        assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
        xport,addr = cfg['url'].split('://')
        c.HubFactory.client_transport = xport
        ip,ports = addr.split(':')
        c.HubFactory.client_ip = ip
        self.ssh_server = cfg['ssh']
        assert int(ports) == c.HubFactory.regport, "regport mismatch"
    
    def init_hub(self):
        c = self.config
        
        self.do_import_statements()
        reusing = self.reuse_files
        if reusing:
            try:
                self.load_config_from_json()
            except (AssertionError,IOError):
                reusing=False
        # check again, because reusing may have failed:
        if reusing:
            pass
        elif self.secure:
            key = str(uuid.uuid4())
            # keyfile = os.path.join(self.profile_dir.security_dir, self.exec_key)
            # with open(keyfile, 'w') as f:
            #     f.write(key)
            # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
            c.Session.key = asbytes(key)
        else:
            key = c.Session.key = b''
        
        try:
            self.factory = HubFactory(config=c, log=self.log)
            # self.start_logging()
            self.factory.init_hub()
        except:
            self.log.error("Couldn't construct the Controller", exc_info=True)
            self.exit(1)
        
        if not reusing:
            # save to new json config files
            f = self.factory
            cdict = {'exec_key' : key,
                    'ssh' : self.ssh_server,
                    'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
                    'location' : self.location
                    }
            self.save_connection_dict('ipcontroller-client.json', cdict)
            edict = cdict
            edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
            self.save_connection_dict('ipcontroller-engine.json', edict)

    #
    def init_schedulers(self):
        children = self.children
        mq = import_item(str(self.mq_class))
        
        hub = self.factory
        # maybe_inproc = 'inproc://monitor' if self.use_threads else self.monitor_url
        # IOPub relay (in a Process)
        q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
        q.bind_in(hub.client_info['iopub'])
        q.bind_out(hub.engine_info['iopub'])
        q.setsockopt_out(zmq.SUBSCRIBE, b'')
        q.connect_mon(hub.monitor_url)
        q.daemon=True
        children.append(q)

        # Multiplexer Queue (in a Process)
        q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'in', b'out')
        q.bind_in(hub.client_info['mux'])
        q.setsockopt_in(zmq.IDENTITY, b'mux')
        q.bind_out(hub.engine_info['mux'])
        q.connect_mon(hub.monitor_url)
        q.daemon=True
        children.append(q)

        # Control Queue (in a Process)
        q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'incontrol', b'outcontrol')
        q.bind_in(hub.client_info['control'])
        q.setsockopt_in(zmq.IDENTITY, b'control')
        q.bind_out(hub.engine_info['control'])
        q.connect_mon(hub.monitor_url)
        q.daemon=True
        children.append(q)
        try:
            scheme = self.config.TaskScheduler.scheme_name
        except AttributeError:
            scheme = TaskScheduler.scheme_name.get_default_value()
        # Task Queue (in a Process)
        if scheme == 'pure':
            self.log.warn("task::using pure XREQ Task scheduler")
            q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, b'intask', b'outtask')
            # q.setsockopt_out(zmq.HWM, hub.hwm)
            q.bind_in(hub.client_info['task'][1])
            q.setsockopt_in(zmq.IDENTITY, b'task')
            q.bind_out(hub.engine_info['task'])
            q.connect_mon(hub.monitor_url)
            q.daemon=True
            children.append(q)
        elif scheme == 'none':
            self.log.warn("task::using no Task scheduler")

        else:
            self.log.info("task::using Python %s Task scheduler"%scheme)
            sargs = (hub.client_info['task'][1], hub.engine_info['task'],
                                hub.monitor_url, hub.client_info['notification'])
            kwargs = dict(logname='scheduler', loglevel=self.log_level,
                            log_url = self.log_url, config=dict(self.config))
            if 'Process' in self.mq_class:
                # run the Python scheduler in a Process
                q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
                q.daemon=True
                children.append(q)
            else:
                # single-threaded Controller
                kwargs['in_thread'] = True
                launch_scheduler(*sargs, **kwargs)

    
    def save_urls(self):
        """save the registration urls to files."""
        c = self.config
        
        sec_dir = self.profile_dir.security_dir
        cf = self.factory
        
        with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
            f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
        
        with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
            f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
        
    
    def do_import_statements(self):
        statements = self.import_statements
        for s in statements:
            try:
                self.log.msg("Executing statement: '%s'" % s)
                exec(s, globals(), locals())
            except:
                self.log.msg("Error running statement: %s" % s)

    def forward_logging(self):
        if self.log_url:
            self.log.info("Forwarding logging to %s"%self.log_url)
            context = zmq.Context.instance()
            lsock = context.socket(zmq.PUB)
            lsock.connect(self.log_url)
            handler = PUBHandler(lsock)
            self.log.removeHandler(self._log_handler)
            handler.root_topic = 'controller'
            handler.setLevel(self.log_level)
            self.log.addHandler(handler)
            self._log_handler = handler
    # #
    
    def initialize(self, argv=None):
        super(IPControllerApp, self).initialize(argv)
        self.forward_logging()
        self.init_hub()
        self.init_schedulers()
    
    def start(self):
        # Start the subprocesses:
        self.factory.start()
        child_procs = []
        for child in self.children:
            child.start()
            if isinstance(child, ProcessMonitoredQueue):
                child_procs.append(child.launcher)
            elif isinstance(child, Process):
                child_procs.append(child)
        if child_procs:
            signal_children(child_procs)

        self.write_pid_file(overwrite=True)

        try:
            self.factory.loop.start()
        except KeyboardInterrupt:
            self.log.critical("Interrupted, Exiting...\n")
Example #4
0
class IPControllerApp(BaseParallelApplication):

    name = u'ipcontroller'
    description = _description
    examples = _examples
    config_file_name = Unicode(default_config_file_name)
    classes = [
        ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB
    ] + maybe_mongo

    # change default to True
    auto_create = Bool(
        True,
        config=True,
        help="""Whether to create profile dir if it doesn't exist.""")

    reuse_files = Bool(False,
                       config=True,
                       help='Whether to reuse existing json connection files.')
    ssh_server = Unicode(
        u'',
        config=True,
        help="""ssh url for clients to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    engine_ssh_server = Unicode(
        u'',
        config=True,
        help="""ssh url for engines to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    location = Unicode(
        u'',
        config=True,
        help=
        """The external IP or domain name of the Controller, used for disambiguating
        engine and client connections.""",
    )
    import_statements = List(
        [],
        config=True,
        help=
        "import statements to be run at startup.  Necessary in some environments"
    )

    use_threads = Bool(
        False,
        config=True,
        help='Use threads instead of processes for the schedulers',
    )

    engine_json_file = Unicode(
        'ipcontroller-engine.json',
        config=True,
        help="JSON filename where engine connection info will be stored.")
    client_json_file = Unicode(
        'ipcontroller-client.json',
        config=True,
        help="JSON filename where client connection info will be stored.")

    def _cluster_id_changed(self, name, old, new):
        super(IPControllerApp, self)._cluster_id_changed(name, old, new)
        self.engine_json_file = "%s-engine.json" % self.name
        self.client_json_file = "%s-client.json" % self.name

    # internal
    children = List()
    mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')

    def _use_threads_changed(self, name, old, new):
        self.mq_class = 'zmq.devices.%sMonitoredQueue' % ('Thread' if new else
                                                          'Process')

    aliases = Dict(aliases)
    flags = Dict(flags)

    def save_connection_dict(self, fname, cdict):
        """save a connection dict to json file."""
        c = self.config
        url = cdict['url']
        location = cdict['location']
        if not location:
            try:
                proto, ip, port = split_url(url)
            except AssertionError:
                pass
            else:
                try:
                    location = socket.gethostbyname_ex(
                        socket.gethostname())[2][-1]
                except (socket.gaierror, IndexError):
                    self.log.warn(
                        "Could not identify this machine's IP, assuming 127.0.0.1."
                        " You may need to specify '--location=<external_ip_address>' to help"
                        " IPython decide when to connect via loopback.")
                    location = '127.0.0.1'
            cdict['location'] = location
        fname = os.path.join(self.profile_dir.security_dir, fname)
        with open(fname, 'wb') as f:
            f.write(json.dumps(cdict, indent=2))
        os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR)

    def load_config_from_json(self):
        """load config from existing json connector files."""
        c = self.config
        self.log.debug("loading config from JSON")
        # load from engine config
        with open(
                os.path.join(self.profile_dir.security_dir,
                             self.engine_json_file)) as f:
            cfg = json.loads(f.read())
        key = c.Session.key = asbytes(cfg['exec_key'])
        xport, addr = cfg['url'].split('://')
        c.HubFactory.engine_transport = xport
        ip, ports = addr.split(':')
        c.HubFactory.engine_ip = ip
        c.HubFactory.regport = int(ports)
        self.location = cfg['location']
        if not self.engine_ssh_server:
            self.engine_ssh_server = cfg['ssh']
        # load client config
        with open(
                os.path.join(self.profile_dir.security_dir,
                             self.client_json_file)) as f:
            cfg = json.loads(f.read())
        assert key == cfg[
            'exec_key'], "exec_key mismatch between engine and client keys"
        xport, addr = cfg['url'].split('://')
        c.HubFactory.client_transport = xport
        ip, ports = addr.split(':')
        c.HubFactory.client_ip = ip
        if not self.ssh_server:
            self.ssh_server = cfg['ssh']
        assert int(ports) == c.HubFactory.regport, "regport mismatch"

    def load_secondary_config(self):
        """secondary config, loading from JSON and setting defaults"""
        if self.reuse_files:
            try:
                self.load_config_from_json()
            except (AssertionError, IOError) as e:
                self.log.error("Could not load config from JSON: %s" % e)
                self.reuse_files = False
        # switch Session.key default to secure
        default_secure(self.config)
        self.log.debug("Config changed")
        self.log.debug(repr(self.config))

    def init_hub(self):
        c = self.config

        self.do_import_statements()

        try:
            self.factory = HubFactory(config=c, log=self.log)
            # self.start_logging()
            self.factory.init_hub()
        except TraitError:
            raise
        except Exception:
            self.log.error("Couldn't construct the Controller", exc_info=True)
            self.exit(1)

        if not self.reuse_files:
            # save to new json config files
            f = self.factory
            cdict = {
                'exec_key': f.session.key,
                'ssh': self.ssh_server,
                'url':
                "%s://%s:%s" % (f.client_transport, f.client_ip, f.regport),
                'location': self.location
            }
            self.save_connection_dict(self.client_json_file, cdict)
            edict = cdict
            edict['url'] = "%s://%s:%s" % (
                (f.client_transport, f.client_ip, f.regport))
            edict['ssh'] = self.engine_ssh_server
            self.save_connection_dict(self.engine_json_file, edict)

    #
    def init_schedulers(self):
        children = self.children
        mq = import_item(str(self.mq_class))

        hub = self.factory
        # disambiguate url, in case of *
        monitor_url = disambiguate_url(hub.monitor_url)
        # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
        # IOPub relay (in a Process)
        q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A', b'iopub')
        q.bind_in(hub.client_info['iopub'])
        q.bind_out(hub.engine_info['iopub'])
        q.setsockopt_out(zmq.SUBSCRIBE, b'')
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)

        # Multiplexer Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
        q.bind_in(hub.client_info['mux'])
        q.setsockopt_in(zmq.IDENTITY, b'mux')
        q.bind_out(hub.engine_info['mux'])
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)

        # Control Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
        q.bind_in(hub.client_info['control'])
        q.setsockopt_in(zmq.IDENTITY, b'control')
        q.bind_out(hub.engine_info['control'])
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)
        try:
            scheme = self.config.TaskScheduler.scheme_name
        except AttributeError:
            scheme = TaskScheduler.scheme_name.get_default_value()
        # Task Queue (in a Process)
        if scheme == 'pure':
            self.log.warn("task::using pure XREQ Task scheduler")
            q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
            # q.setsockopt_out(zmq.HWM, hub.hwm)
            q.bind_in(hub.client_info['task'][1])
            q.setsockopt_in(zmq.IDENTITY, b'task')
            q.bind_out(hub.engine_info['task'])
            q.connect_mon(monitor_url)
            q.daemon = True
            children.append(q)
        elif scheme == 'none':
            self.log.warn("task::using no Task scheduler")

        else:
            self.log.info("task::using Python %s Task scheduler" % scheme)
            sargs = (hub.client_info['task'][1], hub.engine_info['task'],
                     monitor_url,
                     disambiguate_url(hub.client_info['notification']))
            kwargs = dict(logname='scheduler',
                          loglevel=self.log_level,
                          log_url=self.log_url,
                          config=dict(self.config))
            if 'Process' in self.mq_class:
                # run the Python scheduler in a Process
                q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
                q.daemon = True
                children.append(q)
            else:
                # single-threaded Controller
                kwargs['in_thread'] = True
                launch_scheduler(*sargs, **kwargs)

    def save_urls(self):
        """save the registration urls to files."""
        c = self.config

        sec_dir = self.profile_dir.security_dir
        cf = self.factory

        with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
            f.write("%s://%s:%s" %
                    (cf.engine_transport, cf.engine_ip, cf.regport))

        with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
            f.write("%s://%s:%s" %
                    (cf.client_transport, cf.client_ip, cf.regport))

    def do_import_statements(self):
        statements = self.import_statements
        for s in statements:
            try:
                self.log.msg("Executing statement: '%s'" % s)
                exec s in globals(), locals()
            except:
                self.log.msg("Error running statement: %s" % s)

    def forward_logging(self):
        if self.log_url:
            self.log.info("Forwarding logging to %s" % self.log_url)
            context = zmq.Context.instance()
            lsock = context.socket(zmq.PUB)
            lsock.connect(self.log_url)
            handler = PUBHandler(lsock)
            self.log.removeHandler(self._log_handler)
            handler.root_topic = 'controller'
            handler.setLevel(self.log_level)
            self.log.addHandler(handler)
            self._log_handler = handler

    @catch_config_error
    def initialize(self, argv=None):
        super(IPControllerApp, self).initialize(argv)
        self.forward_logging()
        self.load_secondary_config()
        self.init_hub()
        self.init_schedulers()

    def start(self):
        # Start the subprocesses:
        self.factory.start()
        child_procs = []
        for child in self.children:
            child.start()
            if isinstance(child, ProcessMonitoredQueue):
                child_procs.append(child.launcher)
            elif isinstance(child, Process):
                child_procs.append(child)
        if child_procs:
            signal_children(child_procs)

        self.write_pid_file(overwrite=True)

        try:
            self.factory.loop.start()
        except KeyboardInterrupt:
            self.log.critical("Interrupted, Exiting...\n")
Example #5
0
class IPControllerApp(BaseParallelApplication):

    name = u"ipcontroller"
    description = _description
    examples = _examples
    config_file_name = Unicode(default_config_file_name)
    classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, DictDB] + real_dbs

    # change default to True
    auto_create = Bool(True, config=True, help="""Whether to create profile dir if it doesn't exist.""")

    reuse_files = Bool(
        False,
        config=True,
        help="""Whether to reuse existing json connection files.
        If False, connection files will be removed on a clean exit.
        """,
    )
    restore_engines = Bool(
        False,
        config=True,
        help="""Reload engine state from JSON file
        """,
    )
    ssh_server = Unicode(
        u"",
        config=True,
        help="""ssh url for clients to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    engine_ssh_server = Unicode(
        u"",
        config=True,
        help="""ssh url for engines to use when connecting to the Controller
        processes. It should be of the form: [user@]server[:port]. The
        Controller's listening addresses must be accessible from the ssh server""",
    )
    location = Unicode(
        u"",
        config=True,
        help="""The external IP or domain name of the Controller, used for disambiguating
        engine and client connections.""",
    )
    import_statements = List(
        [], config=True, help="import statements to be run at startup.  Necessary in some environments"
    )

    use_threads = Bool(False, config=True, help="Use threads instead of processes for the schedulers")

    engine_json_file = Unicode(
        "ipcontroller-engine.json", config=True, help="JSON filename where engine connection info will be stored."
    )
    client_json_file = Unicode(
        "ipcontroller-client.json", config=True, help="JSON filename where client connection info will be stored."
    )

    def _cluster_id_changed(self, name, old, new):
        super(IPControllerApp, self)._cluster_id_changed(name, old, new)
        self.engine_json_file = "%s-engine.json" % self.name
        self.client_json_file = "%s-client.json" % self.name

    # internal
    children = List()
    mq_class = Unicode("zmq.devices.ProcessMonitoredQueue")

    def _use_threads_changed(self, name, old, new):
        self.mq_class = "zmq.devices.%sMonitoredQueue" % ("Thread" if new else "Process")

    write_connection_files = Bool(
        True,
        help="""Whether to write connection files to disk.
        True in all cases other than runs with `reuse_files=True` *after the first*
        """,
    )

    aliases = Dict(aliases)
    flags = Dict(flags)

    def save_connection_dict(self, fname, cdict):
        """save a connection dict to json file."""
        c = self.config
        url = cdict["registration"]
        location = cdict["location"]

        if not location:
            if PUBLIC_IPS:
                location = PUBLIC_IPS[-1]
            else:
                self.log.warn(
                    "Could not identify this machine's IP, assuming %s."
                    " You may need to specify '--location=<external_ip_address>' to help"
                    " IPython decide when to connect via loopback." % LOCALHOST
                )
                location = LOCALHOST
            cdict["location"] = location
        fname = os.path.join(self.profile_dir.security_dir, fname)
        self.log.info("writing connection info to %s", fname)
        with open(fname, "w") as f:
            f.write(json.dumps(cdict, indent=2))
        os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR)

    def load_config_from_json(self):
        """load config from existing json connector files."""
        c = self.config
        self.log.debug("loading config from JSON")

        # load engine config

        fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file)
        self.log.info("loading connection info from %s", fname)
        with open(fname) as f:
            ecfg = json.loads(f.read())

        # json gives unicode, Session.key wants bytes
        c.Session.key = ecfg["exec_key"].encode("ascii")

        xport, ip = ecfg["interface"].split("://")

        c.HubFactory.engine_ip = ip
        c.HubFactory.engine_transport = xport

        self.location = ecfg["location"]
        if not self.engine_ssh_server:
            self.engine_ssh_server = ecfg["ssh"]

        # load client config

        fname = os.path.join(self.profile_dir.security_dir, self.client_json_file)
        self.log.info("loading connection info from %s", fname)
        with open(fname) as f:
            ccfg = json.loads(f.read())

        for key in ("exec_key", "registration", "pack", "unpack"):
            assert ccfg[key] == ecfg[key], "mismatch between engine and client info: %r" % key

        xport, addr = ccfg["interface"].split("://")

        c.HubFactory.client_transport = xport
        c.HubFactory.client_ip = ip
        if not self.ssh_server:
            self.ssh_server = ccfg["ssh"]

        # load port config:
        c.HubFactory.regport = ecfg["registration"]
        c.HubFactory.hb = (ecfg["hb_ping"], ecfg["hb_pong"])
        c.HubFactory.control = (ccfg["control"], ecfg["control"])
        c.HubFactory.mux = (ccfg["mux"], ecfg["mux"])
        c.HubFactory.task = (ccfg["task"], ecfg["task"])
        c.HubFactory.iopub = (ccfg["iopub"], ecfg["iopub"])
        c.HubFactory.notifier_port = ccfg["notification"]

    def cleanup_connection_files(self):
        if self.reuse_files:
            self.log.debug("leaving JSON connection files for reuse")
            return
        self.log.debug("cleaning up JSON connection files")
        for f in (self.client_json_file, self.engine_json_file):
            f = os.path.join(self.profile_dir.security_dir, f)
            try:
                os.remove(f)
            except Exception as e:
                self.log.error("Failed to cleanup connection file: %s", e)
            else:
                self.log.debug(u"removed %s", f)

    def load_secondary_config(self):
        """secondary config, loading from JSON and setting defaults"""
        if self.reuse_files:
            try:
                self.load_config_from_json()
            except (AssertionError, IOError) as e:
                self.log.error("Could not load config from JSON: %s" % e)
            else:
                # successfully loaded config from JSON, and reuse=True
                # no need to wite back the same file
                self.write_connection_files = False

        # switch Session.key default to secure
        default_secure(self.config)
        self.log.debug("Config changed")
        self.log.debug(repr(self.config))

    def init_hub(self):
        c = self.config

        self.do_import_statements()

        try:
            self.factory = HubFactory(config=c, log=self.log)
            # self.start_logging()
            self.factory.init_hub()
        except TraitError:
            raise
        except Exception:
            self.log.error("Couldn't construct the Controller", exc_info=True)
            self.exit(1)

        if self.write_connection_files:
            # save to new json config files
            f = self.factory
            base = {
                "exec_key": f.session.key.decode("ascii"),
                "location": self.location,
                "pack": f.session.packer,
                "unpack": f.session.unpacker,
            }

            cdict = {"ssh": self.ssh_server}
            cdict.update(f.client_info)
            cdict.update(base)
            self.save_connection_dict(self.client_json_file, cdict)

            edict = {"ssh": self.engine_ssh_server}
            edict.update(f.engine_info)
            edict.update(base)
            self.save_connection_dict(self.engine_json_file, edict)

        fname = "engines%s.json" % self.cluster_id
        self.factory.hub.engine_state_file = os.path.join(self.profile_dir.log_dir, fname)
        if self.restore_engines:
            self.factory.hub._load_engine_state()

    def init_schedulers(self):
        children = self.children
        mq = import_item(str(self.mq_class))

        f = self.factory
        ident = f.session.bsession
        # disambiguate url, in case of *
        monitor_url = disambiguate_url(f.monitor_url)
        # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
        # IOPub relay (in a Process)
        q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b"N/A", b"iopub")
        q.bind_in(f.client_url("iopub"))
        q.setsockopt_in(zmq.IDENTITY, ident + b"_iopub")
        q.bind_out(f.engine_url("iopub"))
        q.setsockopt_out(zmq.SUBSCRIBE, b"")
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)

        # Multiplexer Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b"in", b"out")
        q.bind_in(f.client_url("mux"))
        q.setsockopt_in(zmq.IDENTITY, b"mux_in")
        q.bind_out(f.engine_url("mux"))
        q.setsockopt_out(zmq.IDENTITY, b"mux_out")
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)

        # Control Queue (in a Process)
        q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b"incontrol", b"outcontrol")
        q.bind_in(f.client_url("control"))
        q.setsockopt_in(zmq.IDENTITY, b"control_in")
        q.bind_out(f.engine_url("control"))
        q.setsockopt_out(zmq.IDENTITY, b"control_out")
        q.connect_mon(monitor_url)
        q.daemon = True
        children.append(q)
        try:
            scheme = self.config.TaskScheduler.scheme_name
        except AttributeError:
            scheme = TaskScheduler.scheme_name.get_default_value()
        # Task Queue (in a Process)
        if scheme == "pure":
            self.log.warn("task::using pure DEALER Task scheduler")
            q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b"intask", b"outtask")
            # q.setsockopt_out(zmq.HWM, hub.hwm)
            q.bind_in(f.client_url("task"))
            q.setsockopt_in(zmq.IDENTITY, b"task_in")
            q.bind_out(f.engine_url("task"))
            q.setsockopt_out(zmq.IDENTITY, b"task_out")
            q.connect_mon(monitor_url)
            q.daemon = True
            children.append(q)
        elif scheme == "none":
            self.log.warn("task::using no Task scheduler")

        else:
            self.log.info("task::using Python %s Task scheduler" % scheme)
            sargs = (
                f.client_url("task"),
                f.engine_url("task"),
                monitor_url,
                disambiguate_url(f.client_url("notification")),
                disambiguate_url(f.client_url("registration")),
            )
            kwargs = dict(logname="scheduler", loglevel=self.log_level, log_url=self.log_url, config=dict(self.config))
            if "Process" in self.mq_class:
                # run the Python scheduler in a Process
                q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
                q.daemon = True
                children.append(q)
            else:
                # single-threaded Controller
                kwargs["in_thread"] = True
                launch_scheduler(*sargs, **kwargs)

    def terminate_children(self):
        child_procs = []
        for child in self.children:
            if isinstance(child, ProcessMonitoredQueue):
                child_procs.append(child.launcher)
            elif isinstance(child, Process):
                child_procs.append(child)
        if child_procs:
            self.log.critical("terminating children...")
            for child in child_procs:
                try:
                    child.terminate()
                except OSError:
                    # already dead
                    pass

    def handle_signal(self, sig, frame):
        self.log.critical("Received signal %i, shutting down", sig)
        self.terminate_children()
        self.loop.stop()

    def init_signal(self):
        for sig in (SIGINT, SIGABRT, SIGTERM):
            signal(sig, self.handle_signal)

    def do_import_statements(self):
        statements = self.import_statements
        for s in statements:
            try:
                self.log.msg("Executing statement: '%s'" % s)
                exec s in globals(), locals()
            except:
                self.log.msg("Error running statement: %s" % s)

    def forward_logging(self):
        if self.log_url:
            self.log.info("Forwarding logging to %s" % self.log_url)
            context = zmq.Context.instance()
            lsock = context.socket(zmq.PUB)
            lsock.connect(self.log_url)
            handler = PUBHandler(lsock)
            handler.root_topic = "controller"
            handler.setLevel(self.log_level)
            self.log.addHandler(handler)

    @catch_config_error
    def initialize(self, argv=None):
        super(IPControllerApp, self).initialize(argv)
        self.forward_logging()
        self.load_secondary_config()
        self.init_hub()
        self.init_schedulers()

    def start(self):
        # Start the subprocesses:
        self.factory.start()
        # children must be started before signals are setup,
        # otherwise signal-handling will fire multiple times
        for child in self.children:
            child.start()
        self.init_signal()

        self.write_pid_file(overwrite=True)

        try:
            self.factory.loop.start()
        except KeyboardInterrupt:
            self.log.critical("Interrupted, Exiting...\n")
        finally:
            self.cleanup_connection_files()