class ImageWidget(DOMWidget): _view_name = Unicode('ImageView', sync=True) # Define the custom state properties to sync with the front-end format = Unicode('png', sync=True) width = CUnicode(sync=True) height = CUnicode(sync=True) _b64value = Unicode(sync=True) value = Bytes() def _value_changed(self, name, old, new): self._b64value = base64.b64encode(new)
class PBSEngineSetLauncher(PBSLauncher): """Launch Engines using PBS""" batch_file_name = CUnicode(u'pbs_engines', config=True) default_template = CUnicode(u"""#!/bin/sh #PBS -V #PBS -N ipengine %s --cluster-dir $cluster_dir """ % (' '.join(ipengine_cmd_argv))) def start(self, n, cluster_dir): """Start n engines by profile or cluster_dir.""" self.log.info('Starting %i engines with PBSEngineSetLauncher: %r' % (n, self.args)) return super(PBSEngineSetLauncher, self).start(n, cluster_dir)
class SGEControllerLauncher(SGELauncher): """Launch a controller using SGE.""" batch_file_name = CUnicode(u'sge_controller', config=True) default_template = CUnicode(u"""#$$ -V #$$ -S /bin/sh #$$ -N ipcontroller %s --log-to-file --cluster-dir $cluster_dir """ % (' '.join(ipcontroller_cmd_argv))) def start(self, cluster_dir): """Start the controller by profile or cluster_dir.""" self.log.info("Starting PBSControllerLauncher: %r" % self.args) return super(PBSControllerLauncher, self).start(1, cluster_dir)
class SGEEngineSetLauncher(SGELauncher): """Launch Engines with SGE""" batch_file_name = CUnicode(u'sge_engines', config=True) default_template = CUnicode("""#$$ -V #$$ -S /bin/sh #$$ -N ipengine %s --cluster-dir $cluster_dir """ % (' '.join(ipengine_cmd_argv))) def start(self, n, cluster_dir): """Start n engines by profile or cluster_dir.""" self.log.info('Starting %i engines with SGEEngineSetLauncher: %r' % (n, self.args)) return super(SGEEngineSetLauncher, self).start(n, cluster_dir)
class LoggingFactory(Configurable): """A most basic class, that has a `log` (type:`Logger`) attribute, set via a `logname` Trait.""" log = Instance('logging.Logger', ('ZMQ', logging.WARN)) logname = CUnicode('ZMQ') def _logname_changed(self, name, old, new): self.log = logging.getLogger(new)
class WindowsHPCEngineSetLauncher(WindowsHPCLauncher): job_file_name = CUnicode(u'ipengineset_job.xml', config=True) extra_args = List([], config=False) def write_job_file(self, n): job = IPEngineSetJob(config=self.config) for i in range(n): t = IPEngineTask(config=self.config) # The tasks work directory is *not* the actual work directory of # the engine. It is used as the base path for the stdout/stderr # files that the scheduler redirects to. t.work_directory = self.cluster_dir # Add the --cluster-dir and from self.start(). t.engine_args.extend(self.extra_args) job.add_task(t) self.log.info("Writing job description file: %s" % self.job_file) job.write(self.job_file) @property def job_file(self): return os.path.join(self.cluster_dir, self.job_file_name) def start(self, n, cluster_dir): """Start the controller by cluster_dir.""" self.extra_args = ['--cluster-dir', cluster_dir] self.cluster_dir = unicode(cluster_dir) return super(WindowsHPCEngineSetLauncher, self).start(n)
class LaTeXTool(SingletonConfigurable): """An object to store configuration of the LaTeX tool.""" backends = List( CUnicode, ["matplotlib", "dvipng"], help="Preferred backend to draw LaTeX math equations. " "Backends in the list are checked one by one and the first " "usable one is used. Note that `matplotlib` backend " "is usable only for inline style equations. To draw " "display style equations, `dvipng` backend must be specified. ", # It is a List instead of Enum, to make configuration more # flexible. For example, to use matplotlib mainly but dvipng # for display style, the default ["matplotlib", "dvipng"] can # be used. To NOT use dvipng so that other repr such as # unicode pretty printing is used, you can use ["matplotlib"]. config=True) use_breqn = CBool( True, help="Use breqn.sty to automatically break long equations. " "This configuration takes effect only for dvipng backend.", config=True) packages = List( ['amsmath', 'amsthm', 'amssymb', 'bm'], help="A list of packages to use for dvipng backend. " "'breqn' will be automatically appended when use_breqn=True.", config=True) preamble = CUnicode( help="Additional preamble to use when generating LaTeX source " "for dvipng backend.", config=True)
class MongoDB(BaseDB): """MongoDB TaskRecord backend.""" connection_args = List(config=True) connection_kwargs = Dict(config=True) database = CUnicode(config=True) _table = Dict() def __init__(self, **kwargs): super(MongoDB, self).__init__(**kwargs) self._connection = Connection(*self.connection_args, **self.connection_kwargs) if not self.database: self.database = self.session self._db = self._connection[self.database] self._records = self._db['task_records'] def _binary_buffers(self, rec): for key in ('buffers', 'result_buffers'): if key in rec: rec[key] = map(Binary, rec[key]) def add_record(self, msg_id, rec): """Add a new Task Record, by msg_id.""" # print rec rec = _binary_buffers(rec) obj_id = self._records.insert(rec) self._table[msg_id] = obj_id def get_record(self, msg_id): """Get a specific Task Record, by msg_id.""" return self._records.find_one(self._table[msg_id]) def update_record(self, msg_id, rec): """Update the data in an existing record.""" rec = _binary_buffers(rec) obj_id = self._table[msg_id] self._records.update({'_id': obj_id}, {'$set': rec}) def drop_matching_records(self, check): """Remove a record from the DB.""" self._records.remove(check) def drop_record(self, msg_id): """Remove a record from the DB.""" obj_id = self._table.pop(msg_id) self._records.remove(obj_id) def find_records(self, check, id_only=False): """Find records matching a query dict.""" matches = list(self._records.find(check)) if id_only: return [rec['msg_id'] for rec in matches] else: data = {} for rec in matches: data[rec['msg_id']] = rec return data
class DOMWidget(Widget): visible = Bool(True, allow_none=True, help="Whether the widget is visible. False collapses the empty space, while None preserves the empty space.", sync=True) _css = Tuple(sync=True, help="CSS property list: (selector, key, value)") _dom_classes = Tuple(sync=True, help="DOM classes applied to widget.$el.") width = CUnicode(sync=True) height = CUnicode(sync=True) # A default padding of 2.5 px makes the widgets look nice when displayed inline. padding = CUnicode("2.5px", sync=True) margin = CUnicode(sync=True) color = Unicode(sync=True) background_color = Unicode(sync=True) border_color = Unicode(sync=True) border_width = CUnicode(sync=True) border_radius = CUnicode(sync=True) border_style = CaselessStrEnum(values=[ # http://www.w3schools.com/cssref/pr_border-style.asp 'none', 'hidden', 'dotted', 'dashed', 'solid', 'double', 'groove', 'ridge', 'inset', 'outset', 'initial', 'inherit', ''], default_value='', sync=True) font_style = CaselessStrEnum(values=[ # http://www.w3schools.com/cssref/pr_font_font-style.asp 'normal', 'italic', 'oblique', 'initial', 'inherit', ''], default_value='', sync=True) font_weight = CaselessStrEnum(values=[ # http://www.w3schools.com/cssref/pr_font_weight.asp 'normal', 'bold', 'bolder', 'lighter', 'initial', 'inherit', ''] + [str(100 * (i+1)) for i in range(9)], default_value='', sync=True) font_size = CUnicode(sync=True) font_family = Unicode(sync=True) def __init__(self, *pargs, **kwargs): super(DOMWidget, self).__init__(*pargs, **kwargs) def _validate_border(name, old, new): if new is not None and new != '': if name != 'border_width' and not self.border_width: self.border_width = 1 if name != 'border_style' and self.border_style == '': self.border_style = 'solid' self.on_trait_change(_validate_border, ['border_width', 'border_style', 'border_color'])
class SSHLauncher(LocalProcessLauncher): """A minimal launcher for ssh. To be useful this will probably have to be extended to use the ``sshx`` idea for environment variables. There could be other things this needs as well. """ ssh_cmd = List(['ssh'], config=True) ssh_args = List(['-tt'], config=True) program = List(['date'], config=True) program_args = List([], config=True) hostname = CUnicode('', config=True) user = CUnicode('', config=True) location = CUnicode('') def _hostname_changed(self, name, old, new): if self.user: self.location = u'%s@%s' % (self.user, new) else: self.location = new def _user_changed(self, name, old, new): self.location = u'%s@%s' % (new, self.hostname) def find_args(self): return self.ssh_cmd + self.ssh_args + [self.location] + \ self.program + self.program_args def start(self, cluster_dir, hostname=None, user=None): self.cluster_dir = unicode(cluster_dir) if hostname is not None: self.hostname = hostname if user is not None: self.user = user return super(SSHLauncher, self).start() def signal(self, sig): if self.state == 'running': # send escaped ssh connection-closer self.process.stdin.write('~.') self.process.stdin.flush()
class Image(DOMWidget): """Displays an image as a widget. The `value` of this widget accepts a byte string. The byte string is the raw image data that you want the browser to display. You can explicitly define the format of the byte string using the `format` trait (which defaults to "png").""" _view_name = Unicode('ImageView', sync=True) # Define the custom state properties to sync with the front-end format = Unicode('png', sync=True) width = CUnicode(sync=True) height = CUnicode(sync=True) _b64value = Unicode(sync=True) value = Bytes() def _value_changed(self, name, old, new): self._b64value = base64.b64encode(new)
class SessionFactory(LoggingFactory): """The Base factory from which every factory in IPython.parallel inherits""" packer = Str('', config=True) unpacker = Str('', config=True) ident = CStr('', config=True) def _ident_default(self): return str(uuid.uuid4()) username = CUnicode(os.environ.get('USER', 'username'), config=True) exec_key = CUnicode('', config=True) # not configurable: context = Instance('zmq.Context', (), {}) session = Instance('IPython.parallel.streamsession.StreamSession') loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False) def _loop_default(self): return IOLoop.instance() def __init__(self, **kwargs): super(SessionFactory, self).__init__(**kwargs) exec_key = self.exec_key or None # set the packers: if not self.packer: packer_f = unpacker_f = None elif self.packer.lower() == 'json': packer_f = ss.json_packer unpacker_f = ss.json_unpacker elif self.packer.lower() == 'pickle': packer_f = ss.pickle_packer unpacker_f = ss.pickle_unpacker else: packer_f = import_item(self.packer) unpacker_f = import_item(self.unpacker) # construct the session self.session = ss.StreamSession(self.username, self.ident, packer=packer_f, unpacker=unpacker_f, key=exec_key)
class ImageButton(DOMWidget): disabled = Bool(False, help="Enable or disable user changes.", sync=True) _view_name = Unicode('ImageButtonView', sync=True) format = Unicode('png', sync=True) width = CUnicode(sync=True) height = CUnicode(sync=True) _b64value = Unicode(sync=True) value = Bytes() def _value_changed(self, name, old, new): self._b64value = base64.b64encode(new) def __init__(self, **kwargs): super(ImageButton, self).__init__(**kwargs) self._click_handlers = CallbackDispatcher() self.on_msg(self._handle_button_msg) def on_click(self, callback, remove=False): self._click_handlers.register_callback(callback, remove=remove) def _handle_button_msg(self, _, content): if content.get('event', '') == 'click': self._click_handlers(self, content)
class PBSLauncher(BatchSystemLauncher): """A BatchSystemLauncher subclass for PBS.""" submit_command = List(['qsub'], config=True) delete_command = List(['qdel'], config=True) job_id_regexp = CUnicode(r'\d+', config=True) batch_file = CUnicode(u'') job_array_regexp = CUnicode('#PBS\W+-t\W+[\w\d\-\$]+') job_array_template = CUnicode('#PBS -t 1-$n') queue_regexp = CUnicode('#PBS\W+-q\W+\$?\w+') queue_template = CUnicode('#PBS -q $queue')
class Label(InstallerMixin, widgets.DOMWidget): """ Just some text... """ _view_name = Unicode('LabelView', sync=True) _view_module = Unicode('nbextensions/ipbs/js/widget_label', sync=True) value = CUnicode(sync=True) html = Bool(False, sync=True) lead = Bool(False, sync=True) align = Enum(bs.Alignment, sync=True) # 3.2? transform = Enum(bs.Transformation, sync=True) # bootstrap context color context = Enum(bs.Context, default_value=bs.Context.default, sync=True) def __init__(self, value=None, **kwargs): if value is not None: kwargs["value"] = value super(Label, self).__init__(**kwargs)
class IPythonConsoleApp(ConnectionFileMixin): name = 'ipython-console-mixin' description = """ The IPython Mixin Console. This class contains the common portions of console client (QtConsole, ZMQ-based terminal console, etc). It is not a full console, in that launched terminal subprocesses will not be able to accept input. The Console using this mixing supports various extra features beyond the single-process Terminal IPython shell, such as connecting to existing kernel, via: ipython <appname> --existing as well as tunnel via SSH """ classes = classes flags = Dict(flags) aliases = Dict(aliases) kernel_manager_class = KernelManager kernel_client_class = BlockingKernelClient kernel_argv = List(Unicode) # frontend flags&aliases to be stripped when building kernel_argv frontend_flags = Any(app_flags) frontend_aliases = Any(app_aliases) # create requested profiles by default, if they don't exist: auto_create = CBool(True) # connection info: sshserver = Unicode('', config=True, help="""The SSH server to use to connect to the kernel.""") sshkey = Unicode('', config=True, help="""Path to the ssh key to use for logging in to the ssh server.""") hb_port = Int(0, config=True, help="set the heartbeat port [default: random]") shell_port = Int(0, config=True, help="set the shell (ROUTER) port [default: random]") iopub_port = Int(0, config=True, help="set the iopub (PUB) port [default: random]") stdin_port = Int(0, config=True, help="set the stdin (DEALER) port [default: random]") connection_file = Unicode('', config=True, help="""JSON file in which to store connection info [default: kernel-<pid>.json] This file will contain the IP, ports, and authentication key needed to connect clients to this kernel. By default, this file will be created in the security-dir of the current profile, but can be specified by absolute path. """) def _connection_file_default(self): return 'kernel-%i.json' % os.getpid() existing = CUnicode('', config=True, help="""Connect to an already running kernel""") kernel_name = Unicode('python', config=True, help="""The name of the default kernel to start.""") confirm_exit = CBool(True, config=True, help=""" Set to display confirmation dialog on exit. You can always use 'exit' or 'quit', to force a direct exit without any confirmation.""", ) def build_kernel_argv(self, argv=None): """build argv to be passed to kernel subprocess""" if argv is None: argv = sys.argv[1:] self.kernel_argv = swallow_argv(argv, self.frontend_aliases, self.frontend_flags) # kernel should inherit default config file from frontend self.kernel_argv.append("--IPKernelApp.parent_appname='%s'" % self.name) def init_connection_file(self): """find the connection file, and load the info if found. The current working directory and the current profile's security directory will be searched for the file if it is not given by absolute path. When attempting to connect to an existing kernel and the `--existing` argument does not match an existing file, it will be interpreted as a fileglob, and the matching file in the current profile's security dir with the latest access time will be used. After this method is called, self.connection_file contains the *full path* to the connection file, never just its name. """ if self.existing: try: cf = find_connection_file(self.existing) except Exception: self.log.critical("Could not find existing kernel connection file %s", self.existing) self.exit(1) self.log.debug("Connecting to existing kernel: %s" % cf) self.connection_file = cf else: # not existing, check if we are going to write the file # and ensure that self.connection_file is a full path, not just the shortname try: cf = find_connection_file(self.connection_file) except Exception: # file might not exist if self.connection_file == os.path.basename(self.connection_file): # just shortname, put it in security dir cf = os.path.join(self.profile_dir.security_dir, self.connection_file) else: cf = self.connection_file self.connection_file = cf # should load_connection_file only be used for existing? # as it is now, this allows reusing ports if an existing # file is requested try: self.load_connection_file() except Exception: self.log.error("Failed to load connection file: %r", self.connection_file, exc_info=True) self.exit(1) def load_connection_file(self): """load ip/port/hmac config from JSON connection file""" # this is identical to IPKernelApp.load_connection_file # perhaps it can be centralized somewhere? try: fname = filefind(self.connection_file, ['.', self.profile_dir.security_dir]) except IOError: self.log.debug("Connection File not found: %s", self.connection_file) return self.log.debug(u"Loading connection file %s", fname) with open(fname) as f: cfg = json.load(f) self.transport = cfg.get('transport', 'tcp') self.ip = cfg.get('ip', localhost()) for channel in ('hb', 'shell', 'iopub', 'stdin', 'control'): name = channel + '_port' if getattr(self, name) == 0 and name in cfg: # not overridden by config or cl_args setattr(self, name, cfg[name]) if 'key' in cfg: self.config.Session.key = str_to_bytes(cfg['key']) if 'signature_scheme' in cfg: self.config.Session.signature_scheme = cfg['signature_scheme'] def init_ssh(self): """set up ssh tunnels, if needed.""" if not self.existing or (not self.sshserver and not self.sshkey): return self.load_connection_file() transport = self.transport ip = self.ip if transport != 'tcp': self.log.error("Can only use ssh tunnels with TCP sockets, not %s", transport) sys.exit(-1) if self.sshkey and not self.sshserver: # specifying just the key implies that we are connecting directly self.sshserver = ip ip = localhost() # build connection dict for tunnels: info = dict(ip=ip, shell_port=self.shell_port, iopub_port=self.iopub_port, stdin_port=self.stdin_port, hb_port=self.hb_port ) self.log.info("Forwarding connections to %s via %s"%(ip, self.sshserver)) # tunnels return a new set of ports, which will be on localhost: self.ip = localhost() try: newports = tunnel_to_kernel(info, self.sshserver, self.sshkey) except: # even catch KeyboardInterrupt self.log.error("Could not setup tunnels", exc_info=True) self.exit(1) self.shell_port, self.iopub_port, self.stdin_port, self.hb_port = newports cf = self.connection_file base,ext = os.path.splitext(cf) base = os.path.basename(base) self.connection_file = os.path.basename(base)+'-ssh'+ext self.log.info("To connect another client via this tunnel, use:") self.log.info("--existing %s" % self.connection_file) def _new_connection_file(self): cf = '' while not cf: # we don't need a 128b id to distinguish kernels, use more readable # 48b node segment (12 hex chars). Users running more than 32k simultaneous # kernels can subclass. ident = str(uuid.uuid4()).split('-')[-1] cf = os.path.join(self.profile_dir.security_dir, 'kernel-%s.json' % ident) # only keep if it's actually new. Protect against unlikely collision # in 48b random search space cf = cf if not os.path.exists(cf) else '' return cf def init_kernel_manager(self): # Don't let Qt or ZMQ swallow KeyboardInterupts. if self.existing: self.kernel_manager = None return signal.signal(signal.SIGINT, signal.SIG_DFL) # Create a KernelManager and start a kernel. try: self.kernel_manager = self.kernel_manager_class( ip=self.ip, transport=self.transport, shell_port=self.shell_port, iopub_port=self.iopub_port, stdin_port=self.stdin_port, hb_port=self.hb_port, connection_file=self.connection_file, kernel_name=self.kernel_name, parent=self, ipython_dir=self.ipython_dir, ) except NoSuchKernel: self.log.critical("Could not find kernel %s", self.kernel_name) self.exit(1) self.kernel_manager.client_factory = self.kernel_client_class self.kernel_manager.start_kernel(extra_arguments=self.kernel_argv) atexit.register(self.kernel_manager.cleanup_ipc_files) if self.sshserver: # ssh, write new connection file self.kernel_manager.write_connection_file() # in case KM defaults / ssh writing changes things: km = self.kernel_manager self.shell_port=km.shell_port self.iopub_port=km.iopub_port self.stdin_port=km.stdin_port self.hb_port=km.hb_port self.connection_file = km.connection_file atexit.register(self.kernel_manager.cleanup_connection_file) def init_kernel_client(self): if self.kernel_manager is not None: self.kernel_client = self.kernel_manager.client() else: self.kernel_client = self.kernel_client_class( ip=self.ip, transport=self.transport, shell_port=self.shell_port, iopub_port=self.iopub_port, stdin_port=self.stdin_port, hb_port=self.hb_port, connection_file=self.connection_file, parent=self, ) self.kernel_client.start_channels() def initialize(self, argv=None): """ Classes which mix this class in should call: IPythonConsoleApp.initialize(self,argv) """ self.init_connection_file() default_secure(self.config) self.init_ssh() self.init_kernel_manager() self.init_kernel_client()
class SingleUserNotebookApp(NotebookApp): """A Subclass of the regular NotebookApp that is aware of the parent multiuser context.""" user = CUnicode(config=True) def _user_changed(self, name, old, new): self.log.name = new cookie_name = Unicode(config=True) hub_prefix = Unicode(config=True) hub_api_url = Unicode(config=True) aliases = aliases open_browser = False trust_xheaders = True login_handler_class = JupyterHubLoginHandler logout_handler_class = JupyterHubLogoutHandler cookie_cache_lifetime = Integer( config=True, default_value=300, allow_none=True, help=""" Time, in seconds, that we cache a validated cookie before requiring revalidation with the hub. """, ) def _log_datefmt_default(self): """Exclude date from default date format""" return "%Y-%m-%d %H:%M:%S" def _log_format_default(self): """override default log format to include time""" return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s %(module)s:%(lineno)d]%(end_color)s %(message)s" def _confirm_exit(self): # disable the exit confirmation for background notebook processes ioloop.IOLoop.instance().stop() def _clear_cookie_cache(self): self.log.debug("Clearing cookie cache") self.tornado_settings['cookie_cache'].clear() def start(self): # Start a PeriodicCallback to clear cached cookies. This forces us to # revalidate our user with the Hub at least every # `cookie_cache_lifetime` seconds. if self.cookie_cache_lifetime: ioloop.PeriodicCallback( self._clear_cookie_cache, self.cookie_cache_lifetime * 1e3, ).start() super(SingleUserNotebookApp, self).start() def init_webapp(self): # load the hub related settings into the tornado settings dict env = os.environ s = self.tornado_settings s['cookie_cache'] = {} s['user'] = self.user s['hub_api_key'] = env.pop('JPY_API_TOKEN') s['hub_prefix'] = self.hub_prefix s['cookie_name'] = self.cookie_name s['login_url'] = self.hub_prefix s['hub_api_url'] = self.hub_api_url super(SingleUserNotebookApp, self).init_webapp() self.patch_templates() def patch_templates(self): """Patch page templates to add Hub-related buttons""" env = self.web_app.settings['jinja2_env'] env.globals['hub_control_panel_url'] = \ url_path_join(self.hub_prefix, 'home') # patch jinja env loading to modify page template def get_page(name): if name == 'page.html': return page_template orig_loader = env.loader env.loader = ChoiceLoader([ FunctionLoader(get_page), orig_loader, ])
class MongoDB(BaseDB): """MongoDB TaskRecord backend.""" connection_args = List(config=True) # args passed to pymongo.Connection connection_kwargs = Dict( config=True) # kwargs passed to pymongo.Connection database = CUnicode(config=True) # name of the mongodb database _table = Dict() def __init__(self, **kwargs): super(MongoDB, self).__init__(**kwargs) self._connection = Connection(*self.connection_args, **self.connection_kwargs) if not self.database: self.database = self.session self._db = self._connection[self.database] self._records = self._db['task_records'] def _binary_buffers(self, rec): for key in ('buffers', 'result_buffers'): if rec.get(key, None): rec[key] = map(Binary, rec[key]) return rec def add_record(self, msg_id, rec): """Add a new Task Record, by msg_id.""" # print rec rec = self._binary_buffers(rec) obj_id = self._records.insert(rec) self._table[msg_id] = obj_id def get_record(self, msg_id): """Get a specific Task Record, by msg_id.""" return self._records.find_one(self._table[msg_id]) def update_record(self, msg_id, rec): """Update the data in an existing record.""" rec = self._binary_buffers(rec) obj_id = self._table[msg_id] self._records.update({'_id': obj_id}, {'$set': rec}) def drop_matching_records(self, check): """Remove a record from the DB.""" self._records.remove(check) def drop_record(self, msg_id): """Remove a record from the DB.""" obj_id = self._table.pop(msg_id) self._records.remove(obj_id) def find_records(self, check, keys=None): """Find records matching a query dict, optionally extracting subset of keys. Returns list of matching records. Parameters ---------- check: dict mongodb-style query argument keys: list of strs [optional] if specified, the subset of keys to extract. msg_id will *always* be included. """ if keys and 'msg_id' not in keys: keys.append('msg_id') matches = list(self._records.find(check, keys)) for rec in matches: rec.pop('_id') return matches def get_history(self): """get all msg_ids, ordered by time submitted.""" cursor = self._records.find({}, {'msg_id': 1}).sort('submitted') return [rec['msg_id'] for rec in cursor]
class SGELauncher(PBSLauncher): """Sun GridEngine is a PBS clone with slightly different syntax""" job_array_regexp = CUnicode('#$$\W+-t\W+[\w\d\-\$]+') job_array_template = CUnicode('#$$ -t 1-$n') queue_regexp = CUnicode('#$$\W+-q\W+\$?\w+') queue_template = CUnicode('#$$ -q $queue')
class Client(HasTraits): """A semi-synchronous client to the IPython ZMQ cluster Parameters ---------- url_or_file : bytes; zmq url or path to ipcontroller-client.json Connection information for the Hub's registration. If a json connector file is given, then likely no further configuration is necessary. [Default: use profile] profile : bytes The name of the Cluster profile to be used to find connector information. [Default: 'default'] context : zmq.Context Pass an existing zmq.Context instance, otherwise the client will create its own. username : bytes set username to be passed to the Session object debug : bool flag for lots of message printing for debug purposes #-------------- ssh related args ---------------- # These are args for configuring the ssh tunnel to be used # credentials are used to forward connections over ssh to the Controller # Note that the ip given in `addr` needs to be relative to sshserver # The most basic case is to leave addr as pointing to localhost (127.0.0.1), # and set sshserver as the same machine the Controller is on. However, # the only requirement is that sshserver is able to see the Controller # (i.e. is within the same trusted network). sshserver : str A string of the form passed to ssh, i.e. 'server.tld' or '[email protected]:port' If keyfile or password is specified, and this is not, it will default to the ip given in addr. sshkey : str; path to public ssh key file This specifies a key to be used in ssh login, default None. Regular default ssh keys will be used without specifying this argument. password : str Your ssh password to sshserver. Note that if this is left None, you will be prompted for it if passwordless key based login is unavailable. paramiko : bool flag for whether to use paramiko instead of shell ssh for tunneling. [default: True on win32, False else] ------- exec authentication args ------- If even localhost is untrusted, you can have some protection against unauthorized execution by using a key. Messages are still sent as cleartext, so if someone can snoop your loopback traffic this will not help against malicious attacks. exec_key : str an authentication key or file containing a key default: None Attributes ---------- ids : list of int engine IDs requesting the ids attribute always synchronizes the registration state. To request ids without synchronization, use semi-private _ids attributes. history : list of msg_ids a list of msg_ids, keeping track of all the execution messages you have submitted in order. outstanding : set of msg_ids a set of msg_ids that have been submitted, but whose results have not yet been received. results : dict a dict of all our results, keyed by msg_id block : bool determines default behavior when block not specified in execution methods Methods ------- spin flushes incoming results and registration state changes control methods spin, and requesting `ids` also ensures up to date wait wait on one or more msg_ids execution methods apply legacy: execute, run data movement push, pull, scatter, gather query methods queue_status, get_result, purge, result_status control methods abort, shutdown """ block = Bool(False) outstanding = Set() results = Instance('collections.defaultdict', (dict, )) metadata = Instance('collections.defaultdict', (Metadata, )) history = List() debug = Bool(False) profile = CUnicode('default') _outstanding_dict = Instance('collections.defaultdict', (set, )) _ids = List() _connected = Bool(False) _ssh = Bool(False) _context = Instance('zmq.Context') _config = Dict() _engines = Instance(util.ReverseDict, (), {}) # _hub_socket=Instance('zmq.Socket') _query_socket = Instance('zmq.Socket') _control_socket = Instance('zmq.Socket') _iopub_socket = Instance('zmq.Socket') _notification_socket = Instance('zmq.Socket') _mux_socket = Instance('zmq.Socket') _task_socket = Instance('zmq.Socket') _task_scheme = Str() _closed = False _ignored_control_replies = Int(0) _ignored_hub_replies = Int(0) def __init__(self, url_or_file=None, profile='default', cluster_dir=None, ipython_dir=None, context=None, username=None, debug=False, exec_key=None, sshserver=None, sshkey=None, password=None, paramiko=None, timeout=10): super(Client, self).__init__(debug=debug, profile=profile) if context is None: context = zmq.Context.instance() self._context = context self._setup_cluster_dir(profile, cluster_dir, ipython_dir) if self._cd is not None: if url_or_file is None: url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json') assert url_or_file is not None, "I can't find enough information to connect to a hub!"\ " Please specify at least one of url_or_file or profile." try: util.validate_url(url_or_file) except AssertionError: if not os.path.exists(url_or_file): if self._cd: url_or_file = os.path.join(self._cd.security_dir, url_or_file) assert os.path.exists( url_or_file ), "Not a valid connection file or url: %r" % url_or_file with open(url_or_file) as f: cfg = json.loads(f.read()) else: cfg = {'url': url_or_file} # sync defaults from args, json: if sshserver: cfg['ssh'] = sshserver if exec_key: cfg['exec_key'] = exec_key exec_key = cfg['exec_key'] sshserver = cfg['ssh'] url = cfg['url'] location = cfg.setdefault('location', None) cfg['url'] = util.disambiguate_url(cfg['url'], location) url = cfg['url'] self._config = cfg self._ssh = bool(sshserver or sshkey or password) if self._ssh and sshserver is None: # default to ssh via localhost sshserver = url.split('://')[1].split(':')[0] if self._ssh and password is None: if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko): password = False else: password = getpass("SSH Password for %s: " % sshserver) ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko) if exec_key is not None and os.path.isfile(exec_key): arg = 'keyfile' else: arg = 'key' key_arg = {arg: exec_key} if username is None: self.session = ss.StreamSession(**key_arg) else: self.session = ss.StreamSession(username, **key_arg) self._query_socket = self._context.socket(zmq.XREQ) self._query_socket.setsockopt(zmq.IDENTITY, self.session.session) if self._ssh: tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs) else: self._query_socket.connect(url) self.session.debug = self.debug self._notification_handlers = { 'registration_notification': self._register_engine, 'unregistration_notification': self._unregister_engine, 'shutdown_notification': lambda msg: self.close(), } self._queue_handlers = { 'execute_reply': self._handle_execute_reply, 'apply_reply': self._handle_apply_reply } self._connect(sshserver, ssh_kwargs, timeout) def __del__(self): """cleanup sockets, but _not_ context.""" self.close() def _setup_cluster_dir(self, profile, cluster_dir, ipython_dir): if ipython_dir is None: ipython_dir = get_ipython_dir() if cluster_dir is not None: try: self._cd = ClusterDir.find_cluster_dir(cluster_dir) return except ClusterDirError: pass elif profile is not None: try: self._cd = ClusterDir.find_cluster_dir_by_profile( ipython_dir, profile) return except ClusterDirError: pass self._cd = None def _update_engines(self, engines): """Update our engines dict and _ids from a dict of the form: {id:uuid}.""" for k, v in engines.iteritems(): eid = int(k) self._engines[eid] = bytes(v) # force not unicode self._ids.append(eid) self._ids = sorted(self._ids) if sorted(self._engines.keys()) != range(len(self._engines)) and \ self._task_scheme == 'pure' and self._task_socket: self._stop_scheduling_tasks() def _stop_scheduling_tasks(self): """Stop scheduling tasks because an engine has been unregistered from a pure ZMQ scheduler. """ self._task_socket.close() self._task_socket = None msg = "An engine has been unregistered, and we are using pure " +\ "ZMQ task scheduling. Task farming will be disabled." if self.outstanding: msg += " If you were running tasks when this happened, " +\ "some `outstanding` msg_ids may never resolve." warnings.warn(msg, RuntimeWarning) def _build_targets(self, targets): """Turn valid target IDs or 'all' into two lists: (int_ids, uuids). """ if targets is None: targets = self._ids elif isinstance(targets, str): if targets.lower() == 'all': targets = self._ids else: raise TypeError("%r not valid str target, must be 'all'" % (targets)) elif isinstance(targets, int): if targets < 0: targets = self.ids[targets] if targets not in self.ids: raise IndexError("No such engine: %i" % targets) targets = [targets] if isinstance(targets, slice): indices = range(len(self._ids))[targets] ids = self.ids targets = [ids[i] for i in indices] if not isinstance(targets, (tuple, list, xrange)): raise TypeError( "targets by int/slice/collection of ints only, not %s" % (type(targets))) return [self._engines[t] for t in targets], list(targets) def _connect(self, sshserver, ssh_kwargs, timeout): """setup all our socket connections to the cluster. This is called from __init__.""" # Maybe allow reconnecting? if self._connected: return self._connected = True def connect_socket(s, url): url = util.disambiguate_url(url, self._config['location']) if self._ssh: return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs) else: return s.connect(url) self.session.send(self._query_socket, 'connection_request') r, w, x = zmq.select([self._query_socket], [], [], timeout) if not r: raise error.TimeoutError("Hub connection request timed out") idents, msg = self.session.recv(self._query_socket, mode=0) if self.debug: pprint(msg) msg = ss.Message(msg) content = msg.content self._config['registration'] = dict(content) if content.status == 'ok': if content.mux: self._mux_socket = self._context.socket(zmq.XREQ) self._mux_socket.setsockopt(zmq.IDENTITY, self.session.session) connect_socket(self._mux_socket, content.mux) if content.task: self._task_scheme, task_addr = content.task self._task_socket = self._context.socket(zmq.XREQ) self._task_socket.setsockopt(zmq.IDENTITY, self.session.session) connect_socket(self._task_socket, task_addr) if content.notification: self._notification_socket = self._context.socket(zmq.SUB) connect_socket(self._notification_socket, content.notification) self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'') # if content.query: # self._query_socket = self._context.socket(zmq.XREQ) # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session) # connect_socket(self._query_socket, content.query) if content.control: self._control_socket = self._context.socket(zmq.XREQ) self._control_socket.setsockopt(zmq.IDENTITY, self.session.session) connect_socket(self._control_socket, content.control) if content.iopub: self._iopub_socket = self._context.socket(zmq.SUB) self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'') self._iopub_socket.setsockopt(zmq.IDENTITY, self.session.session) connect_socket(self._iopub_socket, content.iopub) self._update_engines(dict(content.engines)) else: self._connected = False raise Exception("Failed to connect!") #-------------------------------------------------------------------------- # handlers and callbacks for incoming messages #-------------------------------------------------------------------------- def _unwrap_exception(self, content): """unwrap exception, and remap engine_id to int.""" e = error.unwrap_exception(content) # print e.traceback if e.engine_info: e_uuid = e.engine_info['engine_uuid'] eid = self._engines[e_uuid] e.engine_info['engine_id'] = eid return e def _extract_metadata(self, header, parent, content): md = { 'msg_id': parent['msg_id'], 'received': datetime.now(), 'engine_uuid': header.get('engine', None), 'follow': parent.get('follow', []), 'after': parent.get('after', []), 'status': content['status'], } if md['engine_uuid'] is not None: md['engine_id'] = self._engines.get(md['engine_uuid'], None) if 'date' in parent: md['submitted'] = datetime.strptime(parent['date'], util.ISO8601) if 'started' in header: md['started'] = datetime.strptime(header['started'], util.ISO8601) if 'date' in header: md['completed'] = datetime.strptime(header['date'], util.ISO8601) return md def _register_engine(self, msg): """Register a new engine, and update our connection info.""" content = msg['content'] eid = content['id'] d = {eid: content['queue']} self._update_engines(d) def _unregister_engine(self, msg): """Unregister an engine that has died.""" content = msg['content'] eid = int(content['id']) if eid in self._ids: self._ids.remove(eid) uuid = self._engines.pop(eid) self._handle_stranded_msgs(eid, uuid) if self._task_socket and self._task_scheme == 'pure': self._stop_scheduling_tasks() def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified of the unregistration and later receive the successful result. """ outstanding = self._outstanding_dict[uuid] for msg_id in list(outstanding): if msg_id in self.results: # we already continue try: raise error.EngineError( "Engine %r died while running task %r" % (eid, msg_id)) except: content = error.wrap_exception() # build a fake message: parent = {} header = {} parent['msg_id'] = msg_id header['engine'] = uuid header['date'] = datetime.now().strftime(util.ISO8601) msg = dict(parent_header=parent, header=header, content=content) self._handle_apply_reply(msg) def _handle_execute_reply(self, msg): """Save the reply to an execute_request into our results. execute messages are never actually used. apply is used instead. """ parent = msg['parent_header'] msg_id = parent['msg_id'] if msg_id not in self.outstanding: if msg_id in self.history: print("got stale result: %s" % msg_id) else: print("got unknown result: %s" % msg_id) else: self.outstanding.remove(msg_id) self.results[msg_id] = self._unwrap_exception(msg['content']) def _handle_apply_reply(self, msg): """Save the reply to an apply_request into our results.""" parent = msg['parent_header'] msg_id = parent['msg_id'] if msg_id not in self.outstanding: if msg_id in self.history: print("got stale result: %s" % msg_id) print self.results[msg_id] print msg else: print("got unknown result: %s" % msg_id) else: self.outstanding.remove(msg_id) content = msg['content'] header = msg['header'] # construct metadata: md = self.metadata[msg_id] md.update(self._extract_metadata(header, parent, content)) # is this redundant? self.metadata[msg_id] = md e_outstanding = self._outstanding_dict[md['engine_uuid']] if msg_id in e_outstanding: e_outstanding.remove(msg_id) # construct result: if content['status'] == 'ok': self.results[msg_id] = util.unserialize_object(msg['buffers'])[0] elif content['status'] == 'aborted': self.results[msg_id] = error.TaskAborted(msg_id) elif content['status'] == 'resubmitted': # TODO: handle resubmission pass else: self.results[msg_id] = self._unwrap_exception(content) def _flush_notifications(self): """Flush notifications of engine registrations waiting in ZMQ queue.""" msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK) while msg is not None: if self.debug: pprint(msg) msg = msg[-1] msg_type = msg['msg_type'] handler = self._notification_handlers.get(msg_type, None) if handler is None: raise Exception("Unhandled message type: %s" % msg.msg_type) else: handler(msg) msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK) def _flush_results(self, sock): """Flush task or queue results waiting in ZMQ queue.""" msg = self.session.recv(sock, mode=zmq.NOBLOCK) while msg is not None: if self.debug: pprint(msg) msg = msg[-1] msg_type = msg['msg_type'] handler = self._queue_handlers.get(msg_type, None) if handler is None: raise Exception("Unhandled message type: %s" % msg.msg_type) else: handler(msg) msg = self.session.recv(sock, mode=zmq.NOBLOCK) def _flush_control(self, sock): """Flush replies from the control channel waiting in the ZMQ queue. Currently: ignore them.""" if self._ignored_control_replies <= 0: return msg = self.session.recv(sock, mode=zmq.NOBLOCK) while msg is not None: self._ignored_control_replies -= 1 if self.debug: pprint(msg) msg = self.session.recv(sock, mode=zmq.NOBLOCK) def _flush_ignored_control(self): """flush ignored control replies""" while self._ignored_control_replies > 0: self.session.recv(self._control_socket) self._ignored_control_replies -= 1 def _flush_ignored_hub_replies(self): msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK) while msg is not None: msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK) def _flush_iopub(self, sock): """Flush replies from the iopub channel waiting in the ZMQ queue. """ msg = self.session.recv(sock, mode=zmq.NOBLOCK) while msg is not None: if self.debug: pprint(msg) msg = msg[-1] parent = msg['parent_header'] msg_id = parent['msg_id'] content = msg['content'] header = msg['header'] msg_type = msg['msg_type'] # init metadata: md = self.metadata[msg_id] if msg_type == 'stream': name = content['name'] s = md[name] or '' md[name] = s + content['data'] elif msg_type == 'pyerr': md.update({'pyerr': self._unwrap_exception(content)}) elif msg_type == 'pyin': md.update({'pyin': content['code']}) else: md.update({msg_type: content.get('data', '')}) # reduntant? self.metadata[msg_id] = md msg = self.session.recv(sock, mode=zmq.NOBLOCK) #-------------------------------------------------------------------------- # len, getitem #-------------------------------------------------------------------------- def __len__(self): """len(client) returns # of engines.""" return len(self.ids) def __getitem__(self, key): """index access returns DirectView multiplexer objects Must be int, slice, or list/tuple/xrange of ints""" if not isinstance(key, (int, slice, tuple, list, xrange)): raise TypeError("key by int/slice/iterable of ints only, not %s" % (type(key))) else: return self.direct_view(key) #-------------------------------------------------------------------------- # Begin public methods #-------------------------------------------------------------------------- @property def ids(self): """Always up-to-date ids property.""" self._flush_notifications() # always copy: return list(self._ids) def close(self): if self._closed: return snames = filter(lambda n: n.endswith('socket'), dir(self)) for socket in map(lambda name: getattr(self, name), snames): if isinstance(socket, zmq.Socket) and not socket.closed: socket.close() self._closed = True def spin(self): """Flush any registration notifications and execution results waiting in the ZMQ queue. """ if self._notification_socket: self._flush_notifications() if self._mux_socket: self._flush_results(self._mux_socket) if self._task_socket: self._flush_results(self._task_socket) if self._control_socket: self._flush_control(self._control_socket) if self._iopub_socket: self._flush_iopub(self._iopub_socket) if self._query_socket: self._flush_ignored_hub_replies() def wait(self, jobs=None, timeout=-1): """waits on one or more `jobs`, for up to `timeout` seconds. Parameters ---------- jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects ints are indices to self.history strs are msg_ids default: wait on all outstanding messages timeout : float a time in seconds, after which to give up. default is -1, which means no timeout Returns ------- True : when all msg_ids are done False : timeout reached, some msg_ids still outstanding """ tic = time.time() if jobs is None: theids = self.outstanding else: if isinstance(jobs, (int, str, AsyncResult)): jobs = [jobs] theids = set() for job in jobs: if isinstance(job, int): # index access job = self.history[job] elif isinstance(job, AsyncResult): map(theids.add, job.msg_ids) continue theids.add(job) if not theids.intersection(self.outstanding): return True self.spin() while theids.intersection(self.outstanding): if timeout >= 0 and (time.time() - tic) > timeout: break time.sleep(1e-3) self.spin() return len(theids.intersection(self.outstanding)) == 0 #-------------------------------------------------------------------------- # Control methods #-------------------------------------------------------------------------- @spin_first @default_block def clear(self, targets=None, block=None): """Clear the namespace in target(s).""" targets = self._build_targets(targets)[0] for t in targets: self.session.send(self._control_socket, 'clear_request', content={}, ident=t) error = False if self.block: self._flush_ignored_control() for i in range(len(targets)): idents, msg = self.session.recv(self._control_socket, 0) if self.debug: pprint(msg) if msg['content']['status'] != 'ok': error = self._unwrap_exception(msg['content']) else: self._ignored_control_replies += len(targets) if error: raise error @spin_first @default_block def abort(self, jobs=None, targets=None, block=None): """Abort specific jobs from the execution queues of target(s). This is a mechanism to prevent jobs that have already been submitted from executing. Parameters ---------- jobs : msg_id, list of msg_ids, or AsyncResult The jobs to be aborted """ targets = self._build_targets(targets)[0] msg_ids = [] if isinstance(jobs, (basestring, AsyncResult)): jobs = [jobs] bad_ids = filter( lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs) if bad_ids: raise TypeError( "Invalid msg_id type %r, expected str or AsyncResult" % bad_ids[0]) for j in jobs: if isinstance(j, AsyncResult): msg_ids.extend(j.msg_ids) else: msg_ids.append(j) content = dict(msg_ids=msg_ids) for t in targets: self.session.send(self._control_socket, 'abort_request', content=content, ident=t) error = False if self.block: self._flush_ignored_control() for i in range(len(targets)): idents, msg = self.session.recv(self._control_socket, 0) if self.debug: pprint(msg) if msg['content']['status'] != 'ok': error = self._unwrap_exception(msg['content']) else: self._ignored_control_replies += len(targets) if error: raise error @spin_first @default_block def shutdown(self, targets=None, restart=False, hub=False, block=None): """Terminates one or more engine processes, optionally including the hub.""" if hub: targets = 'all' targets = self._build_targets(targets)[0] for t in targets: self.session.send(self._control_socket, 'shutdown_request', content={'restart': restart}, ident=t) error = False if block or hub: self._flush_ignored_control() for i in range(len(targets)): idents, msg = self.session.recv(self._control_socket, 0) if self.debug: pprint(msg) if msg['content']['status'] != 'ok': error = self._unwrap_exception(msg['content']) else: self._ignored_control_replies += len(targets) if hub: time.sleep(0.25) self.session.send(self._query_socket, 'shutdown_request') idents, msg = self.session.recv(self._query_socket, 0) if self.debug: pprint(msg) if msg['content']['status'] != 'ok': error = self._unwrap_exception(msg['content']) if error: raise error #-------------------------------------------------------------------------- # Execution methods #-------------------------------------------------------------------------- @default_block def _execute(self, code, targets='all', block=None): """Executes `code` on `targets` in blocking or nonblocking manner. ``execute`` is always `bound` (affects engine namespace) Parameters ---------- code : str the code string to be executed targets : int/str/list of ints/strs the engines on which to execute default : all block : bool whether or not to wait until done to return default: self.block """ return self[targets].execute(code, block=block) def _maybe_raise(self, result): """wrapper for maybe raising an exception if apply failed.""" if isinstance(result, error.RemoteError): raise result return result def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False, ident=None): """construct and send an apply message via a socket. This is the principal method with which all engine execution is performed by views. """ assert not self._closed, "cannot use me anymore, I'm closed!" # defaults: args = args if args is not None else [] kwargs = kwargs if kwargs is not None else {} subheader = subheader if subheader is not None else {} # validate arguments if not callable(f): raise TypeError("f must be callable, not %s" % type(f)) if not isinstance(args, (tuple, list)): raise TypeError("args must be tuple or list, not %s" % type(args)) if not isinstance(kwargs, dict): raise TypeError("kwargs must be dict, not %s" % type(kwargs)) if not isinstance(subheader, dict): raise TypeError("subheader must be dict, not %s" % type(subheader)) if not self._ids: # flush notification socket if no engines yet any_ids = self.ids if not any_ids: raise error.NoEnginesRegistered( "Can't execute without any connected engines.") # enforce types of f,args,kwargs bufs = util.pack_apply_message(f, args, kwargs) msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident, subheader=subheader, track=track) msg_id = msg['msg_id'] self.outstanding.add(msg_id) if ident: # possibly routed to a specific engine if isinstance(ident, list): ident = ident[-1] if ident in self._engines.values(): # save for later, in case of engine death self._outstanding_dict[ident].add(msg_id) self.history.append(msg_id) self.metadata[msg_id]['submitted'] = datetime.now() return msg #-------------------------------------------------------------------------- # construct a View object #-------------------------------------------------------------------------- def load_balanced_view(self, targets=None): """construct a DirectView object. If no arguments are specified, create a LoadBalancedView using all engines. Parameters ---------- targets: list,slice,int,etc. [default: use all engines] The subset of engines across which to load-balance """ if targets is not None: targets = self._build_targets(targets)[1] return LoadBalancedView(client=self, socket=self._task_socket, targets=targets) def direct_view(self, targets='all'): """construct a DirectView object. If no targets are specified, create a DirectView using all engines. Parameters ---------- targets: list,slice,int,etc. [default: use all engines] The engines to use for the View """ single = isinstance(targets, int) targets = self._build_targets(targets)[1] if single: targets = targets[0] return DirectView(client=self, socket=self._mux_socket, targets=targets) #-------------------------------------------------------------------------- # Data movement (TO BE REMOVED) #-------------------------------------------------------------------------- @default_block def _push(self, ns, targets='all', block=None, track=False): """Push the contents of `ns` into the namespace on `target`""" if not isinstance(ns, dict): raise TypeError("Must be a dict, not %s" % type(ns)) result = self.apply(util._push, kwargs=ns, targets=targets, block=block, bound=True, balanced=False, track=track) if not block: return result @default_block def _pull(self, keys, targets='all', block=None): """Pull objects from `target`'s namespace by `keys`""" if isinstance(keys, basestring): pass elif isinstance(keys, (list, tuple, set)): for key in keys: if not isinstance(key, basestring): raise TypeError("keys must be str, not type %r" % type(key)) else: raise TypeError("keys must be strs, not %r" % keys) result = self.apply(util._pull, (keys, ), targets=targets, block=block, bound=True, balanced=False) return result #-------------------------------------------------------------------------- # Query methods #-------------------------------------------------------------------------- @spin_first @default_block def get_result(self, indices_or_msg_ids=None, block=None): """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object. If the client already has the results, no request to the Hub will be made. This is a convenient way to construct AsyncResult objects, which are wrappers that include metadata about execution, and allow for awaiting results that were not submitted by this Client. It can also be a convenient way to retrieve the metadata associated with blocking execution, since it always retrieves Examples -------- :: In [10]: r = client.apply() Parameters ---------- indices_or_msg_ids : integer history index, str msg_id, or list of either The indices or msg_ids of indices to be retrieved block : bool Whether to wait for the result to be done Returns ------- AsyncResult A single AsyncResult object will always be returned. AsyncHubResult A subclass of AsyncResult that retrieves results from the Hub """ if indices_or_msg_ids is None: indices_or_msg_ids = -1 if not isinstance(indices_or_msg_ids, (list, tuple)): indices_or_msg_ids = [indices_or_msg_ids] theids = [] for id in indices_or_msg_ids: if isinstance(id, int): id = self.history[id] if not isinstance(id, str): raise TypeError("indices must be str or int, not %r" % id) theids.append(id) local_ids = filter( lambda msg_id: msg_id in self.history or msg_id in self.results, theids) remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids) if remote_ids: ar = AsyncHubResult(self, msg_ids=theids) else: ar = AsyncResult(self, msg_ids=theids) if block: ar.wait() return ar @spin_first def result_status(self, msg_ids, status_only=True): """Check on the status of the result(s) of the apply request with `msg_ids`. If status_only is False, then the actual results will be retrieved, else only the status of the results will be checked. Parameters ---------- msg_ids : list of msg_ids if int: Passed as index to self.history for convenience. status_only : bool (default: True) if False: Retrieve the actual results of completed tasks. Returns ------- results : dict There will always be the keys 'pending' and 'completed', which will be lists of msg_ids that are incomplete or complete. If `status_only` is False, then completed results will be keyed by their `msg_id`. """ if not isinstance(msg_ids, (list, tuple)): msg_ids = [msg_ids] theids = [] for msg_id in msg_ids: if isinstance(msg_id, int): msg_id = self.history[msg_id] if not isinstance(msg_id, basestring): raise TypeError("msg_ids must be str, not %r" % msg_id) theids.append(msg_id) completed = [] local_results = {} # comment this block out to temporarily disable local shortcut: for msg_id in theids: if msg_id in self.results: completed.append(msg_id) local_results[msg_id] = self.results[msg_id] theids.remove(msg_id) if theids: # some not locally cached content = dict(msg_ids=theids, status_only=status_only) msg = self.session.send(self._query_socket, "result_request", content=content) zmq.select([self._query_socket], [], []) idents, msg = self.session.recv(self._query_socket, zmq.NOBLOCK) if self.debug: pprint(msg) content = msg['content'] if content['status'] != 'ok': raise self._unwrap_exception(content) buffers = msg['buffers'] else: content = dict(completed=[], pending=[]) content['completed'].extend(completed) if status_only: return content failures = [] # load cached results into result: content.update(local_results) # update cache with results: for msg_id in sorted(theids): if msg_id in content['completed']: rec = content[msg_id] parent = rec['header'] header = rec['result_header'] rcontent = rec['result_content'] iodict = rec['io'] if isinstance(rcontent, str): rcontent = self.session.unpack(rcontent) md = self.metadata[msg_id] md.update(self._extract_metadata(header, parent, rcontent)) md.update(iodict) if rcontent['status'] == 'ok': res, buffers = util.unserialize_object(buffers) else: print rcontent res = self._unwrap_exception(rcontent) failures.append(res) self.results[msg_id] = res content[msg_id] = res if len(theids) == 1 and failures: raise failures[0] error.collect_exceptions(failures, "result_status") return content @spin_first def queue_status(self, targets='all', verbose=False): """Fetch the status of engine queues. Parameters ---------- targets : int/str/list of ints/strs the engines whose states are to be queried. default : all verbose : bool Whether to return lengths only, or lists of ids for each element """ engine_ids = self._build_targets(targets)[1] content = dict(targets=engine_ids, verbose=verbose) self.session.send(self._query_socket, "queue_request", content=content) idents, msg = self.session.recv(self._query_socket, 0) if self.debug: pprint(msg) content = msg['content'] status = content.pop('status') if status != 'ok': raise self._unwrap_exception(content) content = util.rekey(content) if isinstance(targets, int): return content[targets] else: return content @spin_first def purge_results(self, jobs=[], targets=[]): """Tell the Hub to forget results. Individual results can be purged by msg_id, or the entire history of specific targets can be purged. Parameters ---------- jobs : str or list of str or AsyncResult objects the msg_ids whose results should be forgotten. targets : int/str/list of ints/strs The targets, by uuid or int_id, whose entire history is to be purged. Use `targets='all'` to scrub everything from the Hub's memory. default : None """ if not targets and not jobs: raise ValueError( "Must specify at least one of `targets` and `jobs`") if targets: targets = self._build_targets(targets)[1] # construct msg_ids from jobs msg_ids = [] if isinstance(jobs, (basestring, AsyncResult)): jobs = [jobs] bad_ids = filter( lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs) if bad_ids: raise TypeError( "Invalid msg_id type %r, expected str or AsyncResult" % bad_ids[0]) for j in jobs: if isinstance(j, AsyncResult): msg_ids.extend(j.msg_ids) else: msg_ids.append(j) content = dict(targets=targets, msg_ids=msg_ids) self.session.send(self._query_socket, "purge_request", content=content) idents, msg = self.session.recv(self._query_socket, 0) if self.debug: pprint(msg) content = msg['content'] if content['status'] != 'ok': raise self._unwrap_exception(content)
class BatchSystemLauncher(BaseLauncher): """Launch an external process using a batch system. This class is designed to work with UNIX batch systems like PBS, LSF, GridEngine, etc. The overall model is that there are different commands like qsub, qdel, etc. that handle the starting and stopping of the process. This class also has the notion of a batch script. The ``batch_template`` attribute can be set to a string that is a template for the batch script. This template is instantiated using Itpl. Thus the template can use ${n} fot the number of instances. Subclasses can add additional variables to the template dict. """ # Subclasses must fill these in. See PBSEngineSet # The name of the command line program used to submit jobs. submit_command = List([''], config=True) # The name of the command line program used to delete jobs. delete_command = List([''], config=True) # A regular expression used to get the job id from the output of the # submit_command. job_id_regexp = CUnicode('', config=True) # The string that is the batch script template itself. batch_template = CUnicode('', config=True) # The file that contains the batch template batch_template_file = CUnicode(u'', config=True) # The filename of the instantiated batch script. batch_file_name = CUnicode(u'batch_script', config=True) # The PBS Queue queue = CUnicode(u'', config=True) # not configurable, override in subclasses # PBS Job Array regex job_array_regexp = CUnicode('') job_array_template = CUnicode('') # PBS Queue regex queue_regexp = CUnicode('') queue_template = CUnicode('') # The default batch template, override in subclasses default_template = CUnicode('') # The full path to the instantiated batch script. batch_file = CUnicode(u'') # the format dict used with batch_template: context = Dict() def find_args(self): return self.submit_command + [self.batch_file] def __init__(self, work_dir=u'.', config=None, **kwargs): super(BatchSystemLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs) self.batch_file = os.path.join(self.work_dir, self.batch_file_name) def parse_job_id(self, output): """Take the output of the submit command and return the job id.""" m = re.search(self.job_id_regexp, output) if m is not None: job_id = m.group() else: raise LauncherError("Job id couldn't be determined: %s" % output) self.job_id = job_id self.log.info('Job submitted with job id: %r' % job_id) return job_id def write_batch_script(self, n): """Instantiate and write the batch script to the work_dir.""" self.context['n'] = n self.context['queue'] = self.queue print self.context # first priority is batch_template if set if self.batch_template_file and not self.batch_template: # second priority is batch_template_file with open(self.batch_template_file) as f: self.batch_template = f.read() if not self.batch_template: # third (last) priority is default_template self.batch_template = self.default_template regex = re.compile(self.job_array_regexp) # print regex.search(self.batch_template) if not regex.search(self.batch_template): self.log.info("adding job array settings to batch script") firstline, rest = self.batch_template.split('\n', 1) self.batch_template = u'\n'.join( [firstline, self.job_array_template, rest]) regex = re.compile(self.queue_regexp) # print regex.search(self.batch_template) if self.queue and not regex.search(self.batch_template): self.log.info("adding PBS queue settings to batch script") firstline, rest = self.batch_template.split('\n', 1) self.batch_template = u'\n'.join( [firstline, self.queue_template, rest]) script_as_string = Itpl.itplns(self.batch_template, self.context) self.log.info('Writing instantiated batch script: %s' % self.batch_file) with open(self.batch_file, 'w') as f: f.write(script_as_string) os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) def start(self, n, cluster_dir): """Start n copies of the process using a batch system.""" # Here we save profile and cluster_dir in the context so they # can be used in the batch script template as ${profile} and # ${cluster_dir} self.context['cluster_dir'] = cluster_dir self.cluster_dir = unicode(cluster_dir) self.write_batch_script(n) output = check_output(self.args, env=os.environ) job_id = self.parse_job_id(output) self.notify_start(job_id) return job_id def stop(self): output = check_output(self.delete_command + [self.job_id], env=os.environ) self.notify_stop( dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd return output
class BaseDB(Configurable): """Empty Parent class so traitlets work on DB.""" # base configurable traits: session = CUnicode("")
class RepresentationViewer(DOMWidget): # Name of the javascript class which this widget syncs against on the # browser side. To work correctly, this javascript class has to be # registered and loaded in the browser before this widget is constructed # (that's what enable_notebook() does) _view_module = Unicode('nbextensions/chemview_widget', sync=True) _view_name = Unicode('MolecularView', sync=True) width = CInt(sync=True) height = CInt(sync=True) background = CInt(sync=True) # Update Camera Hack camera_str = CUnicode(sync=True) static_moving = CBool(sync=True) # Helper loaded = CBool(False, sync=True) def __init__(self, width=500, height=500): '''RepresentationViewer is an IPython notebook widget useful to display 3d scenes through webgl. Example: .. code:: from IPython.display import display rv = RepresentationViewer() rv.add_representation('point', {'coordinates': coordinates, 'colors': colors, 'sizes': sizes}) display(rv) .. py:attribute: width Width in pixels of the IPython widget .. py:attribute: height Height in pixels of the IPython widget .. py:attribute: camera_str A string-representation of camera position and orientation .. py:attribute: static_moving Set to True to make the camera lose the "bouncy" rotation. ''' super(RepresentationViewer, self).__init__() self.displayed = False self.width = width self.height = height # Store the events sent from the javascript side self._event_handlers = defaultdict(list) # What to do when we export def callback(content): display(Image(url=content.get('dataUrl'))) self._connect_event('displayImg', callback) # A record of the new representations self.representations = {} # Things to be called when the js part is done loading self._displayed_callbacks = [] def on_loaded(name, old, new): for cb in self._displayed_callbacks: cb(self) self.on_trait_change(on_loaded, "loaded") def add_representation(self, rep_type, options): '''Add a 3D representation to the viewer. See User Guide for a complete description of the representations available. :return: An unique hexadecimal identifier for the representation. :rtype: str ''' # Add our unique id to be able to refer to the representation rep_id = uuid4().hex self.representations[rep_id] = { 'type': rep_type, 'options': options.copy() } self._remote_call('addRepresentation', type=rep_type, repId=rep_id, options=options) return rep_id def remove_representation(self, rep_id): '''Remove a representation from the viewer :param str rep_id: the unique identifier generated by RepresentationViewer.add_representation ''' self._remote_call('removeRepresentation', repId=rep_id) del self.representations[rep_id] def update_representation(self, rep_id, options): '''Update a representation with new data. :param str rep_id: the unique identifier returned by RepresentationViewer.add_representation :param dict options: dictionary containing the updated data. ''' self.representations[rep_id]['options'].update(options) self._remote_call('updateRepresentation', repId=rep_id, options=options) def _connect_event(self, event_name, callback): '''Respond to an event sent by the Javascript side. Events available: - displayImg - serialize ''' self._event_handlers[event_name].append(callback) def _remote_call(self, method_name, **kwargs): '''Call a method remotely on the javascript side''' msg = {} msg['type'] = 'callMethod' msg['methodName'] = method_name msg['args'] = self._recursive_serialize(kwargs) if self.displayed is True: self.send(msg) # This will be received with View.on_msg else: # We should prepare a callback to be # called when widget is displayed def callback(widget, msg=msg): widget.send(msg) self._displayed_callbacks.append(callback) def _recursive_serialize(self, dictionary): '''Serialize a dictionary inplace''' for k, v in dictionary.items(): if isinstance(v, dict): self._recursive_serialize(v) else: # This is when custom serialization happens if isinstance(v, np.ndarray): if v.dtype == 'float64': # We don't support float64 on js side v = v.astype('float32') dictionary[k] = encode_numpy(v) return dictionary def _handle_custom_msg(self, content): # Handle custom messages sent by the javascript counterpart event = content.get('event', '') for cb in self._event_handlers[event]: cb(content) def _ipython_display_(self, **kwargs): super(RepresentationViewer, self)._ipython_display_(**kwargs) self.displayed = True def get_scene(self): '''Return a dictionary that uniquely identifies the scene displayed''' scene = {} # Camera camspec = json.loads(self.camera_str) location = np.array([ camspec['position']['x'], camspec['position']['y'], camspec['position']['z'] ], 'float') quaternion = np.array([ camspec['quaternion']['_x'], camspec['quaternion']['_y'], camspec['quaternion']['_z'], camspec['quaternion']['_w'] ], 'float') target = np.array([ camspec['target']['x'], camspec['target']['y'], camspec['target']['z'] ], 'float') scene['camera'] = dict(location=location, quaternion=quaternion, target=target, vfov=camspec['fov'], aspect=camspec['aspect']) # Lights: TODO scene['lights'] = [{ 'position': np.array([2, 4, -3]) * 1000, 'color': 0xffffff }, { 'position': np.array([-1, 2, 3]) * 1000, 'color': 0xffffff }] # Objects scene['representations'] = self.representations.values() scene['background'] = self.background return scene
class Session(Configurable): """Object for handling serialization and sending of messages. The Session object handles building messages and sending them with ZMQ sockets or ZMQStream objects. Objects can communicate with each other over the network via Session objects, and only need to work with the dict-based IPython message spec. The Session will handle serialization/deserialization, security, and metadata. Sessions support configurable serialiization via packer/unpacker traits, and signing with HMAC digests via the key/keyfile traits. Parameters ---------- debug : bool whether to trigger extra debugging statements packer/unpacker : str : 'json', 'pickle' or import_string importstrings for methods to serialize message parts. If just 'json' or 'pickle', predefined JSON and pickle packers will be used. Otherwise, the entire importstring must be used. The functions must accept at least valid JSON input, and output *bytes*. For example, to use msgpack: packer = 'msgpack.packb', unpacker='msgpack.unpackb' pack/unpack : callables You can also set the pack/unpack callables for serialization directly. session : bytes the ID of this Session object. The default is to generate a new UUID. username : unicode username added to message headers. The default is to ask the OS. key : bytes The key used to initialize an HMAC signature. If unset, messages will not be signed or checked. keyfile : filepath The file containing a key. If this is set, `key` will be initialized to the contents of the file. """ debug = Bool(False, config=True, help="""Debug output in the Session""") packer = DottedObjectName( 'json', config=True, help="""The name of the packer for serializing messages. Should be one of 'json', 'pickle', or an import name for a custom callable serializer.""") def _packer_changed(self, name, old, new): if new.lower() == 'json': self.pack = json_packer self.unpack = json_unpacker elif new.lower() == 'pickle': self.pack = pickle_packer self.unpack = pickle_unpacker else: self.pack = import_item(str(new)) unpacker = DottedObjectName( 'json', config=True, help="""The name of the unpacker for unserializing messages. Only used with custom functions for `packer`.""") def _unpacker_changed(self, name, old, new): if new.lower() == 'json': self.pack = json_packer self.unpack = json_unpacker elif new.lower() == 'pickle': self.pack = pickle_packer self.unpack = pickle_unpacker else: self.unpack = import_item(str(new)) session = CUnicode(u'', config=True, help="""The UUID identifying this session.""") def _session_default(self): u = unicode(uuid.uuid4()) self.bsession = u.encode('ascii') return u def _session_changed(self, name, old, new): self.bsession = self.session.encode('ascii') # bsession is the session as bytes bsession = CBytes(b'') username = Unicode( os.environ.get('USER', u'username'), config=True, help="""Username for the Session. Default is your system username.""") # message signature related traits: key = CBytes(b'', config=True, help="""execution key, for extra authentication.""") def _key_changed(self, name, old, new): if new: self.auth = hmac.HMAC(new) else: self.auth = None auth = Instance(hmac.HMAC) digest_history = Set() keyfile = Unicode('', config=True, help="""path to file containing execution key.""") def _keyfile_changed(self, name, old, new): with open(new, 'rb') as f: self.key = f.read().strip() # serialization traits: pack = Any(default_packer) # the actual packer function def _pack_changed(self, name, old, new): if not callable(new): raise TypeError("packer must be callable, not %s" % type(new)) unpack = Any(default_unpacker) # the actual packer function def _unpack_changed(self, name, old, new): # unpacker is not checked - it is assumed to be if not callable(new): raise TypeError("unpacker must be callable, not %s" % type(new)) def __init__(self, **kwargs): """create a Session object Parameters ---------- debug : bool whether to trigger extra debugging statements packer/unpacker : str : 'json', 'pickle' or import_string importstrings for methods to serialize message parts. If just 'json' or 'pickle', predefined JSON and pickle packers will be used. Otherwise, the entire importstring must be used. The functions must accept at least valid JSON input, and output *bytes*. For example, to use msgpack: packer = 'msgpack.packb', unpacker='msgpack.unpackb' pack/unpack : callables You can also set the pack/unpack callables for serialization directly. session : unicode (must be ascii) the ID of this Session object. The default is to generate a new UUID. bsession : bytes The session as bytes username : unicode username added to message headers. The default is to ask the OS. key : bytes The key used to initialize an HMAC signature. If unset, messages will not be signed or checked. keyfile : filepath The file containing a key. If this is set, `key` will be initialized to the contents of the file. """ super(Session, self).__init__(**kwargs) self._check_packers() self.none = self.pack({}) # ensure self._session_default() if necessary, so bsession is defined: self.session @property def msg_id(self): """always return new uuid""" return str(uuid.uuid4()) def _check_packers(self): """check packers for binary data and datetime support.""" pack = self.pack unpack = self.unpack # check simple serialization msg = dict(a=[1, 'hi']) try: packed = pack(msg) except Exception: raise ValueError("packer could not serialize a simple message") # ensure packed message is bytes if not isinstance(packed, bytes): raise ValueError("message packed to %r, but bytes are required" % type(packed)) # check that unpack is pack's inverse try: unpacked = unpack(packed) except Exception: raise ValueError("unpacker could not handle the packer's output") # check datetime support msg = dict(t=datetime.now()) try: unpacked = unpack(pack(msg)) except Exception: self.pack = lambda o: pack(squash_dates(o)) self.unpack = lambda s: extract_dates(unpack(s)) def msg_header(self, msg_type): return msg_header(self.msg_id, msg_type, self.username, self.session) def msg(self, msg_type, content=None, parent=None, subheader=None, header=None): """Return the nested message dict. This format is different from what is sent over the wire. The serialize/unserialize methods converts this nested message dict to the wire format, which is a list of message parts. """ msg = {} header = self.msg_header(msg_type) if header is None else header msg['header'] = header msg['msg_id'] = header['msg_id'] msg['msg_type'] = header['msg_type'] msg['parent_header'] = {} if parent is None else extract_header(parent) msg['content'] = {} if content is None else content sub = {} if subheader is None else subheader msg['header'].update(sub) return msg def sign(self, msg_list): """Sign a message with HMAC digest. If no auth, return b''. Parameters ---------- msg_list : list The [p_header,p_parent,p_content] part of the message list. """ if self.auth is None: return b'' h = self.auth.copy() for m in msg_list: h.update(m) return str_to_bytes(h.hexdigest()) def serialize(self, msg, ident=None): """Serialize the message components to bytes. This is roughly the inverse of unserialize. The serialize/unserialize methods work with full message lists, whereas pack/unpack work with the individual message parts in the message list. Parameters ---------- msg : dict or Message The nexted message dict as returned by the self.msg method. Returns ------- msg_list : list The list of bytes objects to be sent with the format: [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content, buffer1,buffer2,...]. In this list, the p_* entities are the packed or serialized versions, so if JSON is used, these are utf8 encoded JSON strings. """ content = msg.get('content', {}) if content is None: content = self.none elif isinstance(content, dict): content = self.pack(content) elif isinstance(content, bytes): # content is already packed, as in a relayed message pass elif isinstance(content, unicode): # should be bytes, but JSON often spits out unicode content = content.encode('utf8') else: raise TypeError("Content incorrect type: %s" % type(content)) real_message = [ self.pack(msg['header']), self.pack(msg['parent_header']), content ] to_send = [] if isinstance(ident, list): # accept list of idents to_send.extend(ident) elif ident is not None: to_send.append(ident) to_send.append(DELIM) signature = self.sign(real_message) to_send.append(signature) to_send.extend(real_message) return to_send def send(self, stream, msg_or_type, content=None, parent=None, ident=None, buffers=None, subheader=None, track=False, header=None): """Build and send a message via stream or socket. The message format used by this function internally is as follows: [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content, buffer1,buffer2,...] The serialize/unserialize methods convert the nested message dict into this format. Parameters ---------- stream : zmq.Socket or ZMQStream The socket-like object used to send the data. msg_or_type : str or Message/dict Normally, msg_or_type will be a msg_type unless a message is being sent more than once. If a header is supplied, this can be set to None and the msg_type will be pulled from the header. content : dict or None The content of the message (ignored if msg_or_type is a message). header : dict or None The header dict for the message (ignores if msg_to_type is a message). parent : Message or dict or None The parent or parent header describing the parent of this message (ignored if msg_or_type is a message). ident : bytes or list of bytes The zmq.IDENTITY routing path. subheader : dict or None Extra header keys for this message's header (ignored if msg_or_type is a message). buffers : list or None The already-serialized buffers to be appended to the message. track : bool Whether to track. Only for use with Sockets, because ZMQStream objects cannot track messages. Returns ------- msg : dict The constructed message. (msg,tracker) : (dict, MessageTracker) if track=True, then a 2-tuple will be returned, the first element being the constructed message, and the second being the MessageTracker """ if not isinstance(stream, (zmq.Socket, ZMQStream)): raise TypeError("stream must be Socket or ZMQStream, not %r" % type(stream)) elif track and isinstance(stream, ZMQStream): raise TypeError("ZMQStream cannot track messages") if isinstance(msg_or_type, (Message, dict)): # We got a Message or message dict, not a msg_type so don't # build a new Message. msg = msg_or_type else: msg = self.msg(msg_or_type, content=content, parent=parent, subheader=subheader, header=header) buffers = [] if buffers is None else buffers to_send = self.serialize(msg, ident) flag = 0 if buffers: flag = zmq.SNDMORE _track = False else: _track = track if track: tracker = stream.send_multipart(to_send, flag, copy=False, track=_track) else: tracker = stream.send_multipart(to_send, flag, copy=False) for b in buffers[:-1]: stream.send(b, flag, copy=False) if buffers: if track: tracker = stream.send(buffers[-1], copy=False, track=track) else: tracker = stream.send(buffers[-1], copy=False) # omsg = Message(msg) if self.debug: pprint.pprint(msg) pprint.pprint(to_send) pprint.pprint(buffers) msg['tracker'] = tracker return msg def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None): """Send a raw message via ident path. This method is used to send a already serialized message. Parameters ---------- stream : ZMQStream or Socket The ZMQ stream or socket to use for sending the message. msg_list : list The serialized list of messages to send. This only includes the [p_header,p_parent,p_content,buffer1,buffer2,...] portion of the message. ident : ident or list A single ident or a list of idents to use in sending. """ to_send = [] if isinstance(ident, bytes): ident = [ident] if ident is not None: to_send.extend(ident) to_send.append(DELIM) to_send.append(self.sign(msg_list)) to_send.extend(msg_list) stream.send_multipart(msg_list, flags, copy=copy) def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True): """Receive and unpack a message. Parameters ---------- socket : ZMQStream or Socket The socket or stream to use in receiving. Returns ------- [idents], msg [idents] is a list of idents and msg is a nested message dict of same format as self.msg returns. """ if isinstance(socket, ZMQStream): socket = socket.socket try: msg_list = socket.recv_multipart(mode) except zmq.ZMQError as e: if e.errno == zmq.EAGAIN: # We can convert EAGAIN to None as we know in this case # recv_multipart won't return None. return None, None else: raise # split multipart message into identity list and message dict # invalid large messages can cause very expensive string comparisons idents, msg_list = self.feed_identities(msg_list, copy) try: return idents, self.unserialize(msg_list, content=content, copy=copy) except Exception as e: # TODO: handle it raise e def feed_identities(self, msg_list, copy=True): """Split the identities from the rest of the message. Feed until DELIM is reached, then return the prefix as idents and remainder as msg_list. This is easily broken by setting an IDENT to DELIM, but that would be silly. Parameters ---------- msg_list : a list of Message or bytes objects The message to be split. copy : bool flag determining whether the arguments are bytes or Messages Returns ------- (idents, msg_list) : two lists idents will always be a list of bytes, each of which is a ZMQ identity. msg_list will be a list of bytes or zmq.Messages of the form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and should be unpackable/unserializable via self.unserialize at this point. """ if copy: idx = msg_list.index(DELIM) return msg_list[:idx], msg_list[idx + 1:] else: failed = True for idx, m in enumerate(msg_list): if m.bytes == DELIM: failed = False break if failed: raise ValueError("DELIM not in msg_list") idents, msg_list = msg_list[:idx], msg_list[idx + 1:] return [m.bytes for m in idents], msg_list def unserialize(self, msg_list, content=True, copy=True): """Unserialize a msg_list to a nested message dict. This is roughly the inverse of serialize. The serialize/unserialize methods work with full message lists, whereas pack/unpack work with the individual message parts in the message list. Parameters: ----------- msg_list : list of bytes or Message objects The list of message parts of the form [HMAC,p_header,p_parent, p_content,buffer1,buffer2,...]. content : bool (True) Whether to unpack the content dict (True), or leave it packed (False). copy : bool (True) Whether to return the bytes (True), or the non-copying Message object in each place (False). Returns ------- msg : dict The nested message dict with top-level keys [header, parent_header, content, buffers]. """ minlen = 4 message = {} if not copy: for i in range(minlen): msg_list[i] = msg_list[i].bytes if self.auth is not None: signature = msg_list[0] if not signature: raise ValueError("Unsigned Message") if signature in self.digest_history: raise ValueError("Duplicate Signature: %r" % signature) self.digest_history.add(signature) check = self.sign(msg_list[1:4]) if not signature == check: raise ValueError("Invalid Signature: %r" % signature) if not len(msg_list) >= minlen: raise TypeError( "malformed message, must have at least %i elements" % minlen) header = self.unpack(msg_list[1]) message['header'] = header message['msg_id'] = header['msg_id'] message['msg_type'] = header['msg_type'] message['parent_header'] = self.unpack(msg_list[2]) if content: message['content'] = self.unpack(msg_list[3]) else: message['content'] = msg_list[3] message['buffers'] = msg_list[4:] return message
class ClusteringSessionModel(Widget): """ widget is used as a base class for communicating between python and the frontend. """ files = List(sync=True) current = CUnicode(sync=True) status = CUnicode("close", sync=True) status_desc = CUnicode(sync=True) debug = CUnicode(sync=True) def __init__(self, session, *args, **kwargs): super(ClusteringSessionModel, self).__init__(*args, **kwargs) self.on_msg(self._handle_button_msg) self.session = session self.folders = self.session.settings_manager.get_user_settings( 'phy.data_search_dirs', scope='global') self.files = list_kwik(self.folders) def _handle_button_msg(self, _, content): """Handle a msg from the front-end. Parameters ---------- content: dict Content of the msg.""" self.debug = "debug: " + str(content) try: if content.get('event', '') == 'open': self.debug = "debug-ope: " + str(content) self.session_open(content.get('filename')) elif content.get('event', '') == 'close': self.debug = "debug-clo: " + str(content) self.session_close() else: raise Exception("command not implemented for ", str(content)) except Exception as err: self.set_status("error", str(err)) def set_session(self, session): self.session = session def set_status(self, status, status_desc=""): self.status = status self.status_desc = status_desc def session_open(self, filename): try: self.set_status("opening") self.session.open(str(filename)) self.current = filename self.set_status("open", "experiment: " + filename) except Exception as err: #import traceback #self.set_status("error", traceback.format_exc()) self.filename = "None" #avoid set_status('close') self.current = "None" raise def session_close(self): self.set_status("close") self.session.close()
class Session(Configurable): """Object for handling serialization and sending of messages. The Session object handles building messages and sending them with ZMQ sockets or ZMQStream objects. Objects can communicate with each other over the network via Session objects, and only need to work with the dict-based IPython message spec. The Session will handle serialization/deserialization, security, and metadata. Sessions support configurable serialization via packer/unpacker traits, and signing with HMAC digests via the key/keyfile traits. Parameters ---------- debug : bool whether to trigger extra debugging statements packer/unpacker : str : 'json', 'pickle' or import_string importstrings for methods to serialize message parts. If just 'json' or 'pickle', predefined JSON and pickle packers will be used. Otherwise, the entire importstring must be used. The functions must accept at least valid JSON input, and output *bytes*. For example, to use msgpack: packer = 'msgpack.packb', unpacker='msgpack.unpackb' pack/unpack : callables You can also set the pack/unpack callables for serialization directly. session : bytes the ID of this Session object. The default is to generate a new UUID. username : unicode username added to message headers. The default is to ask the OS. key : bytes The key used to initialize an HMAC signature. If unset, messages will not be signed or checked. keyfile : filepath The file containing a key. If this is set, `key` will be initialized to the contents of the file. """ debug = Bool(False, config=True, help="""Debug output in the Session""") packer = DottedObjectName( 'json', config=True, help="""The name of the packer for serializing messages. Should be one of 'json', 'pickle', or an import name for a custom callable serializer.""") def _packer_changed(self, name, old, new): if new.lower() == 'json': self.pack = json_packer self.unpack = json_unpacker self.unpacker = new elif new.lower() == 'pickle': self.pack = pickle_packer self.unpack = pickle_unpacker self.unpacker = new else: self.pack = import_item(str(new)) unpacker = DottedObjectName( 'json', config=True, help="""The name of the unpacker for unserializing messages. Only used with custom functions for `packer`.""") def _unpacker_changed(self, name, old, new): if new.lower() == 'json': self.pack = json_packer self.unpack = json_unpacker self.packer = new elif new.lower() == 'pickle': self.pack = pickle_packer self.unpack = pickle_unpacker self.packer = new else: self.unpack = import_item(str(new)) session = CUnicode(u'', config=True, help="""The UUID identifying this session.""") def _session_default(self): u = unicode_type(uuid.uuid4()) self.bsession = u.encode('ascii') return u def _session_changed(self, name, old, new): self.bsession = self.session.encode('ascii') # bsession is the session as bytes bsession = CBytes(b'') username = Unicode( str_to_unicode(os.environ.get('USER', 'username')), help="""Username for the Session. Default is your system username.""", config=True) metadata = Dict( {}, config=True, help= """Metadata dictionary, which serves as the default top-level metadata dict for each message.""" ) # if 0, no adapting to do. adapt_version = Integer(0) # message signature related traits: key = CBytes(b'', config=True, help="""execution key, for extra authentication.""") def _key_changed(self): self._new_auth() signature_scheme = Unicode( 'hmac-sha256', config=True, help="""The digest scheme used to construct the message signatures. Must have the form 'hmac-HASH'.""") def _signature_scheme_changed(self, name, old, new): if not new.startswith('hmac-'): raise TraitError( "signature_scheme must start with 'hmac-', got %r" % new) hash_name = new.split('-', 1)[1] try: self.digest_mod = getattr(hashlib, hash_name) except AttributeError: raise TraitError("hashlib has no such attribute: %s" % hash_name) self._new_auth() digest_mod = Any() def _digest_mod_default(self): return hashlib.sha256 auth = Instance(hmac.HMAC) def _new_auth(self): if self.key: self.auth = hmac.HMAC(self.key, digestmod=self.digest_mod) else: self.auth = None digest_history = Set() digest_history_size = Integer( 2**16, config=True, help="""The maximum number of digests to remember. The digest history will be culled when it exceeds this value. """) keyfile = Unicode('', config=True, help="""path to file containing execution key.""") def _keyfile_changed(self, name, old, new): with open(new, 'rb') as f: self.key = f.read().strip() # for protecting against sends from forks pid = Integer() # serialization traits: pack = Any(default_packer) # the actual packer function def _pack_changed(self, name, old, new): if not callable(new): raise TypeError("packer must be callable, not %s" % type(new)) unpack = Any(default_unpacker) # the actual packer function def _unpack_changed(self, name, old, new): # unpacker is not checked - it is assumed to be if not callable(new): raise TypeError("unpacker must be callable, not %s" % type(new)) # thresholds: copy_threshold = Integer( 2**16, config=True, help= "Threshold (in bytes) beyond which a buffer should be sent without copying." ) buffer_threshold = Integer( MAX_BYTES, config=True, help= "Threshold (in bytes) beyond which an object's buffer should be extracted to avoid pickling." ) item_threshold = Integer( MAX_ITEMS, config=True, help= """The maximum number of items for a container to be introspected for custom serialization. Containers larger than this are pickled outright. """) def __init__(self, **kwargs): """create a Session object Parameters ---------- debug : bool whether to trigger extra debugging statements packer/unpacker : str : 'json', 'pickle' or import_string importstrings for methods to serialize message parts. If just 'json' or 'pickle', predefined JSON and pickle packers will be used. Otherwise, the entire importstring must be used. The functions must accept at least valid JSON input, and output *bytes*. For example, to use msgpack: packer = 'msgpack.packb', unpacker='msgpack.unpackb' pack/unpack : callables You can also set the pack/unpack callables for serialization directly. session : unicode (must be ascii) the ID of this Session object. The default is to generate a new UUID. bsession : bytes The session as bytes username : unicode username added to message headers. The default is to ask the OS. key : bytes The key used to initialize an HMAC signature. If unset, messages will not be signed or checked. signature_scheme : str The message digest scheme. Currently must be of the form 'hmac-HASH', where 'HASH' is a hashing function available in Python's hashlib. The default is 'hmac-sha256'. This is ignored if 'key' is empty. keyfile : filepath The file containing a key. If this is set, `key` will be initialized to the contents of the file. """ super(Session, self).__init__(**kwargs) self._check_packers() self.none = self.pack({}) # ensure self._session_default() if necessary, so bsession is defined: self.session self.pid = os.getpid() @property def msg_id(self): """always return new uuid""" return str(uuid.uuid4()) def _check_packers(self): """check packers for datetime support.""" pack = self.pack unpack = self.unpack # check simple serialization msg = dict(a=[1, 'hi']) try: packed = pack(msg) except Exception as e: msg = "packer '{packer}' could not serialize a simple message: {e}{jsonmsg}" if self.packer == 'json': jsonmsg = "\nzmq.utils.jsonapi.jsonmod = %s" % jsonapi.jsonmod else: jsonmsg = "" raise ValueError( msg.format(packer=self.packer, e=e, jsonmsg=jsonmsg)) # ensure packed message is bytes if not isinstance(packed, bytes): raise ValueError("message packed to %r, but bytes are required" % type(packed)) # check that unpack is pack's inverse try: unpacked = unpack(packed) assert unpacked == msg except Exception as e: msg = "unpacker '{unpacker}' could not handle output from packer '{packer}': {e}{jsonmsg}" if self.packer == 'json': jsonmsg = "\nzmq.utils.jsonapi.jsonmod = %s" % jsonapi.jsonmod else: jsonmsg = "" raise ValueError( msg.format(packer=self.packer, unpacker=self.unpacker, e=e, jsonmsg=jsonmsg)) # check datetime support msg = dict(t=datetime.now()) try: unpacked = unpack(pack(msg)) if isinstance(unpacked['t'], datetime): raise ValueError("Shouldn't deserialize to datetime") except Exception: self.pack = lambda o: pack(squash_dates(o)) self.unpack = lambda s: unpack(s) def msg_header(self, msg_type): return msg_header(self.msg_id, msg_type, self.username, self.session) def msg(self, msg_type, content=None, parent=None, header=None, metadata=None): """Return the nested message dict. This format is different from what is sent over the wire. The serialize/deserialize methods converts this nested message dict to the wire format, which is a list of message parts. """ msg = {} header = self.msg_header(msg_type) if header is None else header msg['header'] = header msg['msg_id'] = header['msg_id'] msg['msg_type'] = header['msg_type'] msg['parent_header'] = {} if parent is None else extract_header(parent) msg['content'] = {} if content is None else content msg['metadata'] = self.metadata.copy() if metadata is not None: msg['metadata'].update(metadata) return msg def sign(self, msg_list): """Sign a message with HMAC digest. If no auth, return b''. Parameters ---------- msg_list : list The [p_header,p_parent,p_content] part of the message list. """ if self.auth is None: return b'' h = self.auth.copy() for m in msg_list: h.update(m) return str_to_bytes(h.hexdigest()) def serialize(self, msg, ident=None): """Serialize the message components to bytes. This is roughly the inverse of deserialize. The serialize/deserialize methods work with full message lists, whereas pack/unpack work with the individual message parts in the message list. Parameters ---------- msg : dict or Message The next message dict as returned by the self.msg method. Returns ------- msg_list : list The list of bytes objects to be sent with the format:: [ident1, ident2, ..., DELIM, HMAC, p_header, p_parent, p_metadata, p_content, buffer1, buffer2, ...] In this list, the ``p_*`` entities are the packed or serialized versions, so if JSON is used, these are utf8 encoded JSON strings. """ content = msg.get('content', {}) if content is None: content = self.none elif isinstance(content, dict): content = self.pack(content) elif isinstance(content, bytes): # content is already packed, as in a relayed message pass elif isinstance(content, unicode_type): # should be bytes, but JSON often spits out unicode content = content.encode('utf8') else: raise TypeError("Content incorrect type: %s" % type(content)) real_message = [ self.pack(msg['header']), self.pack(msg['parent_header']), self.pack(msg['metadata']), content, ] to_send = [] if isinstance(ident, list): # accept list of idents to_send.extend(ident) elif ident is not None: to_send.append(ident) to_send.append(DELIM) signature = self.sign(real_message) to_send.append(signature) to_send.extend(real_message) return to_send def send(self, stream, msg_or_type, content=None, parent=None, ident=None, buffers=None, track=False, header=None, metadata=None): """Build and send a message via stream or socket. The message format used by this function internally is as follows: [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content, buffer1,buffer2,...] The serialize/deserialize methods convert the nested message dict into this format. Parameters ---------- stream : zmq.Socket or ZMQStream The socket-like object used to send the data. msg_or_type : str or Message/dict Normally, msg_or_type will be a msg_type unless a message is being sent more than once. If a header is supplied, this can be set to None and the msg_type will be pulled from the header. content : dict or None The content of the message (ignored if msg_or_type is a message). header : dict or None The header dict for the message (ignored if msg_to_type is a message). parent : Message or dict or None The parent or parent header describing the parent of this message (ignored if msg_or_type is a message). ident : bytes or list of bytes The zmq.IDENTITY routing path. metadata : dict or None The metadata describing the message buffers : list or None The already-serialized buffers to be appended to the message. track : bool Whether to track. Only for use with Sockets, because ZMQStream objects cannot track messages. Returns ------- msg : dict The constructed message. """ if not isinstance(stream, zmq.Socket): # ZMQStreams and dummy sockets do not support tracking. track = False if isinstance(msg_or_type, (Message, dict)): # We got a Message or message dict, not a msg_type so don't # build a new Message. msg = msg_or_type buffers = buffers or msg.get('buffers', []) else: msg = self.msg(msg_or_type, content=content, parent=parent, header=header, metadata=metadata) if not os.getpid() == self.pid: io.rprint("WARNING: attempted to send message from fork") io.rprint(msg) return buffers = [] if buffers is None else buffers if self.adapt_version: msg = adapt(msg, self.adapt_version) to_send = self.serialize(msg, ident) to_send.extend(buffers) longest = max([len(s) for s in to_send]) copy = (longest < self.copy_threshold) if buffers and track and not copy: # only really track when we are doing zero-copy buffers tracker = stream.send_multipart(to_send, copy=False, track=True) else: # use dummy tracker, which will be done immediately tracker = DONE stream.send_multipart(to_send, copy=copy) if self.debug: pprint.pprint(msg) pprint.pprint(to_send) pprint.pprint(buffers) msg['tracker'] = tracker return msg def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None): """Send a raw message via ident path. This method is used to send a already serialized message. Parameters ---------- stream : ZMQStream or Socket The ZMQ stream or socket to use for sending the message. msg_list : list The serialized list of messages to send. This only includes the [p_header,p_parent,p_metadata,p_content,buffer1,buffer2,...] portion of the message. ident : ident or list A single ident or a list of idents to use in sending. """ to_send = [] if isinstance(ident, bytes): ident = [ident] if ident is not None: to_send.extend(ident) to_send.append(DELIM) to_send.append(self.sign(msg_list)) to_send.extend(msg_list) stream.send_multipart(to_send, flags, copy=copy) def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True): """Receive and unpack a message. Parameters ---------- socket : ZMQStream or Socket The socket or stream to use in receiving. Returns ------- [idents], msg [idents] is a list of idents and msg is a nested message dict of same format as self.msg returns. """ if isinstance(socket, ZMQStream): socket = socket.socket try: msg_list = socket.recv_multipart(mode, copy=copy) except zmq.ZMQError as e: if e.errno == zmq.EAGAIN: # We can convert EAGAIN to None as we know in this case # recv_multipart won't return None. return None, None else: raise # split multipart message into identity list and message dict # invalid large messages can cause very expensive string comparisons idents, msg_list = self.feed_identities(msg_list, copy) try: return idents, self.deserialize(msg_list, content=content, copy=copy) except Exception as e: # TODO: handle it raise e def feed_identities(self, msg_list, copy=True): """Split the identities from the rest of the message. Feed until DELIM is reached, then return the prefix as idents and remainder as msg_list. This is easily broken by setting an IDENT to DELIM, but that would be silly. Parameters ---------- msg_list : a list of Message or bytes objects The message to be split. copy : bool flag determining whether the arguments are bytes or Messages Returns ------- (idents, msg_list) : two lists idents will always be a list of bytes, each of which is a ZMQ identity. msg_list will be a list of bytes or zmq.Messages of the form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and should be unpackable/unserializable via self.deserialize at this point. """ if copy: idx = msg_list.index(DELIM) return msg_list[:idx], msg_list[idx + 1:] else: failed = True for idx, m in enumerate(msg_list): if m.bytes == DELIM: failed = False break if failed: raise ValueError("DELIM not in msg_list") idents, msg_list = msg_list[:idx], msg_list[idx + 1:] return [m.bytes for m in idents], msg_list def _add_digest(self, signature): """add a digest to history to protect against replay attacks""" if self.digest_history_size == 0: # no history, never add digests return self.digest_history.add(signature) if len(self.digest_history) > self.digest_history_size: # threshold reached, cull 10% self._cull_digest_history() def _cull_digest_history(self): """cull the digest history Removes a randomly selected 10% of the digest history """ current = len(self.digest_history) n_to_cull = max(int(current // 10), current - self.digest_history_size) if n_to_cull >= current: self.digest_history = set() return to_cull = random.sample(self.digest_history, n_to_cull) self.digest_history.difference_update(to_cull) def deserialize(self, msg_list, content=True, copy=True): """Unserialize a msg_list to a nested message dict. This is roughly the inverse of serialize. The serialize/deserialize methods work with full message lists, whereas pack/unpack work with the individual message parts in the message list. Parameters ---------- msg_list : list of bytes or Message objects The list of message parts of the form [HMAC,p_header,p_parent, p_metadata,p_content,buffer1,buffer2,...]. content : bool (True) Whether to unpack the content dict (True), or leave it packed (False). copy : bool (True) Whether to return the bytes (True), or the non-copying Message object in each place (False). Returns ------- msg : dict The nested message dict with top-level keys [header, parent_header, content, buffers]. """ minlen = 5 message = {} if not copy: for i in range(minlen): msg_list[i] = msg_list[i].bytes if self.auth is not None: signature = msg_list[0] if not signature: raise ValueError("Unsigned Message") if signature in self.digest_history: raise ValueError("Duplicate Signature: %r" % signature) self._add_digest(signature) check = self.sign(msg_list[1:5]) if not compare_digest(signature, check): raise ValueError("Invalid Signature: %r" % signature) if not len(msg_list) >= minlen: raise TypeError( "malformed message, must have at least %i elements" % minlen) header = self.unpack(msg_list[1]) message['header'] = extract_dates(header) message['msg_id'] = header['msg_id'] message['msg_type'] = header['msg_type'] message['parent_header'] = extract_dates(self.unpack(msg_list[2])) message['metadata'] = self.unpack(msg_list[3]) if content: message['content'] = self.unpack(msg_list[4]) else: message['content'] = msg_list[4] message['buffers'] = msg_list[5:] # adapt to the current version return adapt(message) def unserialize(self, *args, **kwargs): warnings.warn( "Session.unserialize is deprecated. Use Session.deserialize.", DeprecationWarning, ) return self.deserialize(*args, **kwargs)
class SQLiteDB(BaseDB): """SQLite3 TaskRecord backend.""" filename = CUnicode('tasks.db', config=True) location = CUnicode('', config=True) table = CUnicode("", config=True) _db = Instance('sqlite3.Connection') _keys = List([ 'msg_id', 'header', 'content', 'buffers', 'submitted', 'client_uuid', 'engine_uuid', 'started', 'completed', 'resubmitted', 'result_header', 'result_content', 'result_buffers', 'queue', 'pyin', 'pyout', 'pyerr', 'stdout', 'stderr', ]) def __init__(self, **kwargs): super(SQLiteDB, self).__init__(**kwargs) if not self.table: # use session, and prefix _, since starting with # is illegal self.table = '_' + self.session.replace('-', '_') if not self.location: if hasattr(self.config.Global, 'cluster_dir'): self.location = self.config.Global.cluster_dir else: self.location = '.' self._init_db() # register db commit as 2s periodic callback # to prevent clogging pipes # assumes we are being run in a zmq ioloop app loop = ioloop.IOLoop.instance() pc = ioloop.PeriodicCallback(self._db.commit, 2000, loop) pc.start() def _defaults(self): """create an empty record""" d = {} for key in self._keys: d[key] = None return d def _init_db(self): """Connect to the database and get new session number.""" # register adapters sqlite3.register_adapter(datetime, _adapt_datetime) sqlite3.register_converter('datetime', _convert_datetime) sqlite3.register_adapter(dict, _adapt_dict) sqlite3.register_converter('dict', _convert_dict) sqlite3.register_adapter(list, _adapt_bufs) sqlite3.register_converter('bufs', _convert_bufs) # connect to the db dbfile = os.path.join(self.location, self.filename) self._db = sqlite3.connect( dbfile, detect_types=sqlite3.PARSE_DECLTYPES, # isolation_level = None)#, cached_statements=64) # print dir(self._db) self._db.execute("""CREATE TABLE IF NOT EXISTS %s (msg_id text PRIMARY KEY, header dict text, content dict text, buffers bufs blob, submitted datetime text, client_uuid text, engine_uuid text, started datetime text, completed datetime text, resubmitted datetime text, result_header dict text, result_content dict text, result_buffers bufs blob, queue text, pyin text, pyout text, pyerr text, stdout text, stderr text) """ % self.table) # self._db.execute("""CREATE TABLE IF NOT EXISTS %s_buffers # (msg_id text, result integer, buffer blob) # """%self.table) self._db.commit() def _dict_to_list(self, d): """turn a mongodb-style record dict into a list.""" return [d[key] for key in self._keys] def _list_to_dict(self, line): """Inverse of dict_to_list""" d = self._defaults() for key, value in zip(self._keys, line): d[key] = value return d def _render_expression(self, check): """Turn a mongodb-style search dict into an SQL query.""" expressions = [] args = [] skeys = set(check.keys()) skeys.difference_update(set(self._keys)) skeys.difference_update(set(['buffers', 'result_buffers'])) if skeys: raise KeyError("Illegal testing key(s): %s" % skeys) for name, sub_check in check.iteritems(): if isinstance(sub_check, dict): for test, value in sub_check.iteritems(): try: op = operators[test] except KeyError: raise KeyError("Unsupported operator: %r" % test) if isinstance(op, tuple): op, join = op expr = "%s %s ?" % (name, op) if isinstance(value, (tuple, list)): expr = '( %s )' % (join.join([expr] * len(value))) args.extend(value) else: args.append(value) expressions.append(expr) else: # it's an equality check expressions.append("%s IS ?" % name) args.append(sub_check) expr = " AND ".join(expressions) return expr, args def add_record(self, msg_id, rec): """Add a new Task Record, by msg_id.""" d = self._defaults() d.update(rec) d['msg_id'] = msg_id line = self._dict_to_list(d) tups = '(%s)' % (','.join(['?'] * len(line))) self._db.execute("INSERT INTO %s VALUES %s" % (self.table, tups), line) # self._db.commit() def get_record(self, msg_id): """Get a specific Task Record, by msg_id.""" cursor = self._db.execute( """SELECT * FROM %s WHERE msg_id==?""" % self.table, (msg_id, )) line = cursor.fetchone() if line is None: raise KeyError("No such msg: %r" % msg_id) return self._list_to_dict(line) def update_record(self, msg_id, rec): """Update the data in an existing record.""" query = "UPDATE %s SET " % self.table sets = [] keys = sorted(rec.keys()) values = [] for key in keys: sets.append('%s = ?' % key) values.append(rec[key]) query += ', '.join(sets) query += ' WHERE msg_id == %r' % msg_id self._db.execute(query, values) # self._db.commit() def drop_record(self, msg_id): """Remove a record from the DB.""" self._db.execute("""DELETE FROM %s WHERE mgs_id==?""" % self.table, (msg_id, )) # self._db.commit() def drop_matching_records(self, check): """Remove a record from the DB.""" expr, args = self._render_expression(check) query = "DELETE FROM %s WHERE %s" % (self.table, expr) self._db.execute(query, args) # self._db.commit() def find_records(self, check, id_only=False): """Find records matching a query dict.""" req = 'msg_id' if id_only else '*' expr, args = self._render_expression(check) query = """SELECT %s FROM %s WHERE %s""" % (req, self.table, expr) cursor = self._db.execute(query, args) matches = cursor.fetchall() if id_only: return [m[0] for m in matches] else: records = {} for line in matches: rec = self._list_to_dict(line) records[rec['msg_id']] = rec return records
class PlainTextFormatter(BaseFormatter): """The default pretty-printer. This uses :mod:`IPython.lib.pretty` to compute the format data of the object. If the object cannot be pretty printed, :func:`repr` is used. See the documentation of :mod:`IPython.lib.pretty` for details on how to write pretty printers. Here is a simple example:: def dtype_pprinter(obj, p, cycle): if cycle: return p.text('dtype(...)') if hasattr(obj, 'fields'): if obj.fields is None: p.text(repr(obj)) else: p.begin_group(7, 'dtype([') for i, field in enumerate(obj.descr): if i > 0: p.text(',') p.breakable() p.pretty(field) p.end_group(7, '])') """ # The format type of data returned. format_type = Unicode('text/plain') # This subclass ignores this attribute as it always need to return # something. enabled = Bool(True, config=False) # Look for a _repr_pretty_ methods to use for pretty printing. print_method = ObjectName('_repr_pretty_') # Whether to pretty-print or not. pprint = Bool(True, config=True) # Whether to be verbose or not. verbose = Bool(False, config=True) # The maximum width. max_width = Integer(79, config=True) # The newline character. newline = Unicode('\n', config=True) # format-string for pprinting floats float_format = Unicode('%r') # setter for float precision, either int or direct format-string float_precision = CUnicode('', config=True) def _float_precision_changed(self, name, old, new): """float_precision changed, set float_format accordingly. float_precision can be set by int or str. This will set float_format, after interpreting input. If numpy has been imported, numpy print precision will also be set. integer `n` sets format to '%.nf', otherwise, format set directly. An empty string returns to defaults (repr for float, 8 for numpy). This parameter can be set via the '%precision' magic. """ if '%' in new: # got explicit format string fmt = new try: fmt % 3.14159 except Exception: raise ValueError( "Precision must be int or format string, not %r" % new) elif new: # otherwise, should be an int try: i = int(new) assert i >= 0 except ValueError: raise ValueError( "Precision must be int or format string, not %r" % new) except AssertionError: raise ValueError("int precision must be non-negative, not %r" % i) fmt = '%%.%if' % i if 'numpy' in sys.modules: # set numpy precision if it has been imported import numpy numpy.set_printoptions(precision=i) else: # default back to repr fmt = '%r' if 'numpy' in sys.modules: import numpy # numpy default is 8 numpy.set_printoptions(precision=8) self.float_format = fmt # Use the default pretty printers from IPython.lib.pretty. def _singleton_printers_default(self): return pretty._singleton_pprinters.copy() def _type_printers_default(self): d = pretty._type_pprinters.copy() d[float] = lambda obj, p, cycle: p.text(self.float_format % obj) return d def _deferred_printers_default(self): return pretty._deferred_type_pprinters.copy() #### FormatterABC interface #### def __call__(self, obj): """Compute the pretty representation of the object.""" if not self.pprint: try: return repr(obj) except TypeError: return '' else: # This uses use StringIO, as cStringIO doesn't handle unicode. stream = StringIO() # self.newline.encode() is a quick fix for issue gh-597. We need to # ensure that stream does not get a mix of unicode and bytestrings, # or it will cause trouble. printer = pretty.RepresentationPrinter( stream, self.verbose, self.max_width, unicode_to_str(self.newline), singleton_pprinters=self.singleton_printers, type_pprinters=self.type_printers, deferred_pprinters=self.deferred_printers) printer.pretty(obj) printer.flush() return stream.getvalue()
class WindowsHPCLauncher(BaseLauncher): # A regular expression used to get the job id from the output of the # submit_command. job_id_regexp = Str(r'\d+', config=True) # The filename of the instantiated job script. job_file_name = CUnicode(u'ipython_job.xml', config=True) # The full path to the instantiated job script. This gets made dynamically # by combining the work_dir with the job_file_name. job_file = CUnicode(u'') # The hostname of the scheduler to submit the job to scheduler = CUnicode('', config=True) job_cmd = CUnicode(find_job_cmd(), config=True) def __init__(self, work_dir=u'.', config=None, **kwargs): super(WindowsHPCLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs) @property def job_file(self): return os.path.join(self.work_dir, self.job_file_name) def write_job_file(self, n): raise NotImplementedError("Implement write_job_file in a subclass.") def find_args(self): return [u'job.exe'] def parse_job_id(self, output): """Take the output of the submit command and return the job id.""" m = re.search(self.job_id_regexp, output) if m is not None: job_id = m.group() else: raise LauncherError("Job id couldn't be determined: %s" % output) self.job_id = job_id self.log.info('Job started with job id: %r' % job_id) return job_id def start(self, n): """Start n copies of the process using the Win HPC job scheduler.""" self.write_job_file(n) args = [ 'submit', '/jobfile:%s' % self.job_file, '/scheduler:%s' % self.scheduler ] self.log.info("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args), )) # Twisted will raise DeprecationWarnings if we try to pass unicode to this output = check_output([self.job_cmd] + args, env=os.environ, cwd=self.work_dir, stderr=STDOUT) job_id = self.parse_job_id(output) self.notify_start(job_id) return job_id def stop(self): args = ['cancel', self.job_id, '/scheduler:%s' % self.scheduler] self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args), )) try: output = check_output([self.job_cmd] + args, env=os.environ, cwd=self.work_dir, stderr=STDOUT) except: output = 'The job already appears to be stoppped: %r' % self.job_id self.notify_stop( dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd return output