def _update_traits(self): ''' Generate trait objects from column data. This function will group columns (if applicable) and form JSON object strings from columns which have been declared as traits (using the _traits attribute). Note: This function decides what `trait type`_ to use. This will almost always be a JSON (unicode) string formatted to be parsed into an array like structure in Javascript. .. _trait type: http://traitlets.readthedocs.org/en/stable/trait_types.html ''' self._revert_categories() traits = self._custom_traits() groups = None prefix = self.__class__.__name__.lower() self._fi = self.index[0] if self._groupbys: groups = self.groupby(self._groupbys) for name in self._traits: trait_name = '_'.join((prefix, str(name))) # Name mangle to ensure uniqueness if name in self.columns: if np.all(np.isclose(self[name], self.ix[self._fi, name])): value = self.ix[self._fi, name] # If all the entries are the same if isinstance(value, Integral): # only send a single entry to JS. trait = Integer(int(value)) elif isinstance(value, Real): trait = Float(float(value)) else: raise TypeError('Unknown type for {0} with type {1}'.format(name, dtype)) elif groups: # If groups exist, make a list of list(s) p = 10 if name in self._precision: p = self._precision[name] trait = Unicode(groups.apply(lambda g: g[name].values).to_json(orient='values', double_precision=p)) else: # Otherwise, just send the flattened values p = 10 if name in self._precision: p = self._precision[name] trait = Unicode(self[name].to_json(orient='values', double_precision=p)) traits[trait_name] = trait.tag(sync=True) elif name == self.index.names[0]: # If not in columns, but is index name, send index trait_name = '_'.join((prefix, str(name))) string = pd.Series(self.index.values).to_json(orient='values') traits[trait_name] = Unicode(string).tag(sync=True) self._set_categories() return traits
class Pagination(Configurable): # configurable options default_per_page = Integer( 100, config=True, help="Default number of entries per page for paginated results.", ) max_per_page = Integer( 250, config=True, help="Maximum number of entries per page for paginated results.", ) # state variables url = Unicode("") page = Integer(1) per_page = Integer(1, min=1) @default("per_page") def _default_per_page(self): return self.default_per_page @validate("per_page") def _limit_per_page(self, proposal): if self.max_per_page and proposal.value > self.max_per_page: return self.max_per_page if proposal.value <= 1: return 1 return proposal.value @observe("max_per_page") def _apply_max(self, change): if change.new: self.per_page = min(change.new, self.per_page) total = Integer(0) total_pages = Integer(0) @default("total_pages") def _calculate_total_pages(self): total_pages = self.total // self.per_page if self.total % self.per_page: # there's a remainder, add 1 total_pages += 1 return total_pages @observe("per_page", "total") def _update_total_pages(self, change): """Update total_pages when per_page or total is changed""" self.total_pages = self._calculate_total_pages() separator = Unicode("...") def get_page_args(self, handler): """ This method gets the arguments used in the webpage to configurate the pagination In case of no arguments, it uses the default values from this class Returns: - page: The page requested for paginating or the default value (1) - per_page: The number of items to return in this page. No more than max_per_page - offset: The offset to consider when managing pagination via the ORM """ page = handler.get_argument("page", 1) per_page = handler.get_argument("per_page", self.default_per_page) try: self.per_page = int(per_page) except Exception: self.per_page = self.default_per_page try: self.page = int(page) if self.page < 1: self.page = 1 except Exception: self.page = 1 return self.page, self.per_page, self.per_page * (self.page - 1) @property def info(self): """Get the pagination information.""" start = 1 + (self.page - 1) * self.per_page end = start + self.per_page - 1 if end > self.total: end = self.total if start > self.total: start = self.total return {'total': self.total, 'start': start, 'end': end} def calculate_pages_window(self): """Calculates the set of pages to render later in links() method. It returns the list of pages to render via links for the pagination By default, as we've observed in other applications, we're going to render only a finite and predefined number of pages, avoiding visual fatigue related to a long list of pages. By default, we render 7 pages plus some inactive links with the characters '...' to point out that there are other pages that aren't explicitly rendered. The primary way of work is to provide current webpage and 5 next pages, the last 2 ones (in case the current page + 5 does not overflow the total lenght of pages) and the first one for reference. """ before_page = 2 after_page = 2 window_size = before_page + after_page + 1 # Add 1 to total_pages since our starting page is 1 and not 0 last_page = self.total_pages pages = [] # will default window + start, end fit without truncation? if self.total_pages > window_size + 2: if self.page - before_page > 1: # before_page will not reach page 1 pages.append(1) if self.page - before_page > 2: # before_page will not reach page 2, need separator pages.append(self.separator) pages.extend(range(max(1, self.page - before_page), self.page)) # we now have up to but not including self.page if self.page + after_page + 1 >= last_page: # after_page gets us to the end pages.extend(range(self.page, last_page + 1)) else: # add full after_page entries pages.extend(range(self.page, self.page + after_page + 1)) # add separator *if* this doesn't get to last page - 1 if self.page + after_page < last_page - 1: pages.append(self.separator) pages.append(last_page) return pages else: # everything will fit, nothing to think about # always return at least one page return list(range(1, last_page + 1)) or [1] @property def links(self): """Get the links for the pagination. Getting the input from calculate_pages_window(), generates the HTML code for the pages to render, plus the arrows to go onwards and backwards (if needed). """ if self.total_pages == 1: return [] pages_to_render = self.calculate_pages_window() links = ['<nav>'] links.append('<ul class="pagination">') if self.page > 1: prev_page = self.page - 1 links.append('<li><a href="?page={prev_page}">«</a></li>'.format( prev_page=prev_page)) else: links.append( '<li class="disabled"><span><span aria-hidden="true">«</span></span></li>' ) for page in list(pages_to_render): if page == self.page: links.append( '<li class="active"><span>{page}<span class="sr-only">(current)</span></span></li>' .format(page=page)) elif page == self.separator: links.append( '<li class="disabled"><span> <span aria-hidden="true">{separator}</span></span></li>' .format(separator=self.separator)) else: links.append( '<li><a href="?page={page}">{page}</a></li>'.format( page=page)) if self.page >= 1 and self.page < self.total_pages: next_page = self.page + 1 links.append('<li><a href="?page={next_page}">»</a></li>'.format( next_page=next_page)) else: links.append( '<li class="disabled"><span><span aria-hidden="true">»</span></span></li>' ) links.append('</ul>') links.append('</nav>') return ''.join(links)
class ExecutePreprocessor(Preprocessor): """ Executes all the cells in a notebook """ timeout = Integer(30, allow_none=True, help=dedent(""" The time to wait (in seconds) for output from executions. If a cell execution takes longer, an exception (TimeoutError on python 3+, RuntimeError on python 2) is raised. `None` or `-1` will disable the timeout. If `timeout_func` is set, it overrides `timeout`. """)).tag(config=True) timeout_func = Any(default_value=None, allow_none=True, help=dedent(""" A callable which, when given the cell source as input, returns the time to wait (in seconds) for output from cell executions. If a cell execution takes longer, an exception (TimeoutError on python 3+, RuntimeError on python 2) is raised. Returning `None` or `-1` will disable the timeout for the cell. Not setting `timeout_func` will cause the preprocessor to default to using the `timeout` trait for all cells. The `timeout_func` trait overrides `timeout` if it is not `None`. """)).tag(config=True) interrupt_on_timeout = Bool(False, help=dedent(""" If execution of a cell times out, interrupt the kernel and continue executing other cells rather than throwing an error and stopping. """)).tag(config=True) startup_timeout = Integer(60, help=dedent(""" The time to wait (in seconds) for the kernel to start. If kernel startup takes longer, a RuntimeError is raised. """)).tag(config=True) allow_errors = Bool(False, help=dedent(""" If `False` (default), when a cell raises an error the execution is stopped and a `CellExecutionError` is raised. If `True`, execution errors are ignored and the execution is continued until the end of the notebook. Output from exceptions is included in the cell output in both cases. """)).tag(config=True) force_raise_errors = Bool(False, help=dedent(""" If False (default), errors from executing the notebook can be allowed with a `raises-exception` tag on a single cell, or the `allow_errors` configurable option for all cells. An allowed error will be recorded in notebook output, and execution will continue. If an error occurs when it is not explicitly allowed, a `CellExecutionError` will be raised. If True, `CellExecutionError` will be raised for any error that occurs while executing the notebook. This overrides both the `allow_errors` option and the `raises-exception` cell tag. """)).tag(config=True) extra_arguments = List(Unicode()) kernel_name = Unicode('', help=dedent(""" Name of kernel to use to execute the cells. If not set, use the kernel_spec embedded in the notebook. """)).tag(config=True) raise_on_iopub_timeout = Bool(False, help=dedent(""" If `False` (default), then the kernel will continue waiting for iopub messages until it receives a kernel idle message, or until a timeout occurs, at which point the currently executing cell will be skipped. If `True`, then an error will be raised after the first timeout. This option generally does not need to be used, but may be useful in contexts where there is the possibility of executing notebooks with memory-consuming infinite loops. """)).tag(config=True) store_widget_state = Bool(True, help=dedent(""" If `True` (default), then the state of the Jupyter widgets created at the kernel will be stored in the metadata of the notebook. """)).tag(config=True) iopub_timeout = Integer(4, allow_none=False, help=dedent(""" The time to wait (in seconds) for IOPub output. This generally doesn't need to be set, but on some slow networks (such as CI systems) the default timeout might not be long enough to get all messages. """)).tag(config=True) shutdown_kernel = Enum(['graceful', 'immediate'], default_value='graceful', help=dedent(""" If `graceful` (default), then the kernel is given time to clean up after executing all cells, e.g., to execute its `atexit` hooks. If `immediate`, then the kernel is signaled to immediately terminate. """)).tag(config=True) ipython_hist_file = Unicode( default_value=':memory:', help= """Path to file to use for SQLite history database for an IPython kernel. The specific value `:memory:` (including the colon at both end but not the back ticks), avoids creating a history file. Otherwise, IPython will create a history file for each kernel. When running kernels simultaneously (e.g. via multiprocessing) saving history a single SQLite file can result in database errors, so using `:memory:` is recommended in non-interactive contexts. """).tag(config=True) kernel_manager_class = Type(config=True, help='The kernel manager class to use.') @default('kernel_manager_class') def _kernel_manager_class_default(self): """Use a dynamic default to avoid importing jupyter_client at startup""" try: from jupyter_client import KernelManager except ImportError: raise ImportError( "`nbconvert --execute` requires the jupyter_client package: `pip install jupyter_client`" ) return KernelManager _display_id_map = Dict(help=dedent(""" mapping of locations of outputs with a given display_id tracks cell index and output index within cell.outputs for each appearance of the display_id { 'display_id': { cell_idx: [output_idx,] } } """)) def start_new_kernel(self, **kwargs): """Creates a new kernel manager and kernel client. Parameters ---------- kwargs : Any options for `self.kernel_manager_class.start_kernel()`. Because that defaults to KernelManager, this will likely include options accepted by `KernelManager.start_kernel()``, which includes `cwd`. Returns ------- km : KernelManager A kernel manager as created by self.kernel_manager_class. kc : KernelClient Kernel client as created by the kernel manager `km`. """ if not self.kernel_name: self.kernel_name = self.nb.metadata.get('kernelspec', {}).get('name', 'python') km = self.kernel_manager_class(kernel_name=self.kernel_name, config=self.config) if km.ipykernel and self.ipython_hist_file: self.extra_arguments += [ '--HistoryManager.hist_file={}'.format(self.ipython_hist_file) ] km.start_kernel(extra_arguments=self.extra_arguments, **kwargs) kc = km.client() kc.start_channels() try: kc.wait_for_ready(timeout=self.startup_timeout) except RuntimeError: kc.stop_channels() km.shutdown_kernel() raise kc.allow_stdin = False return km, kc @contextmanager def setup_preprocessor(self, nb, resources, km=None, **kwargs): """ Context manager for setting up the class to execute a notebook. The assigns `nb` to `self.nb` where it will be modified in-place. It also creates and assigns the Kernel Manager (`self.km`) and Kernel Client(`self.kc`). It is intended to yield to a block that will execute codeself. When control returns from the yield it stops the client's zmq channels, shuts down the kernel, and removes the now unused attributes. Parameters ---------- nb : NotebookNode Notebook being executed. resources : dictionary Additional resources used in the conversion process. For example, passing ``{'metadata': {'path': run_path}}`` sets the execution path to ``run_path``. km : KernerlManager (optional) Optional kernel manager. If none is provided, a kernel manager will be created. Returns ------- nb : NotebookNode The executed notebook. resources : dictionary Additional resources used in the conversion process. """ path = resources.get('metadata', {}).get('path', '') or None self.nb = nb # clear display_id map self._display_id_map = {} self.widget_state = {} self.widget_buffers = {} if km is None: kwargs["cwd"] = path self.km, self.kc = self.start_new_kernel(**kwargs) try: # Yielding unbound args for more easier understanding and downstream consumption yield nb, self.km, self.kc finally: self.kc.stop_channels() self.km.shutdown_kernel( now=self.shutdown_kernel == 'immediate') for attr in ['nb', 'km', 'kc']: delattr(self, attr) else: self.km = km if not km.has_kernel: km.start_kernel(extra_arguments=self.extra_arguments, **kwargs) self.kc = km.client() self.kc.start_channels() try: self.kc.wait_for_ready(timeout=self.startup_timeout) except RuntimeError: self.kc.stop_channels() raise self.kc.allow_stdin = False try: yield nb, self.km, self.kc finally: self.kc.stop_channels() for attr in ['nb', 'km', 'kc']: delattr(self, attr) def preprocess(self, nb, resources=None, km=None): """ Preprocess notebook executing each code cell. The input argument `nb` is modified in-place. Parameters ---------- nb : NotebookNode Notebook being executed. resources : dictionary (optional) Additional resources used in the conversion process. For example, passing ``{'metadata': {'path': run_path}}`` sets the execution path to ``run_path``. km: KernelManager (optional) Optional kernel manager. If none is provided, a kernel manager will be created. Returns ------- nb : NotebookNode The executed notebook. resources : dictionary Additional resources used in the conversion process. """ if not resources: resources = {} with self.setup_preprocessor(nb, resources, km=km): self.log.info("Executing notebook with kernel: %s" % self.kernel_name) nb, resources = super(ExecutePreprocessor, self).preprocess(nb, resources) info_msg = self._wait_for_reply(self.kc.kernel_info()) nb.metadata['language_info'] = info_msg['content']['language_info'] self.set_widgets_metadata() return nb, resources def set_widgets_metadata(self): if self.widget_state: self.nb.metadata.widgets = { 'application/vnd.jupyter.widget-state+json': { 'state': { model_id: _serialize_widget_state(state) for model_id, state in self.widget_state.items() if '_model_name' in state }, 'version_major': 2, 'version_minor': 0, } } for key, widget in self.nb.metadata.widgets[ 'application/vnd.jupyter.widget-state+json'][ 'state'].items(): buffers = self.widget_buffers.get(key) if buffers: widget['buffers'] = buffers def preprocess_cell(self, cell, resources, cell_index, store_history=True): """ Executes a single code cell. See base.py for details. To execute all cells see :meth:`preprocess`. """ if cell.cell_type != 'code' or not cell.source.strip(): return cell, resources reply, outputs = self.run_cell(cell, cell_index, store_history) # Backwards compatibility for processes that wrap run_cell cell.outputs = outputs cell_allows_errors = (self.allow_errors or "raises-exception" in cell.metadata.get( "tags", [])) if self.force_raise_errors or not cell_allows_errors: if (reply is not None) and reply['content']['status'] == 'error': raise CellExecutionError.from_cell_and_msg( cell, reply['content']) return cell, resources def _update_display_id(self, display_id, msg): """Update outputs with a given display_id""" if display_id not in self._display_id_map: self.log.debug("display id %r not in %s", display_id, self._display_id_map) return if msg['header']['msg_type'] == 'update_display_data': msg['header']['msg_type'] = 'display_data' try: out = output_from_msg(msg) except ValueError: self.log.error("unhandled iopub msg: " + msg['msg_type']) return for cell_idx, output_indices in self._display_id_map[display_id].items( ): cell = self.nb['cells'][cell_idx] outputs = cell['outputs'] for output_idx in output_indices: outputs[output_idx]['data'] = out['data'] outputs[output_idx]['metadata'] = out['metadata'] def _poll_for_reply(self, msg_id, cell=None, timeout=None): try: # check with timeout if kernel is still alive msg = self.kc.shell_channel.get_msg(timeout=timeout) if msg['parent_header'].get('msg_id') == msg_id: return msg except Empty: # received no message, check if kernel is still alive self._check_alive() # kernel still alive, wait for a message def _get_timeout(self, cell): if self.timeout_func is not None and cell is not None: timeout = self.timeout_func(cell) else: timeout = self.timeout if not timeout or timeout < 0: timeout = None return timeout def _handle_timeout(self, timeout, cell=None): self.log.error("Timeout waiting for execute reply (%is)." % timeout) if self.interrupt_on_timeout: self.log.error("Interrupting kernel") self.km.interrupt_kernel() else: raise CellTimeoutError.error_from_timeout_and_cell( "Cell execution timed out", timeout, cell) def _check_alive(self): if not self.kc.is_alive(): self.log.error("Kernel died while waiting for execute reply.") raise DeadKernelError("Kernel died") def _wait_for_reply(self, msg_id, cell=None): # wait for finish, with timeout timeout = self._get_timeout(cell) cummulative_time = 0 timeout_interval = 5 while True: try: msg = self.kc.shell_channel.get_msg(timeout=timeout_interval) except Empty: self._check_alive() cummulative_time += timeout_interval if timeout and cummulative_time > timeout: self._handle_timeout(timeout, cell) break else: if msg['parent_header'].get('msg_id') == msg_id: return msg def _timeout_with_deadline(self, timeout, deadline): if deadline is not None and deadline - monotonic() < timeout: timeout = deadline - monotonic() if timeout < 0: timeout = 0 return timeout def _passed_deadline(self, deadline): if deadline is not None and deadline - monotonic() <= 0: return True return False def run_cell(self, cell, cell_index=0, store_history=False): parent_msg_id = self.kc.execute(cell.source, store_history=store_history, stop_on_error=not self.allow_errors) self.log.debug("Executing cell:\n%s", cell.source) exec_timeout = self._get_timeout(cell) deadline = None if exec_timeout is not None: deadline = monotonic() + exec_timeout cell.outputs = [] self.clear_before_next_output = False # This loop resolves #659. By polling iopub_channel's and shell_channel's # output we avoid dropping output and important signals (like idle) from # iopub_channel. Prior to this change, iopub_channel wasn't polled until # after exec_reply was obtained from shell_channel, leading to the # aforementioned dropped data. # These two variables are used to track what still needs polling: # more_output=true => continue to poll the iopub_channel more_output = True # polling_exec_reply=true => continue to poll the shell_channel polling_exec_reply = True while more_output or polling_exec_reply: if polling_exec_reply: if self._passed_deadline(deadline): self._handle_timeout(exec_timeout, cell) polling_exec_reply = False continue # Avoid exceeding the execution timeout (deadline), but stop # after at most 1s so we can poll output from iopub_channel. timeout = self._timeout_with_deadline(1, deadline) exec_reply = self._poll_for_reply(parent_msg_id, cell, timeout) if exec_reply is not None: polling_exec_reply = False if more_output: try: timeout = self.iopub_timeout if polling_exec_reply: # Avoid exceeding the execution timeout (deadline) while # polling for output. timeout = self._timeout_with_deadline( timeout, deadline) msg = self.kc.iopub_channel.get_msg(timeout=timeout) except Empty: if polling_exec_reply: # Still waiting for execution to finish so we expect that # output may not always be produced yet. continue if self.raise_on_iopub_timeout: raise CellTimeoutError.error_from_timeout_and_cell( "Timeout waiting for IOPub output", self.iopub_timeout, cell) else: self.log.warning("Timeout waiting for IOPub output") more_output = False continue if msg['parent_header'].get('msg_id') != parent_msg_id: # not an output from our execution continue try: # Will raise CellExecutionComplete when completed self.process_message(msg, cell, cell_index) except CellExecutionComplete: more_output = False # Return cell.outputs still for backwards compatibility return exec_reply, cell.outputs def process_message(self, msg, cell, cell_index): """ Processes a kernel message, updates cell state, and returns the resulting output object that was appended to cell.outputs. The input argument `cell` is modified in-place. Parameters ---------- msg : dict The kernel message being processed. cell : nbformat.NotebookNode The cell which is currently being processed. cell_index : int The position of the cell within the notebook object. Returns ------- output : dict The execution output payload (or None for no output). Raises ------ CellExecutionComplete Once a message arrives which indicates computation completeness. """ msg_type = msg['msg_type'] self.log.debug("msg_type: %s", msg_type) content = msg['content'] self.log.debug("content: %s", content) display_id = content.get('transient', {}).get('display_id', None) if display_id and msg_type in { 'execute_result', 'display_data', 'update_display_data' }: self._update_display_id(display_id, msg) # set the prompt number for the input and the output if 'execution_count' in content: cell['execution_count'] = content['execution_count'] if msg_type == 'status': if content['execution_state'] == 'idle': raise CellExecutionComplete() elif msg_type == 'clear_output': self.clear_output(cell.outputs, msg, cell_index) elif msg_type.startswith('comm'): self.handle_comm_msg(cell.outputs, msg, cell_index) # Check for remaining messages we don't process elif msg_type not in ['execute_input', 'update_display_data']: # Assign output as our processed "result" return self.output(cell.outputs, msg, display_id, cell_index) def output(self, outs, msg, display_id, cell_index): msg_type = msg['msg_type'] try: out = output_from_msg(msg) except ValueError: self.log.error("unhandled iopub msg: " + msg_type) return if self.clear_before_next_output: self.log.debug('Executing delayed clear_output') outs[:] = [] self.clear_display_id_mapping(cell_index) self.clear_before_next_output = False if display_id: # record output index in: # _display_id_map[display_id][cell_idx] cell_map = self._display_id_map.setdefault(display_id, {}) output_idx_list = cell_map.setdefault(cell_index, []) output_idx_list.append(len(outs)) outs.append(out) return out def clear_output(self, outs, msg, cell_index): content = msg['content'] if content.get('wait'): self.log.debug('Wait to clear output') self.clear_before_next_output = True else: self.log.debug('Immediate clear output') outs[:] = [] self.clear_display_id_mapping(cell_index) def clear_display_id_mapping(self, cell_index): for display_id, cell_map in self._display_id_map.items(): if cell_index in cell_map: cell_map[cell_index] = [] def handle_comm_msg(self, outs, msg, cell_index): content = msg['content'] data = content['data'] if self.store_widget_state and 'state' in data: # ignore custom msg'es self.widget_state.setdefault(content['comm_id'], {}).update(data['state']) if 'buffer_paths' in data and data['buffer_paths']: self.widget_buffers[content['comm_id']] = _get_buffer_data(msg)
class OwncloudUserSpawner(DockerSpawner): container_image = Unicode("jupyterhub/ownclouduser", config=True) def _options_form_default(self): return """ <label for="owncloud_username">Owncloud username:</label> <input id="username_inputs" type="username" autocapitalize="off" autocorrect="off" class="form-control" name="owncloud_username" value="{username}" tabindex="1" /> <label for='owncloud_password'>Owncloud password:</label> <input id="password_input" type="password" class="form-control" name="owncloud_password" tabindex="2" autofocus="autofocus" /> """.format(username=self.user.name) def options_from_form(self, formdata): options = {} options['username'] = '' options['password'] = '' arg = formdata.get('owncloud_username', [''])[0] if arg: options['username'] = arg arg = formdata.get('owncloud_password', [''])[0] if arg: options['password'] = arg self.davfs2_config = os.getenv('PWD') + '/.davfs2.%s' % self.user.name shutil.rmtree(self.davfs2_config, ignore_errors=True) os.mkdir(self.davfs2_config) f = open(self.davfs2_config + '/secrets', 'w') f.write( 'https://tangshan.cosx-isinx.org/owncloud/remote.php/webdav %s %s\n' % (options['username'], options['password'])) f.close() f = open(self.davfs2_config + '/davfs2.conf', 'w') f.write('kernel_fs fuse\n') f.write('use_locks 0\n') f.write('table_size 65536\n') f.write('dir_refresh 1\n') f.write('delay_upload 0.5\n') f.write('gui_optimize 1\n') f.close() return options image_homedir_format_string = Unicode("/home/{username}", config=True, help=dedent(""" Format string for the path to the user's home directory inside the image. The format string should include a `username` variable, which will be formatted with the user's username. """)) user_id = Integer(9999, help=dedent(""" User id is irrelevant if we are mounting from the owncloud WebDAV interface """)) @property def homedir(self): """ Path to the user's home directory in the docker image. """ return self.image_homedir_format_string.format(username=self.user.name) @property def volume_binds(self): """ The second half of declaring a volume with docker-py happens when you actually call start(). The required format is a dict of dicts that looks like: { host_location: {'bind': container_location, 'ro': True} } """ volumes = super(OwncloudUserSpawner, self).volume_binds volumes[self.davfs2_config] = {'bind': '/etc/davfs2', 'ro': False} return volumes def get_env(self): env = super(OwncloudUserSpawner, self).get_env() env.update( dict(USER=self.user.name, USER_ID=self.user_id, HOME=self.homedir)) return env def _user_id_default(self): return 9999 def load_state(self, state): super().load_state(state) if 'user_id' in state: self.user_id = state['user_id'] def get_state(self): state = super().get_state() if self.user_id >= 0: state['user_id'] = self.user_id return state def start(self, image=None, extra_create_kwargs=None, extra_start_kwargs=None, extra_host_config=None): """start the single-user server in a docker container""" if extra_create_kwargs is None: extra_create_kwargs = {} extra_create_kwargs['working_dir'] = '/cloud' if extra_host_config is None: extra_host_config = dict() extra_host_config['privileged'] = True extra_host_config['cap_add'] = ['SYS_ADMIN'] extra_host_config['devices'] = ['/dev/fuse:/dev/fuse:rwm'] return super(OwncloudUserSpawner, self).start(image=image, extra_create_kwargs=extra_create_kwargs, extra_start_kwargs=extra_start_kwargs, extra_host_config=extra_host_config)
class HubAuth(SingletonConfigurable): """A class for authenticating with JupyterHub This can be used by any application. If using tornado, use via :class:`HubAuthenticated` mixin. If using manually, use the ``.user_for_cookie(cookie_value)`` method to identify the user corresponding to a given cookie value. The following config must be set: - api_token (token for authenticating with JupyterHub API), fetched from the JUPYTERHUB_API_TOKEN env by default. The following config MAY be set: - api_url: the base URL of the Hub's internal API, fetched from JUPYTERHUB_API_URL by default. - cookie_cache_max_age: the number of seconds responses from the Hub should be cached. - login_url (the *public* ``/hub/login`` URL of the Hub). - cookie_name: the name of the cookie I should be using, if different from the default (unlikely). """ hub_host = Unicode( '', help="""The public host of JupyterHub Only used if JupyterHub is spreading servers across subdomains. """, ).tag(config=True) @default('hub_host') def _default_hub_host(self): return os.getenv('JUPYTERHUB_HOST', '') base_url = Unicode( os.getenv('JUPYTERHUB_SERVICE_PREFIX') or '/', help="""The base URL prefix of this application e.g. /services/service-name/ or /user/name/ Default: get from JUPYTERHUB_SERVICE_PREFIX """, ).tag(config=True) @validate('base_url') def _add_slash(self, proposal): """Ensure base_url starts and ends with /""" value = proposal['value'] if not value.startswith('/'): value = '/' + value if not value.endswith('/'): value = value + '/' return value # where is the hub api_url = Unicode( os.getenv('JUPYTERHUB_API_URL') or 'http://127.0.0.1:8081/hub/api', help="""The base API URL of the Hub. Typically `http://hub-ip:hub-port/hub/api` """, ).tag(config=True) @default('api_url') def _api_url(self): env_url = os.getenv('JUPYTERHUB_API_URL') if env_url: return env_url else: return 'http://127.0.0.1:8081' + url_path_join(self.hub_prefix, 'api') api_token = Unicode( os.getenv('JUPYTERHUB_API_TOKEN', ''), help="""API key for accessing Hub API. Generate with `jupyterhub token [username]` or add to JupyterHub.services config. """, ).tag(config=True) hub_prefix = Unicode( '/hub/', help="""The URL prefix for the Hub itself. Typically /hub/ """, ).tag(config=True) @default('hub_prefix') def _default_hub_prefix(self): return url_path_join(os.getenv('JUPYTERHUB_BASE_URL') or '/', 'hub') + '/' login_url = Unicode( '/hub/login', help="""The login URL to use Typically /hub/login """, ).tag(config=True) @default('login_url') def _default_login_url(self): return self.hub_host + url_path_join(self.hub_prefix, 'login') keyfile = Unicode( os.getenv('JUPYTERHUB_SSL_KEYFILE', ''), help="""The ssl key to use for requests Use with certfile """, ).tag(config=True) certfile = Unicode( os.getenv('JUPYTERHUB_SSL_CERTFILE', ''), help="""The ssl cert to use for requests Use with keyfile """, ).tag(config=True) client_ca = Unicode( os.getenv('JUPYTERHUB_SSL_CLIENT_CA', ''), help="""The ssl certificate authority to use to verify requests Use with keyfile and certfile """, ).tag(config=True) cookie_name = Unicode( 'jupyterhub-services', help="""The name of the cookie I should be looking for""" ).tag(config=True) cookie_options = Dict( help="""Additional options to pass when setting cookies. Can include things like `expires_days=None` for session-expiry or `secure=True` if served on HTTPS and default HTTPS discovery fails (e.g. behind some proxies). """ ).tag(config=True) @default('cookie_options') def _default_cookie_options(self): # load default from env options_env = os.environ.get('JUPYTERHUB_COOKIE_OPTIONS') if options_env: return json.loads(options_env) else: return {} cookie_cache_max_age = Integer(help="DEPRECATED. Use cache_max_age") @observe('cookie_cache_max_age') def _deprecated_cookie_cache(self, change): warnings.warn( "cookie_cache_max_age is deprecated in JupyterHub 0.8. Use cache_max_age instead." ) self.cache_max_age = change.new cache_max_age = Integer( 300, help="""The maximum time (in seconds) to cache the Hub's responses for authentication. A larger value reduces load on the Hub and occasional response lag. A smaller value reduces propagation time of changes on the Hub (rare). Default: 300 (five minutes) """, ).tag(config=True) cache = Instance(_ExpiringDict, allow_none=False) @default('cache') def _default_cache(self): return _ExpiringDict(self.cache_max_age) def _check_hub_authorization(self, url, cache_key=None, use_cache=True): """Identify a user with the Hub Args: url (str): The API URL to check the Hub for authorization (e.g. http://127.0.0.1:8081/hub/api/authorizations/token/abc-def) cache_key (str): The key for checking the cache use_cache (bool): Specify use_cache=False to skip cached cookie values (default: True) Returns: user_model (dict): The user model, if a user is identified, None if authentication fails. Raises an HTTPError if the request failed for a reason other than no such user. """ if use_cache: if cache_key is None: raise ValueError("cache_key is required when using cache") # check for a cached reply, so we don't check with the Hub if we don't have to try: return self.cache[cache_key] except KeyError: app_log.debug("HubAuth cache miss: %s", cache_key) data = self._api_request('GET', url, allow_404=True) if data is None: app_log.warning("No Hub user identified for request") else: app_log.debug("Received request from Hub user %s", data) if use_cache: # cache result self.cache[cache_key] = data return data def _api_request(self, method, url, **kwargs): """Make an API request""" allow_404 = kwargs.pop('allow_404', False) headers = kwargs.setdefault('headers', {}) headers.setdefault('Authorization', 'token %s' % self.api_token) if "cert" not in kwargs and self.certfile and self.keyfile: kwargs["cert"] = (self.certfile, self.keyfile) if self.client_ca: kwargs["verify"] = self.client_ca try: r = requests.request(method, url, **kwargs) except requests.ConnectionError as e: app_log.error("Error connecting to %s: %s", self.api_url, e) msg = "Failed to connect to Hub API at %r." % self.api_url msg += ( " Is the Hub accessible at this URL (from host: %s)?" % socket.gethostname() ) if '127.0.0.1' in self.api_url: msg += ( " Make sure to set c.JupyterHub.hub_ip to an IP accessible to" + " single-user servers if the servers are not on the same host as the Hub." ) raise HTTPError(500, msg) data = None if r.status_code == 404 and allow_404: pass elif r.status_code == 403: app_log.error( "I don't have permission to check authorization with JupyterHub, my auth token may have expired: [%i] %s", r.status_code, r.reason, ) app_log.error(r.text) raise HTTPError( 500, "Permission failure checking authorization, I may need a new token" ) elif r.status_code >= 500: app_log.error( "Upstream failure verifying auth token: [%i] %s", r.status_code, r.reason, ) app_log.error(r.text) raise HTTPError(502, "Failed to check authorization (upstream problem)") elif r.status_code >= 400: app_log.warning( "Failed to check authorization: [%i] %s", r.status_code, r.reason ) app_log.warning(r.text) msg = "Failed to check authorization" # pass on error from oauth failure try: response = r.json() # prefer more specific 'error_description', fallback to 'error' description = response.get( "error_description", response.get("error", "Unknown error") ) except Exception: pass else: msg += ": " + description raise HTTPError(500, msg) else: data = r.json() return data def user_for_cookie(self, encrypted_cookie, use_cache=True, session_id=''): """Ask the Hub to identify the user for a given cookie. Args: encrypted_cookie (str): the cookie value (not decrypted, the Hub will do that) use_cache (bool): Specify use_cache=False to skip cached cookie values (default: True) Returns: user_model (dict): The user model, if a user is identified, None if authentication fails. The 'name' field contains the user's name. """ return self._check_hub_authorization( url=url_path_join( self.api_url, "authorizations/cookie", self.cookie_name, quote(encrypted_cookie, safe=''), ), cache_key='cookie:{}:{}'.format(session_id, encrypted_cookie), use_cache=use_cache, ) def user_for_token(self, token, use_cache=True, session_id=''): """Ask the Hub to identify the user for a given token. Args: token (str): the token use_cache (bool): Specify use_cache=False to skip cached cookie values (default: True) Returns: user_model (dict): The user model, if a user is identified, None if authentication fails. The 'name' field contains the user's name. """ return self._check_hub_authorization( url=url_path_join( self.api_url, "authorizations/token", quote(token, safe='') ), cache_key='token:{}:{}'.format(session_id, token), use_cache=use_cache, ) auth_header_name = 'Authorization' auth_header_pat = re.compile(r'token\s+(.+)', re.IGNORECASE) def get_token(self, handler): """Get the user token from a request - in URL parameters: ?token=<token> - in header: Authorization: token <token> """ user_token = handler.get_argument('token', '') if not user_token: # get it from Authorization header m = self.auth_header_pat.match( handler.request.headers.get(self.auth_header_name, '') ) if m: user_token = m.group(1) return user_token def _get_user_cookie(self, handler): """Get the user model from a cookie""" encrypted_cookie = handler.get_cookie(self.cookie_name) session_id = self.get_session_id(handler) if encrypted_cookie: return self.user_for_cookie(encrypted_cookie, session_id=session_id) def get_session_id(self, handler): """Get the jupyterhub session id from the jupyterhub-session-id cookie. """ return handler.get_cookie('jupyterhub-session-id', '') def get_user(self, handler): """Get the Hub user for a given tornado handler. Checks cookie with the Hub to identify the current user. Args: handler (tornado.web.RequestHandler): the current request handler Returns: user_model (dict): The user model, if a user is identified, None if authentication fails. The 'name' field contains the user's name. """ # only allow this to be called once per handler # avoids issues if an error is raised, # since this may be called again when trying to render the error page if hasattr(handler, '_cached_hub_user'): return handler._cached_hub_user handler._cached_hub_user = user_model = None session_id = self.get_session_id(handler) # check token first token = self.get_token(handler) if token: user_model = self.user_for_token(token, session_id=session_id) if user_model: handler._token_authenticated = True # no token, check cookie if user_model is None: user_model = self._get_user_cookie(handler) # cache result handler._cached_hub_user = user_model if not user_model: app_log.debug("No user identified") return user_model
class ConfigurableHTTPProxy(Proxy): """Proxy implementation for the default configurable-http-proxy. This is the default proxy implementation for running the nodejs proxy `configurable-http-proxy`. If the proxy should not be run as a subprocess of the Hub, (e.g. in a separate container), set:: c.ConfigurableHTTPProxy.should_start = False """ proxy_process = Any() client = Instance(AsyncHTTPClient, ()) concurrency = Integer( 10, config=True, help=""" The number of requests allowed to be concurrently outstanding to the proxy Limiting this number avoids potential timeout errors by sending too many requests to update the proxy at once """, ) semaphore = Any() @default('semaphore') def _default_semaphore(self): return asyncio.BoundedSemaphore(self.concurrency) @observe('concurrency') def _concurrency_changed(self, change): self.semaphore = asyncio.BoundedSemaphore(change.new) debug = Bool(False, help="Add debug-level logging to the Proxy.", config=True) auth_token = Unicode(help="""The Proxy auth token Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default. """, ).tag(config=True) check_running_interval = Integer(5, config=True) @default('auth_token') def _auth_token_default(self): token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', '') if self.should_start and not token: # generating tokens is fine if the Hub is starting the proxy self.log.info("Generating new CONFIGPROXY_AUTH_TOKEN") token = utils.new_token() return token api_url = Unicode( 'http://127.0.0.1:8001', config=True, help="""The ip (or hostname) of the proxy's API endpoint""") command = Command('configurable-http-proxy', config=True, help="""The command to start the proxy""") pid_file = Unicode( "jupyterhub-proxy.pid", config=True, help="File in which to write the PID of the proxy process.", ) _check_running_callback = Any( help="PeriodicCallback to check if the proxy is running") def __init__(self, **kwargs): super().__init__(**kwargs) # check for required token if proxy is external if not self.auth_token and not self.should_start: raise ValueError( "%s.auth_token or CONFIGPROXY_AUTH_TOKEN env is required" " if Proxy.should_start is False" % self.__class__.__name__) def _check_previous_process(self): """Check if there's a process leftover and shut it down if so""" if not self.pid_file or not os.path.exists(self.pid_file): return pid_file = os.path.abspath(self.pid_file) self.log.warning("Found proxy pid file: %s", pid_file) try: with open(pid_file, "r") as f: pid = int(f.read().strip()) except ValueError: self.log.warning("%s did not appear to contain a pid", pid_file) self._remove_pid_file() return try: os.kill(pid, 0) except ProcessLookupError: self.log.warning("Proxy no longer running at pid=%s", pid) self._remove_pid_file() return # if we got here, CHP is still running self.log.warning("Proxy still running at pid=%s", pid) for i, sig in enumerate([signal.SIGTERM] * 2 + [signal.SIGKILL]): try: os.kill(pid, signal.SIGTERM) except ProcessLookupError: break time.sleep(1) try: os.kill(pid, 0) except ProcessLookupError: break try: os.kill(pid, 0) except ProcessLookupError: self.log.warning("Stopped proxy at pid=%s", pid) self._remove_pid_file() return else: raise RuntimeError("Failed to stop proxy at pid=%s", pid) def _write_pid_file(self): """write pid for proxy to a file""" self.log.debug("Writing proxy pid file: %s", self.pid_file) with open(self.pid_file, "w") as f: f.write(str(self.proxy_process.pid)) def _remove_pid_file(self): """Cleanup pid file for proxy after stopping""" if not self.pid_file: return self.log.debug("Removing proxy pid file %s", self.pid_file) try: os.remove(self.pid_file) except FileNotFoundError: self.log.debug("PID file %s already removed", self.pid_file) pass async def start(self): """Start the proxy process""" # check if there is a previous instance still around self._check_previous_process() # build the command to launch public_server = Server.from_url(self.public_url) api_server = Server.from_url(self.api_url) env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.auth_token cmd = self.command + [ '--ip', public_server.ip, '--port', str(public_server.port), '--api-ip', api_server.ip, '--api-port', str(api_server.port), '--error-target', url_path_join(self.hub.url, 'error'), ] if self.app.subdomain_host: cmd.append('--host-routing') if self.debug: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) if self.ssl_cert: cmd.extend(['--ssl-cert', self.ssl_cert]) if self.app.statsd_host: cmd.extend([ '--statsd-host', self.app.statsd_host, '--statsd-port', str(self.app.statsd_port), '--statsd-prefix', self.app.statsd_prefix + '.chp' ]) # Warn if SSL is not used if ' --ssl' not in ' '.join(cmd): self.log.warning( "Running JupyterHub without SSL." " I hope there is SSL termination happening somewhere else..." ) self.log.info("Starting proxy @ %s", public_server.bind_url) self.log.debug("Proxy cmd: %s", cmd) shell = os.name == 'nt' try: self.proxy_process = Popen(cmd, env=env, start_new_session=True, shell=shell) except FileNotFoundError as e: self.log.error( "Failed to find proxy %r\n" "The proxy can be installed with `npm install -g configurable-http-proxy`." "To install `npm`, install nodejs which includes `npm`." "If you see an `EACCES` error or permissions error, refer to the `npm` " "documentation on How To Prevent Permissions Errors." % self.command) raise self._write_pid_file() def _check_process(): status = self.proxy_process.poll() if status is not None: e = RuntimeError("Proxy failed to start with exit code %i" % status) raise e from None for server in (public_server, api_server): for i in range(10): _check_process() try: await server.wait_up(1) except TimeoutError: continue else: break await server.wait_up(1) _check_process() self.log.debug("Proxy started and appears to be up") pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) self._check_running_callback = pc pc.start() def _terminate(self): """Terminate our process""" if os.name == 'nt': # On Windows we spawned a shell on Popen, so we need to # terminate all child processes as well import psutil parent = psutil.Process(self.proxy_process.pid) children = parent.children(recursive=True) for child in children: child.kill() psutil.wait_procs(children, timeout=5) else: self.proxy_process.terminate() def stop(self): self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid) if self._check_running_callback is not None: self._check_running_callback.stop() if self.proxy_process.poll() is None: try: self._terminate() except Exception as e: self.log.error("Failed to terminate proxy process: %s", e) self._remove_pid_file() async def check_running(self): """Check if the proxy is still running""" if self.proxy_process.poll() is None: return self.log.error( "Proxy stopped with exit code %r", 'unknown' if self.proxy_process is None else self.proxy_process.poll()) self._remove_pid_file() await self.start() await self.restore_routes() def _routespec_to_chp_path(self, routespec): """Turn a routespec into a CHP API path For host-based routing, CHP uses the host as the first path segment. """ path = self.validate_routespec(routespec) # CHP always wants to start with / if not path.startswith('/'): path = '/' + path # BUG: CHP doesn't seem to like trailing slashes on some endpoints (DELETE) if path != '/' and path.endswith('/'): path = path.rstrip('/') return path def _routespec_from_chp_path(self, chp_path): """Turn a CHP route into a route spec In the JSON API, CHP route keys are unescaped, so re-escape them to raw URLs and ensure slashes are in the right places. """ # chp stores routes in unescaped form. # restore escaped-form we created it with. routespec = quote(chp_path, safe='@/~') if self.host_routing: # host routes don't start with / routespec = routespec.lstrip('/') # all routes should end with / if not routespec.endswith('/'): routespec = routespec + '/' return routespec async def api_request(self, path, method='GET', body=None, client=None): """Make an authenticated API request of the proxy.""" client = client or AsyncHTTPClient() url = url_path_join(self.api_url, 'api/routes', path) if isinstance(body, dict): body = json.dumps(body) self.log.debug("Proxy: Fetching %s %s", method, url) req = HTTPRequest( url, method=method, headers={'Authorization': 'token {}'.format(self.auth_token)}, body=body, ) async with self.semaphore: result = await client.fetch(req) return result async def add_route(self, routespec, target, data): body = data or {} body['target'] = target body['jupyterhub'] = True path = self._routespec_to_chp_path(routespec) await self.api_request( path, method='POST', body=body, ) async def delete_route(self, routespec): path = self._routespec_to_chp_path(routespec) try: await self.api_request(path, method='DELETE') except HTTPError as e: if e.code == 404: # Warn about 404s because something might be wrong # but don't raise because the route is gone, # which is the goal. self.log.warning("Route %s already deleted", routespec) else: raise def _reformat_routespec(self, routespec, chp_data): """Reformat CHP data format to JupyterHub's proxy API.""" target = chp_data.pop('target') chp_data.pop('jupyterhub') return { 'routespec': routespec, 'target': target, 'data': chp_data, } async def get_all_routes(self, client=None): """Fetch the proxy's routes.""" resp = await self.api_request('', client=client) chp_routes = json.loads(resp.body.decode('utf8', 'replace')) all_routes = {} for chp_path, chp_data in chp_routes.items(): routespec = self._routespec_from_chp_path(chp_path) if 'jupyterhub' not in chp_data: # exclude routes not associated with JupyterHub self.log.debug("Omitting non-jupyterhub route %r", routespec) continue all_routes[routespec] = self._reformat_routespec( routespec, chp_data) return all_routes
class LocalProcessSpawner(Spawner): """A Spawner that just uses Popen to start local processes as users. Requires users to exist on the local system. This is the default spawner for JupyterHub. """ INTERRUPT_TIMEOUT = Integer(10, help="Seconds to wait for process to halt after SIGINT before proceeding to SIGTERM" ).tag(config=True) TERM_TIMEOUT = Integer(5, help="Seconds to wait for process to halt after SIGTERM before proceeding to SIGKILL" ).tag(config=True) KILL_TIMEOUT = Integer(5, help="Seconds to wait for process to halt after SIGKILL before giving up" ).tag(config=True) proc = Instance(Popen, allow_none=True) pid = Integer(0) def make_preexec_fn(self, name): return set_user_setuid(name) def load_state(self, state): """load pid from state""" super(LocalProcessSpawner, self).load_state(state) if 'pid' in state: self.pid = state['pid'] def get_state(self): """add pid to state""" state = super(LocalProcessSpawner, self).get_state() if self.pid: state['pid'] = self.pid return state def clear_state(self): """clear pid state""" super(LocalProcessSpawner, self).clear_state() self.pid = 0 def user_env(self, env): env['USER'] = self.user.name home = pwd.getpwnam(self.user.name).pw_dir shell = pwd.getpwnam(self.user.name).pw_shell # These will be empty if undefined, # in which case don't set the env: if home: env['HOME'] = home if shell: env['SHELL'] = shell return env def get_env(self): """Add user environment variables""" env = super().get_env() env = self.user_env(env) return env @gen.coroutine def start(self): """Start the process""" self.port = random_port() cmd = [] env = self.get_env() cmd.extend(self.cmd) cmd.extend(self.get_args()) self.log.info("Spawning %s", ' '.join(pipes.quote(s) for s in cmd)) try: self.proc = Popen(cmd, env=env, preexec_fn=self.make_preexec_fn(self.user.name), start_new_session=True, # don't forward signals ) except PermissionError: # use which to get abspath script = shutil.which(cmd[0]) or cmd[0] self.log.error("Permission denied trying to run %r. Does %s have access to this file?", script, self.user.name, ) raise self.pid = self.proc.pid if self.__class__ is not LocalProcessSpawner: # subclasses may not pass through return value of super().start, # relying on deprecated 0.6 way of setting ip, port, # so keep a redundant copy here for now. # A deprecation warning will be shown if the subclass # does not return ip, port. if self.ip: self.user.server.ip = self.ip self.user.server.port = self.port return (self.ip or '127.0.0.1', self.port) @gen.coroutine def poll(self): """Poll the process""" # if we started the process, poll with Popen if self.proc is not None: status = self.proc.poll() if status is not None: # clear state if the process is done self.clear_state() return status # if we resumed from stored state, # we don't have the Popen handle anymore, so rely on self.pid if not self.pid: # no pid, not running self.clear_state() return 0 # send signal 0 to check if PID exists # this doesn't work on Windows, but that's okay because we don't support Windows. alive = yield self._signal(0) if not alive: self.clear_state() return 0 else: return None @gen.coroutine def _signal(self, sig): """simple implementation of signal, which we can use when we are using setuid (we are root)""" try: os.kill(self.pid, sig) except OSError as e: if e.errno == errno.ESRCH: return False # process is gone else: raise return True # process exists @gen.coroutine def stop(self, now=False): """stop the subprocess if `now`, skip waiting for clean shutdown """ if not now: status = yield self.poll() if status is not None: return self.log.debug("Interrupting %i", self.pid) yield self._signal(signal.SIGINT) yield self.wait_for_death(self.INTERRUPT_TIMEOUT) # clean shutdown failed, use TERM status = yield self.poll() if status is not None: return self.log.debug("Terminating %i", self.pid) yield self._signal(signal.SIGTERM) yield self.wait_for_death(self.TERM_TIMEOUT) # TERM failed, use KILL status = yield self.poll() if status is not None: return self.log.debug("Killing %i", self.pid) yield self._signal(signal.SIGKILL) yield self.wait_for_death(self.KILL_TIMEOUT) status = yield self.poll() if status is None: # it all failed, zombie process self.log.warning("Process %i never died", self.pid)
class Spawner(LoggingConfigurable): """Base class for spawning single-user notebook servers. Subclass this, and override the following methods: - load_state - get_state - start - stop - poll """ db = Any() user = Any() hub = Any() api_token = Unicode() ip = Unicode( 'localhost', config=True, help= "The IP address (or hostname) the single-user server should listen on") start_timeout = Integer( 60, config=True, help="""Timeout (in seconds) before giving up on the spawner. This is the timeout for start to return, not the timeout for the server to respond. Callers of spawner.start will assume that startup has failed if it takes longer than this. start should return when the server process is started and its location is known. """) http_timeout = Integer( 30, config=True, help="""Timeout (in seconds) before giving up on a spawned HTTP server Once a server has successfully been spawned, this is the amount of time we wait before assuming that the server is unable to accept connections. """) poll_interval = Integer( 30, config=True, help="""Interval (in seconds) on which to poll the spawner.""") _callbacks = List() _poll_callback = Any() debug = Bool(False, config=True, help="Enable debug-logging of the single-user server") env_keep = List( [ 'PATH', 'PYTHONPATH', 'CONDA_ROOT', 'CONDA_DEFAULT_ENV', 'VIRTUAL_ENV', 'LANG', 'LC_ALL', ], config=True, help="Whitelist of environment variables for the subprocess to inherit" ) env = Dict() def _env_default(self): env = {} for key in self.env_keep: if key in os.environ: env[key] = os.environ[key] env['JPY_API_TOKEN'] = self.api_token return env cmd = Command(['jupyterhub-singleuser'], config=True, help="""The command used for starting notebooks.""") args = List( Unicode, config=True, help="""Extra arguments to be passed to the single-user server""") notebook_dir = Unicode( '', config=True, help="""The notebook directory for the single-user server `~` will be expanded to the user's home directory """) def __init__(self, **kwargs): super(Spawner, self).__init__(**kwargs) if self.user.state: self.load_state(self.user.state) def load_state(self, state): """load state from the database This is the extensible part of state Override in a subclass if there is state to load. Should call `super`. See Also -------- get_state, clear_state """ pass def get_state(self): """store the state necessary for load_state A black box of extra state for custom spawners. Should call `super`. Returns ------- state: dict a JSONable dict of state """ state = {} return state def clear_state(self): """clear any state that should be cleared when the process stops State that should be preserved across server instances should not be cleared. Subclasses should call super, to ensure that state is properly cleared. """ self.api_token = '' def get_args(self): """Return the arguments to be passed after self.cmd""" args = [ '--user=%s' % self.user.name, '--port=%i' % self.user.server.port, '--cookie-name=%s' % self.user.server.cookie_name, '--base-url=%s' % self.user.server.base_url, '--hub-prefix=%s' % self.hub.server.base_url, '--hub-api-url=%s' % self.hub.api_url, ] if self.ip: args.append('--ip=%s' % self.ip) if self.notebook_dir: args.append('--notebook-dir=%s' % self.notebook_dir) if self.debug: args.append('--debug') args.extend(self.args) return args @gen.coroutine def start(self): """Start the single-user process""" raise NotImplementedError( "Override in subclass. Must be a Tornado gen.coroutine.") @gen.coroutine def stop(self, now=False): """Stop the single-user process""" raise NotImplementedError( "Override in subclass. Must be a Tornado gen.coroutine.") @gen.coroutine def poll(self): """Check if the single-user process is running return None if it is, an exit status (0 if unknown) if it is not. """ raise NotImplementedError( "Override in subclass. Must be a Tornado gen.coroutine.") def add_poll_callback(self, callback, *args, **kwargs): """add a callback to fire when the subprocess stops as noticed by periodic poll_and_notify() """ if args or kwargs: cb = callback callback = lambda: cb(*args, **kwargs) self._callbacks.append(callback) def stop_polling(self): """stop the periodic poll""" if self._poll_callback: self._poll_callback.stop() self._poll_callback = None def start_polling(self): """Start polling periodically callbacks registered via `add_poll_callback` will fire if/when the process stops. Explicit termination via the stop method will not trigger the callbacks. """ if self.poll_interval <= 0: self.log.debug("Not polling subprocess") return else: self.log.debug("Polling subprocess every %is", self.poll_interval) self.stop_polling() self._poll_callback = PeriodicCallback(self.poll_and_notify, 1e3 * self.poll_interval) self._poll_callback.start() @gen.coroutine def poll_and_notify(self): """Used as a callback to periodically poll the process, and notify any watchers """ status = yield self.poll() if status is None: # still running, nothing to do here return self.stop_polling() add_callback = IOLoop.current().add_callback for callback in self._callbacks: add_callback(callback) death_interval = Float(0.1) @gen.coroutine def wait_for_death(self, timeout=10): """wait for the process to die, up to timeout seconds""" loop = IOLoop.current() for i in range(int(timeout / self.death_interval)): status = yield self.poll() if status is not None: break else: yield gen.Task(loop.add_timeout, loop.time() + self.death_interval)
class ConnectionFileMixin(LoggingConfigurable): """Mixin for configurable classes that work with connection files""" data_dir = Unicode() def _data_dir_default(self): return jupyter_data_dir() # The addresses for the communication channels connection_file = Unicode( '', config=True, help= """JSON file in which to store connection info [default: kernel-<pid>.json] This file will contain the IP, ports, and authentication key needed to connect clients to this kernel. By default, this file will be created in the security dir of the current profile, but can be specified by absolute path. """) _connection_file_written = Bool(False) transport = CaselessStrEnum(['tcp', 'ipc'], default_value='tcp', config=True) kernel_name = Unicode() ip = Unicode(config=True, help="""Set the kernel\'s IP address [default localhost]. If the IP address is something other than localhost, then Consoles on other machines will be able to connect to the Kernel, so be careful!""") def _ip_default(self): if self.transport == 'ipc': if self.connection_file: return os.path.splitext(self.connection_file)[0] + '-ipc' else: return 'kernel-ipc' else: return localhost() def _ip_changed(self, name, old, new): if new == '*': self.ip = '0.0.0.0' # protected traits hb_port = Integer(0, config=True, help="set the heartbeat port [default: random]") shell_port = Integer(0, config=True, help="set the shell (ROUTER) port [default: random]") iopub_port = Integer(0, config=True, help="set the iopub (PUB) port [default: random]") stdin_port = Integer(0, config=True, help="set the stdin (ROUTER) port [default: random]") control_port = Integer( 0, config=True, help="set the control (ROUTER) port [default: random]") # names of the ports with random assignment _random_port_names = None @property def ports(self): return [getattr(self, name) for name in port_names] # The Session to use for communication with the kernel. session = Instance('jupyter_client.session.Session') def _session_default(self): from jupyter_client.session import Session return Session(parent=self) #-------------------------------------------------------------------------- # Connection and ipc file management #-------------------------------------------------------------------------- def get_connection_info(self, session=False): """Return the connection info as a dict Parameters ---------- session : bool [default: False] If True, return our session object will be included in the connection info. If False (default), the configuration parameters of our session object will be included, rather than the session object itself. Returns ------- connect_info : dict dictionary of connection information. """ info = dict( transport=self.transport, ip=self.ip, shell_port=self.shell_port, iopub_port=self.iopub_port, stdin_port=self.stdin_port, hb_port=self.hb_port, control_port=self.control_port, ) if session: # add *clone* of my session, # so that state such as digest_history is not shared. info['session'] = self.session.clone() else: # add session info info.update( dict( signature_scheme=self.session.signature_scheme, key=self.session.key, )) return info # factory for blocking clients blocking_class = Type(klass=object, default_value='jupyter_client.BlockingKernelClient') def blocking_client(self): """Make a blocking client connected to my kernel""" info = self.get_connection_info() info['parent'] = self bc = self.blocking_class(**info) bc.session.key = self.session.key return bc def cleanup_connection_file(self): """Cleanup connection file *if we wrote it* Will not raise if the connection file was already removed somehow. """ if self._connection_file_written: # cleanup connection files on full shutdown of kernel we started self._connection_file_written = False try: os.remove(self.connection_file) except (IOError, OSError, AttributeError): pass def cleanup_ipc_files(self): """Cleanup ipc files if we wrote them.""" if self.transport != 'ipc': return for port in self.ports: ipcfile = "%s-%i" % (self.ip, port) try: os.remove(ipcfile) except (IOError, OSError): pass def _record_random_port_names(self): """Records which of the ports are randomly assigned. Records on first invocation, if the transport is tcp. Does nothing on later invocations.""" if self.transport != 'tcp': return if self._random_port_names is not None: return self._random_port_names = [] for name in port_names: if getattr(self, name) <= 0: self._random_port_names.append(name) def cleanup_random_ports(self): """Forgets randomly assigned port numbers and cleans up the connection file. Does nothing if no port numbers have been randomly assigned. In particular, does nothing unless the transport is tcp. """ if not self._random_port_names: return for name in self._random_port_names: setattr(self, name, 0) self.cleanup_connection_file() def write_connection_file(self): """Write connection info to JSON dict in self.connection_file.""" if self._connection_file_written and os.path.exists( self.connection_file): return self.connection_file, cfg = write_connection_file( self.connection_file, transport=self.transport, ip=self.ip, key=self.session.key, stdin_port=self.stdin_port, iopub_port=self.iopub_port, shell_port=self.shell_port, hb_port=self.hb_port, control_port=self.control_port, signature_scheme=self.session.signature_scheme, kernel_name=self.kernel_name) # write_connection_file also sets default ports: self._record_random_port_names() for name in port_names: setattr(self, name, cfg[name]) self._connection_file_written = True def load_connection_file(self, connection_file=None): """Load connection info from JSON dict in self.connection_file. Parameters ---------- connection_file: unicode, optional Path to connection file to load. If unspecified, use self.connection_file """ if connection_file is None: connection_file = self.connection_file self.log.debug(u"Loading connection file %s", connection_file) with open(connection_file) as f: info = json.load(f) self.load_connection_info(info) def load_connection_info(self, info): """Load connection info from a dict containing connection info. Typically this data comes from a connection file and is called by load_connection_file. Parameters ---------- info: dict Dictionary containing connection_info. See the connection_file spec for details. """ self.transport = info.get('transport', self.transport) self.ip = info.get('ip', self._ip_default()) self._record_random_port_names() for name in port_names: if getattr(self, name) == 0 and name in info: # not overridden by config or cl_args setattr(self, name, info[name]) if 'key' in info: self.session.key = cast_bytes(info['key']) if 'signature_scheme' in info: self.session.signature_scheme = info['signature_scheme'] #-------------------------------------------------------------------------- # Creating connected sockets #-------------------------------------------------------------------------- def _make_url(self, channel): """Make a ZeroMQ URL for a given channel.""" transport = self.transport ip = self.ip port = getattr(self, '%s_port' % channel) if transport == 'tcp': return "tcp://%s:%i" % (ip, port) else: return "%s://%s-%s" % (transport, ip, port) def _create_connected_socket(self, channel, identity=None): """Create a zmq Socket and connect it to the kernel.""" url = self._make_url(channel) socket_type = channel_socket_types[channel] self.log.debug("Connecting to: %s" % url) sock = self.context.socket(socket_type) # set linger to 1s to prevent hangs at exit sock.linger = 1000 if identity: sock.identity = identity sock.connect(url) return sock def connect_iopub(self, identity=None): """return zmq Socket connected to the IOPub channel""" sock = self._create_connected_socket('iopub', identity=identity) sock.setsockopt(zmq.SUBSCRIBE, b'') return sock def connect_shell(self, identity=None): """return zmq Socket connected to the Shell channel""" return self._create_connected_socket('shell', identity=identity) def connect_stdin(self, identity=None): """return zmq Socket connected to the StdIn channel""" return self._create_connected_socket('stdin', identity=identity) def connect_hb(self, identity=None): """return zmq Socket connected to the Heartbeat channel""" return self._create_connected_socket('hb', identity=identity) def connect_control(self, identity=None): """return zmq Socket connected to the Control channel""" return self._create_connected_socket('control', identity=identity)
class TerminalInteractiveShell(InteractiveShell): colors_force = True space_for_menu = Integer( 6, help='Number of line at the bottom of the screen ' 'to reserve for the completion menu').tag(config=True) def _space_for_menu_changed(self, old, new): self._update_layout() pt_cli = None debugger_history = None simple_prompt = Bool( _use_simple_prompt, help= """Use `raw_input` for the REPL, without completion, multiline input, and prompt colors. Useful when controlling IPython as a subprocess, and piping STDIN/OUT/ERR. Known usage are: IPython own testing machinery, and emacs inferior-shell integration through elpy. This mode default to `True` if the `IPY_TEST_SIMPLE_PROMPT` environment variable is set, or the current terminal is not a tty. """).tag(config=True) @property def debugger_cls(self): return Pdb if self.simple_prompt else TerminalPdb autoedit_syntax = Bool( False, help="auto editing of files with syntax errors.", ).tag(config=True) confirm_exit = Bool( True, help=""" Set to confirm when you try to exit IPython with an EOF (Control-D in Unix, Control-Z/Enter in Windows). By typing 'exit' or 'quit', you can force a direct exit without any confirmation.""", ).tag(config=True) editing_mode = Unicode( 'emacs', help="Shortcut style to use at the prompt. 'vi' or 'emacs'.", ).tag(config=True) mouse_support = Bool( False, help="Enable mouse support in the prompt").tag(config=True) highlighting_style = Unicode( 'default', help= "The name of a Pygments style to use for syntax highlighting: \n %s" % ', '.join(get_all_styles())).tag(config=True) @observe('highlighting_style') def _highlighting_style_changed(self, change): self._style = self._make_style_from_name(self.highlighting_style) highlighting_style_overrides = Dict( help="Override highlighting format for specific tokens").tag( config=True) editor = Unicode( get_default_editor(), help="Set the editor used by IPython (default to $EDITOR/vi/notepad)." ).tag(config=True) prompts_class = Type( Prompts, help='Class used to generate Prompt token for prompt_toolkit').tag( config=True) prompts = Instance(Prompts) @default('prompts') def _prompts_default(self): return self.prompts_class(self) @observe('prompts') def _(self, change): self._update_layout() @default('displayhook_class') def _displayhook_class_default(self): return RichPromptDisplayHook term_title = Bool( True, help="Automatically set the terminal title").tag(config=True) display_completions_in_columns = Bool( False, help="Display a multi column completion menu.", ).tag(config=True) highlight_matching_brackets = Bool( True, help="Highlight matching brackets .", ).tag(config=True) @observe('term_title') def init_term_title(self, change=None): # Enable or disable the terminal title. if self.term_title: toggle_set_term_title(True) set_term_title('IPython: ' + abbrev_cwd()) else: toggle_set_term_title(False) def init_prompt_toolkit_cli(self): self._app = None if self.simple_prompt: # Fall back to plain non-interactive output for tests. # This is very limited, and only accepts a single line. def prompt(): return cast_unicode_py2( input('In [%d]: ' % self.execution_count)) self.prompt_for_code = prompt return kbmanager = KeyBindingManager.for_prompt() insert_mode = ViInsertMode() | EmacsInsertMode() # Ctrl+J == Enter, seemingly @kbmanager.registry.add_binding(Keys.ControlJ, filter=(HasFocus(DEFAULT_BUFFER) & ~HasSelection() & insert_mode)) def _(event): b = event.current_buffer d = b.document if b.complete_state: cs = b.complete_state b.apply_completion(cs.current_completion) return if not (d.on_last_line or d.cursor_position_row >= d.line_count - d.empty_line_count_at_the_end()): b.newline() return status, indent = self.input_splitter.check_complete(d.text) if (status != 'incomplete') and b.accept_action.is_returnable: b.accept_action.validate_and_handle(event.cli, b) else: b.insert_text('\n' + (' ' * (indent or 0))) @kbmanager.registry.add_binding(Keys.ControlC, filter=HasFocus(DEFAULT_BUFFER)) def _reset_buffer(event): event.current_buffer.reset() @kbmanager.registry.add_binding(Keys.ControlC, filter=HasFocus(SEARCH_BUFFER)) def _reset_search_buffer(event): if event.current_buffer.document.text: event.current_buffer.reset() else: event.cli.push_focus(DEFAULT_BUFFER) supports_suspend = Condition(lambda cli: hasattr(signal, 'SIGTSTP')) @kbmanager.registry.add_binding(Keys.ControlZ, filter=supports_suspend) def _suspend_to_bg(event): event.cli.suspend_to_background() @Condition def cursor_in_leading_ws(cli): before = cli.application.buffer.document.current_line_before_cursor return (not before) or before.isspace() # Ctrl+I == Tab @kbmanager.registry.add_binding(Keys.ControlI, filter=(HasFocus(DEFAULT_BUFFER) & ~HasSelection() & insert_mode & cursor_in_leading_ws)) def _indent_buffer(event): event.current_buffer.insert_text(' ' * 4) # Pre-populate history from IPython's history database history = InMemoryHistory() last_cell = u"" for __, ___, cell in self.history_manager.get_tail( self.history_load_length, include_latest=True): # Ignore blank lines and consecutive duplicates cell = cell.rstrip() if cell and (cell != last_cell): history.append(cell) self._style = self._make_style_from_name(self.highlighting_style) style = DynamicStyle(lambda: self._style) editing_mode = getattr(EditingMode, self.editing_mode.upper()) self._app = create_prompt_application( editing_mode=editing_mode, key_bindings_registry=kbmanager.registry, history=history, completer=IPythonPTCompleter(self.Completer), enable_history_search=True, style=style, mouse_support=self.mouse_support, **self._layout_options()) self._eventloop = create_eventloop(self.inputhook) self.pt_cli = CommandLineInterface(self._app, eventloop=self._eventloop) def _make_style_from_name(self, name): """ Small wrapper that make an IPython compatible style from a style name We need that to add style for prompt ... etc. """ style_cls = get_style_by_name(name) style_overrides = { Token.Prompt: '#009900', Token.PromptNum: '#00ff00 bold', Token.OutPrompt: '#990000', Token.OutPromptNum: '#ff0000 bold', } if name == 'default': style_cls = get_style_by_name('default') # The default theme needs to be visible on both a dark background # and a light background, because we can't tell what the terminal # looks like. These tweaks to the default theme help with that. style_overrides.update({ Token.Number: '#007700', Token.Operator: 'noinherit', Token.String: '#BB6622', Token.Name.Function: '#2080D0', Token.Name.Class: 'bold #2080D0', Token.Name.Namespace: 'bold #2080D0', }) style_overrides.update(self.highlighting_style_overrides) style = PygmentsStyle.from_defaults(pygments_style_cls=style_cls, style_dict=style_overrides) return style def _layout_options(self): """ Return the current layout option for the current Terminal InteractiveShell """ return { 'lexer': IPythonPTLexer(), 'reserve_space_for_menu': self.space_for_menu, 'get_prompt_tokens': self.prompts.in_prompt_tokens, 'get_continuation_tokens': self.prompts.continuation_prompt_tokens, 'multiline': True, 'display_completions_in_columns': self.display_completions_in_columns, # Highlight matching brackets, but only when this setting is # enabled, and only when the DEFAULT_BUFFER has the focus. 'extra_input_processors': [ ConditionalProcessor( processor=HighlightMatchingBracketProcessor( chars='[](){}'), filter=HasFocus(DEFAULT_BUFFER) & ~IsDone() & Condition(lambda cli: self.highlight_matching_brackets)) ], } def _update_layout(self): """ Ask for a re computation of the application layout, if for example , some configuration options have changed. """ if getattr(self, '._app', None): self._app.layout = create_prompt_layout(**self._layout_options()) def prompt_for_code(self): document = self.pt_cli.run(pre_run=self.pre_prompt, reset_current_buffer=True) return document.text def init_io(self): if sys.platform not in {'win32', 'cli'}: return import colorama colorama.init() # For some reason we make these wrappers around stdout/stderr. # For now, we need to reset them so all output gets coloured. # https://github.com/ipython/ipython/issues/8669 from IPython.utils import io io.stdout = io.IOStream(sys.stdout) io.stderr = io.IOStream(sys.stderr) def init_magics(self): super(TerminalInteractiveShell, self).init_magics() self.register_magics(TerminalMagics) def init_alias(self): # The parent class defines aliases that can be safely used with any # frontend. super(TerminalInteractiveShell, self).init_alias() # Now define aliases that only make sense on the terminal, because they # need direct access to the console in a way that we can't emulate in # GUI or web frontend if os.name == 'posix': for cmd in ['clear', 'more', 'less', 'man']: self.alias_manager.soft_define_alias(cmd, cmd) def __init__(self, *args, **kwargs): super(TerminalInteractiveShell, self).__init__(*args, **kwargs) self.init_prompt_toolkit_cli() self.init_term_title() self.keep_running = True self.debugger_history = InMemoryHistory() def ask_exit(self): self.keep_running = False rl_next_input = None def pre_prompt(self): if self.rl_next_input: self.pt_cli.application.buffer.text = cast_unicode_py2( self.rl_next_input) self.rl_next_input = None def interact(self): while self.keep_running: print(self.separate_in, end='') try: code = self.prompt_for_code() except EOFError: if (not self.confirm_exit) \ or self.ask_yes_no('Do you really want to exit ([y]/n)?','y','n'): self.ask_exit() else: if code: self.run_cell(code, store_history=True) if self.autoedit_syntax and self.SyntaxTB.last_syntax_error: self.edit_syntax_error() def mainloop(self): # An extra layer of protection in case someone mashing Ctrl-C breaks # out of our internal code. while True: try: self.interact() break except KeyboardInterrupt: print("\nKeyboardInterrupt escaped interact()\n") if hasattr(self, '_eventloop'): self._eventloop.close() _inputhook = None def inputhook(self, context): if self._inputhook is not None: self._inputhook(context) def enable_gui(self, gui=None): if gui: self._inputhook = get_inputhook_func(gui) else: self._inputhook = None # Methods to support auto-editing of SyntaxErrors: def edit_syntax_error(self): """The bottom half of the syntax error handler called in the main loop. Loop until syntax error is fixed or user cancels. """ while self.SyntaxTB.last_syntax_error: # copy and clear last_syntax_error err = self.SyntaxTB.clear_err_state() if not self._should_recompile(err): return try: # may set last_syntax_error again if a SyntaxError is raised self.safe_execfile(err.filename, self.user_ns) except: self.showtraceback() else: try: with open(err.filename) as f: # This should be inside a display_trap block and I # think it is. sys.displayhook(f.read()) except: self.showtraceback() def _should_recompile(self, e): """Utility routine for edit_syntax_error""" if e.filename in ('<ipython console>', '<input>', '<string>', '<console>', '<BackgroundJob compilation>', None): return False try: if (self.autoedit_syntax and not self.ask_yes_no( 'Return to editor to correct syntax error? ' '[Y/n] ', 'y')): return False except EOFError: return False def int0(x): try: return int(x) except TypeError: return 0 # always pass integer line and offset values to editor hook try: self.hooks.fix_error_editor(e.filename, int0(e.lineno), int0(e.offset), e.msg) except TryNext: warn('Could not open editor') return False return True # Run !system commands directly, not through pipes, so terminal programs # work correctly. system = InteractiveShell.system_raw def auto_rewrite_input(self, cmd): """Overridden from the parent class to use fancy rewriting prompt""" if not self.show_rewritten_input: return tokens = self.prompts.rewrite_prompt_tokens() if self.pt_cli: self.pt_cli.print_tokens(tokens) print(cmd) else: prompt = ''.join(s for t, s in tokens) print(prompt, cmd, sep='') _prompts_before = None def switch_doctest_mode(self, mode): """Switch prompts to classic for %doctest_mode""" if mode: self._prompts_before = self.prompts self.prompts = ClassicPrompts(self) elif self._prompts_before: self.prompts = self._prompts_before self._prompts_before = None
class OpaEntryKernelManager(MappingKernelManager): caller_kid = Integer(config=True, default_value=-1) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _attach_to_latest_kernel(self, kernel_id): self.log.info(f'Attaching {kernel_id} to an existing kernel...') kernel = self._kernels[kernel_id] port_names = [ 'shell_port', 'stdin_port', 'iopub_port', 'hb_port', 'control_port' ] port_names = kernel._random_port_names if hasattr( kernel, '_random_port_names') else port_names for port_name in port_names: setattr(kernel, port_name, 0) kernel.load_connection_file(connection_fname) @gen.coroutine def start_kernel(self, kernel_name=None, **kwargs): print('STARTTING ', kernel_name) if kernel_name.startswith('kernel_'): kernel_id = kwargs.pop('kernel_id', unicode_type(uuid.uuid4())) constructor_kwargs = {} if self.kernel_spec_manager: constructor_kwargs[ 'kernel_spec_manager'] = self.kernel_spec_manager km = OpaKernelManager( parent=self, log=self.log, kernel_name=kernel_name, **constructor_kwargs, ) km.kernel_spec_manager = OpaKernelSpecManager(parent=self.parent) km.start_kernel(**kwargs) self._kernels[kernel_id] = km self.start_watching_activity(kernel_id) self._kernel_connections[kernel_id] = 0 raise gen.Return(kernel_id) else: return super().start_kernel(kernel_name=kernel_name, **kwargs) def restart_kernel(self, kernel_id=None): assert 0 #loop.remove_timeout(timeout) #kernel.remove_restart_callback(on_restart_failed, 'dead') # def finish(): # """Common cleanup when restart finishes/fails for any reason.""" # if not channel.closed(): # channel.close() # loop.remove_timeout(timeout) # kernel.remove_restart_callback(on_restart_failed, 'dead') # self._attach_to_latest_kernel(kernel_id) # # def on_reply(msg): # self.log.debug("Kernel info reply received: %s", kernel_id) # finish() # if not future.done(): # future.set_result(msg) # # def on_timeout(): # self.log.warning("Timeout waiting for kernel_info_reply: %s", kernel_id) # finish() # if not future.done(): # future.set_exception(gen.TimeoutError("Timeout waiting for restart")) # # def on_restart_failed(): # self.log.warning("Restarting kernel failed: %s", kernel_id) # finish() # if not future.done(): # future.set_exception(RuntimeError("Restart failed")) # # kernel.add_restart_callback(on_restart_failed, 'dead') # kernel.session.send(channel, "kernel_info_request") # channel.on_recv(on_reply) # loop = IOLoop.current() # timeout = loop.add_timeout(loop.time() + 30, on_timeout) # return future def list_kernels(self): res = super().list_kernels() print('KERNEL LIST >> ', res) return res
class Voila(Application): name = 'voila' version = __version__ examples = 'voila example.ipynb --port 8888' description = Unicode("""voila [OPTIONS] NOTEBOOK_FILENAME This launches a stand-alone server for read-only notebooks. """) option_description = Unicode(""" notebook_path: File name of the Jupyter notebook to display. """) notebook_filename = Unicode() strip_sources = Bool( True, help='Strip sources from rendered html').tag(config=True) port = Integer(8866, config=True, help='Port of the voila server. Default 8866.') autoreload = Bool( False, config=True, help= 'Will autoreload to server and the page when a template, js file or Python code changes' ) root_dir = Unicode(config=True, help="The directory to use for notebooks.") static_root = Unicode( STATIC_ROOT, config=True, help='Directory holding static assets (HTML, JS and CSS files).') aliases = { 'port': 'Voila.port', 'static': 'Voila.static_root', 'strip_sources': 'Voila.strip_sources', 'autoreload': 'Voila.autoreload', 'template': 'Voila.template' } connection_dir_root = Unicode( config=True, help=('Location of temporry connection files. Defaults ' 'to system `tempfile.gettempdir()` value.')) connection_dir = Unicode() template = Unicode('default', config=True, allow_none=True, help=('template name to be used by voila.')) notebook_path = Unicode(None, config=True, allow_none=True, help=('path to notebook to serve with voila')) nbconvert_template_paths = List([], config=True, help=('path to nbconvert templates')) template_paths = List([], allow_none=True, config=True, help=('path to nbconvert templates')) static_paths = List([STATIC_ROOT], config=True, help=('paths to static assets')) config_file_paths = List(Unicode(), config=True, help='Paths to search for voila.(py|json)') tornado_settings = Dict( {}, config=True, help= ('Extra settings to apply to tornado application, e.g. headers, ssl, etc' )) @default('config_file_paths') def _config_file_paths_default(self): return [os.getcwd()] + jupyter_config_path() @default('connection_dir_root') def _default_connection_dir(self): connection_dir = tempfile.gettempdir() self.log.info('Using %s to store connection files' % connection_dir) return connection_dir @default('log_level') def _default_log_level(self): return logging.INFO # similar to NotebookApp, except no extra path @property def nbextensions_path(self): """The path to look for Javascript notebook extensions""" path = jupyter_path('nbextensions') # FIXME: remove IPython nbextensions path after a migration period try: from IPython.paths import get_ipython_dir except ImportError: pass else: path.append(os.path.join(get_ipython_dir(), 'nbextensions')) return path @default('root_dir') def _default_root_dir(self): if self.notebook_path: return os.path.dirname(os.path.abspath(self.notebook_path)) else: return getcwd() def initialize(self, argv=None): self.log.debug("Searching path %s for config files", self.config_file_paths) # to make config_file_paths settable via cmd line, we first need to parse it super(Voila, self).initialize(argv) self.notebook_path = self.notebook_path if self.notebook_path else self.extra_args[ 0] if len(self.extra_args) == 1 else None # then we load the config self.load_config_file('voila', path=self.config_file_paths) # but that cli config has preference, so we overwrite with that self.update_config(self.cli_config) self.setup_template_dirs() signal.signal(signal.SIGTERM, self._handle_signal_stop) def setup_template_dirs(self): if self.template: collect_template_paths(self.nbconvert_template_paths, self.static_paths, self.template_paths, self.template) self.log.debug('using template: %s', self.template) self.log.debug('nbconvert template paths: %s', self.nbconvert_template_paths) self.log.debug('template paths: %s', self.template_paths) self.log.debug('static paths: %s', self.static_paths) if self.notebook_path and not os.path.exists(self.notebook_path): raise ValueError('Notebook not found: %s' % self.notebook_path) def _handle_signal_stop(self, sig, frame): self.log.info('Handle signal %s.' % sig) self.ioloop.add_callback_from_signal(self.ioloop.stop) def start(self): self.connection_dir = tempfile.mkdtemp(prefix='voila_', dir=self.connection_dir_root) self.log.info('Storing connection files in %s.' % self.connection_dir) self.log.info('Serving static files from %s.' % self.static_root) self.kernel_manager = MappingKernelManager( parent=self, connection_dir=self.connection_dir, allowed_message_types=[ 'comm_msg', 'comm_info_request', 'kernel_info_request', 'shutdown_request' ]) jenv_opt = { "autoescape": True } # we might want extra options via cmd line like notebook server env = jinja2.Environment(loader=jinja2.FileSystemLoader( self.template_paths), extensions=['jinja2.ext.i18n'], **jenv_opt) nbui = gettext.translation('nbui', localedir=os.path.join(ROOT, 'i18n'), fallback=True) env.install_gettext_translations(nbui, newstyle=False) self.contents_manager = LargeFileManager(parent=self) # we create a config manager that load both the serverconfig and nbconfig (classical notebook) read_config_path = [ os.path.join(p, 'serverconfig') for p in jupyter_config_path() ] read_config_path += [ os.path.join(p, 'nbconfig') for p in jupyter_config_path() ] self.config_manager = ConfigManager(parent=self, read_config_path=read_config_path) self.app = tornado.web.Application( kernel_manager=self.kernel_manager, allow_remote_access=True, autoreload=self.autoreload, voila_jinja2_env=env, jinja2_env=env, static_path='/', server_root_dir='/', contents_manager=self.contents_manager, config_manager=self.config_manager) base_url = self.app.settings.get('base_url', '/') self.app.settings.update(self.tornado_settings) handlers = [] handlers.extend([ (url_path_join(base_url, r'/api/kernels/%s' % _kernel_id_regex), KernelHandler), (url_path_join(base_url, r'/api/kernels/%s/channels' % _kernel_id_regex), ZMQChannelsHandler), (url_path_join(base_url, r'/voila/static/(.*)'), MultiStaticFileHandler, { 'paths': self.static_paths, 'default_filename': 'index.html' }) ]) # this handler serves the nbextensions similar to the classical notebook handlers.append(( url_path_join(base_url, r'/voila/nbextensions/(.*)'), FileFindHandler, { 'path': self.nbextensions_path, 'no_cache_paths': ['/'], # don't cache anything in nbextensions }, )) if self.notebook_path: handlers.append((url_path_join(base_url, r'/'), VoilaHandler, { 'notebook_path': os.path.relpath(self.notebook_path, self.root_dir), 'strip_sources': self.strip_sources, 'nbconvert_template_paths': self.nbconvert_template_paths, 'template_name': self.template, 'config': self.config })) else: self.log.debug('serving directory: %r', self.root_dir) handlers.extend([ (base_url, VoilaTreeHandler), (url_path_join(base_url, r'/voila/tree' + path_regex), VoilaTreeHandler), (url_path_join(base_url, r'/voila/render' + path_regex), VoilaHandler, { 'strip_sources': self.strip_sources, 'nbconvert_template_paths': self.nbconvert_template_paths, 'config': self.config }), ]) self.app.add_handlers('.*$', handlers) self.listen() def listen(self): self.app.listen(self.port) self.log.info('Voila listening on port %s.' % self.port) self.ioloop = tornado.ioloop.IOLoop.current() try: self.ioloop.start() except KeyboardInterrupt: self.log.info('Stopping...') finally: shutil.rmtree(self.connection_dir) self.kernel_manager.shutdown_all()
class SentinelTable(DOMWidget): _view_name = Unicode('SentinelTable').tag(sync=True) _view_module = Unicode('sentinel_table').tag(sync=True) _view_module_version = Unicode('0.1.0').tag(sync=True) # Attributes entry_list = List().tag(sync=True) entry_descriptions = List().tag(sync=True) entry_descriptions_metadata = List().tag(sync=True) class_list = List().tag(sync=True) table_header = Unicode("").tag(sync=True) selected_value_index = Integer( -1, help="The index of the selected value.").tag(sync=True) disabled = Bool(False, help="Enable or disable user changes.").tag(sync=True) reset_flag = Bool(False).tag(sync=True) expandable = Bool(False).tag(sync=True) def __init__(self, list_reference: List, class_list: List, table_header: str = "No title", expandable: bool = False, *args, **kwargs): super().__init__(*args, **kwargs) self._observers = [] self.entry_list = list_reference self.class_list = class_list self.table_header = table_header self.expandable = expandable self.js_handler = TableJavascriptHandler() self.js_handler.activate_javascript() def add_observer(self, observ): self._observers.append(observ) @observe('selected_value_index') def on_selection_change(self, change): for observ in self._observers: observ.update_observers(self.selected_value_index) def set_entries(self, list_data, entry_descriptions=None): self.entry_list = list_data if not entry_descriptions: self.entry_descriptions = [""] * len(self.entry_list) else: self.entry_descriptions = entry_descriptions @validate('selected') def _valid_value(self, proposal): if proposal['value'] not in self.entry_list: raise TraitError('Selected entry not found in dictionary') return proposal['value'] def get_selected_value(self): if self.selected_value_index >= 0: return self.entry_list[self.selected_value_index] else: return None def clear_selected(self): self.reset_flag = not self.reset_flag def append_class(self, class_name): new_list = list(self.class_list) new_list.append(class_name) self.class_list = new_list def erase_class(self, class_name): new_list = list(self.class_list) new_list.remove(class_name) self.class_list = new_list def is_full(self): if "empty" in self.class_list: self.erase_class("empty") def is_empty(self): if "empty" not in self.class_list: self.append_class("empty")
class MappingKernelManager(MultiKernelManager): """A KernelManager that handles notebook mapping and HTTP error handling""" @default('kernel_manager_class') def _default_kernel_manager_class(self): return "jupyter_client.ioloop.IOLoopKernelManager" kernel_argv = List(Unicode()) root_dir = Unicode(config=True) _kernel_connections = Dict() _culler_callback = None _initialized_culler = False @default('root_dir') def _default_root_dir(self): try: return self.parent.notebook_dir except AttributeError: return getcwd() @validate('root_dir') def _update_root_dir(self, proposal): """Do a bit of validation of the root dir.""" value = proposal['value'] if not os.path.isabs(value): # If we receive a non-absolute path, make it absolute. value = os.path.abspath(value) if not exists(value) or not os.path.isdir(value): raise TraitError("kernel root dir %r is not a directory" % value) return value cull_idle_timeout_minimum = 300 # 5 minutes cull_idle_timeout = Integer( 0, config=True, help= """Timeout (in seconds) after which a kernel is considered idle and ready to be culled. Values of 0 or lower disable culling. The minimum timeout is 300 seconds (5 minutes). Positive values less than the minimum value will be set to the minimum.""") cull_interval_default = 300 # 5 minutes cull_interval = Integer( cull_interval_default, config=True, help= """The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value.""" ) cull_connected = Bool( False, config=True, help= """Whether to consider culling kernels which have one or more connections. Only effective if cull_idle_timeout is not 0.""") cull_busy = Bool( False, config=True, help="""Whether to consider culling kernels which are busy. Only effective if cull_idle_timeout is not 0.""") #------------------------------------------------------------------------- # Methods for managing kernels and sessions #------------------------------------------------------------------------- def _handle_kernel_died(self, kernel_id): """notice that a kernel died""" self.log.warning("Kernel %s died, removing from map.", kernel_id) self.remove_kernel(kernel_id) def cwd_for_path(self, path): """Turn API path into absolute OS path.""" os_path = to_os_path(path, self.root_dir) # in the case of notebooks and kernels not being on the same filesystem, # walk up to root_dir if the paths don't exist while not os.path.isdir(os_path) and os_path != self.root_dir: os_path = os.path.dirname(os_path) return os_path @gen.coroutine def start_kernel(self, kernel_id=None, path=None, **kwargs): """Start a kernel for a session and return its kernel_id. Parameters ---------- kernel_id : uuid The uuid to associate the new kernel with. If this is not None, this kernel will be persistent whenever it is requested. path : API path The API path (unicode, '/' delimited) for the cwd. Will be transformed to an OS path relative to root_dir. kernel_name : str The name identifying which kernel spec to launch. This is ignored if an existing kernel is returned, but it may be checked in the future. """ if kernel_id is None: if path is not None: kwargs['cwd'] = self.cwd_for_path(path) kernel_id = yield gen.maybe_future( super(MappingKernelManager, self).start_kernel(**kwargs)) self._kernel_connections[kernel_id] = 0 self.start_watching_activity(kernel_id) self.log.info("Kernel started: %s" % kernel_id) self.log.debug("Kernel args: %r" % kwargs) # register callback for failed auto-restart self.add_restart_callback( kernel_id, lambda: self._handle_kernel_died(kernel_id), 'dead', ) else: self._check_kernel_id(kernel_id) self.log.info("Using existing kernel: %s" % kernel_id) # Initialize culling if not already if not self._initialized_culler: self.initialize_culler() # py2-compat raise gen.Return(kernel_id) def shutdown_kernel(self, kernel_id, now=False): """Shutdown a kernel by kernel_id""" self._check_kernel_id(kernel_id) self._kernels[kernel_id]._activity_stream.close() self._kernel_connections.pop(kernel_id, None) return super(MappingKernelManager, self).shutdown_kernel(kernel_id, now=now) def restart_kernel(self, kernel_id): """Restart a kernel by kernel_id""" self._check_kernel_id(kernel_id) super(MappingKernelManager, self).restart_kernel(kernel_id) kernel = self.get_kernel(kernel_id) # return a Future that will resolve when the kernel has successfully restarted channel = kernel.connect_shell() future = Future() def finish(): """Common cleanup when restart finishes/fails for any reason.""" if not channel.closed(): channel.close() loop.remove_timeout(timeout) kernel.remove_restart_callback(on_restart_failed, 'dead') def on_reply(msg): self.log.debug("Kernel info reply received: %s", kernel_id) finish() if not future.done(): future.set_result(msg) def on_timeout(): self.log.warning("Timeout waiting for kernel_info_reply: %s", kernel_id) finish() if not future.done(): future.set_exception( gen.TimeoutError("Timeout waiting for restart")) def on_restart_failed(): self.log.warning("Restarting kernel failed: %s", kernel_id) finish() if not future.done(): future.set_exception(RuntimeError("Restart failed")) kernel.add_restart_callback(on_restart_failed, 'dead') kernel.session.send(channel, "kernel_info_request") channel.on_recv(on_reply) loop = IOLoop.current() timeout = loop.add_timeout(loop.time() + 30, on_timeout) return future def notify_connect(self, kernel_id): """Notice a new connection to a kernel""" if kernel_id in self._kernel_connections: self._kernel_connections[kernel_id] += 1 def notify_disconnect(self, kernel_id): """Notice a disconnection from a kernel""" if kernel_id in self._kernel_connections: self._kernel_connections[kernel_id] -= 1 def kernel_model(self, kernel_id): """Return a JSON-safe dict representing a kernel For use in representing kernels in the JSON APIs. """ self._check_kernel_id(kernel_id) kernel = self._kernels[kernel_id] model = { "id": kernel_id, "name": kernel.kernel_name, "last_activity": isoformat(kernel.last_activity), "execution_state": kernel.execution_state, "connections": self._kernel_connections[kernel_id], } return model def list_kernels(self): """Returns a list of kernel_id's of kernels running.""" kernels = [] kernel_ids = super(MappingKernelManager, self).list_kernel_ids() for kernel_id in kernel_ids: model = self.kernel_model(kernel_id) kernels.append(model) return kernels # override _check_kernel_id to raise 404 instead of KeyError def _check_kernel_id(self, kernel_id): """Check a that a kernel_id exists and raise 404 if not.""" if kernel_id not in self: raise web.HTTPError(404, u'Kernel does not exist: %s' % kernel_id) # monitoring activity: def start_watching_activity(self, kernel_id): """Start watching IOPub messages on a kernel for activity. - update last_activity on every message - record execution_state from status messages """ kernel = self._kernels[kernel_id] # add busy/activity markers: kernel.execution_state = 'starting' kernel.last_activity = utcnow() kernel._activity_stream = kernel.connect_iopub() session = Session( config=kernel.session.config, key=kernel.session.key, ) def record_activity(msg_list): """Record an IOPub message arriving from a kernel""" kernel.last_activity = utcnow() idents, fed_msg_list = session.feed_identities(msg_list) msg = session.deserialize(fed_msg_list) msg_type = msg['header']['msg_type'] self.log.debug("activity on %s: %s", kernel_id, msg_type) if msg_type == 'status': kernel.execution_state = msg['content']['execution_state'] kernel._activity_stream.on_recv(record_activity) def initialize_culler(self): """Start idle culler if 'cull_idle_timeout' is greater than zero. Regardless of that value, set flag that we've been here. """ if not self._initialized_culler and self.cull_idle_timeout > 0: if self._culler_callback is None: if self.cull_idle_timeout < self.cull_idle_timeout_minimum: self.log.warning( "'cull_idle_timeout' (%s) is less than the minimum value (%s) and has been set to the minimum.", self.cull_idle_timeout, self.cull_idle_timeout_minimum) self.cull_idle_timeout = self.cull_idle_timeout_minimum loop = IOLoop.current() if self.cull_interval <= 0: #handle case where user set invalid value self.log.warning( "Invalid value for 'cull_interval' detected (%s) - using default value (%s).", self.cull_interval, self.cull_interval_default) self.cull_interval = self.cull_interval_default self._culler_callback = PeriodicCallback( self.cull_kernels, 1000 * self.cull_interval, loop) self.log.info( "Culling kernels with idle durations > %s seconds at %s second intervals ...", self.cull_idle_timeout, self.cull_interval) if self.cull_busy: self.log.info("Culling kernels even if busy") if self.cull_connected: self.log.info( "Culling kernels even with connected clients") self._culler_callback.start() self._initialized_culler = True def cull_kernels(self): self.log.debug( "Polling every %s seconds for kernels idle > %s seconds...", self.cull_interval, self.cull_idle_timeout) """Create a separate list of kernels to avoid conflicting updates while iterating""" for kernel_id in list(self._kernels): try: self.cull_kernel_if_idle(kernel_id) except Exception as e: self.log.exception( "The following exception was encountered while checking the idle duration of kernel %s: %s", kernel_id, e) def cull_kernel_if_idle(self, kernel_id): kernel = self._kernels[kernel_id] self.log.debug("kernel_id=%s, kernel_name=%s, last_activity=%s", kernel_id, kernel.kernel_name, kernel.last_activity) if kernel.last_activity is not None: dt_now = utcnow() dt_idle = dt_now - kernel.last_activity # Compute idle properties is_idle_time = dt_idle > timedelta(seconds=self.cull_idle_timeout) is_idle_execute = self.cull_busy or (kernel.execution_state != 'busy') connections = self._kernel_connections.get(kernel_id, 0) is_idle_connected = self.cull_connected or not connections # Cull the kernel if all three criteria are met if (is_idle_time and is_idle_execute and is_idle_connected): idle_duration = int(dt_idle.total_seconds()) self.log.warning( "Culling '%s' kernel '%s' (%s) with %d connections due to %s seconds of inactivity.", kernel.execution_state, kernel.kernel_name, kernel_id, connections, idle_duration) self.shutdown_kernel(kernel_id)
class JupyterHub(Application): """An Application for starting a Multi-User Jupyter Notebook server.""" name = 'jupyterhub' version = jupyterhub.__version__ description = """Start a multi-user Jupyter Notebook server Spawns a configurable-http-proxy and multi-user Hub, which authenticates users and spawns single-user Notebook servers on behalf of users. """ examples = """ generate default config file: jupyterhub --generate-config -f /etc/jupyterhub/jupyterhub.py spawn the server on 10.0.1.2:443 with https: jupyterhub --ip 10.0.1.2 --port 443 --ssl-key my_ssl.key --ssl-cert my_ssl.cert """ aliases = Dict(aliases) flags = Dict(flags) subcommands = {'token': (NewToken, "Generate an API token for a user")} classes = List([ Spawner, LocalProcessSpawner, Authenticator, PAMAuthenticator, ]) config_file = Unicode( 'jupyterhub_config.py', config=True, help="The config file to load", ) generate_config = Bool( False, config=True, help="Generate default config file", ) answer_yes = Bool( False, config=True, help="Answer yes to any questions (e.g. confirm overwrite)") pid_file = Unicode('', config=True, help="""File to write PID Useful for daemonizing jupyterhub. """) cookie_max_age_days = Float( 14, config=True, help="""Number of days for a login cookie to be valid. Default is two weeks. """) last_activity_interval = Integer( 300, config=True, help= "Interval (in seconds) at which to update last-activity timestamps.") proxy_check_interval = Integer( 30, config=True, help="Interval (in seconds) at which to check if the proxy is running." ) data_files_path = Unicode( DATA_FILES_PATH, config=True, help= "The location of jupyterhub data files (e.g. /usr/local/share/jupyter/hub)" ) template_paths = List( config=True, help="Paths to search for jinja templates.", ) def _template_paths_default(self): return [os.path.join(self.data_files_path, 'templates')] ssl_key = Unicode( '', config=True, help="""Path to SSL key file for the public facing interface of the proxy Use with ssl_cert """) ssl_cert = Unicode( '', config=True, help= """Path to SSL certificate file for the public facing interface of the proxy Use with ssl_key """) ip = Unicode('', config=True, help="The public facing ip of the proxy") port = Integer(8000, config=True, help="The public facing port of the proxy") base_url = URLPrefix('/', config=True, help="The base URL of the entire application") jinja_environment_options = Dict( config=True, help="Supply extra arguments that will be passed to Jinja environment." ) proxy_cmd = Command('configurable-http-proxy', config=True, help="""The command to start the http proxy. Only override if configurable-http-proxy is not on your PATH """) debug_proxy = Bool(False, config=True, help="show debug output in configurable-http-proxy") proxy_auth_token = Unicode(config=True, help="""The Proxy Auth token. Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default. """) def _proxy_auth_token_default(self): token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', None) if not token: self.log.warn('\n'.join([ "", "Generating CONFIGPROXY_AUTH_TOKEN. Restarting the Hub will require restarting the proxy.", "Set CONFIGPROXY_AUTH_TOKEN env or JupyterHub.proxy_auth_token config to avoid this message.", "", ])) token = orm.new_token() return token proxy_api_ip = Unicode('localhost', config=True, help="The ip for the proxy API handlers") proxy_api_port = Integer(config=True, help="The port for the proxy API handlers") def _proxy_api_port_default(self): return self.port + 1 hub_port = Integer(8081, config=True, help="The port for this process") hub_ip = Unicode('localhost', config=True, help="The ip for this process") hub_prefix = URLPrefix( '/hub/', config=True, help="The prefix for the hub server. Must not be '/'") def _hub_prefix_default(self): return url_path_join(self.base_url, '/hub/') def _hub_prefix_changed(self, name, old, new): if new == '/': raise TraitError("'/' is not a valid hub prefix") if not new.startswith(self.base_url): self.hub_prefix = url_path_join(self.base_url, new) cookie_secret = Bytes(config=True, env='JPY_COOKIE_SECRET', help="""The cookie secret to use to encrypt cookies. Loaded from the JPY_COOKIE_SECRET env variable by default. """) cookie_secret_file = Unicode( 'jupyterhub_cookie_secret', config=True, help="""File in which to store the cookie secret.""") authenticator_class = Type(PAMAuthenticator, Authenticator, config=True, help="""Class for authenticating users. This should be a class with the following form: - constructor takes one kwarg: `config`, the IPython config object. - is a tornado.gen.coroutine - returns username on success, None on failure - takes two arguments: (handler, data), where `handler` is the calling web.RequestHandler, and `data` is the POST form data from the login page. """) authenticator = Instance(Authenticator) def _authenticator_default(self): return self.authenticator_class(parent=self, db=self.db) # class for spawning single-user servers spawner_class = Type( LocalProcessSpawner, Spawner, config=True, help="""The class to use for spawning single-user servers. Should be a subclass of Spawner. """) db_url = Unicode( 'sqlite:///jupyterhub.sqlite', config=True, help="url for the database. e.g. `sqlite:///jupyterhub.sqlite`") def _db_url_changed(self, name, old, new): if '://' not in new: # assume sqlite, if given as a plain filename self.db_url = 'sqlite:///%s' % new db_kwargs = Dict( config=True, help="""Include any kwargs to pass to the database connection. See sqlalchemy.create_engine for details. """) reset_db = Bool(False, config=True, help="Purge and reset the database.") debug_db = Bool( False, config=True, help="log all database transactions. This has A LOT of output") session_factory = Any() admin_access = Bool( False, config=True, help="""Grant admin users permission to access single-user servers. Users should be properly informed if this is enabled. """) admin_users = Set( config=True, help="""DEPRECATED, use Authenticator.admin_users instead.""") tornado_settings = Dict(config=True) cleanup_servers = Bool( True, config=True, help="""Whether to shutdown single-user servers when the Hub shuts down. Disable if you want to be able to teardown the Hub while leaving the single-user servers running. If both this and cleanup_proxy are False, sending SIGINT to the Hub will only shutdown the Hub, leaving everything else running. The Hub should be able to resume from database state. """) cleanup_proxy = Bool( True, config=True, help="""Whether to shutdown the proxy when the Hub shuts down. Disable if you want to be able to teardown the Hub while leaving the proxy running. Only valid if the proxy was starting by the Hub process. If both this and cleanup_servers are False, sending SIGINT to the Hub will only shutdown the Hub, leaving everything else running. The Hub should be able to resume from database state. """) handlers = List() _log_formatter_cls = CoroutineLogFormatter http_server = None proxy_process = None io_loop = None def _log_level_default(self): return logging.INFO def _log_datefmt_default(self): """Exclude date from default date format""" return "%Y-%m-%d %H:%M:%S" def _log_format_default(self): """override default log format to include time""" return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s %(module)s:%(lineno)d]%(end_color)s %(message)s" extra_log_file = Unicode("", config=True, help="Set a logging.FileHandler on this file.") extra_log_handlers = List( Instance(logging.Handler), config=True, help="Extra log handlers to set on JupyterHub logger", ) def init_logging(self): # This prevents double log messages because tornado use a root logger that # self.log is a child of. The logging module dipatches log messages to a log # and all of its ancenstors until propagate is set to False. self.log.propagate = False if self.extra_log_file: self.extra_log_handlers.append( logging.FileHandler(self.extra_log_file)) _formatter = self._log_formatter_cls( fmt=self.log_format, datefmt=self.log_datefmt, ) for handler in self.extra_log_handlers: if handler.formatter is None: handler.setFormatter(_formatter) self.log.addHandler(handler) # hook up tornado 3's loggers to our app handlers for log in (app_log, access_log, gen_log): # ensure all log statements identify the application they come from log.name = self.log.name logger = logging.getLogger('tornado') logger.propagate = True logger.parent = self.log logger.setLevel(self.log.level) def init_ports(self): if self.hub_port == self.port: raise TraitError( "The hub and proxy cannot both listen on port %i" % self.port) if self.hub_port == self.proxy_api_port: raise TraitError( "The hub and proxy API cannot both listen on port %i" % self.hub_port) if self.proxy_api_port == self.port: raise TraitError( "The proxy's public and API ports cannot both be %i" % self.port) @staticmethod def add_url_prefix(prefix, handlers): """add a url prefix to handlers""" for i, tup in enumerate(handlers): lis = list(tup) lis[0] = url_path_join(prefix, tup[0]) handlers[i] = tuple(lis) return handlers def init_handlers(self): h = [] # load handlers from the authenticator h.extend(self.authenticator.get_handlers(self)) # set default handlers h.extend(handlers.default_handlers) h.extend(apihandlers.default_handlers) self.handlers = self.add_url_prefix(self.hub_prefix, h) # some extra handlers, outside hub_prefix self.handlers.extend([ (r"%s" % self.hub_prefix.rstrip('/'), web.RedirectHandler, { "url": self.hub_prefix, "permanent": False, }), (r"(?!%s).*" % self.hub_prefix, handlers.PrefixRedirectHandler), (r'(.*)', handlers.Template404), ]) def _check_db_path(self, path): """More informative log messages for failed filesystem access""" path = os.path.abspath(path) parent, fname = os.path.split(path) user = getuser() if not os.path.isdir(parent): self.log.error("Directory %s does not exist", parent) if os.path.exists(parent) and not os.access(parent, os.W_OK): self.log.error("%s cannot create files in %s", user, parent) if os.path.exists(path) and not os.access(path, os.W_OK): self.log.error("%s cannot edit %s", user, path) def init_secrets(self): trait_name = 'cookie_secret' trait = self.traits()[trait_name] env_name = trait.get_metadata('env') secret_file = os.path.abspath( os.path.expanduser(self.cookie_secret_file)) secret = self.cookie_secret secret_from = 'config' # load priority: 1. config, 2. env, 3. file if not secret and os.environ.get(env_name): secret_from = 'env' self.log.info("Loading %s from env[%s]", trait_name, env_name) secret = binascii.a2b_hex(os.environ[env_name]) if not secret and os.path.exists(secret_file): secret_from = 'file' perm = os.stat(secret_file).st_mode if perm & 0o077: self.log.error("Bad permissions on %s", secret_file) else: self.log.info("Loading %s from %s", trait_name, secret_file) with open(secret_file) as f: b64_secret = f.read() try: secret = binascii.a2b_base64(b64_secret) except Exception as e: self.log.error("%s does not contain b64 key: %s", secret_file, e) if not secret: secret_from = 'new' self.log.debug("Generating new %s", trait_name) secret = os.urandom(SECRET_BYTES) if secret_file and secret_from == 'new': # if we generated a new secret, store it in the secret_file self.log.info("Writing %s to %s", trait_name, secret_file) b64_secret = binascii.b2a_base64(secret).decode('ascii') with open(secret_file, 'w') as f: f.write(b64_secret) try: os.chmod(secret_file, 0o600) except OSError: self.log.warn("Failed to set permissions on %s", secret_file) # store the loaded trait value self.cookie_secret = secret # thread-local storage of db objects _local = Instance(threading.local, ()) @property def db(self): if not hasattr(self._local, 'db'): self._local.db = scoped_session(self.session_factory)() return self._local.db @property def hub(self): if not getattr(self._local, 'hub', None): q = self.db.query(orm.Hub) assert q.count() <= 1 self._local.hub = q.first() return self._local.hub @hub.setter def hub(self, hub): self._local.hub = hub @property def proxy(self): if not getattr(self._local, 'proxy', None): q = self.db.query(orm.Proxy) assert q.count() <= 1 p = self._local.proxy = q.first() if p: p.auth_token = self.proxy_auth_token return self._local.proxy @proxy.setter def proxy(self, proxy): self._local.proxy = proxy def init_db(self): """Create the database connection""" self.log.debug("Connecting to db: %s", self.db_url) try: self.session_factory = orm.new_session_factory(self.db_url, reset=self.reset_db, echo=self.debug_db, **self.db_kwargs) # trigger constructing thread local db property _ = self.db except OperationalError as e: self.log.error("Failed to connect to db: %s", self.db_url) self.log.debug("Database error was:", exc_info=True) if self.db_url.startswith('sqlite:///'): self._check_db_path(self.db_url.split(':///', 1)[1]) self.exit(1) def init_hub(self): """Load the Hub config into the database""" self.hub = self.db.query(orm.Hub).first() if self.hub is None: self.hub = orm.Hub(server=orm.Server( ip=self.hub_ip, port=self.hub_port, base_url=self.hub_prefix, cookie_name='jupyter-hub-token', )) self.db.add(self.hub) else: server = self.hub.server server.ip = self.hub_ip server.port = self.hub_port server.base_url = self.hub_prefix self.db.commit() @gen.coroutine def init_users(self): """Load users into and from the database""" db = self.db if self.admin_users and not self.authenticator.admin_users: self.log.warn("\nJupyterHub.admin_users is deprecated." "\nUse Authenticator.admin_users instead.") self.authenticator.admin_users = self.admin_users admin_users = self.authenticator.admin_users if not admin_users: self.log.warning( "No admin users, admin interface will be unavailable.") self.log.warning( "Add any administrative users to `c.Authenticator.admin_users` in config." ) new_users = [] for name in admin_users: # ensure anyone specified as admin in config is admin in db user = orm.User.find(db, name) if user is None: user = orm.User(name=name, admin=True) new_users.append(user) db.add(user) else: user.admin = True # the admin_users config variable will never be used after this point. # only the database values will be referenced. whitelist = self.authenticator.whitelist if not whitelist: self.log.info( "Not using whitelist. Any authenticated user will be allowed.") # add whitelisted users to the db for name in whitelist: user = orm.User.find(db, name) if user is None: user = orm.User(name=name) new_users.append(user) db.add(user) if whitelist: # fill the whitelist with any users loaded from the db, # so we are consistent in both directions. # This lets whitelist be used to set up initial list, # but changes to the whitelist can occur in the database, # and persist across sessions. for user in db.query(orm.User): whitelist.add(user.name) # The whitelist set and the users in the db are now the same. # From this point on, any user changes should be done simultaneously # to the whitelist set and user db, unless the whitelist is empty (all users allowed). db.commit() for user in new_users: yield gen.maybe_future(self.authenticator.add_user(user)) db.commit() @gen.coroutine def init_spawners(self): db = self.db user_summaries = [''] def _user_summary(user): parts = ['{0: >8}'.format(user.name)] if user.admin: parts.append('admin') if user.server: parts.append('running at %s' % user.server) return ' '.join(parts) @gen.coroutine def user_stopped(user): status = yield user.spawner.poll() self.log.warn( "User %s server stopped with exit code: %s", user.name, status, ) yield self.proxy.delete_user(user) yield user.stop() for user in db.query(orm.User): if not user.state: # without spawner state, server isn't valid user.server = None user_summaries.append(_user_summary(user)) continue self.log.debug("Loading state for %s from db", user.name) user.spawner = spawner = self.spawner_class( user=user, hub=self.hub, config=self.config, db=self.db, authenticator=self.authenticator, ) status = yield spawner.poll() if status is None: self.log.info("%s still running", user.name) spawner.add_poll_callback(user_stopped, user) spawner.start_polling() else: # user not running. This is expected if server is None, # but indicates the user's server died while the Hub wasn't running # if user.server is defined. log = self.log.warn if user.server else self.log.debug log("%s not running.", user.name) user.server = None user_summaries.append(_user_summary(user)) self.log.debug("Loaded users: %s", '\n'.join(user_summaries)) db.commit() def init_proxy(self): """Load the Proxy config into the database""" self.proxy = self.db.query(orm.Proxy).first() if self.proxy is None: self.proxy = orm.Proxy( public_server=orm.Server(), api_server=orm.Server(), ) self.db.add(self.proxy) self.db.commit() self.proxy.auth_token = self.proxy_auth_token # not persisted self.proxy.log = self.log self.proxy.public_server.ip = self.ip self.proxy.public_server.port = self.port self.proxy.api_server.ip = self.proxy_api_ip self.proxy.api_server.port = self.proxy_api_port self.proxy.api_server.base_url = '/api/routes/' self.db.commit() @gen.coroutine def start_proxy(self): """Actually start the configurable-http-proxy""" # check for proxy if self.proxy.public_server.is_up() or self.proxy.api_server.is_up(): # check for *authenticated* access to the proxy (auth token can change) try: yield self.proxy.get_routes() except (HTTPError, OSError, socket.error) as e: if isinstance(e, HTTPError) and e.code == 403: msg = "Did CONFIGPROXY_AUTH_TOKEN change?" else: msg = "Is something else using %s?" % self.proxy.public_server.bind_url self.log.error( "Proxy appears to be running at %s, but I can't access it (%s)\n%s", self.proxy.public_server.bind_url, e, msg) self.exit(1) return else: self.log.info("Proxy already running at: %s", self.proxy.public_server.bind_url) self.proxy_process = None return env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.proxy.auth_token cmd = self.proxy_cmd + [ '--ip', self.proxy.public_server.ip, '--port', str(self.proxy.public_server.port), '--api-ip', self.proxy.api_server.ip, '--api-port', str(self.proxy.api_server.port), '--default-target', self.hub.server.host, ] if self.debug_proxy: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) if self.ssl_cert: cmd.extend(['--ssl-cert', self.ssl_cert]) self.log.info("Starting proxy @ %s", self.proxy.public_server.bind_url) self.log.debug("Proxy cmd: %s", cmd) try: self.proxy_process = Popen(cmd, env=env) except FileNotFoundError as e: self.log.error( "Failed to find proxy %r\n" "The proxy can be installed with `npm install -g configurable-http-proxy`" % self.proxy_cmd) self.exit(1) def _check(): status = self.proxy_process.poll() if status is not None: e = RuntimeError("Proxy failed to start with exit code %i" % status) # py2-compatible `raise e from None` e.__cause__ = None raise e for server in (self.proxy.public_server, self.proxy.api_server): for i in range(10): _check() try: yield server.wait_up(1) except TimeoutError: continue else: break yield server.wait_up(1) self.log.debug("Proxy started and appears to be up") @gen.coroutine def check_proxy(self): if self.proxy_process.poll() is None: return self.log.error( "Proxy stopped with exit code %r", 'unknown' if self.proxy_process is None else self.proxy_process.poll()) yield self.start_proxy() self.log.info("Setting up routes on new proxy") yield self.proxy.add_all_users() self.log.info("New proxy back up, and good to go") def init_tornado_settings(self): """Set up the tornado settings dict.""" base_url = self.hub.server.base_url jinja_env = Environment(loader=FileSystemLoader(self.template_paths), **self.jinja_environment_options) login_url = self.authenticator.login_url(base_url) logout_url = self.authenticator.logout_url(base_url) # if running from git, disable caching of require.js # otherwise cache based on server start time parent = os.path.dirname(os.path.dirname(jupyterhub.__file__)) if os.path.isdir(os.path.join(parent, '.git')): version_hash = '' else: version_hash = datetime.now().strftime("%Y%m%d%H%M%S"), settings = dict( log_function=log_request, config=self.config, log=self.log, db=self.db, proxy=self.proxy, hub=self.hub, admin_users=self.authenticator.admin_users, admin_access=self.admin_access, authenticator=self.authenticator, spawner_class=self.spawner_class, base_url=self.base_url, cookie_secret=self.cookie_secret, cookie_max_age_days=self.cookie_max_age_days, login_url=login_url, logout_url=logout_url, static_path=os.path.join(self.data_files_path, 'static'), static_url_prefix=url_path_join(self.hub.server.base_url, 'static/'), static_handler_class=CacheControlStaticFilesHandler, template_path=self.template_paths, jinja2_env=jinja_env, version_hash=version_hash, ) # allow configured settings to have priority settings.update(self.tornado_settings) self.tornado_settings = settings def init_tornado_application(self): """Instantiate the tornado Application object""" self.tornado_application = web.Application(self.handlers, **self.tornado_settings) def write_pid_file(self): pid = os.getpid() if self.pid_file: self.log.debug("Writing PID %i to %s", pid, self.pid_file) with open(self.pid_file, 'w') as f: f.write('%i' % pid) @gen.coroutine @catch_config_error def initialize(self, *args, **kwargs): super().initialize(*args, **kwargs) if self.generate_config or self.subapp: return self.load_config_file(self.config_file) self.init_logging() if 'JupyterHubApp' in self.config: self.log.warn( "Use JupyterHub in config, not JupyterHubApp. Outdated config:\n%s", '\n'.join('JupyterHubApp.{key} = {value!r}'.format(key=key, value=value) for key, value in self.config.JupyterHubApp.items())) cfg = self.config.copy() cfg.JupyterHub.merge(cfg.JupyterHubApp) self.update_config(cfg) self.write_pid_file() self.init_ports() self.init_secrets() self.init_db() self.init_hub() self.init_proxy() yield self.init_users() yield self.init_spawners() self.init_handlers() self.init_tornado_settings() self.init_tornado_application() @gen.coroutine def cleanup(self): """Shutdown our various subprocesses and cleanup runtime files.""" futures = [] if self.cleanup_servers: self.log.info("Cleaning up single-user servers...") # request (async) process termination for user in self.db.query(orm.User): if user.spawner is not None: futures.append(user.stop()) else: self.log.info("Leaving single-user servers running") # clean up proxy while SUS are shutting down if self.cleanup_proxy: if self.proxy_process: self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid) if self.proxy_process.poll() is None: try: self.proxy_process.terminate() except Exception as e: self.log.error("Failed to terminate proxy process: %s", e) else: self.log.info("I didn't start the proxy, I can't clean it up") else: self.log.info("Leaving proxy running") # wait for the requests to stop finish: for f in futures: try: yield f except Exception as e: self.log.error("Failed to stop user: %s", e) self.db.commit() if self.pid_file and os.path.exists(self.pid_file): self.log.info("Cleaning up PID file %s", self.pid_file) os.remove(self.pid_file) # finally stop the loop once we are all cleaned up self.log.info("...done") def write_config_file(self): """Write our default config to a .py config file""" if os.path.exists(self.config_file) and not self.answer_yes: answer = '' def ask(): prompt = "Overwrite %s with default config? [y/N]" % self.config_file try: return input(prompt).lower() or 'n' except KeyboardInterrupt: print('') # empty line return 'n' answer = ask() while not answer.startswith(('y', 'n')): print("Please answer 'yes' or 'no'") answer = ask() if answer.startswith('n'): return config_text = self.generate_config_file() if isinstance(config_text, bytes): config_text = config_text.decode('utf8') print("Writing default config to: %s" % self.config_file) with open(self.config_file, mode='w') as f: f.write(config_text) @gen.coroutine def update_last_activity(self): """Update User.last_activity timestamps from the proxy""" routes = yield self.proxy.get_routes() for prefix, route in routes.items(): if 'user' not in route: # not a user route, ignore it continue user = orm.User.find(self.db, route['user']) if user is None: self.log.warn("Found no user for route: %s", route) continue try: dt = datetime.strptime(route['last_activity'], ISO8601_ms) except Exception: dt = datetime.strptime(route['last_activity'], ISO8601_s) user.last_activity = max(user.last_activity, dt) self.db.commit() yield self.proxy.check_routes(routes) @gen.coroutine def start(self): """Start the whole thing""" self.io_loop = loop = IOLoop.current() if self.subapp: self.subapp.start() loop.stop() return if self.generate_config: self.write_config_file() loop.stop() return # start the webserver self.http_server = tornado.httpserver.HTTPServer( self.tornado_application, xheaders=True) try: self.http_server.listen(self.hub_port, address=self.hub_ip) except Exception: self.log.error("Failed to bind hub to %s", self.hub.server.bind_url) raise else: self.log.info("Hub API listening on %s", self.hub.server.bind_url) # start the proxy try: yield self.start_proxy() except Exception as e: self.log.critical("Failed to start proxy", exc_info=True) self.exit(1) return loop.add_callback(self.proxy.add_all_users) if self.proxy_process: # only check / restart the proxy if we started it in the first place. # this means a restarted Hub cannot restart a Proxy that its # predecessor started. pc = PeriodicCallback(self.check_proxy, 1e3 * self.proxy_check_interval) pc.start() if self.last_activity_interval: pc = PeriodicCallback(self.update_last_activity, 1e3 * self.last_activity_interval) pc.start() self.log.info("JupyterHub is now running at %s", self.proxy.public_server.url) # register cleanup on both TERM and INT atexit.register(self.atexit) self.init_signal() def init_signal(self): signal.signal(signal.SIGTERM, self.sigterm) def sigterm(self, signum, frame): self.log.critical("Received SIGTERM, shutting down") self.io_loop.stop() self.atexit() _atexit_ran = False def atexit(self): """atexit callback""" if self._atexit_ran: return self._atexit_ran = True # run the cleanup step (in a new loop, because the interrupted one is unclean) IOLoop.clear_current() loop = IOLoop() loop.make_current() loop.run_sync(self.cleanup) def stop(self): if not self.io_loop: return if self.http_server: if self.io_loop._running: self.io_loop.add_callback(self.http_server.stop) else: self.http_server.stop() self.io_loop.add_callback(self.io_loop.stop) @gen.coroutine def launch_instance_async(self, argv=None): try: yield self.initialize(argv) yield self.start() except Exception as e: self.log.exception("") self.exit(1) @classmethod def launch_instance(cls, argv=None): self = cls.instance() loop = IOLoop.current() loop.add_callback(self.launch_instance_async, argv) try: loop.start() except KeyboardInterrupt: print("\nInterrupted")
class LocalProcessSpawner(Spawner): """A Spawner that just uses Popen to start local processes.""" INTERRUPT_TIMEOUT = Integer( 10, config=True, help= "Seconds to wait for process to halt after SIGINT before proceeding to SIGTERM" ) TERM_TIMEOUT = Integer( 5, config=True, help= "Seconds to wait for process to halt after SIGTERM before proceeding to SIGKILL" ) KILL_TIMEOUT = Integer( 5, config=True, help= "Seconds to wait for process to halt after SIGKILL before giving up") proc = Instance(Popen, allow_none=True) pid = Integer(0) def make_preexec_fn(self, name): return set_user_setuid(name) def load_state(self, state): """load pid from state""" super(LocalProcessSpawner, self).load_state(state) if 'pid' in state: self.pid = state['pid'] def get_state(self): """add pid to state""" state = super(LocalProcessSpawner, self).get_state() if self.pid: state['pid'] = self.pid return state def clear_state(self): """clear pid state""" super(LocalProcessSpawner, self).clear_state() self.pid = 0 def user_env(self, env): env['USER'] = self.user.name home = pwd.getpwnam(self.user.name).pw_dir shell = pwd.getpwnam(self.user.name).pw_shell # These will be empty if undefined, # in which case don't set the env: if home: env['HOME'] = home if shell: env['SHELL'] = shell return env def _env_default(self): env = super()._env_default() return self.user_env(env) @gen.coroutine def start(self): """Start the process""" if self.ip: self.user.server.ip = self.ip self.user.server.port = random_port() cmd = [] env = self.env.copy() cmd.extend(self.cmd) cmd.extend(self.get_args()) self.log.info("Spawning %s", ' '.join(pipes.quote(s) for s in cmd)) self.proc = Popen( cmd, env=env, preexec_fn=self.make_preexec_fn(self.user.name), ) self.pid = self.proc.pid @gen.coroutine def poll(self): """Poll the process""" # if we started the process, poll with Popen if self.proc is not None: status = self.proc.poll() if status is not None: # clear state if the process is done self.clear_state() return status # if we resumed from stored state, # we don't have the Popen handle anymore, so rely on self.pid if not self.pid: # no pid, not running self.clear_state() return 0 # send signal 0 to check if PID exists # this doesn't work on Windows, but that's okay because we don't support Windows. alive = yield self._signal(0) if not alive: self.clear_state() return 0 else: return None @gen.coroutine def _signal(self, sig): """simple implementation of signal, which we can use when we are using setuid (we are root)""" try: os.kill(self.pid, sig) except OSError as e: if e.errno == errno.ESRCH: return False # process is gone else: raise return True # process exists @gen.coroutine def stop(self, now=False): """stop the subprocess if `now`, skip waiting for clean shutdown """ if not now: status = yield self.poll() if status is not None: return self.log.debug("Interrupting %i", self.pid) yield self._signal(signal.SIGINT) yield self.wait_for_death(self.INTERRUPT_TIMEOUT) # clean shutdown failed, use TERM status = yield self.poll() if status is not None: return self.log.debug("Terminating %i", self.pid) yield self._signal(signal.SIGTERM) yield self.wait_for_death(self.TERM_TIMEOUT) # TERM failed, use KILL status = yield self.poll() if status is not None: return self.log.debug("Killing %i", self.pid) yield self._signal(signal.SIGKILL) yield self.wait_for_death(self.KILL_TIMEOUT) status = yield self.poll() if status is None: # it all failed, zombie process self.log.warn("Process %i never died", self.pid)
class NotebookApp(JupyterApp): name = 'jupyter-notebook' version = __version__ description = """ The Jupyter HTML Notebook. This launches a Tornado based HTML Notebook Server that serves up an HTML5/Javascript Notebook client. """ examples = _examples aliases = aliases flags = flags classes = [ KernelManager, Session, MappingKernelManager, ContentsManager, FileContentsManager, NotebookNotary, KernelSpecManager, ] flags = Dict(flags) aliases = Dict(aliases) subcommands = dict(list=(NbserverListApp, NbserverListApp.description.splitlines()[0]), ) _log_formatter_cls = LogFormatter def _log_level_default(self): return logging.INFO def _log_datefmt_default(self): """Exclude date from default date format""" return "%H:%M:%S" def _log_format_default(self): """override default log format to include time""" return u"%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s]%(end_color)s %(message)s" ignore_minified_js = Bool( False, config=True, help= 'Deprecated: Use minified JS file or not, mainly use during dev to avoid JS recompilation', ) # file to be opened in the notebook server file_to_run = Unicode('', config=True) # Network related information allow_origin = Unicode('', config=True, help="""Set the Access-Control-Allow-Origin header Use '*' to allow any origin to access your server. Takes precedence over allow_origin_pat. """) allow_origin_pat = Unicode( '', config=True, help= """Use a regular expression for the Access-Control-Allow-Origin header Requests from an origin matching the expression will get replies with: Access-Control-Allow-Origin: origin where `origin` is the origin of the request. Ignored if allow_origin is set. """) allow_credentials = Bool( False, config=True, help="Set the Access-Control-Allow-Credentials: true header") allow_root = Bool( False, config=True, help="Whether to allow the user to run the notebook as root.") default_url = Unicode('/tree', config=True, help="The default URL to redirect to from `/`") ip = Unicode('localhost', config=True, help="The IP address the notebook server will listen on.") def _ip_default(self): """Return localhost if available, 127.0.0.1 otherwise. On some (horribly broken) systems, localhost cannot be bound. """ s = socket.socket() try: s.bind(('localhost', 0)) except socket.error as e: self.log.warning( "Cannot bind to localhost, using 127.0.0.1 as default ip\n%s", e) return '127.0.0.1' else: s.close() return 'localhost' def _ip_changed(self, name, old, new): if new == u'*': self.ip = u'' port = Integer(8888, config=True, help="The port the notebook server will listen on.") port_retries = Integer( 50, config=True, help= "The number of additional ports to try if the specified port is not available." ) certfile = Unicode( u'', config=True, help="""The full path to an SSL/TLS certificate file.""") keyfile = Unicode( u'', config=True, help="""The full path to a private key file for usage with SSL/TLS.""") client_ca = Unicode( u'', config=True, help= """The full path to a certificate authority certifificate for SSL/TLS client authentication.""" ) cookie_secret_file = Unicode( config=True, help="""The file where the cookie secret is stored.""") def _cookie_secret_file_default(self): return os.path.join(self.runtime_dir, 'notebook_cookie_secret') cookie_secret = Bytes(b'', config=True, help="""The random bytes used to secure cookies. By default this is a new random number every time you start the Notebook. Set it to a value in a config file to enable logins to persist across server sessions. Note: Cookie secrets should be kept private, do not share config files with cookie_secret stored in plaintext (you can read the value from a file). """) def _cookie_secret_default(self): if os.path.exists(self.cookie_secret_file): with io.open(self.cookie_secret_file, 'rb') as f: return f.read() else: secret = encodebytes(os.urandom(1024)) self._write_cookie_secret_file(secret) return secret def _write_cookie_secret_file(self, secret): """write my secret to my secret_file""" self.log.info("Writing notebook server cookie secret to %s", self.cookie_secret_file) with io.open(self.cookie_secret_file, 'wb') as f: f.write(secret) try: os.chmod(self.cookie_secret_file, 0o600) except OSError: self.log.warning("Could not set permissions on %s", self.cookie_secret_file) password = Unicode(u'', config=True, help="""Hashed password to use for web authentication. To generate, type in a python/IPython shell: from notebook.auth import passwd; passwd() The string should be of the form type:salt:hashed-password. """) password_required = Bool( False, config=True, help="""Forces users to use a password for the Notebook server. This is useful in a multi user environment, for instance when everybody in the LAN can access each other's machine though ssh. In such a case, server the notebook server on localhost is not secure since any user can connect to the notebook server via ssh. """) open_browser = Bool(True, config=True, help="""Whether to open in a browser after starting. The specific browser used is platform dependent and determined by the python standard library `webbrowser` module, unless it is overridden using the --browser (NotebookApp.browser) configuration option. """) browser = Unicode(u'', config=True, help="""Specify what command to use to invoke a web browser when opening the notebook. If not specified, the default browser will be determined by the `webbrowser` standard library module, which allows setting of the BROWSER environment variable to override it. """) webapp_settings = Dict(config=True, help="DEPRECATED, use tornado_settings") def _webapp_settings_changed(self, name, old, new): self.log.warning( "\n webapp_settings is deprecated, use tornado_settings.\n") self.tornado_settings = new tornado_settings = Dict( config=True, help="Supply overrides for the tornado.web.Application that the " "Jupyter notebook uses.") cookie_options = Dict( config=True, help="Extra keyword arguments to pass to `set_secure_cookie`." " See tornado's set_secure_cookie docs for details.") ssl_options = Dict(config=True, help="""Supply SSL options for the tornado HTTPServer. See the tornado docs for details.""") jinja_environment_options = Dict( config=True, help="Supply extra arguments that will be passed to Jinja environment." ) jinja_template_vars = Dict( config=True, help="Extra variables to supply to jinja templates when rendering.", ) enable_mathjax = Bool( True, config=True, help="""Whether to enable MathJax for typesetting math/TeX MathJax is the javascript library Jupyter uses to render math/LaTeX. It is very large, so you may want to disable it if you have a slow internet connection, or for offline use of the notebook. When disabled, equations etc. will appear as their untransformed TeX source. """) def _enable_mathjax_changed(self, name, old, new): """set mathjax url to empty if mathjax is disabled""" if not new: self.mathjax_url = u'' base_url = Unicode('/', config=True, help='''The base URL for the notebook server. Leading and trailing slashes can be omitted, and will automatically be added. ''') def _base_url_changed(self, name, old, new): if not new.startswith('/'): self.base_url = '/' + new elif not new.endswith('/'): self.base_url = new + '/' base_project_url = Unicode('/', config=True, help="""DEPRECATED use base_url""") def _base_project_url_changed(self, name, old, new): self.log.warning("base_project_url is deprecated, use base_url") self.base_url = new extra_static_paths = List( Unicode(), config=True, help="""Extra paths to search for serving static files. This allows adding javascript/css to be available from the notebook server machine, or overriding individual files in the IPython""") @property def static_file_path(self): """return extra paths + the default location""" return self.extra_static_paths + [DEFAULT_STATIC_FILES_PATH] static_custom_path = List(Unicode(), help="""Path to search for custom.js, css""") def _static_custom_path_default(self): return [ os.path.join(d, 'custom') for d in (self.config_dir, DEFAULT_STATIC_FILES_PATH) ] extra_template_paths = List( Unicode(), config=True, help="""Extra paths to search for serving jinja templates. Can be used to override templates from notebook.templates.""") @property def template_file_path(self): """return extra paths + the default locations""" return self.extra_template_paths + DEFAULT_TEMPLATE_PATH_LIST extra_nbextensions_path = List( Unicode(), config=True, help="""extra paths to look for Javascript notebook extensions""") @property def nbextensions_path(self): """The path to look for Javascript notebook extensions""" path = self.extra_nbextensions_path + jupyter_path('nbextensions') # FIXME: remove IPython nbextensions path after a migration period try: from IPython.paths import get_ipython_dir except ImportError: pass else: path.append(os.path.join(get_ipython_dir(), 'nbextensions')) return path websocket_url = Unicode("", config=True, help="""The base URL for websockets, if it differs from the HTTP server (hint: it almost certainly doesn't). Should be in the form of an HTTP origin: ws[s]://hostname[:port] """) mathjax_url = Unicode("", config=True, help="""The url for MathJax.js.""") def _mathjax_url_default(self): if not self.enable_mathjax: return u'' static_url_prefix = self.tornado_settings.get("static_url_prefix", "static") return url_path_join(static_url_prefix, 'components', 'MathJax', 'MathJax.js') def _mathjax_url_changed(self, name, old, new): if new and not self.enable_mathjax: # enable_mathjax=False overrides mathjax_url self.mathjax_url = u'' else: self.log.info("Using MathJax: %s", new) contents_manager_class = Type(default_value=FileContentsManager, klass=ContentsManager, config=True, help='The notebook manager class to use.') kernel_manager_class = Type(default_value=MappingKernelManager, config=True, help='The kernel manager class to use.') session_manager_class = Type(default_value=SessionManager, config=True, help='The session manager class to use.') config_manager_class = Type(default_value=ConfigManager, config=True, help='The config manager class to use') kernel_spec_manager = Instance(KernelSpecManager, allow_none=True) kernel_spec_manager_class = Type(default_value=KernelSpecManager, config=True, help=""" The kernel spec manager class to use. Should be a subclass of `jupyter_client.kernelspec.KernelSpecManager`. The Api of KernelSpecManager is provisional and might change without warning between this version of Jupyter and the next stable one. """) login_handler_class = Type( default_value=LoginHandler, klass=web.RequestHandler, config=True, help='The login handler class to use.', ) logout_handler_class = Type( default_value=LogoutHandler, klass=web.RequestHandler, config=True, help='The logout handler class to use.', ) trust_xheaders = Bool( False, config=True, help= ("Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded-For headers" "sent by the upstream reverse proxy. Necessary if the proxy handles SSL" )) info_file = Unicode() def _info_file_default(self): info_file = "nbserver-%s.json" % os.getpid() return os.path.join(self.runtime_dir, info_file) pylab = Unicode('disabled', config=True, help=""" DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. """) def _pylab_changed(self, name, old, new): """when --pylab is specified, display a warning and exit""" if new != 'warn': backend = ' %s' % new else: backend = '' self.log.error( "Support for specifying --pylab on the command line has been removed." ) self.log.error( "Please use `%pylab{0}` or `%matplotlib{0}` in the notebook itself." .format(backend)) self.exit(1) notebook_dir = Unicode( config=True, help="The directory to use for notebooks and kernels.") def _notebook_dir_default(self): if self.file_to_run: return os.path.dirname(os.path.abspath(self.file_to_run)) else: return py3compat.getcwd() def _notebook_dir_validate(self, value, trait): # Strip any trailing slashes # *except* if it's root _, path = os.path.splitdrive(value) if path == os.sep: return value value = value.rstrip(os.sep) if not os.path.isabs(value): # If we receive a non-absolute path, make it absolute. value = os.path.abspath(value) if not os.path.isdir(value): raise TraitError("No such notebook dir: %r" % value) return value def _notebook_dir_changed(self, name, old, new): """Do a bit of validation of the notebook dir.""" # setting App.notebook_dir implies setting notebook and kernel dirs as well self.config.FileContentsManager.root_dir = new self.config.MappingKernelManager.root_dir = new server_extensions = List( Unicode(), config=True, help=( "Python modules to load as notebook server extensions. " "This is an experimental API, and may change in future releases.")) reraise_server_extension_failures = Bool( False, config=True, help="Reraise exceptions encountered loading server extensions?", ) iopub_msg_rate_limit = Float(0, config=True, help="""(msg/sec) Maximum rate at which messages can be sent on iopub before they are limited.""") iopub_data_rate_limit = Float(0, config=True, help="""(bytes/sec) Maximum rate at which messages can be sent on iopub before they are limited.""") rate_limit_window = Float(1.0, config=True, help="""(sec) Time window used to check the message and data rate limits.""") def parse_command_line(self, argv=None): super(NotebookApp, self).parse_command_line(argv) if self.extra_args: arg0 = self.extra_args[0] f = os.path.abspath(arg0) self.argv.remove(arg0) if not os.path.exists(f): self.log.critical("No such file or directory: %s", f) self.exit(1) # Use config here, to ensure that it takes higher priority than # anything that comes from the config dirs. c = Config() if os.path.isdir(f): c.NotebookApp.notebook_dir = f elif os.path.isfile(f): c.NotebookApp.file_to_run = f self.update_config(c) def init_configurables(self): self.kernel_spec_manager = self.kernel_spec_manager_class( parent=self, ) self.kernel_manager = self.kernel_manager_class( parent=self, log=self.log, connection_dir=self.runtime_dir, kernel_spec_manager=self.kernel_spec_manager, ) self.contents_manager = self.contents_manager_class( parent=self, log=self.log, ) self.session_manager = self.session_manager_class( parent=self, log=self.log, kernel_manager=self.kernel_manager, contents_manager=self.contents_manager, ) self.config_manager = self.config_manager_class( parent=self, log=self.log, config_dir=os.path.join(self.config_dir, 'nbconfig'), ) def init_logging(self): # This prevents double log messages because tornado use a root logger that # self.log is a child of. The logging module dipatches log messages to a log # and all of its ancenstors until propagate is set to False. self.log.propagate = False for log in app_log, access_log, gen_log: # consistent log output name (NotebookApp instead of tornado.access, etc.) log.name = self.log.name # hook up tornado 3's loggers to our app handlers logger = logging.getLogger('tornado') logger.propagate = True logger.parent = self.log logger.setLevel(self.log.level) def init_webapp(self): """initialize tornado webapp and httpserver""" self.tornado_settings['allow_origin'] = self.allow_origin if self.allow_origin_pat: self.tornado_settings['allow_origin_pat'] = re.compile( self.allow_origin_pat) self.tornado_settings['allow_credentials'] = self.allow_credentials self.tornado_settings['cookie_options'] = self.cookie_options # ensure default_url starts with base_url if not self.default_url.startswith(self.base_url): self.default_url = url_path_join(self.base_url, self.default_url) if self.password_required and (not self.password): self.log.critical( "Notebook servers are configured to only be run with a password." ) self.log.critical( "Hint: run the following command to set a password") self.log.critical("\t$ python -m notebook.auth password") sys.exit(1) self.web_app = NotebookWebApplication( self, self.kernel_manager, self.contents_manager, self.session_manager, self.kernel_spec_manager, self.config_manager, self.log, self.base_url, self.default_url, self.tornado_settings, self.jinja_environment_options) ssl_options = self.ssl_options if self.certfile: ssl_options['certfile'] = self.certfile if self.keyfile: ssl_options['keyfile'] = self.keyfile if self.client_ca: ssl_options['ca_certs'] = self.client_ca if not ssl_options: # None indicates no SSL config ssl_options = None else: # SSL may be missing, so only import it if it's to be used import ssl # Disable SSLv3, since its use is discouraged. ssl_options['ssl_version'] = ssl.PROTOCOL_TLSv1 if ssl_options.get('ca_certs', False): ssl_options['cert_reqs'] = ssl.CERT_REQUIRED self.login_handler_class.validate_security(self, ssl_options=ssl_options) self.http_server = httpserver.HTTPServer(self.web_app, ssl_options=ssl_options, xheaders=self.trust_xheaders) success = None for port in random_ports(self.port, self.port_retries + 1): try: self.http_server.listen(port, self.ip) except socket.error as e: if e.errno == errno.EADDRINUSE: self.log.info( 'The port %i is already in use, trying another port.' % port) continue elif e.errno in (errno.EACCES, getattr(errno, 'WSAEACCES', errno.EACCES)): self.log.warning("Permission to listen on port %i denied" % port) continue else: raise else: self.port = port success = True break if not success: self.log.critical( 'ERROR: the notebook server could not be started because ' 'no available port could be found.') self.exit(1) @property def display_url(self): ip = self.ip if self.ip else '[all ip addresses on your system]' return self._url(ip) @property def connection_url(self): ip = self.ip if self.ip else 'localhost' return self._url(ip) def _url(self, ip): proto = 'https' if self.certfile else 'http' return "%s://%s:%i%s" % (proto, ip, self.port, self.base_url) def init_terminals(self): try: from .terminal import initialize initialize(self.web_app, self.notebook_dir, self.connection_url) self.web_app.settings['terminals_available'] = True except ImportError as e: log = self.log.debug if sys.platform == 'win32' else self.log.warning log("Terminals not available (error was %s)", e) def init_signal(self): if not sys.platform.startswith('win') and sys.stdin.isatty(): signal.signal(signal.SIGINT, self._handle_sigint) signal.signal(signal.SIGTERM, self._signal_stop) if hasattr(signal, 'SIGUSR1'): # Windows doesn't support SIGUSR1 signal.signal(signal.SIGUSR1, self._signal_info) if hasattr(signal, 'SIGINFO'): # only on BSD-based systems signal.signal(signal.SIGINFO, self._signal_info) def _handle_sigint(self, sig, frame): """SIGINT handler spawns confirmation dialog""" # register more forceful signal handler for ^C^C case signal.signal(signal.SIGINT, self._signal_stop) # request confirmation dialog in bg thread, to avoid # blocking the App thread = threading.Thread(target=self._confirm_exit) thread.daemon = True thread.start() def _restore_sigint_handler(self): """callback for restoring original SIGINT handler""" signal.signal(signal.SIGINT, self._handle_sigint) def _confirm_exit(self): """confirm shutdown on ^C A second ^C, or answering 'y' within 5s will cause shutdown, otherwise original SIGINT handler will be restored. This doesn't work on Windows. """ info = self.log.info info('interrupted') print(self.notebook_info()) sys.stdout.write("Shutdown this notebook server (y/[n])? ") sys.stdout.flush() r, w, x = select.select([sys.stdin], [], [], 5) if r: line = sys.stdin.readline() if line.lower().startswith('y') and 'n' not in line.lower(): self.log.critical("Shutdown confirmed") ioloop.IOLoop.current().stop() return else: print("No answer for 5s:", end=' ') print("resuming operation...") # no answer, or answer is no: # set it back to original SIGINT handler # use IOLoop.add_callback because signal.signal must be called # from main thread ioloop.IOLoop.current().add_callback(self._restore_sigint_handler) def _signal_stop(self, sig, frame): self.log.critical("received signal %s, stopping", sig) ioloop.IOLoop.current().stop() def _signal_info(self, sig, frame): print(self.notebook_info()) def init_components(self): """Check the components submodule, and warn if it's unclean""" # TODO: this should still check, but now we use bower, not git submodule pass def init_server_extensions(self): """Load any extensions specified by config. Import the module, then call the load_jupyter_server_extension function, if one exists. The extension API is experimental, and may change in future releases. """ for modulename in self.server_extensions: try: mod = importlib.import_module(modulename) func = getattr(mod, 'load_jupyter_server_extension', None) if func is not None: func(self) except Exception: if self.reraise_server_extension_failures: raise self.log.warning("Error loading server extension %s", modulename, exc_info=True) @catch_config_error def initialize(self, argv=None): super(NotebookApp, self).initialize(argv) self.init_logging() if self._dispatching: return self.init_configurables() self.init_components() self.init_webapp() self.init_terminals() self.init_signal() self.init_server_extensions() def cleanup_kernels(self): """Shutdown all kernels. The kernels will shutdown themselves when this process no longer exists, but explicit shutdown allows the KernelManagers to cleanup the connection files. """ self.log.info('Shutting down kernels') self.kernel_manager.shutdown_all() def notebook_info(self): "Return the current working directory and the server url information" info = self.contents_manager.info_string() + "\n" info += "%d active kernels \n" % len(self.kernel_manager._kernels) return info + "The Jupyter Notebook is running at: %s" % self.display_url def server_info(self): """Return a JSONable dict of information about this server.""" return { 'url': self.connection_url, 'hostname': self.ip if self.ip else 'localhost', 'port': self.port, 'secure': bool(self.certfile), 'base_url': self.base_url, 'notebook_dir': os.path.abspath(self.notebook_dir), 'pid': os.getpid() } def write_server_info_file(self): """Write the result of server_info() to the JSON file info_file.""" with open(self.info_file, 'w') as f: json.dump(self.server_info(), f, indent=2) def remove_server_info_file(self): """Remove the nbserver-<pid>.json file created for this server. Ignores the error raised when the file has already been removed. """ try: os.unlink(self.info_file) except OSError as e: if e.errno != errno.ENOENT: raise def start(self): """ Start the Notebook server app, after initialization This method takes no arguments so all configuration and initialization must be done prior to calling this method.""" if not self.allow_root: # check if we are running as root, and abort if it's not allowed try: uid = os.geteuid() except AttributeError: uid = -1 # anything nonzero here, since we can't check UID assume non-root if uid == 0: self.log.critical( "Running as root is not recommended. Use --allow-root to bypass." ) self.exit(1) super(NotebookApp, self).start() info = self.log.info for line in self.notebook_info().split("\n"): info(line) info( "Use Control-C to stop this server and shut down all kernels (twice to skip confirmation)." ) self.write_server_info_file() if self.open_browser or self.file_to_run: try: browser = webbrowser.get(self.browser or None) except webbrowser.Error as e: self.log.warning('No web browser found: %s.' % e) browser = None if self.file_to_run: if not os.path.exists(self.file_to_run): self.log.critical("%s does not exist" % self.file_to_run) self.exit(1) relpath = os.path.relpath(self.file_to_run, self.notebook_dir) uri = url_escape( url_path_join('notebooks', *relpath.split(os.sep))) else: uri = self.default_url if browser: b = lambda: browser.open( url_path_join(self.connection_url, uri), new=2) threading.Thread(target=b).start() self.io_loop = ioloop.IOLoop.current() if sys.platform.startswith('win'): # add no-op to wake every 5s # to handle signals that may be ignored by the inner loop pc = ioloop.PeriodicCallback(lambda: None, 5000) pc.start() try: self.io_loop.start() except KeyboardInterrupt: info("Interrupted...") finally: self.cleanup_kernels() self.remove_server_info_file() def stop(self): def _stop(): self.http_server.stop() self.io_loop.stop() self.io_loop.add_callback(_stop)
class Spawner(LoggingConfigurable): """Base class for spawning single-user notebook servers. Subclass this, and override the following methods: - load_state - get_state - start - stop - poll """ db = Any() user = Any() hub = Any() authenticator = Any() api_token = Unicode() ip = Unicode('127.0.0.1', help="The IP address (or hostname) the single-user server should listen on" ).tag(config=True) port = Integer(0, help="The port for single-user servers to listen on. New in version 0.7." ) start_timeout = Integer(60, help="""Timeout (in seconds) before giving up on the spawner. This is the timeout for start to return, not the timeout for the server to respond. Callers of spawner.start will assume that startup has failed if it takes longer than this. start should return when the server process is started and its location is known. """ ).tag(config=True) http_timeout = Integer(30, help="""Timeout (in seconds) before giving up on a spawned HTTP server Once a server has successfully been spawned, this is the amount of time we wait before assuming that the server is unable to accept connections. """ ).tag(config=True) poll_interval = Integer(30, help="""Interval (in seconds) on which to poll the spawner.""" ).tag(config=True) _callbacks = List() _poll_callback = Any() debug = Bool(False, help="Enable debug-logging of the single-user server" ).tag(config=True) options_form = Unicode("", help=""" An HTML form for options a user can specify on launching their server. The surrounding `<form>` element and the submit button are already provided. For example: Set your key: <input name="key" val="default_key"></input> <br> Choose a letter: <select name="letter" multiple="true"> <option value="A">The letter A</option> <option value="B">The letter B</option> </select> """).tag(config=True) def options_from_form(self, form_data): """Interpret HTTP form data Form data will always arrive as a dict of lists of strings. Override this function to understand single-values, numbers, etc. This should coerce form data into the structure expected by self.user_options, which must be a dict. Instances will receive this data on self.user_options, after passing through this function, prior to `Spawner.start`. """ return form_data user_options = Dict(help="This is where form-specified options ultimately end up.") env_keep = List([ 'PATH', 'PYTHONPATH', 'CONDA_ROOT', 'CONDA_DEFAULT_ENV', 'VIRTUAL_ENV', 'LANG', 'LC_ALL', ], help="Whitelist of environment variables for the subprocess to inherit" ).tag(config=True) env = Dict(help="""Deprecated: use Spawner.get_env or Spawner.environment - extend Spawner.get_env for adding required env in Spawner subclasses - Spawner.environment for config-specified env """) environment = Dict( help="""Environment variables to load for the Spawner. Value could be a string or a callable. If it is a callable, it will be called with one parameter, which will be the instance of the spawner in use. It should quickly (without doing much blocking operations) return a string that will be used as the value for the environment variable. """ ).tag(config=True) cmd = Command(['jupyterhub-singleuser'], help="""The command used for starting notebooks.""" ).tag(config=True) args = List(Unicode(), help="""Extra arguments to be passed to the single-user server""" ).tag(config=True) notebook_dir = Unicode('', help="""The notebook directory for the single-user server `~` will be expanded to the user's home directory `{username}` will be expanded to the user's username """ ).tag(config=True) default_url = Unicode('', help="""The default URL for the single-user server. Can be used in conjunction with --notebook-dir=/ to enable full filesystem traversal, while preserving user's homedir as landing page for notebook `{username}` will be expanded to the user's username """ ).tag(config=True) @validate('notebook_dir', 'default_url') def _deprecate_percent_u(self, proposal): print(proposal) v = proposal['value'] if '%U' in v: self.log.warning("%%U for username in %s is deprecated in JupyterHub 0.7, use {username}", proposal['trait'].name, ) v = v.replace('%U', '{username}') self.log.warning("Converting %r to %r", proposal['value'], v) return v disable_user_config = Bool(False, help="""Disable per-user configuration of single-user servers. This prevents any config in users' $HOME directories from having an effect on their server. """ ).tag(config=True) def __init__(self, **kwargs): super(Spawner, self).__init__(**kwargs) if self.user.state: self.load_state(self.user.state) def load_state(self, state): """load state from the database This is the extensible part of state Override in a subclass if there is state to load. Should call `super`. See Also -------- get_state, clear_state """ pass def get_state(self): """store the state necessary for load_state A black box of extra state for custom spawners. Subclasses should call `super`. Returns ------- state: dict a JSONable dict of state """ state = {} return state def clear_state(self): """clear any state that should be cleared when the process stops State that should be preserved across server instances should not be cleared. Subclasses should call super, to ensure that state is properly cleared. """ self.api_token = '' def get_env(self): """Return the environment dict to use for the Spawner. This applies things like `env_keep`, anything defined in `Spawner.environment`, and adds the API token to the env. Use this to access the env in Spawner.start to allow extension in subclasses. """ env = {} if self.env: warnings.warn("Spawner.env is deprecated, found %s" % self.env, DeprecationWarning) env.update(self.env) for key in self.env_keep: if key in os.environ: env[key] = os.environ[key] # config overrides. If the value is a callable, it will be called with # one parameter - the current spawner instance - and the return value # will be assigned to the environment variable. This will be called at # spawn time. for key, value in self.environment.items(): if callable(value): env[key] = value(self) else: env[key] = value env['JPY_API_TOKEN'] = self.api_token return env def template_namespace(self): """Return the template namespace for format-string formatting. Currently used on default_url and notebook_dir. Subclasses may add items to the available namespace. The default implementation includes:: { 'username': user.name, 'base_url': users_base_url, } Returns: ns (dict): namespace for string formatting. """ d = {'username': self.user.name} if self.user.server: d['base_url'] = self.user.server.base_url return d def format_string(self, s): """Render a Python format string Uses :meth:`Spawner.template_namespace` to populate format namespace. Args: s (str): Python format-string to be formatted. Returns: str: Formatted string, rendered """ return s.format(**self.template_namespace()) def get_args(self): """Return the arguments to be passed after self.cmd""" args = [ '--user=%s' % self.user.name, '--cookie-name=%s' % self.user.server.cookie_name, '--base-url=%s' % self.user.server.base_url, '--hub-host=%s' % self.hub.host, '--hub-prefix=%s' % self.hub.server.base_url, '--hub-api-url=%s' % self.hub.api_url, ] if self.ip: args.append('--ip=%s' % self.ip) if self.port: args.append('--port=%i' % self.port) elif self.user.server.port: self.log.warning("Setting port from user.server is deprecated as of JupyterHub 0.7.") args.append('--port=%i' % self.user.server.port) if self.notebook_dir: notebook_dir = self.format_string(self.notebook_dir) args.append('--notebook-dir=%s' % notebook_dir) if self.default_url: default_url = self.format_string(self.default_url) args.append('--NotebookApp.default_url=%s' % default_url) if self.debug: args.append('--debug') if self.disable_user_config: args.append('--disable-user-config') args.extend(self.args) return args @gen.coroutine def start(self): """Start the single-user server Returns: (ip, port): the ip, port where the Hub can connect to the server. .. versionchanged:: 0.7 Return ip, port instead of setting on self.user.server directly. """ raise NotImplementedError("Override in subclass. Must be a Tornado gen.coroutine.") @gen.coroutine def stop(self, now=False): """Stop the single-user process""" raise NotImplementedError("Override in subclass. Must be a Tornado gen.coroutine.") @gen.coroutine def poll(self): """Check if the single-user process is running return None if it is, an exit status (0 if unknown) if it is not. """ raise NotImplementedError("Override in subclass. Must be a Tornado gen.coroutine.") def add_poll_callback(self, callback, *args, **kwargs): """add a callback to fire when the subprocess stops as noticed by periodic poll_and_notify() """ if args or kwargs: cb = callback callback = lambda : cb(*args, **kwargs) self._callbacks.append(callback) def stop_polling(self): """stop the periodic poll""" if self._poll_callback: self._poll_callback.stop() self._poll_callback = None def start_polling(self): """Start polling periodically callbacks registered via `add_poll_callback` will fire if/when the process stops. Explicit termination via the stop method will not trigger the callbacks. """ if self.poll_interval <= 0: self.log.debug("Not polling subprocess") return else: self.log.debug("Polling subprocess every %is", self.poll_interval) self.stop_polling() self._poll_callback = PeriodicCallback( self.poll_and_notify, 1e3 * self.poll_interval ) self._poll_callback.start() @gen.coroutine def poll_and_notify(self): """Used as a callback to periodically poll the process, and notify any watchers """ status = yield self.poll() if status is None: # still running, nothing to do here return self.stop_polling() for callback in self._callbacks: try: yield gen.maybe_future(callback()) except Exception: self.log.exception("Unhandled error in poll callback for %s", self) return status death_interval = Float(0.1) @gen.coroutine def wait_for_death(self, timeout=10): """wait for the process to die, up to timeout seconds""" for i in range(int(timeout / self.death_interval)): status = yield self.poll() if status is not None: break else: yield gen.sleep(self.death_interval)
class QGridWidget(widgets.DOMWidget): _view_module = Unicode("nbextensions/qgridjs/qgrid.widget", sync=True) _view_name = Unicode('QGridView', sync=True) _df_json = Unicode('', sync=True) _column_types_json = Unicode('', sync=True) _index_name = Unicode('') _initialized = Bool(False) _dirty = Bool(False) _cdn_base_url = Unicode(LOCAL_URL, sync=True) _multi_index = Bool(False) _selected_rows = List() df = Instance(pd.DataFrame) precision = Integer(6) grid_options = Dict(sync=True) remote_js = Bool(False) def __init__(self, *args, **kwargs): """Initialize all variables before building the table.""" self._initialized = False super(QGridWidget, self).__init__(*args, **kwargs) # register a callback for custom messages self.on_msg(self._handle_qgrid_msg) self._initialized = True self._selected_rows = [] if self.df is not None: self._update_table() def _grid_options_default(self): return defaults.grid_options def _remote_js_default(self): return defaults.remote_js def _precision_default(self): return defaults.precision def _df_changed(self): """Build the Data Table for the DataFrame.""" if not self._initialized: return self._update_table() self.send({'type': 'draw_table'}) def _update_table(self): df = self.df.copy() if not df.index.name: df.index.name = 'Index' if type(df.index) == pd.core.index.MultiIndex: df.reset_index(inplace=True) self._multi_index = True else: df.insert(0, df.index.name, df.index) self._multi_index = False self._index_name = df.index.name or 'Index' tc = dict(np.typecodes) for key in np.typecodes.keys(): if "All" in key: del tc[key] column_types = [] for col_name, dtype in df.dtypes.iteritems(): if str(dtype) == 'category': categories = list(df[col_name].cat.categories) column_type = { 'field': col_name, 'categories': ','.join(categories) } # XXXX: work around bug in to_json for categorical types # https://github.com/pydata/pandas/issues/10778 df[col_name] = df[col_name].astype(str) column_types.append(column_type) continue column_type = {'field': col_name} for type_name, type_codes in tc.items(): if dtype.kind in type_codes: column_type['type'] = type_name break column_types.append(column_type) self._column_types_json = json.dumps(column_types) self._df_json = df.to_json( orient='records', date_format='iso', double_precision=self.precision, ) self._cdn_base_url = REMOTE_URL if self.remote_js else LOCAL_URL self._dirty = False def add_row(self, value=None): """Append a row at the end of the dataframe.""" df = self.df if not df.index.is_integer(): msg = "Cannot add a row to a table with a non-integer index" display(Javascript('alert("%s")' % msg)) return last = df.iloc[-1] last.name += 1 df.loc[last.name] = last.values precision = pd.get_option('display.precision') - 1 row_data = last.to_json(date_format='iso', double_precision=precision) msg = json.loads(row_data) msg[self._index_name] = str(last.name) msg['id'] = str(last.name) msg['type'] = 'add_row' self._dirty = True self.send(msg) def remove_row(self, value=None): """Remove the current row from the table""" if self._multi_index: msg = "Cannot remove a row from a table with a multi index" display(Javascript('alert("%s")' % msg)) return self.send({'type': 'remove_row'}) def _handle_qgrid_msg(self, widget, content, buffers=None): """Handle incoming messages from the QGridView""" if 'type' not in content: return if content['type'] == 'remove_row': self.df.drop(content['row'], inplace=True) self._dirty = True elif content['type'] == 'cell_change': try: self.df.set_value(self.df.index[content['row']], content['column'], content['value']) self._dirty = True except ValueError: pass elif content['type'] == 'selection_change': self._selected_rows = content['rows'] print(self._selected_rows) def get_selected_rows(self): """Get the currently selected rows""" return self._selected_rows def export(self, value=None): if self._dirty: self._update_table() base_url = REMOTE_URL div_id = str(uuid.uuid4()) grid_options = self.grid_options grid_options['editable'] = False raw_html = SLICK_GRID_CSS.format( div_id=div_id, cdn_base_url=base_url, ) raw_js = SLICK_GRID_JS.format( cdn_base_url=base_url, div_id=div_id, data_frame_json=self._df_json, column_types_json=self._column_types_json, options_json=json.dumps(grid_options), ) display_html(raw_html, raw=True) display_javascript(raw_js, raw=True)
class SubgradientSchedule(LoggingConfigurable): cpu_quota = Integer(0, help='''share cpu quota''').tag(config=True) #(unit) gpu_quota = List([], help='''share gpu quota''').tag(config=True) #(list) cpu_mem_quota = Integer(0, help='''share cpu memory quota''').tag( config=True) #(G) gpu_mem_quota = Integer(-1, help='''share gpu memory quota''').tag( config=True) #(list) #($) def __init__(self, db, workspace, **kwargs): super(SubgradientSchedule, self).__init__(**kwargs) self._db = db self._subgradient_server = SubgradientServer(self, **kwargs) self._workspace = workspace # 1.step analysis cpu resource self.log.info('analysis cpu resource') self._cpu = CPU() logical_cores_num = len(self.cpu.cpu_logical_cores()) if self.cpu_quota <= 0 or self.cpu_quota > logical_cores_num: self.cpu_quota = logical_cores_num total_mem = int(self.cpu.cpu_total_mem()) if self.cpu_mem_quota <= 0 or self.cpu_mem_quota > total_mem: self.cpu_mem_quota = total_mem # 2.step analysis gpu resource self.log.info('analysis gpu resource') self._gpu = GPU() if self.gpu.is_gpu_ok: if len(self.gpu_quota) == 0 or len( self.gpu_quota) > self.gpu.gpu_physical_cards(): self.gpu_quota = [ i for i in range(self.gpu.gpu_physical_cards()) ] self.gpu_mem_quota = self.gpu.gpu_total_mem(0) else: self.gpu_quota = [] self.gpu_mem_quota = 0 # 3.step analysis disk io speed disk_result = disktest.shell() disk_read_speed, disk_read_speed_unit = disk_result['read'] if disk_read_speed_unit == 'GB/s': disk_read_speed = float(disk_read_speed) * 1024.0 elif disk_read_speed_unit == 'KB/s': disk_read_speed = float(disk_read_speed) / 1024.0 disk_write_speed, disk_write_speed_unit = disk_result['write'] if disk_write_speed_unit == 'GB/s': disk_write_speed = float(disk_write_speed) * 1024.0 elif disk_write_speed_unit == 'KB/s': disk_write_speed = float(disk_write_speed) * 1024.0 self.disk_read = disk_read_speed self.disk_write = disk_write_speed # 4.step analysis net speed (skip) #download_speed, upload_speed = speedtest.shell() download_speed, upload_speed = 0.0, 0.0 self.upload_speed = upload_speed self.download_speed = download_speed if not os.path.exists(self.workspace): os.makedirs(self.workspace) @property def db(self): return self._db @property def workspace(self): return self._workspace @property def cpu(self): return self._cpu @property def gpu(self): return self._gpu @property def schedule_cpu_quota(self): return self.cpu_quota @property def schedule_gpu_quota(self): return self.gpu_quota @property def schedule_cpu_mem_quota(self): return self.cpu_mem_quota @property def schedule_gpu_mem_quota(self): return self.gpu_mem_quota @property def net_speed(self): # unit Mbit/s return self.upload_speed, self.download_speed @property def disk_speed(self): return self.disk_read, self.disk_write @property def subgradient_server(self): return self._subgradient_server def _check_free_resource(self, processing_orders, cpu_model, cpu_num, cpu_mem, gpu_model=None, gpu_num=None, gpu_mem=None): # 1.step check cpu relevant resource # 1.1.step check request cpu model (临时注释) if cpu_model is not None: share_cpu_model_names = self.cpu.cpu_model_name() if cpu_model not in share_cpu_model_names: self.log.error('request cpu model not support by this node') return False # 1.2.step check request cpu number cpu_num = 1 if cpu_num is None else cpu_num occupied_cpu = 0 for order in processing_orders: occupied_cpu += order.stock.cpu_num if occupied_cpu > self.cpu_quota: self.log.error('now running servers have occupied more cpus') return False now_free_cpu = self.cpu_quota - occupied_cpu if now_free_cpu < cpu_num: self.log.error('have no enough cpus for request') return False # 1.3.step check request cpu memory cpu_mem = 1 if cpu_mem is None else cpu_mem occupied_cpu_mem = 0 for order in processing_orders: occupied_cpu_mem += order.stock.cpu_mem if occupied_cpu_mem > self.cpu_mem_quota: self.log.error('now running servers have occupied more cpu memory') return False now_free_cpu_mems = self.cpu_mem_quota - occupied_cpu_mem if now_free_cpu_mems < cpu_mem: self.log.error('have no enough cpu memory for request') return False # 2.step check gpu relevant resource if not self.gpu.is_gpu_ok: if gpu_model is not None and gpu_model != '': self.log.error('dont support gpu') return False if gpu_num is not None and gpu_num > 0: self.log.error('dont support gpu') return False if gpu_mem is not None and gpu_mem > 0: self.log.error('dont support gpu') return False return True # 2.1.step check request gpu model if gpu_model is not None: share_gpu_model_names = self.gpu.gpu_model_name() if gpu_model not in share_gpu_model_names: self.log.error('request gpu model not support by this node') return False # 2.2.step check request gpu number gpu_num = 0 if gpu_num is None else gpu_num occupied_gpu = 0 for container in processing_orders: occupied_gpu += container.gpu_num if occupied_gpu > len(self.gpu_quota): self.log.error('now running servers have occupied more gpus') return False now_free_gpu = len(self.gpu_quota) - occupied_gpu if now_free_gpu < gpu_num: self.log.error('have no enough gpus for request') return False # 2.3.step check request gpu memory gpu_mem = 0 if gpu_mem is None else gpu_mem if gpu_mem > self.gpu.gpu_total_mem(0): self.log.error('have no enough gpu memory') return False return True def schedule_step_1_resource_check(self, order_info): # 1.step check free resource processing_orders = \ self.db.query(orm.Order).filter(or_(orm.Order.status == 1, orm.Order.status == 2)).all() processing_orders = [ mm for mm in processing_orders if mm != order_info['order'] ] running_config = order_info['running_config'] platform_config = order_info['platform_config'] order = order_info['order'] cpu_model = order.stock.cpu_model cpu_num = order.stock.cpu_num cpu_mem = order.stock.cpu_mem gpu_model = order.stock.gpu_model gpu_num = order.stock.gpu_num gpu_mem = order.stock.gpu_mem is_resource_ok = self._check_free_resource(processing_orders, cpu_model, cpu_num, cpu_mem, gpu_model, gpu_num, gpu_mem) if is_resource_ok: order.status = 2 self.db.commit() else: order.status = -1 self.db.commit() self.log.error('%s: cpu %d mem %d gpu %d gpu_mem %d' % ('NO_RESOURCE', cpu_num, cpu_mem, gpu_num, gpu_mem)) return { 'result': 'fail', 'reason': 'NO_RESOURCE_ERROR', 'order': order } # 2.step assign custom workspace order_workspace = os.path.join(self.workspace, order.name) if not os.path.exists(order_workspace): os.makedirs(order_workspace) running_config.update({'workspace': order_workspace}) return { 'result': 'NOT_DONE_YET', 'processing_orders': processing_orders, 'running_config': running_config, 'platform_config': platform_config, 'order': order, } def schedule_step_2_launch_container(self, order_info): if order_info['result'] == 'fail': return order_info order = order_info['order'] try: running_config = order_info['running_config'] platform_config = order_info['platform_config'] result = self.subgradient_server.start(order, running_config, platform_config) result.update(order_info) return result except: order.status = -1 self.db.commit() self.log.error('UNKOWN_ERROR: couldnt launch subgradient server') return {'result': 'fail', 'reason': 'UNKOWN_ERROR', 'order': order} def schedule(self, order_info): # 1.step free resource check step_1_result = self.schedule_step_1_resource_check(order_info) # 2.step launch container step_2_result = self.schedule_step_2_launch_container(step_1_result) return step_2_result def dummy_schedule(self, order_info): # here, couldnt continue to shedule order order_info['order'].status = -1 self.db.commit() @thread_wrap def schedule_download_files(self, order_id, code_address, code_address_code=None): workspace = os.path.join(self.workspace, order_id) if not os.path.exists(workspace): os.makedirs(workspace) if code_address is None or code_address == '': with open(os.path.join(workspace, 'download.success'), 'w'): pass return if code_address.startswith('ipfs:'): # download from ipfs response = ipfs_download(code_address.replace('ipfs:'), workspace) if response: self.log.info( 'IPFS_DONWLOAD_SUCCESS: download dependent files for order %s' % order_id) try: with tarfile.open(os.path.join(workspace, 'code.tar.gz'), 'r:gz') as tar: tar.extractall(workspace) with open(os.path.join(workspace, 'download.success'), 'w'): pass except: # clear incomplete data # if os.path.exists(os.path.join(workspace, 'code')): # shutil.rmtree(os.path.join(workspace, 'code')) if os.path.exists(os.path.join(workspace, 'code.tar.gz')): os.remove(os.path.join(workspace, 'code.tar.gz')) self.log.error('IPFS_DOWNLOAD_ERROR: for order %s' % order_id) with open(os.path.join(workspace, 'download.fail'), 'w'): pass else: # clear incomplete data if os.path.exists(os.path.join(workspace, 'code.tar.gz')): os.remove(os.path.join(workspace, 'code.tar.gz')) self.log.error('IPFS_DOWNLOAD_ERROR: for order %s' % order_id) with open(os.path.join(workspace, 'download.fail'), 'w'): pass elif code_address.startswith('qiniu:'): access_key = 'ZSC-X2p4HG5uvEtfmn5fsTZ5nqB3h54oKjHt0tU6' secret_key = 'Ya8qYwIDXZn6jSJDMz_ottWWOZqlbV8bDTNfCGO0' q = Auth(access_key, secret_key) base_url = code_address.replace('qiniu:', '') private_url = q.private_download_url(base_url, expires=3600) key = base_url.split('/')[-1] # code.tar.gz try: fpath, _ = urllib.request.urlretrieve( private_url, os.path.join(workspace, key)) statinfo = os.stat(fpath) size = statinfo.st_size if size == 0: self.log.error( 'FILE_DOWNLOAD_ERROR: empty file for order %s' % order_id) else: try: if code_address_code is not None: decrypto_shell = 'openssl enc -d -aes256 -in %s -out %s -k %s' % ( key, key.replace('_ssl', ''), code_address_code) subprocess.call(decrypto_shell, shell=True, cwd=workspace) with tarfile.open( os.path.join(workspace, key.replace('_ssl', '')), 'r:gz') as tar: tar.extractall(workspace) if os.path.getsize( os.path.join(workspace, key.replace('_ssl', ''))) == 0: self.log.error( 'FILE_DOWNLOAD_ERROR: for order %s' % order_id) with open(os.path.join(workspace, 'download.fail'), 'w'): pass return # clear tar file os.remove( os.path.join(workspace, key.replace('_ssl', ''))) os.remove(os.path.join(workspace, key)) self.log.info( 'FILE_DONWLOAD_SUCCESS: download dependent files for order %s' % order_id) with open(os.path.join(workspace, 'download.success'), 'w'): pass except: # clear incomplete data # if os.path.exists(os.path.join(workspace, 'code')): # shutil.rmtree(os.path.join(workspace, 'code')) # if os.path.exists(os.path.join(workspace, 'code.tar.gz')): # os.remove(os.path.join(workspace, 'code.tar.gz')) self.log.error('FILE_DOWNLOAD_ERROR: for order %s' % order_id) with open(os.path.join(workspace, 'download.fail'), 'w'): pass except: self.log.error('FILE_DOWNLOAD_ERROR: for order %s' % order_id) with open(os.path.join(workspace, 'download.fail'), 'w'): pass
class SSHSpawner(Spawner): # http://traitlets.readthedocs.io/en/stable/migration.html#separation-of-metadata-and-keyword-arguments-in-traittype-contructors # config is an unrecognized keyword remote_hosts = List( trait=Unicode(), help="Possible remote hosts from which to choose remote_host.", config=True) # Removed 'config=True' tag. # Any user configureation of remote_host is redundant. # The spawner now chooses the value of remote_host. remote_host = Unicode("remote_host", help="SSH remote host to spawn sessions on") remote_port = Unicode("22", help="SSH remote port number", config=True) ssh_command = Unicode("/usr/bin/ssh", help="Actual SSH command", config=True) path = Unicode("/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin", help="Default PATH (should include jupyter and python)", config=True) # The get_port.py script is in scripts/get_port.py # FIXME See if we avoid having to deploy a script on remote side? # For instance, we could just install sshspawner on the remote side # as a package and have it put get_port.py in the right place. # If we were fancy it could be configurable so it could be restricted # to specific ports. remote_port_command = Unicode( "/usr/bin/python /usr/local/bin/get_port.py", help="Command to return unused port on remote host", config=True) # FIXME Fix help, what happens when not set? hub_api_url = Unicode( "", help=dedent("""If set, Spawner will configure the containers to use the specified URL to connect the hub api. This is useful when the hub_api is bound to listen on all ports or is running inside of a container."""), config=True) ssh_keyfile = Unicode( "~/.ssh/id_rsa", help=dedent("""Key file used to authenticate hub with remote host. Assumes use_gsi=False. (use_gsi=False is deprecated) `~` will be expanded to the user's home directory and `{username}` will be expanded to the user's username"""), config=True) # DEPRECATED use_gsi = Bool( False, help="""Use GSI authentication instead of SSH keys. Assumes you have a cert/key pair at the right path. Use in conjunction with GSIAuthenticator. (Deprecated)""", config=True) gsi_cert_path = Unicode( "/tmp/x509_{username}", help=dedent("""GSI certificate used to authenticate hub with remote host. Assumes use_gsi=True. (Deprecated) `~` will be expanded to the user's home directory and `{username}` will be expanded to the user's username"""), config=True) gsi_key_path = Unicode( "/tmp/x509_{username}", help=dedent("""GSI key used to authenticate hub with remote host. Assumes use_gsi=True. (Deprecated) `~` will be expanded to the user's home directory and `{username}` will be expanded to the user's username"""), config=True) pid = Integer( 0, help=dedent("""Process ID of single-user server process spawned for current user.""")) # TODO When we add host pool, we need to keep host/ip too, not just PID. def load_state(self, state): """Restore state about ssh-spawned server after a hub restart. The ssh-spawned processes only need the process id.""" super().load_state(state) if "pid" in state: self.pid = state["pid"] # TODO When we add host pool, we need to keep host/ip too, not just PID. def get_state(self): """Save state needed to restore this spawner instance after hub restore. The ssh-spawned processes only need the process id.""" state = super().get_state() if self.pid: state["pid"] = self.pid return state # TODO When we add host pool, we need to clear host/ip too, not just PID. def clear_state(self): """Clear stored state about this spawner (pid)""" super().clear_state() self.pid = 0 # FIXME this looks like it's done differently now, there is get_env which # actually calls this. def user_env(self): """Augment env of spawned process with user-specific env variables.""" # FIXME I think the JPY_ variables have been deprecated in JupyterHub # since 0.7.2, we should replace them. Can we figure this out? env = super(SSHSpawner, self).get_env() env.update( dict(JPY_USER=self.user.name, JPY_COOKIE_NAME=self.user.server.cookie_name, JPY_BASE_URL=self.user.server.base_url, JPY_HUB_PREFIX=self.hub.server.base_url, JUPYTERHUB_PREFIX=self.hub.server.base_url, PATH=self.path)) if self.notebook_dir: env['NOTEBOOK_DIR'] = self.notebook_dir hub_api_url = self.hub.api_url if self.hub_api_url != '': hub_api_url = self.hub_api_url env['JPY_HUB_API_URL'] = hub_api_url env['JUPYTERHUB_API_URL'] = hub_api_url return env async def start(self): """Start single-user server on remote host.""" self.remote_host = self.choose_remote_host() port = await self.remote_random_port() if port is None or port == 0: return False cmd = [] cmd.extend(self.cmd) cmd.extend(self.get_args()) if self.hub_api_url != "": old = "--hub-api-url={}".format(self.hub.api_url) new = "--hub-api-url={}".format(self.hub_api_url) for index, value in enumerate(cmd): if value == old: cmd[index] = new for index, value in enumerate(cmd): if value[0:6] == '--port': cmd[index] = '--port=%d' % (port) remote_cmd = ' '.join(cmd) # time.sleep(2) # import pdb; pdb.set_trace() self.pid = await self.exec_notebook(remote_cmd) self.log.debug("Starting User: {}, PID: {}".format( self.user.name, self.pid)) if self.pid < 0: return None # DEPRECATION: Spawner.start should return a url or (ip, port) tuple in JupyterHub >= 0.9 return (self.remote_host, port) async def poll(self): """Poll ssh-spawned process to see if it is still running. If it is still running return None. If it is not running return exit code of the process if we have access to it, or 0 otherwise.""" if not self.pid: # no pid, not running self.clear_state() return 0 # send signal 0 to check if PID exists alive = await self.remote_signal(0) self.log.debug("Polling returned {}".format(alive)) if not alive: self.clear_state() return 0 else: return None async def stop(self, now=False): """Stop single-user server process for the current user.""" alive = await self.remote_signal(15) self.clear_state() def get_remote_user(self, username): """Map JupyterHub username to remote username.""" return username def choose_remote_host(self): """ Given the list of possible nodes from which to choose, make the choice of which should be the remote host. """ remote_host = random.choice(self.remote_hosts) return remote_host @observe('remote_host') def _log_remote_host(self, change): self.log.debug("Remote host was set to %s." % self.remote_host) def get_gsi_cert(self): """Get location of x509 user cert. (Deprecated)""" return self.gsi_cert_path.format(username=self.user.name) def get_gsi_key(self): """Get location of x509 user key. (Deprecated)""" return self.gsi_key_path.format(username=self.user.name) async def remote_random_port(self): """Select unoccupied port on the remote host and return it. If this fails for some reason return `None`.""" # FIXME this keeps getting repeated # pass this into bash -c 'command' # command needs to be in "" quotes, with all the redirection outside # eg. bash -c '"ls -la" < /dev/null >> out.txt' command = '"{}" < /dev/null'.format(self.remote_port_command) stdout, stderr, retcode = await self.execute(command) if stdout != b"": # ASCII art fix: turn bytes to string, strip whitespace, split along newlines, grab last line of STDOUT. # Assumption: The last line of STDOUT should always be output of get_port.py, ASCII art or not. # Assumption: The last line of the STDOUT created by get_port.py is always the port number. port = int(stdout.decode().strip().split("\n")[-1]) self.log.debug("port={}".format(port)) else: port = None self.log.error("Failed to get a remote port") return port # FIXME add docstring async def exec_notebook(self, command): """TBD""" env = self.user_env() bash_script_str = "#!/bin/bash\n" for item in env.items(): # item is a (key, value) tuple # command = ('export %s=%s;' % item) + command bash_script_str += 'export %s=%s\n' % item bash_script_str += 'unset XDG_RUNTIME_DIR\n' # FIXME this keeps getting repeated # pass this into bash -c 'command' # command needs to be in "" quotes, with all the redirection outside # eg. bash -c '"ls -la" < /dev/null >> out.txt' # We pass in /dev/null to stdin to avoid the hang # Finally Grab the PID # command = '"%s" < /dev/null >> jupyter.log 2>&1 & pid=$!; echo $pid' % command bash_script_str += '%s < /dev/null >> jupyter.log 2>&1 & pid=$!\n' % command bash_script_str += 'echo $pid\n' run_script = "/tmp/{}_run.sh".format(self.user.name) with open(run_script, "w") as f: f.write(bash_script_str) if not os.path.isfile(run_script): raise Exception("The file " + run_script + "was not created.") else: with open(run_script, "r") as f: self.log.debug(run_script + " was written as:\n" + f.read()) stdout, stderr, retcode = await self.execute(command, stdin=run_script) self.log.debug("exec_notebook status={}".format(retcode)) if stdout != b'': pid = int(stdout) else: return -1 return pid async def remote_signal(self, sig): """Signal on the remote host.""" command = 'kill -s %s %d' % (sig, self.pid) # FIXME this keeps getting repeated # pass this into bash -c 'command' # command needs to be in "" quotes, with all the redirection outside # eg. bash -c '"ls -la" < /dev/null >> out.txt' command = '"%s" < /dev/null' % command stdout, stderr, retcode = await self.execute(command) self.log.debug("command: {} returned {} --- {} --- {}".format( command, stdout, stderr, retcode)) return (retcode == 0) # FIXME clean up async def execute(self, command=None, stdin=None): """Execute remote command via ssh. command: command to execute (via bash -c command) stdin: script to pass in via stdin (via 'bash -s' < stdin) executes command on remote system "command" and "stdin" are mutually exclusive.""" ssh_env = os.environ.copy() username = self.get_remote_user(self.user.name) ssh_args = "-o StrictHostKeyChecking=no -l {username} -p {port}".format( username=username, port=self.remote_port) if self.use_gsi: warnings.warn("SSHSpawner.use_gsi is deprecated", DeprecationWarning) ssh_env['X509_USER_CERT'] = self.get_gsi_cert() ssh_env['X509_USER_KEY'] = self.get_gsi_key() elif self.ssh_keyfile: ssh_args += " -i {keyfile}".format(keyfile=self.ssh_keyfile.format( username=self.user.name)) ssh_args += " -o preferredauthentications=publickey" # DRY (don't repeat yourself) def split_into_arguments(self, command): self.log.debug("command: {}".format(command)) commands = shlex.split(command) self.log.debug("shlex parsed command as: " + "{{" + "}} {{".join(commands) + "}}") return commands if stdin is not None: command = "{ssh_command} {flags} {hostname} 'bash -s'".format( ssh_command=self.ssh_command, flags=ssh_args, hostname=self.remote_host, stdin=stdin) commands = split_into_arguments(self, command) # the variable stdin above is the path to a shell script, but what the process requires as stdin is the content of the file itself as a buffer/bytes stdin = open(stdin, "rb") # ^ might be better if this were an asyncio.streamwriter or asyncio.subprocess.PIPE. This might be (slightly) blocking. else: command = "{ssh_command} {flags} {hostname} bash -c '{command}'".format( ssh_command=self.ssh_command, flags=ssh_args, hostname=self.remote_host, command=command) commands = split_into_arguments(self, command) proc = await asyncio.create_subprocess_exec( *commands, stdin=stdin, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=ssh_env) # DRY def log_process(self, returncode, stdout, stderr): def bytes_to_string(bytes): return bytes.decode().strip() stdout, stderr = (bytes_to_string(stdout), bytes_to_string(stderr)) self.log.debug("subprocess returned exitcode: %s" % returncode) self.log.debug("subprocess returned standard output: %s" % stdout) self.log.debug("subprocess returned standard error: %s" % stderr) try: stdout, stderr = await proc.communicate() # catch wildcard exception except Exception as e: self.log.debug( "execute raised exception %s when trying to run command: %s" % (e, command)) proc.kill() self.log.debug("execute failed done kill") stdout, stderr = await proc.communicate() self.log.debug("execute failed done communicate") log_process(self, proc.returncode, stdout, stderr) raise e else: returncode = proc.returncode # account for instances where no Python exceptions, but shell process returns with non-zero exit status if returncode != 0: self.log.debug("execute failed for command: %s" % command) log_process(self, returncode, stdout, stderr) return (stdout, stderr, returncode)
class HDFSContentsManager(ContentsManager, HDFSManagerMixin): """ ContentsManager that persists to HDFS filesystem local filesystem. """ hdfs_namenode_host = Unicode(u'localhost', config=True, help='The HDFS namenode host') hdfs_namenode_port = Integer(9000, config=True, help='The HDFS namenode port') hdfs_user = Unicode(None, allow_none=True, config=True, help='The HDFS user name') root_dir = Unicode(u'/', config=True, help='The HDFS root directory to use') # The pydoop HDFS connection object used to interact with HDFS cluster. hdfs = Instance(HDFS, config=True) @default('hdfs') def _default_hdfs(self): return HDFS(host=self.hdfs_namenode_host, port=self.hdfs_namenode_port, user=self.hdfs_user) # groups=None def _checkpoints_class_default(self): # TODO: a better way to pass hdfs and root_dir? HDFSCheckpoints.hdfs = self.hdfs HDFSCheckpoints.root_dir = self.root_dir return HDFSCheckpoints # ContentsManager API part 1: methods that must be # implemented in subclasses. def dir_exists(self, path): """Does a directory exist at the given path? Like os.path.isdir Parameters ---------- path : string The relative API style path to check Returns ------- exists : bool Whether the path does indeed exist. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_dir_exists(hdfs_path) def is_hidden(self, path): """Is path a hidden directory or file? Parameters ---------- path : string The path to check. This is an API path (`/` separated, relative to root dir). Returns ------- hidden : bool Whether the path is hidden. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_is_hidden(hdfs_path) def file_exists(self, path=''): """Does a file exist at the given path? Like os.path.isfile Override this method in subclasses. Parameters ---------- path : string The API path of a file to check for. Returns ------- exists : bool Whether the file exists. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_file_exists(hdfs_path) def exists(self, path): """Does a file or directory exist at the given path? Like os.path.exists Parameters ---------- path : string The API path of a file or directory to check for. Returns ------- exists : bool Whether the target exists. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_exists(hdfs_path) def _base_model(self, path): """Build the common base of a hdfscontents model""" hdfs_path = to_os_path(path, self.root_dir) info = self.hdfs.get_path_info(hdfs_path) last_modified = tz.utcfromtimestamp(info.get(u'last_mod')) # TODO: don't have time created! now storing last accessed instead created = tz.utcfromtimestamp(info.get(u'last_access')) # Create the base model. model = {} model['name'] = path.rsplit('/', 1)[-1] model['path'] = path model['last_modified'] = last_modified model['created'] = created model['content'] = None model['format'] = None model['mimetype'] = None # TODO: Now just checking if user have write permission in HDFS. Need to cover all cases and check the user & group? try: model['writable'] = (info.get(u'permissions') & 0o0200) > 0 except OSError: self.log.error("Failed to check write permissions on %s", hdfs_path) model['writable'] = False return model def _dir_model(self, path, content=True): """Build a model for a directory if content is requested, will include a listing of the directory """ hdfs_path = to_os_path(path, self.root_dir) four_o_four = u'directory does not exist: %r' % path if not self.dir_exists(path): raise web.HTTPError(404, four_o_four) elif self.is_hidden(path): self.log.info( "Refusing to serve hidden directory %r, via 404 Error", hdfs_path) raise web.HTTPError(404, four_o_four) model = self._base_model(path) model['type'] = 'directory' if content: model['content'] = contents = [] for subpath in self._hdfs_ls(hdfs_path): name = subpath.strip('/').rsplit('/', 1)[-1] if self.should_list( name) and not self._hdfs_is_hidden(subpath): contents.append( self.get(path='%s/%s' % (path, name), content=False)) model['format'] = 'json' return model def _file_model(self, path, content=True, format=None): """Build a model for a file if content is requested, include the file hdfscontents. format: If 'text', the hdfscontents will be decoded as UTF-8. If 'base64', the raw bytes hdfscontents will be encoded as base64. If not specified, try to decode as UTF-8, and fall back to base64 """ model = self._base_model(path) model['type'] = 'file' hdfs_path = to_os_path(path, self.root_dir) model['mimetype'] = mimetypes.guess_type(hdfs_path)[0] if content: content, format = self._read_file(hdfs_path, format) if model['mimetype'] is None: default_mime = { 'text': 'text/plain', 'base64': 'application/octet-stream' }[format] model['mimetype'] = default_mime model.update( content=content, format=format, ) return model def _notebook_model(self, path, content=True): """Build a notebook model if content is requested, the notebook content will be populated as a JSON structure (not double-serialized) """ model = self._base_model(path) model['type'] = 'notebook' if content: hdfs_path = to_os_path(path, self.root_dir) nb = self._read_notebook(hdfs_path, as_version=4) self.mark_trusted_cells(nb, path) model['content'] = nb model['format'] = 'json' self.validate_notebook_model(model) return model def _save_directory(self, hdfs_path, model, path=''): """create a directory""" if self._hdfs_is_hidden(hdfs_path): raise HTTPError(400, u'Cannot create hidden directory %r' % hdfs_path) if not self._hdfs_exists(hdfs_path): try: self.hdfs.create_directory(hdfs_path) except: raise HTTPError(403, u'Permission denied: %s' % path) elif not self._hdfs_dir_exists(hdfs_path): raise HTTPError(400, u'Not a directory: %s' % (hdfs_path)) else: self.log.debug("Directory %r already exists", hdfs_path) def get(self, path, content=True, type=None, format=None): """Get a file or directory model.""" """ Takes a path for an entity and returns its model Parameters ---------- path : str the API path that describes the relative path for the target content : bool Whether to include the hdfscontents in the reply type : str, optional The requested type - 'file', 'notebook', or 'directory'. Will raise HTTPError 400 if the content doesn't match. format : str, optional The requested format for file contents. 'text' or 'base64'. Ignored if this returns a notebook or directory model. Returns ------- model : dict the contents model. If content=True, returns the contents of the file or directory as well. """ path = path.strip('/') if not self.exists(path): raise web.HTTPError(404, u'No such file or directory: %s' % path) if self.dir_exists(path): if type not in (None, 'directory'): raise web.HTTPError(400, u'%s is a directory, not a %s' % (path, type), reason='bad type') model = self._dir_model(path, content=content) elif type == 'notebook' or (type is None and path.endswith('.ipynb')): model = self._notebook_model(path, content=content) else: if type == 'directory': raise web.HTTPError(400, u'%s is not a directory' % path, reason='bad type') model = self._file_model(path, content=content, format=format) return model def save(self, model, path=''): """ Save a file or directory model to path. Should return the saved model with no content. Save implementations should call self.run_pre_save_hook(model=model, path=path) prior to writing any data. """ path = path.strip('/') if 'type' not in model: raise web.HTTPError(400, u'No file type provided') if 'content' not in model and model['type'] != 'directory': raise web.HTTPError(400, u'No file content provided') path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) self.log.debug("Saving %s", hdfs_path) self.run_pre_save_hook(model=model, path=path) try: if model['type'] == 'notebook': nb = nbformat.from_dict(model['content']) self.check_and_sign(nb, path) self._save_notebook(hdfs_path, nb) # One checkpoint should always exist for notebooks. if not self.checkpoints.list_checkpoints(path): self.create_checkpoint(path) elif model['type'] == 'file': # Missing format will be handled internally by _save_file. self._save_file(hdfs_path, model['content'], model.get('format')) elif model['type'] == 'directory': self._save_directory(hdfs_path, model, path) else: raise web.HTTPError( 400, "Unhandled hdfscontents type: %s" % model['type']) except web.HTTPError: raise except Exception as e: self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) raise web.HTTPError( 500, u'Unexpected error while saving file: %s %s' % (path, e)) validation_message = None if model['type'] == 'notebook': self.validate_notebook_model(model) validation_message = model.get('message', None) model = self.get(path, content=False) if validation_message: model['message'] = validation_message #self.run_post_save_hook(model=model, os_path=hdfs_path) return model def delete_file(self, path): """Delete file at path.""" path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) if self._hdfs_dir_exists(hdfs_path): listing = self._hdfs_ls(hdfs_path) # Don't delete non-empty directories. # A directory containing only leftover checkpoints is # considered empty. cp_dir = getattr(self.checkpoints, 'checkpoint_dir', None) for longentry in listing: entry = longentry.strip('/').rsplit('/', 1)[-1] if entry != cp_dir: raise web.HTTPError(400, u'Directory %s not empty' % hdfs_path) elif not self._hdfs_file_exists(hdfs_path): raise web.HTTPError(404, u'File does not exist: %s' % hdfs_path) if self._hdfs_dir_exists(hdfs_path): self.log.debug("Removing directory %s", hdfs_path) try: self.hdfs.delete(hdfs_path, recursive=True) except: raise HTTPError(403, u'Permission denied: %s' % path) else: self.log.debug("Removing file %s", hdfs_path) try: self.hdfs.delete(hdfs_path, recursive=False) except: raise HTTPError(403, u'Permission denied: %s' % path) def rename_file(self, old_path, new_path): """Rename a file.""" old_path = old_path.strip('/') new_path = new_path.strip('/') if new_path == old_path: return new_hdfs_path = to_os_path(new_path, self.root_dir) old_hdfs_path = to_os_path(old_path, self.root_dir) # Should we proceed with the move? if self._hdfs_exists(new_hdfs_path): raise web.HTTPError(409, u'File already exists: %s' % new_path) # Move the file try: self._hdfs_move_file(old_hdfs_path, new_hdfs_path) except Exception as e: raise web.HTTPError( 500, u'Unknown error renaming file: %s %s' % (old_path, e)) def info_string(self): return "Serving notebooks from HDFS directory: %s" % self.root_dir
class BaseConverter(LoggingConfigurable): notebooks = List([]) assignments = Dict({}) writer = Instance(FilesWriter) exporter = Instance(Exporter) exporter_class = Type(NotebookExporter, klass=Exporter) preprocessors = List([]) force = Bool( False, help="Whether to overwrite existing assignments/submissions").tag( config=True) pre_convert_hook = Any(None, config=True, allow_none=True, help=dedent(""" An optional hook function that you can implement to do some bootstrapping work before converting. This function is called before the notebooks are converted and should be used for specific converters such as Autograde, GenerateAssignment or GenerateFeedback. It will be called as (all arguments are passed as keywords):: hook(assignment=assignment, student=student, notebooks=notebooks) """)) post_convert_hook = Any(None, config=True, allow_none=True, help=dedent(""" An optional hook function that you can implement to do some work after converting. This function is called after the notebooks are converted and should be used for specific converters such as Autograde, GenerateAssignment or GenerateFeedback. It will be called as (all arguments are passed as keywords):: hook(assignment=assignment, student=student, notebooks=notebooks) """)) permissions = Integer(help=dedent(""" Permissions to set on files output by nbgrader. The default is generally read-only (444), with the exception of nbgrader generate_assignment and nbgrader generate_feedback, in which case the user also has write permission. """)).tag(config=True) @default("permissions") def _permissions_default(self) -> int: return 664 if self.coursedir.groupshared else 444 coursedir = Instance(CourseDirectory, allow_none=True) def __init__(self, coursedir: CourseDirectory = None, **kwargs: typing.Any) -> None: self.coursedir = coursedir super(BaseConverter, self).__init__(**kwargs) if self.parent and hasattr(self.parent, "logfile"): self.logfile = self.parent.logfile else: self.logfile = None c = Config() c.Exporter.default_preprocessors = [] self.update_config(c) def start(self) -> None: self.init_notebooks() self.writer = FilesWriter(parent=self, config=self.config) self.exporter = self.exporter_class(parent=self, config=self.config) for pp in self.preprocessors: self.exporter.register_preprocessor(pp) currdir = os.getcwd() os.chdir(self.coursedir.root) try: self.convert_notebooks() finally: os.chdir(currdir) @default("classes") def _classes_default(self): classes = super(BaseConverter, self)._classes_default() classes.append(FilesWriter) classes.append(Exporter) for pp in self.preprocessors: if len(pp.class_traits(config=True)) > 0: classes.append(pp) return classes @property def _input_directory(self): raise NotImplementedError @property def _output_directory(self): raise NotImplementedError def _format_source(self, assignment_id: str, student_id: str, escape: bool = False) -> str: return self.coursedir.format_path(self._input_directory, student_id, assignment_id, escape=escape) def _format_dest(self, assignment_id: str, student_id: str, escape: bool = False) -> str: return self.coursedir.format_path(self._output_directory, student_id, assignment_id, escape=escape) def init_notebooks(self) -> None: self.assignments = {} self.notebooks = [] assignment_glob = self._format_source(self.coursedir.assignment_id, self.coursedir.student_id) for assignment in glob.glob(assignment_glob): notebook_glob = os.path.join(assignment, self.coursedir.notebook_id + ".ipynb") found = glob.glob(notebook_glob) if len(found) == 0: self.log.warning("No notebooks were matched by '%s'", notebook_glob) continue self.assignments[assignment] = found if len(self.assignments) == 0: msg = "No notebooks were matched by '%s'" % assignment_glob self.log.error(msg) assignment_glob2 = self._format_source("*", self.coursedir.student_id) found = glob.glob(assignment_glob2) if found: scores = sorted([(fuzz.ratio(assignment_glob, x), x) for x in found]) self.log.error("Did you mean: %s", scores[-1][1]) raise NbGraderException(msg) def init_single_notebook_resources( self, notebook_filename: str) -> typing.Dict[str, typing.Any]: regexp = re.escape(os.path.sep).join([ self._format_source("(?P<assignment_id>.*)", "(?P<student_id>.*)", escape=True), "(?P<notebook_id>.*).ipynb" ]) m = re.match(regexp, notebook_filename) if m is None: msg = "Could not match '%s' with regexp '%s'" % (notebook_filename, regexp) self.log.error(msg) raise NbGraderException(msg) gd = m.groupdict() self.log.debug("Student: %s", gd['student_id']) self.log.debug("Assignment: %s", gd['assignment_id']) self.log.debug("Notebook: %s", gd['notebook_id']) resources = {} resources['unique_key'] = gd['notebook_id'] resources['output_files_dir'] = '%s_files' % gd['notebook_id'] resources['nbgrader'] = {} resources['nbgrader']['student'] = gd['student_id'] resources['nbgrader']['assignment'] = gd['assignment_id'] resources['nbgrader']['notebook'] = gd['notebook_id'] resources['nbgrader']['db_url'] = self.coursedir.db_url return resources def write_single_notebook(self, output: str, resources: ResourcesDict) -> None: # configure the writer build directory self.writer.build_directory = self._format_dest( resources['nbgrader']['assignment'], resources['nbgrader']['student']) # write out the results self.writer.write(output, resources, notebook_name=resources['unique_key']) def init_destination(self, assignment_id: str, student_id: str) -> bool: """Initialize the destination for an assignment. Returns whether the assignment should actually be processed or not (i.e. whether the initialization was successful). """ if self.coursedir.student_id_exclude: exclude_ids = self.coursedir.student_id_exclude.split(',') if student_id in exclude_ids: return False dest = os.path.normpath(self._format_dest(assignment_id, student_id)) # the destination doesn't exist, so we haven't processed it if self.coursedir.notebook_id == "*": if not os.path.exists(dest): return True else: # if any of the notebooks don't exist, then we want to process them for notebook in self.notebooks: filename = os.path.splitext(os.path.basename( notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if not os.path.exists(path): return True # if we have specified --force, then always remove existing stuff if self.force: if self.coursedir.notebook_id == "*": self.log.warning( "Removing existing assignment: {}".format(dest)) rmtree(dest) else: for notebook in self.notebooks: filename = os.path.splitext(os.path.basename( notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if os.path.exists(path): self.log.warning( "Removing existing notebook: {}".format(path)) remove(path) return True src = self._format_source(assignment_id, student_id) new_timestamp = self.coursedir.get_existing_timestamp(src) old_timestamp = self.coursedir.get_existing_timestamp(dest) # if --force hasn't been specified, but the source assignment is newer, # then we want to overwrite it if new_timestamp is not None and old_timestamp is not None and new_timestamp > old_timestamp: if self.coursedir.notebook_id == "*": self.log.warning( "Updating existing assignment: {}".format(dest)) rmtree(dest) else: for notebook in self.notebooks: filename = os.path.splitext(os.path.basename( notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if os.path.exists(path): self.log.warning( "Updating existing notebook: {}".format(path)) remove(path) return True # otherwise, we should skip the assignment self.log.info("Skipping existing assignment: {}".format(dest)) return False def init_assignment(self, assignment_id: str, student_id: str) -> None: """Initializes resources/dependencies/etc. that are common to all notebooks in an assignment. """ source = self._format_source(assignment_id, student_id) dest = self._format_dest(assignment_id, student_id) # detect other files in the source directory for filename in find_all_files(source, self.coursedir.ignore + ["*.ipynb"]): # Make sure folder exists. path = os.path.join(dest, os.path.relpath(filename, source)) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if os.path.exists(path): remove(path) self.log.info("Copying %s -> %s", filename, path) shutil.copy(filename, path) def set_permissions(self, assignment_id: str, student_id: str) -> None: self.log.info("Setting destination file permissions to %s", self.permissions) dest = os.path.normpath(self._format_dest(assignment_id, student_id)) permissions = int(str(self.permissions), 8) for dirname, _, filenames in os.walk(dest): for filename in filenames: os.chmod(os.path.join(dirname, filename), permissions) # If groupshared, set dir permissions - see comment below. st_mode = os.stat(dirname).st_mode if self.coursedir.groupshared and st_mode & 0o2770 != 0o2770: try: os.chmod(dirname, (st_mode | 0o2770) & 0o2777) except PermissionError: self.log.warning( "Could not update permissions of %s to make it groupshared", dirname) # If groupshared, set write permissions on directories. Directories # are created within ipython_genutils.path.ensure_dir_exists via # nbconvert.writer, (unless there are supplementary files) with a # default mode of 755 and there is no way to pass the mode arguments # all the way to there! So we have to walk and fix. if self.coursedir.groupshared: # Root may be created in this step, and is not included above. rootdir = self.coursedir.format_path(self._output_directory, '.', '.') # Add 2770 to existing dir permissions (don't unconditionally override) st_mode = os.stat(rootdir).st_mode if st_mode & 0o2770 != 0o2770: try: os.chmod(rootdir, (st_mode | 0o2770) & 0o2777) except PermissionError: self.log.warning( "Could not update permissions of %s to make it groupshared", rootdir) def convert_single_notebook(self, notebook_filename: str) -> None: """ Convert a single notebook. Performs the following steps: 1. Initialize notebook resources 2. Export the notebook to a particular format 3. Write the exported notebook to file """ self.log.info("Converting notebook %s", notebook_filename) resources = self.init_single_notebook_resources(notebook_filename) output, resources = self.exporter.from_filename(notebook_filename, resources=resources) self.write_single_notebook(output, resources) def convert_notebooks(self) -> None: errors = [] def _handle_failure(gd: typing.Dict[str, str]) -> None: dest = os.path.normpath( self._format_dest(gd['assignment_id'], gd['student_id'])) if self.coursedir.notebook_id == "*": if os.path.exists(dest): self.log.warning( "Removing failed assignment: {}".format(dest)) rmtree(dest) else: for notebook in self.notebooks: filename = os.path.splitext(os.path.basename( notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if os.path.exists(path): self.log.warning( "Removing failed notebook: {}".format(path)) remove(path) for assignment in sorted(self.assignments.keys()): # initialize the list of notebooks and the exporter self.notebooks = sorted(self.assignments[assignment]) # parse out the assignment and student ids regexp = self._format_source("(?P<assignment_id>.*)", "(?P<student_id>.*)", escape=True) m = re.match(regexp, assignment) if m is None: msg = "Could not match '%s' with regexp '%s'" % (assignment, regexp) self.log.error(msg) raise NbGraderException(msg) gd = m.groupdict() try: # determine whether we actually even want to process this submission should_process = self.init_destination(gd['assignment_id'], gd['student_id']) if not should_process: continue self.run_pre_convert_hook() # initialize the destination self.init_assignment(gd['assignment_id'], gd['student_id']) # convert all the notebooks for notebook_filename in self.notebooks: self.convert_single_notebook(notebook_filename) # set assignment permissions self.set_permissions(gd['assignment_id'], gd['student_id']) self.run_post_convert_hook() except UnresponsiveKernelError: self.log.error( "While processing assignment %s, the kernel became " "unresponsive and we could not interrupt it. This probably " "means that the students' code has an infinite loop that " "consumes a lot of memory or something similar. nbgrader " "doesn't know how to deal with this problem, so you will " "have to manually edit the students' code (for example, to " "just throw an error rather than enter an infinite loop). ", assignment) errors.append((gd['assignment_id'], gd['student_id'])) _handle_failure(gd) except sqlalchemy.exc.OperationalError: _handle_failure(gd) self.log.error(traceback.format_exc()) msg = ( "There was an error accessing the nbgrader database. This " "may occur if you recently upgraded nbgrader. To resolve " "the issue, first BACK UP your database and then run the " "command `nbgrader db upgrade`.") self.log.error(msg) raise NbGraderException(msg) except SchemaTooOldError: _handle_failure(gd) msg = ( "One or more notebooks in the assignment use an old version \n" "of the nbgrader metadata format. Please **back up your class files \n" "directory** and then update the metadata using:\n\nnbgrader update .\n" ) self.log.error(msg) raise NbGraderException(msg) except SchemaTooNewError: _handle_failure(gd) msg = ( "One or more notebooks in the assignment use an newer version \n" "of the nbgrader metadata format. Please update your version of \n" "nbgrader to the latest version to be able to use this notebook.\n" ) self.log.error(msg) raise NbGraderException(msg) except KeyboardInterrupt: _handle_failure(gd) self.log.error("Canceled") raise except Exception: self.log.error("There was an error processing assignment: %s", assignment) self.log.error(traceback.format_exc()) errors.append((gd['assignment_id'], gd['student_id'])) _handle_failure(gd) if len(errors) > 0: for assignment_id, student_id in errors: self.log.error( "There was an error processing assignment '{}' for student '{}'" .format(assignment_id, student_id)) if self.logfile: msg = ( "Please see the error log ({}) for details on the specific " "errors on the above failures.".format(self.logfile)) else: msg = ( "Please see the the above traceback for details on the specific " "errors on the above failures.") self.log.error(msg) raise NbGraderException(msg) def run_pre_convert_hook(self): if self.pre_convert_hook: self.log.info('Running pre-convert hook') try: self.pre_convert_hook(assignment=self.coursedir.assignment_id, student=self.coursedir.student_id, notebooks=self.notebooks) except Exception: self.log.info('Pre-convert hook failed', exc_info=True) def run_post_convert_hook(self): if self.post_convert_hook: self.log.info('Running post-convert hook') try: self.post_convert_hook(assignment=self.coursedir.assignment_id, student=self.coursedir.student_id, notebooks=self.notebooks) except Exception: self.log.info('Post-convert hook failed', exc_info=True)
class Kernel(SingletonConfigurable): #--------------------------------------------------------------------------- # Kernel interface #--------------------------------------------------------------------------- # attribute to override with a GUI eventloop = Any(None) @observe('eventloop') def _update_eventloop(self, change): """schedule call to eventloop from IOLoop""" loop = ioloop.IOLoop.current() if change.new is not None: loop.add_callback(self.enter_eventloop) session = Instance(Session, allow_none=True) profile_dir = Instance('IPython.core.profiledir.ProfileDir', allow_none=True) shell_streams = List() control_stream = Instance(ZMQStream, allow_none=True) iopub_socket = Any() iopub_thread = Any() stdin_socket = Any() log = Instance(logging.Logger, allow_none=True) # identities: int_id = Integer(-1) ident = Unicode() @default('ident') def _default_ident(self): return unicode_type(uuid.uuid4()) # This should be overridden by wrapper kernels that implement any real # language. language_info = {} # any links that should go in the help menu help_links = List() # Private interface _darwin_app_nap = Bool( True, help="""Whether to use appnope for compatibility with OS X App Nap. Only affects OS X >= 10.9. """).tag(config=True) # track associations with current request _allow_stdin = Bool(False) _parent_header = Dict() _parent_ident = Any(b'') # Time to sleep after flushing the stdout/err buffers in each execute # cycle. While this introduces a hard limit on the minimal latency of the # execute cycle, it helps prevent output synchronization problems for # clients. # Units are in seconds. The minimum zmq latency on local host is probably # ~150 microseconds, set this to 500us for now. We may need to increase it # a little if it's not enough after more interactive testing. _execute_sleep = Float(0.0005).tag(config=True) # Frequency of the kernel's event loop. # Units are in seconds, kernel subclasses for GUI toolkits may need to # adapt to milliseconds. _poll_interval = Float(0.01).tag(config=True) stop_on_error_timeout = Float( 0.1, config=True, help="""time (in seconds) to wait for messages to arrive when aborting queued requests after an error. Requests that arrive within this window after an error will be cancelled. Increase in the event of unusually slow network causing significant delays, which can manifest as e.g. "Run all" in a notebook aborting some, but not all, messages after an error. """) # If the shutdown was requested over the network, we leave here the # necessary reply message so it can be sent by our registered atexit # handler. This ensures that the reply is only sent to clients truly at # the end of our shutdown process (which happens after the underlying # IPython shell's own shutdown). _shutdown_message = None # This is a dict of port number that the kernel is listening on. It is set # by record_ports and used by connect_request. _recorded_ports = Dict() # set of aborted msg_ids aborted = Set() # Track execution count here. For IPython, we override this to use the # execution count we store in the shell. execution_count = 0 msg_types = [ 'execute_request', 'complete_request', 'inspect_request', 'history_request', 'comm_info_request', 'kernel_info_request', 'connect_request', 'shutdown_request', 'is_complete_request', # deprecated: 'apply_request', ] # add deprecated ipyparallel control messages control_msg_types = msg_types + ['clear_request', 'abort_request'] def __init__(self, **kwargs): super(Kernel, self).__init__(**kwargs) # Build dict of handlers for message types self.shell_handlers = {} for msg_type in self.msg_types: self.shell_handlers[msg_type] = getattr(self, msg_type) self.control_handlers = {} for msg_type in self.control_msg_types: self.control_handlers[msg_type] = getattr(self, msg_type) @gen.coroutine def dispatch_control(self, msg): """dispatch control requests""" idents, msg = self.session.feed_identities(msg, copy=False) try: msg = self.session.deserialize(msg, content=True, copy=False) except: self.log.error("Invalid Control Message", exc_info=True) return self.log.debug("Control received: %s", msg) # Set the parent message for side effects. self.set_parent(idents, msg) self._publish_status(u'busy') if self._aborting: self._send_abort_reply(self.control_stream, msg, idents) self._publish_status(u'idle') return header = msg['header'] msg_type = header['msg_type'] handler = self.control_handlers.get(msg_type, None) if handler is None: self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r", msg_type) else: try: yield gen.maybe_future( handler(self.control_stream, idents, msg)) except Exception: self.log.error("Exception in control handler:", exc_info=True) sys.stdout.flush() sys.stderr.flush() self._publish_status(u'idle') # flush to ensure reply is sent self.control_stream.flush(zmq.POLLOUT) def should_handle(self, stream, msg, idents): """Check whether a shell-channel message should be handled Allows subclasses to prevent handling of certain messages (e.g. aborted requests). """ msg_id = msg['header']['msg_id'] if msg_id in self.aborted: msg_type = msg['header']['msg_type'] # is it safe to assume a msg_id will not be resubmitted? self.aborted.remove(msg_id) self._send_abort_reply(stream, msg, idents) return False return True @gen.coroutine def dispatch_shell(self, stream, msg): """dispatch shell requests""" idents, msg = self.session.feed_identities(msg, copy=False) try: msg = self.session.deserialize(msg, content=True, copy=False) except: self.log.error("Invalid Message", exc_info=True) return # Set the parent message for side effects. self.set_parent(idents, msg) self._publish_status(u'busy') if self._aborting: self._send_abort_reply(stream, msg, idents) self._publish_status(u'idle') # flush to ensure reply is sent before # handling the next request stream.flush(zmq.POLLOUT) return msg_type = msg['header']['msg_type'] # Print some info about this message and leave a '--->' marker, so it's # easier to trace visually the message chain when debugging. Each # handler prints its message at the end. self.log.debug('\n*** MESSAGE TYPE:%s***', msg_type) self.log.debug(' Content: %s\n --->\n ', msg['content']) if not self.should_handle(stream, msg, idents): return handler = self.shell_handlers.get(msg_type, None) if handler is None: self.log.warning("Unknown message type: %r", msg_type) else: self.log.debug("%s: %s", msg_type, msg) try: self.pre_handler_hook() except Exception: self.log.debug("Unable to signal in pre_handler_hook:", exc_info=True) try: yield gen.maybe_future(handler(stream, idents, msg)) except Exception: self.log.error("Exception in message handler:", exc_info=True) finally: try: self.post_handler_hook() except Exception: self.log.debug("Unable to signal in post_handler_hook:", exc_info=True) sys.stdout.flush() sys.stderr.flush() self._publish_status(u'idle') # flush to ensure reply is sent before # handling the next request stream.flush(zmq.POLLOUT) def pre_handler_hook(self): """Hook to execute before calling message handler""" # ensure default_int_handler during handler call self.saved_sigint_handler = signal(SIGINT, default_int_handler) def post_handler_hook(self): """Hook to execute after calling message handler""" signal(SIGINT, self.saved_sigint_handler) def enter_eventloop(self): """enter eventloop""" self.log.info("Entering eventloop %s", self.eventloop) # record handle, so we can check when this changes eventloop = self.eventloop def advance_eventloop(): # check if eventloop changed: if self.eventloop is not eventloop: self.log.info("exiting eventloop %s", eventloop) return if self.msg_queue.qsize(): self.log.debug("Delaying eventloop due to waiting messages") # still messages to process, make the eventloop wait schedule_next() return self.log.debug("Advancing eventloop %s", eventloop) try: eventloop(self) except KeyboardInterrupt: # Ctrl-C shouldn't crash the kernel self.log.error("KeyboardInterrupt caught in kernel") pass if self.eventloop is eventloop: # schedule advance again schedule_next() def schedule_next(): """Schedule the next advance of the eventloop""" # flush the eventloop every so often, # giving us a chance to handle messages in the meantime self.log.debug("Scheduling eventloop advance") self.io_loop.call_later(1, advance_eventloop) # begin polling the eventloop schedule_next() @gen.coroutine def do_one_iteration(self): """Process a single shell message Any pending control messages will be flushed as well .. versionchanged:: 5 This is now a coroutine """ # flush messages off of shell streams into the message queue for stream in self.shell_streams: stream.flush() # process all messages higher priority than shell (control), # and at most one shell message per iteration priority = 0 while priority is not None and priority < SHELL_PRIORITY: priority = yield self.process_one(wait=False) @gen.coroutine def process_one(self, wait=True): """Process one request Returns priority of the message handled. Returns None if no message was handled. """ if wait: priority, t, dispatch, args = yield self.msg_queue.get() else: try: priority, t, dispatch, args = self.msg_queue.get_nowait() except QueueEmpty: return None yield gen.maybe_future(dispatch(*args)) @gen.coroutine def dispatch_queue(self): """Coroutine to preserve order of message handling Ensures that only one message is processing at a time, even when the handler is async """ while True: # ensure control stream is flushed before processing shell messages if self.control_stream: self.control_stream.flush() # receive the next message and handle it try: yield self.process_one() except Exception: self.log.exception("Error in message handler") _message_counter = Any(help="""Monotonic counter of messages Ensures messages of the same priority are handled in arrival order. """, ) @default('_message_counter') def _message_counter_default(self): return itertools.count() def schedule_dispatch(self, priority, dispatch, *args): """schedule a message for dispatch""" idx = next(self._message_counter) self.msg_queue.put_nowait(( priority, idx, dispatch, args, )) # ensure the eventloop wakes up self.io_loop.add_callback(lambda: None) def start(self): """register dispatchers for streams""" self.io_loop = ioloop.IOLoop.current() self.msg_queue = PriorityQueue() self.io_loop.add_callback(self.dispatch_queue) if self.control_stream: self.control_stream.on_recv( partial( self.schedule_dispatch, CONTROL_PRIORITY, self.dispatch_control, ), copy=False, ) for s in self.shell_streams: if s is self.control_stream: continue s.on_recv( partial( self.schedule_dispatch, SHELL_PRIORITY, self.dispatch_shell, s, ), copy=False, ) # publish idle status self._publish_status('starting') def record_ports(self, ports): """Record the ports that this kernel is using. The creator of the Kernel instance must call this methods if they want the :meth:`connect_request` method to return the port numbers. """ self._recorded_ports = ports #--------------------------------------------------------------------------- # Kernel request handlers #--------------------------------------------------------------------------- def _publish_execute_input(self, code, parent, execution_count): """Publish the code request on the iopub stream.""" self.session.send(self.iopub_socket, u'execute_input', { u'code': code, u'execution_count': execution_count }, parent=parent, ident=self._topic('execute_input')) def _publish_status(self, status, parent=None): """send status (busy/idle) on IOPub""" self.session.send( self.iopub_socket, u'status', {u'execution_state': status}, parent=parent or self._parent_header, ident=self._topic('status'), ) def set_parent(self, ident, parent): """Set the current parent_header Side effects (IOPub messages) and replies are associated with the request that caused them via the parent_header. The parent identity is used to route input_request messages on the stdin channel. """ self._parent_ident = ident self._parent_header = parent def send_response(self, stream, msg_or_type, content=None, ident=None, buffers=None, track=False, header=None, metadata=None): """Send a response to the message we're currently processing. This accepts all the parameters of :meth:`jupyter_client.session.Session.send` except ``parent``. This relies on :meth:`set_parent` having been called for the current message. """ return self.session.send(stream, msg_or_type, content, self._parent_header, ident, buffers, track, header, metadata) def init_metadata(self, parent): """Initialize metadata. Run at the beginning of execution requests. """ # FIXME: `started` is part of ipyparallel # Remove for ipykernel 5.0 return { 'started': now(), } def finish_metadata(self, parent, metadata, reply_content): """Finish populating metadata. Run after completing an execution request. """ return metadata @gen.coroutine def execute_request(self, stream, ident, parent): """handle an execute_request""" try: content = parent[u'content'] code = py3compat.cast_unicode_py2(content[u'code']) silent = content[u'silent'] store_history = content.get(u'store_history', not silent) user_expressions = content.get('user_expressions', {}) allow_stdin = content.get('allow_stdin', False) except: self.log.error("Got bad msg: ") self.log.error("%s", parent) return stop_on_error = content.get('stop_on_error', True) metadata = self.init_metadata(parent) # Re-broadcast our input for the benefit of listening clients, and # start computing output if not silent: self.execution_count += 1 self._publish_execute_input(code, parent, self.execution_count) reply_content = yield gen.maybe_future( self.do_execute( code, silent, store_history, user_expressions, allow_stdin, )) # Flush output before sending the reply. sys.stdout.flush() sys.stderr.flush() # FIXME: on rare occasions, the flush doesn't seem to make it to the # clients... This seems to mitigate the problem, but we definitely need # to better understand what's going on. if self._execute_sleep: time.sleep(self._execute_sleep) # Send the reply. reply_content = json_clean(reply_content) metadata = self.finish_metadata(parent, metadata, reply_content) reply_msg = self.session.send(stream, u'execute_reply', reply_content, parent, metadata=metadata, ident=ident) self.log.debug("%s", reply_msg) if not silent and reply_msg['content'][ 'status'] == u'error' and stop_on_error: yield self._abort_queues() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): """Execute user code. Must be overridden by subclasses. """ raise NotImplementedError @gen.coroutine def complete_request(self, stream, ident, parent): content = parent['content'] code = content['code'] cursor_pos = content['cursor_pos'] matches = yield gen.maybe_future(self.do_complete(code, cursor_pos)) matches = json_clean(matches) completion_msg = self.session.send(stream, 'complete_reply', matches, parent, ident) def do_complete(self, code, cursor_pos): """Override in subclasses to find completions. """ return { 'matches': [], 'cursor_end': cursor_pos, 'cursor_start': cursor_pos, 'metadata': {}, 'status': 'ok' } @gen.coroutine def inspect_request(self, stream, ident, parent): content = parent['content'] reply_content = yield gen.maybe_future( self.do_inspect( content['code'], content['cursor_pos'], content.get('detail_level', 0), )) # Before we send this object over, we scrub it for JSON usage reply_content = json_clean(reply_content) msg = self.session.send(stream, 'inspect_reply', reply_content, parent, ident) self.log.debug("%s", msg) def do_inspect(self, code, cursor_pos, detail_level=0): """Override in subclasses to allow introspection. """ return {'status': 'ok', 'data': {}, 'metadata': {}, 'found': False} @gen.coroutine def history_request(self, stream, ident, parent): content = parent['content'] reply_content = yield gen.maybe_future(self.do_history(**content)) reply_content = json_clean(reply_content) msg = self.session.send(stream, 'history_reply', reply_content, parent, ident) self.log.debug("%s", msg) def do_history(self, hist_access_type, output, raw, session=None, start=None, stop=None, n=None, pattern=None, unique=False): """Override in subclasses to access history. """ return {'status': 'ok', 'history': []} def connect_request(self, stream, ident, parent): if self._recorded_ports is not None: content = self._recorded_ports.copy() else: content = {} content['status'] = 'ok' msg = self.session.send(stream, 'connect_reply', content, parent, ident) self.log.debug("%s", msg) @property def kernel_info(self): return { 'protocol_version': kernel_protocol_version, 'implementation': self.implementation, 'implementation_version': self.implementation_version, 'language_info': self.language_info, 'banner': self.banner, 'help_links': self.help_links, } def kernel_info_request(self, stream, ident, parent): content = {'status': 'ok'} content.update(self.kernel_info) msg = self.session.send(stream, 'kernel_info_reply', content, parent, ident) self.log.debug("%s", msg) def comm_info_request(self, stream, ident, parent): content = parent['content'] target_name = content.get('target_name', None) # Should this be moved to ipkernel? if hasattr(self, 'comm_manager'): comms = { k: dict(target_name=v.target_name) for (k, v) in self.comm_manager.comms.items() if v.target_name == target_name or target_name is None } else: comms = {} reply_content = dict(comms=comms, status='ok') msg = self.session.send(stream, 'comm_info_reply', reply_content, parent, ident) self.log.debug("%s", msg) @gen.coroutine def shutdown_request(self, stream, ident, parent): content = yield gen.maybe_future( self.do_shutdown(parent['content']['restart'])) self.session.send(stream, u'shutdown_reply', content, parent, ident=ident) # same content, but different msg_id for broadcasting on IOPub self._shutdown_message = self.session.msg(u'shutdown_reply', content, parent) self._at_shutdown() # call sys.exit after a short delay loop = ioloop.IOLoop.current() loop.add_timeout(time.time() + 0.1, loop.stop) def do_shutdown(self, restart): """Override in subclasses to do things when the frontend shuts down the kernel. """ return {'status': 'ok', 'restart': restart} @gen.coroutine def is_complete_request(self, stream, ident, parent): content = parent['content'] code = content['code'] reply_content = yield gen.maybe_future(self.do_is_complete(code)) reply_content = json_clean(reply_content) reply_msg = self.session.send(stream, 'is_complete_reply', reply_content, parent, ident) self.log.debug("%s", reply_msg) def do_is_complete(self, code): """Override in subclasses to find completions. """ return { 'status': 'unknown', } #--------------------------------------------------------------------------- # Engine methods (DEPRECATED) #--------------------------------------------------------------------------- def apply_request(self, stream, ident, parent): self.log.warning( "apply_request is deprecated in kernel_base, moving to ipyparallel." ) try: content = parent[u'content'] bufs = parent[u'buffers'] msg_id = parent['header']['msg_id'] except: self.log.error("Got bad msg: %s", parent, exc_info=True) return md = self.init_metadata(parent) reply_content, result_buf = self.do_apply(content, bufs, msg_id, md) # flush i/o sys.stdout.flush() sys.stderr.flush() md = self.finish_metadata(parent, md, reply_content) self.session.send(stream, u'apply_reply', reply_content, parent=parent, ident=ident, buffers=result_buf, metadata=md) def do_apply(self, content, bufs, msg_id, reply_metadata): """DEPRECATED""" raise NotImplementedError #--------------------------------------------------------------------------- # Control messages (DEPRECATED) #--------------------------------------------------------------------------- def abort_request(self, stream, ident, parent): """abort a specific msg by id""" self.log.warning( "abort_request is deprecated in kernel_base. It is only part of IPython parallel" ) msg_ids = parent['content'].get('msg_ids', None) if isinstance(msg_ids, string_types): msg_ids = [msg_ids] if not msg_ids: self._abort_queues() for mid in msg_ids: self.aborted.add(str(mid)) content = dict(status='ok') reply_msg = self.session.send(stream, 'abort_reply', content=content, parent=parent, ident=ident) self.log.debug("%s", reply_msg) def clear_request(self, stream, idents, parent): """Clear our namespace.""" self.log.warning( "clear_request is deprecated in kernel_base. It is only part of IPython parallel" ) content = self.do_clear() self.session.send(stream, 'clear_reply', ident=idents, parent=parent, content=content) def do_clear(self): """DEPRECATED since 4.0.3""" raise NotImplementedError #--------------------------------------------------------------------------- # Protected interface #--------------------------------------------------------------------------- def _topic(self, topic): """prefixed topic for IOPub messages""" base = "kernel.%s" % self.ident return py3compat.cast_bytes("%s.%s" % (base, topic)) _aborting = Bool(False) @gen.coroutine def _abort_queues(self): for stream in self.shell_streams: stream.flush() self._aborting = True self.schedule_dispatch( ABORT_PRIORITY, self._dispatch_abort, ) @gen.coroutine def _dispatch_abort(self): self.log.info("Finishing abort") yield gen.sleep(self.stop_on_error_timeout) self._aborting = False def _send_abort_reply(self, stream, msg, idents): """Send a reply to an aborted request""" self.log.info("Aborting:") self.log.info("%s", msg) reply_type = msg['header']['msg_type'].rsplit('_', 1)[0] + '_reply' status = {'status': 'aborted'} md = {'engine': self.ident} md.update(status) self.session.send( stream, reply_type, metadata=md, content=status, parent=msg, ident=idents, ) def _no_raw_input(self): """Raise StdinNotImplentedError if active frontend doesn't support stdin.""" raise StdinNotImplementedError("raw_input was called, but this " "frontend does not support stdin.") def getpass(self, prompt='', stream=None): """Forward getpass to frontends Raises ------ StdinNotImplentedError if active frontend doesn't support stdin. """ if not self._allow_stdin: raise StdinNotImplementedError( "getpass was called, but this frontend does not support input requests." ) if stream is not None: import warnings warnings.warn( "The `stream` parameter of `getpass.getpass` will have no effect when using ipykernel", UserWarning, stacklevel=2) return self._input_request( prompt, self._parent_ident, self._parent_header, password=True, ) def raw_input(self, prompt=''): """Forward raw_input to frontends Raises ------ StdinNotImplentedError if active frontend doesn't support stdin. """ if not self._allow_stdin: raise StdinNotImplementedError( "raw_input was called, but this frontend does not support input requests." ) return self._input_request( str(prompt), self._parent_ident, self._parent_header, password=False, ) def _input_request(self, prompt, ident, parent, password=False): # Flush output before making the request. sys.stderr.flush() sys.stdout.flush() # flush the stdin socket, to purge stale replies while True: try: self.stdin_socket.recv_multipart(zmq.NOBLOCK) except zmq.ZMQError as e: if e.errno == zmq.EAGAIN: break else: raise # Send the input request. content = json_clean(dict(prompt=prompt, password=password)) self.session.send(self.stdin_socket, u'input_request', content, parent, ident=ident) # Await a response. while True: try: ident, reply = self.session.recv(self.stdin_socket, 0) except Exception: self.log.warning("Invalid Message:", exc_info=True) except KeyboardInterrupt: # re-raise KeyboardInterrupt, to truncate traceback raise KeyboardInterrupt else: break try: value = py3compat.unicode_to_str(reply['content']['value']) except: self.log.error("Bad input_reply: %s", parent) value = '' if value == '\x04': # EOF raise EOFError return value def _at_shutdown(self): """Actions taken at shutdown by the kernel, called by python's atexit. """ if self._shutdown_message is not None: self.session.send(self.iopub_socket, self._shutdown_message, ident=self._topic('shutdown')) self.log.debug("%s", self._shutdown_message) [s.flush(zmq.POLLOUT) for s in self.shell_streams]
class TerminalInteractiveShell(InteractiveShell): space_for_menu = Integer( 6, help='Number of line at the bottom of the screen ' 'to reserve for the completion menu').tag(config=True) def _space_for_menu_changed(self, old, new): self._update_layout() pt_cli = None debugger_history = None _pt_app = None simple_prompt = Bool( _use_simple_prompt, help= """Use `raw_input` for the REPL, without completion, multiline input, and prompt colors. Useful when controlling IPython as a subprocess, and piping STDIN/OUT/ERR. Known usage are: IPython own testing machinery, and emacs inferior-shell integration through elpy. This mode default to `True` if the `IPY_TEST_SIMPLE_PROMPT` environment variable is set, or the current terminal is not a tty. """).tag(config=True) @property def debugger_cls(self): return Pdb if self.simple_prompt else TerminalPdb confirm_exit = Bool( True, help=""" Set to confirm when you try to exit IPython with an EOF (Control-D in Unix, Control-Z/Enter in Windows). By typing 'exit' or 'quit', you can force a direct exit without any confirmation.""", ).tag(config=True) editing_mode = Unicode( 'emacs', help="Shortcut style to use at the prompt. 'vi' or 'emacs'.", ).tag(config=True) mouse_support = Bool( False, help="Enable mouse support in the prompt").tag(config=True) highlighting_style = Union( [Unicode('legacy'), Type(klass=Style)], help="""The name or class of a Pygments style to use for syntax highlighting: \n %s""" % ', '.join(get_all_styles())).tag(config=True) @observe('highlighting_style') @observe('colors') def _highlighting_style_changed(self, change): self.refresh_style() def refresh_style(self): self._style = self._make_style_from_name_or_cls( self.highlighting_style) highlighting_style_overrides = Dict( help="Override highlighting format for specific tokens").tag( config=True) true_color = Bool( False, help=("Use 24bit colors instead of 256 colors in prompt highlighting. " "If your terminal supports true color, the following command " "should print 'TRUECOLOR' in orange: " "printf \"\\x1b[38;2;255;100;0mTRUECOLOR\\x1b[0m\\n\"")).tag( config=True) editor = Unicode( get_default_editor(), help="Set the editor used by IPython (default to $EDITOR/vi/notepad)." ).tag(config=True) prompts_class = Type( Prompts, help='Class used to generate Prompt token for prompt_toolkit').tag( config=True) prompts = Instance(Prompts) @default('prompts') def _prompts_default(self): return self.prompts_class(self) @observe('prompts') def _(self, change): self._update_layout() @default('displayhook_class') def _displayhook_class_default(self): return RichPromptDisplayHook term_title = Bool( True, help="Automatically set the terminal title").tag(config=True) display_completions = Enum( ('column', 'multicolumn', 'readlinelike'), help= ("Options for displaying tab completions, 'column', 'multicolumn', and " "'readlinelike'. These options are for `prompt_toolkit`, see " "`prompt_toolkit` documentation for more information."), default_value='multicolumn').tag(config=True) highlight_matching_brackets = Bool( True, help="Highlight matching brackets .", ).tag(config=True) @observe('term_title') def init_term_title(self, change=None): # Enable or disable the terminal title. if self.term_title: toggle_set_term_title(True) set_term_title('IPython: ' + abbrev_cwd()) else: toggle_set_term_title(False) def init_display_formatter(self): super(TerminalInteractiveShell, self).init_display_formatter() # terminal only supports plain text self.display_formatter.active_types = ['text/plain'] def init_prompt_toolkit_cli(self): if self.simple_prompt: # Fall back to plain non-interactive output for tests. # This is very limited, and only accepts a single line. def prompt(): return cast_unicode_py2( input('In [%d]: ' % self.execution_count)) self.prompt_for_code = prompt return # Set up keyboard shortcuts kbmanager = KeyBindingManager.for_prompt() register_ipython_shortcuts(kbmanager.registry, self) # Pre-populate history from IPython's history database history = InMemoryHistory() last_cell = u"" for __, ___, cell in self.history_manager.get_tail( self.history_load_length, include_latest=True): # Ignore blank lines and consecutive duplicates cell = cell.rstrip() if cell and (cell != last_cell): history.append(cell) last_cell = cell self._style = self._make_style_from_name_or_cls( self.highlighting_style) style = DynamicStyle(lambda: self._style) editing_mode = getattr(EditingMode, self.editing_mode.upper()) def patch_stdout(**kwargs): return self.pt_cli.patch_stdout_context(**kwargs) self._pt_app = create_prompt_application( editing_mode=editing_mode, key_bindings_registry=kbmanager.registry, history=history, completer=IPythonPTCompleter(shell=self, patch_stdout=patch_stdout), enable_history_search=True, style=style, mouse_support=self.mouse_support, **self._layout_options()) self._eventloop = create_eventloop(self.inputhook) self.pt_cli = CommandLineInterface( self._pt_app, eventloop=self._eventloop, output=create_output(true_color=self.true_color)) def _make_style_from_name_or_cls(self, name_or_cls): """ Small wrapper that make an IPython compatible style from a style name We need that to add style for prompt ... etc. """ style_overrides = {} if name_or_cls == 'legacy': legacy = self.colors.lower() if legacy == 'linux': style_cls = get_style_by_name('monokai') style_overrides = _style_overrides_linux elif legacy == 'lightbg': style_overrides = _style_overrides_light_bg style_cls = get_style_by_name('pastie') elif legacy == 'neutral': # The default theme needs to be visible on both a dark background # and a light background, because we can't tell what the terminal # looks like. These tweaks to the default theme help with that. style_cls = get_style_by_name('default') style_overrides.update({ Token.Number: '#007700', Token.Operator: 'noinherit', Token.String: '#BB6622', Token.Name.Function: '#2080D0', Token.Name.Class: 'bold #2080D0', Token.Name.Namespace: 'bold #2080D0', Token.Prompt: '#009900', Token.PromptNum: '#00ff00 bold', Token.OutPrompt: '#990000', Token.OutPromptNum: '#ff0000 bold', }) elif legacy == 'nocolor': style_cls = _NoStyle style_overrides = {} else: raise ValueError('Got unknown colors: ', legacy) else: if isinstance(name_or_cls, str): style_cls = get_style_by_name(name_or_cls) else: style_cls = name_or_cls style_overrides = { Token.Prompt: '#009900', Token.PromptNum: '#00ff00 bold', Token.OutPrompt: '#990000', Token.OutPromptNum: '#ff0000 bold', } style_overrides.update(self.highlighting_style_overrides) style = PygmentsStyle.from_defaults(pygments_style_cls=style_cls, style_dict=style_overrides) return style def _layout_options(self): """ Return the current layout option for the current Terminal InteractiveShell """ return { 'lexer': IPythonPTLexer(), 'reserve_space_for_menu': self.space_for_menu, 'get_prompt_tokens': self.prompts.in_prompt_tokens, 'get_continuation_tokens': self.prompts.continuation_prompt_tokens, 'multiline': True, 'display_completions_in_columns': (self.display_completions == 'multicolumn'), # Highlight matching brackets, but only when this setting is # enabled, and only when the DEFAULT_BUFFER has the focus. 'extra_input_processors': [ ConditionalProcessor( processor=HighlightMatchingBracketProcessor( chars='[](){}'), filter=HasFocus(DEFAULT_BUFFER) & ~IsDone() & Condition(lambda cli: self.highlight_matching_brackets)) ], } def _update_layout(self): """ Ask for a re computation of the application layout, if for example , some configuration options have changed. """ if self._pt_app: self._pt_app.layout = create_prompt_layout( **self._layout_options()) def prompt_for_code(self): document = self.pt_cli.run(pre_run=self.pre_prompt, reset_current_buffer=True) return document.text def enable_win_unicode_console(self): if sys.version_info >= (3, 6): # Since PEP 528, Python uses the unicode APIs for the Windows # console by default, so WUC shouldn't be needed. return import win_unicode_console win_unicode_console.enable() def init_io(self): if sys.platform not in {'win32', 'cli'}: return self.enable_win_unicode_console() import colorama colorama.init() # For some reason we make these wrappers around stdout/stderr. # For now, we need to reset them so all output gets coloured. # https://github.com/ipython/ipython/issues/8669 # io.std* are deprecated, but don't show our own deprecation warnings # during initialization of the deprecated API. with warnings.catch_warnings(): warnings.simplefilter('ignore', DeprecationWarning) io.stdout = io.IOStream(sys.stdout) io.stderr = io.IOStream(sys.stderr) def init_magics(self): super(TerminalInteractiveShell, self).init_magics() self.register_magics(TerminalMagics) def init_alias(self): # The parent class defines aliases that can be safely used with any # frontend. super(TerminalInteractiveShell, self).init_alias() # Now define aliases that only make sense on the terminal, because they # need direct access to the console in a way that we can't emulate in # GUI or web frontend if os.name == 'posix': for cmd in ['clear', 'more', 'less', 'man']: self.alias_manager.soft_define_alias(cmd, cmd) def __init__(self, *args, **kwargs): super(TerminalInteractiveShell, self).__init__(*args, **kwargs) self.init_prompt_toolkit_cli() self.init_term_title() self.keep_running = True self.debugger_history = InMemoryHistory() def ask_exit(self): self.keep_running = False rl_next_input = None def pre_prompt(self): if self.rl_next_input: self.pt_cli.application.buffer.text = cast_unicode_py2( self.rl_next_input) self.rl_next_input = None def interact(self, display_banner=DISPLAY_BANNER_DEPRECATED): if display_banner is not DISPLAY_BANNER_DEPRECATED: warn( 'interact `display_banner` argument is deprecated since IPython 5.0. Call `show_banner()` if needed.', DeprecationWarning, stacklevel=2) self.keep_running = True while self.keep_running: print(self.separate_in, end='') try: code = self.prompt_for_code() except EOFError: if (not self.confirm_exit) \ or self.ask_yes_no('Do you really want to exit ([y]/n)?','y','n'): self.ask_exit() else: if code: self.run_cell(code, store_history=True) def mainloop(self, display_banner=DISPLAY_BANNER_DEPRECATED): # An extra layer of protection in case someone mashing Ctrl-C breaks # out of our internal code. if display_banner is not DISPLAY_BANNER_DEPRECATED: warn( 'mainloop `display_banner` argument is deprecated since IPython 5.0. Call `show_banner()` if needed.', DeprecationWarning, stacklevel=2) while True: try: self.interact() break except KeyboardInterrupt as e: print("\n%s escaped interact()\n" % type(e).__name__) finally: # An interrupt during the eventloop will mess up the # internal state of the prompt_toolkit library. # Stopping the eventloop fixes this, see # https://github.com/ipython/ipython/pull/9867 if hasattr(self, '_eventloop'): self._eventloop.stop() _inputhook = None def inputhook(self, context): if self._inputhook is not None: self._inputhook(context) active_eventloop = None def enable_gui(self, gui=None): if gui: self.active_eventloop, self._inputhook =\ get_inputhook_name_and_func(gui) else: self.active_eventloop = self._inputhook = None # Run !system commands directly, not through pipes, so terminal programs # work correctly. system = InteractiveShell.system_raw def auto_rewrite_input(self, cmd): """Overridden from the parent class to use fancy rewriting prompt""" if not self.show_rewritten_input: return tokens = self.prompts.rewrite_prompt_tokens() if self.pt_cli: self.pt_cli.print_tokens(tokens) print(cmd) else: prompt = ''.join(s for t, s in tokens) print(prompt, cmd, sep='') _prompts_before = None def switch_doctest_mode(self, mode): """Switch prompts to classic for %doctest_mode""" if mode: self._prompts_before = self.prompts self.prompts = ClassicPrompts(self) elif self._prompts_before: self.prompts = self._prompts_before self._prompts_before = None self._update_layout()
class BinderHub(Application): """An Application for starting a builder.""" aliases = { 'log-level': 'Application.log_level', 'f': 'BinderHub.config_file', 'config': 'BinderHub.config_file', 'port': 'BinderHub.port', } flags = { 'debug': ({ 'BinderHub': { 'debug': True } }, "Enable debug HTTP serving & debug logging") } config_file = Unicode('binderhub_config.py', help=""" Config file to load. If a relative path is provided, it is taken relative to current directory """, config=True) google_analytics_code = Unicode(None, allow_none=True, help=""" The Google Analytics code to use on the main page. Note that we'll respect Do Not Track settings, despite the fact that GA does not. We will not load the GA scripts on browsers with DNT enabled. """, config=True) google_analytics_domain = Unicode('auto', help=""" The Google Analytics domain to use on the main page. By default this is set to 'auto', which sets it up for current domain and all subdomains. This can be set to a more restrictive domain here for better privacy """, config=True) base_url = Unicode('/', help="The base URL of the entire application", config=True) @validate('base_url') def _valid_base_url(self, proposal): if not proposal.value.startswith('/'): proposal.value = '/' + proposal.value if not proposal.value.endswith('/'): proposal.value = proposal.value + '/' return proposal.value port = Integer(8585, help=""" Port for the builder to listen on. """, config=True) use_registry = Bool(True, help=""" Set to true to push images to a registry & check for images in registry. Set to false to use only local docker images. Useful when running in a single node. """, config=True) per_repo_quota = Integer( 0, help=""" Maximum number of concurrent users running from a given repo. Limits the amount of Binder that can be consumed by a single repo. 0 (default) means no quotas. """, config=True, ) docker_push_secret = Unicode('docker-push-secret', allow_none=True, help=""" A kubernetes secret object that provides credentials for pushing built images. """, config=True) docker_image_prefix = Unicode("", help=""" Prefix for all built docker images. If you are pushing to gcr.io, you would have this be: gcr.io/<your-project-name>/ Set according to whatever registry you are pushing to. Defaults to "", which is probably not what you want :) """, config=True) build_memory_limit = ByteSpecification(0, help=""" Max amount of memory allocated for each image build process. 0 sets no limit. This is used as both the memory limit & request for the pod that is spawned to do the building, even though the pod itself will not be using that much memory since the docker building is happening outside the pod. However, it makes kubernetes aware of the resources being used, and lets it schedule more intelligently. """, config=True) # TODO: Factor this out! github_auth_token = Unicode(None, allow_none=True, help=""" GitHub OAuth token to use for talking to the GitHub API. Might get throttled otherwise! """, config=True) debug = Bool(False, help=""" Turn on debugging. """, config=True) build_docker_host = Unicode("/var/run/docker.sock", config=True, help=""" The docker URL repo2docker should use to build the images. Currently, only paths are supported, and they are expected to be available on all the hosts. """) @validate('build_docker_host') def docker_build_host_validate(self, proposal): parts = urlparse(proposal.value) if parts.scheme != 'unix' or parts.netloc != '': raise TraitError( "Only unix domain sockets on same node are supported for build_docker_host" ) return proposal.value hub_api_token = Unicode( help="""API token for talking to the JupyterHub API""", config=True, ) hub_url = Unicode( help=""" The base URL of the JupyterHub instance where users will run. e.g. https://hub.mybinder.org/ """, config=True, ) @validate('hub_url') def _add_slash(self, proposal): """trait validator to ensure hub_url ends with a trailing slash""" if proposal.value is not None and not proposal.value.endswith('/'): return proposal.value + '/' return proposal.value build_namespace = Unicode('default', help=""" Kubernetes namespace to spawn build pods in. Note that the docker_push_secret must refer to a secret in this namespace. """, config=True) builder_image_spec = Unicode('jupyter/repo2docker:687788f', help=""" The builder image to be used for doing builds """, config=True) build_node_selector = Dict({}, config=True, help=""" Select the node where build pod runs on. """) repo_providers = Dict( { 'gh': GitHubRepoProvider, 'gist': GistRepoProvider, 'git': GitRepoProvider, 'gl': GitLabRepoProvider, }, config=True, help=""" List of Repo Providers to register and try """) concurrent_build_limit = Integer( 32, config=True, help="""The number of concurrent builds to allow.""") # FIXME: Come up with a better name for it? builder_required = Bool(True, config=True, help=""" If binderhub should try to continue to run without a working build infrastructure. Build infrastructure is kubernetes cluster + docker. This is useful for pure HTML/CSS/JS local development. """) tornado_settings = Dict(config=True, help=""" additional settings to pass through to tornado. can include things like additional headers, etc. """) @staticmethod def add_url_prefix(prefix, handlers): """add a url prefix to handlers""" for i, tup in enumerate(handlers): lis = list(tup) lis[0] = url_path_join(prefix, tup[0]) handlers[i] = tuple(lis) return handlers def init_pycurl(self): try: AsyncHTTPClient.configure( "tornado.curl_httpclient.CurlAsyncHTTPClient") except ImportError as e: self.log.debug( "Could not load pycurl: %s\npycurl is recommended if you have a large number of users.", e) def initialize(self, *args, **kwargs): """Load configuration settings.""" super().initialize(*args, **kwargs) self.load_config_file(self.config_file) # hook up tornado logging if self.debug: self.log_level = logging.DEBUG tornado.options.logging = logging.getLevelName(self.log_level) tornado.log.enable_pretty_logging() self.log = tornado.log.app_log self.init_pycurl() # initialize kubernetes config if self.builder_required: try: kubernetes.config.load_incluster_config() except kubernetes.config.ConfigException: kubernetes.config.load_kube_config() # times 2 for log + build threads self.build_pool = ThreadPoolExecutor(self.concurrent_build_limit * 2) jinja_options = dict(autoescape=True, ) jinja_env = Environment(loader=FileSystemLoader(TEMPLATE_PATH), **jinja_options) if self.use_registry and self.builder_required: registry = DockerRegistry( self.docker_image_prefix.split('/', 1)[0]) else: registry = None self.launcher = Launcher( parent=self, hub_url=self.hub_url, hub_api_token=self.hub_api_token, ) self.tornado_settings.update({ "docker_push_secret": self.docker_push_secret, "docker_image_prefix": self.docker_image_prefix, "static_path": os.path.join(os.path.dirname(__file__), "static"), "github_auth_token": self.github_auth_token, "debug": self.debug, 'hub_url': self.hub_url, 'hub_api_token': self.hub_api_token, 'launcher': self.launcher, "build_namespace": self.build_namespace, "builder_image_spec": self.builder_image_spec, 'build_node_selector': self.build_node_selector, 'build_pool': self.build_pool, 'per_repo_quota': self.per_repo_quota, 'repo_providers': self.repo_providers, 'use_registry': self.use_registry, 'registry': registry, 'traitlets_config': self.config, 'google_analytics_code': self.google_analytics_code, 'google_analytics_domain': self.google_analytics_domain, 'jinja2_env': jinja_env, 'build_memory_limit': self.build_memory_limit, 'build_docker_host': self.build_docker_host, 'base_url': self.base_url, 'static_url_prefix': url_path_join(self.base_url, 'static/'), 'debug': self.debug, }) handlers = [ (r'/metrics', MetricsHandler), (r"/build/([^/]+)/(.+)", BuildHandler), (r"/v2/([^/]+)/(.+)", ParameterizedMainHandler), (r"/repo/([^/]+)/([^/]+)(/.*)?", LegacyRedirectHandler), # for backward-compatible mybinder.org badge URLs # /assets/images/badge.svg (r'/assets/(images/badge\.svg)', tornado.web.StaticFileHandler, { 'path': self.tornado_settings['static_path'] }), # /badge.svg (r'/(badge\.svg)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), # /favicon_XXX.ico (r'/(favicon\_fail\.ico)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), (r'/(favicon\_success\.ico)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), (r'/(favicon\_building\.ico)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), (r'/', MainHandler), (r'.*', Custom404), ] handlers = self.add_url_prefix(self.base_url, handlers) self.tornado_app = tornado.web.Application(handlers, **self.tornado_settings) def stop(self): self.http_server.stop() self.build_pool.shutdown() def start(self, run_loop=True): self.log.info("BinderHub starting on port %i", self.port) self.http_server = self.tornado_app.listen(self.port) if run_loop: tornado.ioloop.IOLoop.current().start()
class DFBox(Box): cur_frame = Integer(-1).tag(sync=True) columns = List([]).tag(sync=True) indexes = List([]).tag(sync=True) start = Int(0).tag(sync=True) def _update_output(self): self._output.clear_output() with self._output: df = self._df idxs = self.indexes if self.cur_frame >= 0: df = self._df.groupby('frame').get_group(self.cur_frame) #idxs = [max(idxs[0], df.index[0]), min(idxs[1], df.index[-1])] display_html(df[self.columns].loc[range(self.start, self.start + 50)].to_html(), raw=True) def _init_gui(self): close = Button(description=' Close', icon='trash', layout=_wlo) def _close(b): self.close() close.on_click(_close) frame = IntSlider(min=self._df.frame.astype(int).min(), max=self._df.frame.astype(int).max(), value=-1, description='Frame', layout=_wlo) cbut = Button(description=' Columns', icon='barcode') cols = self._df.columns.tolist() cols = SelectMultiple(options=cols, value=cols) def _cols(c): self.columns = c.new self._update_output() cols.observe(_cols, names='value') cfol = Folder(cbut, _ListDict([('cols', cols)])) rbut = Button(description=' Rows', icon='bars') rows = IntRangeSlider(min=self.indexes[0], max=self.indexes[1], value=[0, 50]) def _rows(c): self.indexes = c.new print(self.indexes) self._update_output() rows.observe(_rows, names='value') rfol = Folder(rbut, _ListDict([('rows', rows)])) return _ListDict([('close', close), ('frame', frame), ('cols', cfol), ('rows', rfol)]) def __init__(self, df, *args, **kwargs): self._df = df self.cur_frame = 0 if df.frame.astype(int).max() > 1 else -1 self.indexes = (df.index[0], df.index[-1]) self.start = df.index[0] self.columns = df.columns.tolist() self._controls = self._init_gui() gui = GUIBox(tuple(self._controls.values())) _ = kwargs.pop('layout', None) self._output = Output() children = [gui, self._output] self._update_output() super(DFBox, self).__init__(children, layout=_bboxlo, **kwargs)
class IPKernelApp(BaseIPythonApplication, InteractiveShellApp, ConnectionFileMixin): name='ipython-kernel' aliases = Dict(kernel_aliases) flags = Dict(kernel_flags) classes = [IPythonKernel, ZMQInteractiveShell, ProfileDir, Session] # the kernel class, as an importstring kernel_class = Type('ipykernel.ipkernel.IPythonKernel', klass='ipykernel.kernelbase.Kernel', help="""The Kernel subclass to be used. This should allow easy re-use of the IPKernelApp entry point to configure and launch kernels other than IPython's own. """).tag(config=True) kernel = Any() poller = Any() # don't restrict this even though current pollers are all Threads heartbeat = Instance(Heartbeat, allow_none=True) context = Any() shell_socket = Any() control_socket = Any() stdin_socket = Any() iopub_socket = Any() iopub_thread = Any() ports = Dict() subcommands = { 'install': ( 'ipykernel.kernelspec.InstallIPythonKernelSpecApp', 'Install the IPython kernel' ), } # connection info: connection_dir = Unicode() @default('connection_dir') def _default_connection_dir(self): return jupyter_runtime_dir() @property def abs_connection_file(self): if os.path.basename(self.connection_file) == self.connection_file: return os.path.join(self.connection_dir, self.connection_file) else: return self.connection_file # streams, etc. no_stdout = Bool(False, help="redirect stdout to the null device").tag(config=True) no_stderr = Bool(False, help="redirect stderr to the null device").tag(config=True) trio_loop = Bool(False, help="Set main event loop.").tag(config=True) quiet = Bool(True, help="Only send stdout/stderr to output stream").tag(config=True) outstream_class = DottedObjectName('ipykernel.iostream.OutStream', help="The importstring for the OutStream factory").tag(config=True) displayhook_class = DottedObjectName('ipykernel.displayhook.ZMQDisplayHook', help="The importstring for the DisplayHook factory").tag(config=True) # polling parent_handle = Integer(int(os.environ.get('JPY_PARENT_PID') or 0), help="""kill this process if its parent dies. On Windows, the argument specifies the HANDLE of the parent process, otherwise it is simply boolean. """).tag(config=True) interrupt = Integer(int(os.environ.get('JPY_INTERRUPT_EVENT') or 0), help="""ONLY USED ON WINDOWS Interrupt this process when the parent is signaled. """).tag(config=True) def init_crash_handler(self): sys.excepthook = self.excepthook def excepthook(self, etype, evalue, tb): # write uncaught traceback to 'real' stderr, not zmq-forwarder traceback.print_exception(etype, evalue, tb, file=sys.__stderr__) def init_poller(self): if sys.platform == 'win32': if self.interrupt or self.parent_handle: self.poller = ParentPollerWindows(self.interrupt, self.parent_handle) elif self.parent_handle and self.parent_handle != 1: # PID 1 (init) is special and will never go away, # only be reassigned. # Parent polling doesn't work if ppid == 1 to start with. self.poller = ParentPollerUnix() def _try_bind_socket(self, s, port): iface = '%s://%s' % (self.transport, self.ip) if self.transport == 'tcp': if port <= 0: port = s.bind_to_random_port(iface) else: s.bind("tcp://%s:%i" % (self.ip, port)) elif self.transport == 'ipc': if port <= 0: port = 1 path = "%s-%i" % (self.ip, port) while os.path.exists(path): port = port + 1 path = "%s-%i" % (self.ip, port) else: path = "%s-%i" % (self.ip, port) s.bind("ipc://%s" % path) return port def _bind_socket(self, s, port): try: win_in_use = errno.WSAEADDRINUSE except AttributeError: win_in_use = None # Try up to 100 times to bind a port when in conflict to avoid # infinite attempts in bad setups max_attempts = 1 if port else 100 for attempt in range(max_attempts): try: return self._try_bind_socket(s, port) except zmq.ZMQError as ze: # Raise if we have any error not related to socket binding if ze.errno != errno.EADDRINUSE and ze.errno != win_in_use: raise if attempt == max_attempts - 1: raise def write_connection_file(self): """write connection info to JSON file""" cf = self.abs_connection_file self.log.debug("Writing connection file: %s", cf) write_connection_file(cf, ip=self.ip, key=self.session.key, transport=self.transport, shell_port=self.shell_port, stdin_port=self.stdin_port, hb_port=self.hb_port, iopub_port=self.iopub_port, control_port=self.control_port) def cleanup_connection_file(self): cf = self.abs_connection_file self.log.debug("Cleaning up connection file: %s", cf) try: os.remove(cf) except (IOError, OSError): pass self.cleanup_ipc_files() def init_connection_file(self): if not self.connection_file: self.connection_file = "kernel-%s.json"%os.getpid() try: self.connection_file = filefind(self.connection_file, ['.', self.connection_dir]) except IOError: self.log.debug("Connection file not found: %s", self.connection_file) # This means I own it, and I'll create it in this directory: ensure_dir_exists(os.path.dirname(self.abs_connection_file), 0o700) # Also, I will clean it up: atexit.register(self.cleanup_connection_file) return try: self.load_connection_file() except Exception: self.log.error("Failed to load connection file: %r", self.connection_file, exc_info=True) self.exit(1) def init_sockets(self): # Create a context, a session, and the kernel sockets. self.log.info("Starting the kernel at pid: %i", os.getpid()) assert self.context is None, "init_sockets cannot be called twice!" self.context = context = zmq.Context() atexit.register(self.close) self.shell_socket = context.socket(zmq.ROUTER) self.shell_socket.linger = 1000 self.shell_port = self._bind_socket(self.shell_socket, self.shell_port) self.log.debug("shell ROUTER Channel on port: %i" % self.shell_port) self.stdin_socket = context.socket(zmq.ROUTER) self.stdin_socket.linger = 1000 self.stdin_port = self._bind_socket(self.stdin_socket, self.stdin_port) self.log.debug("stdin ROUTER Channel on port: %i" % self.stdin_port) self.control_socket = context.socket(zmq.ROUTER) self.control_socket.linger = 1000 self.control_port = self._bind_socket(self.control_socket, self.control_port) self.log.debug("control ROUTER Channel on port: %i" % self.control_port) if hasattr(zmq, 'ROUTER_HANDOVER'): # set router-handover to workaround zeromq reconnect problems # in certain rare circumstances # see ipython/ipykernel#270 and zeromq/libzmq#2892 self.shell_socket.router_handover = \ self.control_socket.router_handover = \ self.stdin_socket.router_handover = 1 self.init_iopub(context) def init_iopub(self, context): self.iopub_socket = context.socket(zmq.PUB) self.iopub_socket.linger = 1000 self.iopub_port = self._bind_socket(self.iopub_socket, self.iopub_port) self.log.debug("iopub PUB Channel on port: %i" % self.iopub_port) self.configure_tornado_logger() self.iopub_thread = IOPubThread(self.iopub_socket, pipe=True) self.iopub_thread.start() # backward-compat: wrap iopub socket API in background thread self.iopub_socket = self.iopub_thread.background_socket def init_heartbeat(self): """start the heart beating""" # heartbeat doesn't share context, because it mustn't be blocked # by the GIL, which is accessed by libzmq when freeing zero-copy messages hb_ctx = zmq.Context() self.heartbeat = Heartbeat(hb_ctx, (self.transport, self.ip, self.hb_port)) self.hb_port = self.heartbeat.port self.log.debug("Heartbeat REP Channel on port: %i" % self.hb_port) self.heartbeat.start() def close(self): """Close zmq sockets in an orderly fashion""" # un-capture IO before we start closing channels self.reset_io() self.log.info("Cleaning up sockets") if self.heartbeat: self.log.debug("Closing heartbeat channel") self.heartbeat.context.term() if self.iopub_thread: self.log.debug("Closing iopub channel") self.iopub_thread.stop() self.iopub_thread.close() for channel in ('shell', 'control', 'stdin'): self.log.debug("Closing %s channel", channel) socket = getattr(self, channel + "_socket", None) if socket and not socket.closed: socket.close() self.log.debug("Terminating zmq context") self.context.term() self.log.debug("Terminated zmq context") def log_connection_info(self): """display connection info, and store ports""" basename = os.path.basename(self.connection_file) if basename == self.connection_file or \ os.path.dirname(self.connection_file) == self.connection_dir: # use shortname tail = basename else: tail = self.connection_file lines = [ "To connect another client to this kernel, use:", " --existing %s" % tail, ] # log connection info # info-level, so often not shown. # frontends should use the %connect_info magic # to see the connection info for line in lines: self.log.info(line) # also raw print to the terminal if no parent_handle (`ipython kernel`) # unless log-level is CRITICAL (--quiet) if not self.parent_handle and self.log_level < logging.CRITICAL: print(_ctrl_c_message, file=sys.__stdout__) for line in lines: print(line, file=sys.__stdout__) self.ports = dict(shell=self.shell_port, iopub=self.iopub_port, stdin=self.stdin_port, hb=self.hb_port, control=self.control_port) def init_blackhole(self): """redirects stdout/stderr to devnull if necessary""" if self.no_stdout or self.no_stderr: blackhole = open(os.devnull, 'w') if self.no_stdout: sys.stdout = sys.__stdout__ = blackhole if self.no_stderr: sys.stderr = sys.__stderr__ = blackhole def init_io(self): """Redirect input streams and set a display hook.""" if self.outstream_class: outstream_factory = import_item(str(self.outstream_class)) if sys.stdout is not None: sys.stdout.flush() e_stdout = None if self.quiet else sys.__stdout__ e_stderr = None if self.quiet else sys.__stderr__ sys.stdout = outstream_factory(self.session, self.iopub_thread, 'stdout', echo=e_stdout) if sys.stderr is not None: sys.stderr.flush() sys.stderr = outstream_factory(self.session, self.iopub_thread, 'stderr', echo=e_stderr) if self.displayhook_class: displayhook_factory = import_item(str(self.displayhook_class)) self.displayhook = displayhook_factory(self.session, self.iopub_socket) sys.displayhook = self.displayhook self.patch_io() def reset_io(self): """restore original io restores state after init_io """ sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ sys.displayhook = sys.__displayhook__ def patch_io(self): """Patch important libraries that can't handle sys.stdout forwarding""" try: import faulthandler except ImportError: pass else: # Warning: this is a monkeypatch of `faulthandler.enable`, watch for possible # updates to the upstream API and update accordingly (up-to-date as of Python 3.5): # https://docs.python.org/3/library/faulthandler.html#faulthandler.enable # change default file to __stderr__ from forwarded stderr faulthandler_enable = faulthandler.enable def enable(file=sys.__stderr__, all_threads=True, **kwargs): return faulthandler_enable(file=file, all_threads=all_threads, **kwargs) faulthandler.enable = enable if hasattr(faulthandler, 'register'): faulthandler_register = faulthandler.register def register(signum, file=sys.__stderr__, all_threads=True, chain=False, **kwargs): return faulthandler_register(signum, file=file, all_threads=all_threads, chain=chain, **kwargs) faulthandler.register = register def init_signal(self): signal.signal(signal.SIGINT, signal.SIG_IGN) def init_kernel(self): """Create the Kernel object itself""" shell_stream = ZMQStream(self.shell_socket) control_stream = ZMQStream(self.control_socket) kernel_factory = self.kernel_class.instance kernel = kernel_factory(parent=self, session=self.session, control_stream=control_stream, shell_streams=[shell_stream, control_stream], iopub_thread=self.iopub_thread, iopub_socket=self.iopub_socket, stdin_socket=self.stdin_socket, log=self.log, profile_dir=self.profile_dir, user_ns=self.user_ns, ) kernel.record_ports({ name + '_port': port for name, port in self.ports.items() }) self.kernel = kernel # Allow the displayhook to get the execution count self.displayhook.get_execution_count = lambda: kernel.execution_count def init_gui_pylab(self): """Enable GUI event loop integration, taking pylab into account.""" # Register inline backend as default # this is higher priority than matplotlibrc, # but lower priority than anything else (mpl.use() for instance). # This only affects matplotlib >= 1.5 if not os.environ.get('MPLBACKEND'): os.environ['MPLBACKEND'] = 'module://ipykernel.pylab.backend_inline' # Provide a wrapper for :meth:`InteractiveShellApp.init_gui_pylab` # to ensure that any exception is printed straight to stderr. # Normally _showtraceback associates the reply with an execution, # which means frontends will never draw it, as this exception # is not associated with any execute request. shell = self.shell _showtraceback = shell._showtraceback try: # replace error-sending traceback with stderr def print_tb(etype, evalue, stb): print ("GUI event loop or pylab initialization failed", file=sys.stderr) print (shell.InteractiveTB.stb2text(stb), file=sys.stderr) shell._showtraceback = print_tb InteractiveShellApp.init_gui_pylab(self) finally: shell._showtraceback = _showtraceback def init_shell(self): self.shell = getattr(self.kernel, 'shell', None) if self.shell: self.shell.configurables.append(self) def configure_tornado_logger(self): """ Configure the tornado logging.Logger. Must set up the tornado logger or else tornado will call basicConfig for the root logger which makes the root logger go to the real sys.stderr instead of the capture streams. This function mimics the setup of logging.basicConfig. """ logger = logging.getLogger('tornado') handler = logging.StreamHandler() formatter = logging.Formatter(logging.BASIC_FORMAT) handler.setFormatter(formatter) logger.addHandler(handler) def _init_asyncio_patch(self): """set default asyncio policy to be compatible with tornado Tornado 6 (at least) is not compatible with the default asyncio implementation on Windows Pick the older SelectorEventLoopPolicy on Windows if the known-incompatible default policy is in use. do this as early as possible to make it a low priority and overrideable ref: https://github.com/tornadoweb/tornado/issues/2608 FIXME: if/when tornado supports the defaults in asyncio, remove and bump tornado requirement for py38 """ if sys.platform.startswith("win") and sys.version_info >= (3, 8): import asyncio try: from asyncio import ( WindowsProactorEventLoopPolicy, WindowsSelectorEventLoopPolicy, ) except ImportError: pass # not affected else: if type(asyncio.get_event_loop_policy()) is WindowsProactorEventLoopPolicy: # WindowsProactorEventLoopPolicy is not compatible with tornado 6 # fallback to the pre-3.8 default of Selector asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) def init_pdb(self): """Replace pdb with IPython's version that is interruptible. With the non-interruptible version, stopping pdb() locks up the kernel in a non-recoverable state. """ import pdb from IPython.core import debugger if hasattr(debugger, "InterruptiblePdb"): # Only available in newer IPython releases: debugger.Pdb = debugger.InterruptiblePdb pdb.Pdb = debugger.Pdb pdb.set_trace = debugger.set_trace @catch_config_error def initialize(self, argv=None): self._init_asyncio_patch() super(IPKernelApp, self).initialize(argv) if self.subapp is not None: return self.init_pdb() self.init_blackhole() self.init_connection_file() self.init_poller() self.init_sockets() self.init_heartbeat() # writing/displaying connection info must be *after* init_sockets/heartbeat self.write_connection_file() # Log connection info after writing connection file, so that the connection # file is definitely available at the time someone reads the log. self.log_connection_info() self.init_io() try: self.init_signal() except: # Catch exception when initializing signal fails, eg when running the # kernel on a separate thread if self.log_level < logging.CRITICAL: self.log.error("Unable to initialize signal:", exc_info=True) self.init_kernel() # shell init steps self.init_path() self.init_shell() if self.shell: self.init_gui_pylab() self.init_extensions() self.init_code() # flush stdout/stderr, so that anything written to these streams during # initialization do not get associated with the first execution request sys.stdout.flush() sys.stderr.flush() def start(self): if self.subapp is not None: return self.subapp.start() if self.poller is not None: self.poller.start() self.kernel.start() self.io_loop = ioloop.IOLoop.current() if self.trio_loop: from ipykernel.trio_runner import TrioRunner tr = TrioRunner() tr.initialize(self.kernel, self.io_loop) try: tr.run() except KeyboardInterrupt: pass else: try: self.io_loop.start() except KeyboardInterrupt: pass
class GitPuller(Configurable): depth = Integer(config=True, help=""" Depth (ie, commit count) of clone operations. Set this to 0 to make a full depth clone. Defaults to the value of the environment variable NBGITPULLER_DEPTH, or 1 if the the environment variable isn't set. """) @default('depth') def _depth_default(self): """This is a workaround for setting the same default directly in the definition of the traitlet above. Without it, the test fails because a change in the environment variable has no impact. I think this is a consequence of the tests not starting with a totally clean environment where the GitPuller class hadn't been loaded already.""" return int(os.environ.get('NBGITPULLER_DEPTH', 1)) def __init__(self, git_url, branch_name, repo_dir, **kwargs): assert git_url and branch_name self.git_url = git_url self.branch_name = branch_name self.repo_dir = repo_dir newargs = {k: v for k, v in kwargs.items() if v is not None} super(GitPuller, self).__init__(**newargs) def pull(self): """ Pull selected repo from a remote git repository, while preserving user changes """ if not os.path.exists(self.repo_dir): yield from self.initialize_repo() else: yield from self.update() def initialize_repo(self): """ Clones repository """ logging.info('Repo {} doesn\'t exist. Cloning...'.format( self.repo_dir)) clone_args = ['git', 'clone'] if self.depth and self.depth > 0: clone_args.extend(['--depth', str(self.depth)]) clone_args.extend(['--branch', self.branch_name]) clone_args.extend([self.git_url, self.repo_dir]) yield from execute_cmd(clone_args) logging.info('Repo {} initialized'.format(self.repo_dir)) def reset_deleted_files(self): """ Runs the equivalent of git checkout -- <file> for each file that was deleted. This allows us to delete a file, hit an interact link, then get a clean version of the file again. """ yield from self.ensure_lock() deleted_files = subprocess.check_output( ['git', 'ls-files', '--deleted', '-z'], cwd=self.repo_dir).decode().strip().split('\0') for filename in deleted_files: if filename: # Filter out empty lines yield from execute_cmd([ 'git', 'checkout', 'origin/{}'.format(self.branch_name), '--', filename ], cwd=self.repo_dir) def repo_is_dirty(self): """ Return true if repo is dirty """ try: subprocess.check_call(['git', 'diff-files', '--quiet'], cwd=self.repo_dir) # Return code is 0 return False except subprocess.CalledProcessError: return True def update_remotes(self): """ Do a git fetch so our remotes are up to date """ yield from execute_cmd(['git', 'fetch'], cwd=self.repo_dir) def find_upstream_changed(self, kind): """ Return list of files that have been changed upstream belonging to a particular kind of change """ output = subprocess.check_output([ 'git', 'log', '..origin/{}'.format(self.branch_name), '--oneline', '--name-status' ], cwd=self.repo_dir).decode() files = [] for line in output.split('\n'): if line.startswith(kind): files.append( os.path.join(self.repo_dir, line.split('\t', 1)[1])) return files def ensure_lock(self): """ Make sure we have the .git/lock required to do modifications on the repo This must be called before any git commands that modify state. This isn't guaranteed to be atomic, due to the nature of using files for locking. But it's the best we can do right now. """ try: lockpath = os.path.join(self.repo_dir, '.git', 'index.lock') mtime = os.path.getmtime(lockpath) # A lock file does exist # If it's older than 10 minutes, we just assume it is stale and take over # If not, we fail with an explicit error. if time.time() - mtime > 600: yield "Stale .git/index.lock found, attempting to remove" os.remove(lockpath) yield "Stale .git/index.lock removed" else: raise Exception( 'Recent .git/index.lock found, operation can not proceed. Try again in a few minutes.' ) except FileNotFoundError: # No lock is held by other processes, we are free to go return def rename_local_untracked(self): """ Rename local untracked files that would require pulls """ # Find what files have been added! new_upstream_files = self.find_upstream_changed('A') for f in new_upstream_files: if os.path.exists(f): # If there's a file extension, put the timestamp before that ts = datetime.datetime.now().strftime('__%Y%m%d%H%M%S') path_head, path_tail = os.path.split(f) path_tail = ts.join(os.path.splitext(path_tail)) new_file_name = os.path.join(path_head, path_tail) os.rename(f, new_file_name) yield 'Renamed {} to {} to avoid conflict with upstream'.format( f, new_file_name) def update(self): """ Do the pulling if necessary """ # Fetch remotes, so we know we're dealing with latest remote yield from self.update_remotes() # Rename local untracked files that might be overwritten by pull yield from self.rename_local_untracked() # Reset local files that have been deleted. We don't actually expect users to # delete something that's present upstream and expect to keep it. This prevents # unnecessary conflicts, and also allows users to click the link again to get # a fresh copy of a file they might have screwed up. yield from self.reset_deleted_files() # If there are local changes, make a commit so we can do merges when pulling # We also allow empty commits. On NFS (at least), sometimes repo_is_dirty returns a false # positive, returning True even when there are no local changes (git diff-files seems to return # bogus output?). While ideally that would not happen, allowing empty commits keeps us # resilient to that issue. # We explicitly set user info of the commits we are making, to keep that separate from # whatever author info is set in system / repo config by the user. We pass '-c' to git # itself (rather than to 'git commit') to temporarily set config variables. This is # better than passing --author, since git treats author separately from committer. if self.repo_is_dirty(): yield from self.ensure_lock() yield from execute_cmd([ 'git', '-c', '[email protected]', '-c', 'user.name=nbgitpuller', 'commit', '-am', 'Automatic commit by nbgitpuller', '--allow-empty' ], cwd=self.repo_dir) # Merge master into local! yield from self.ensure_lock() yield from execute_cmd([ 'git', '-c', '[email protected]', '-c', 'user.name=nbgitpuller', 'merge', '-Xours', 'origin/{}'.format( self.branch_name) ], cwd=self.repo_dir)
class SingleUserNotebookApp(NotebookApp): """A Subclass of the regular NotebookApp that is aware of the parent multiuser context.""" description = dedent(""" Single-user server for JupyterHub. Extends the Jupyter Notebook server. Meant to be invoked by JupyterHub Spawners, and not directly. """) examples = "" subcommands = {} version = __version__ classes = NotebookApp.classes + [HubOAuth] # disable single-user app's localhost checking allow_remote_access = True # don't store cookie secrets cookie_secret_file = '' # always generate a new cookie secret on launch # ensures that each spawn clears any cookies from previous session, # triggering OAuth again cookie_secret = Bytes() def _cookie_secret_default(self): return os.urandom(32) user = CUnicode().tag(config=True) group = CUnicode().tag(config=True) @default('user') def _default_user(self): return os.environ.get('JUPYTERHUB_USER') or '' @default('group') def _default_group(self): return os.environ.get('JUPYTERHUB_GROUP') or '' @observe('user') def _user_changed(self, change): self.log.name = change.new hub_host = Unicode().tag(config=True) hub_prefix = Unicode('/hub/').tag(config=True) @default('keyfile') def _keyfile_default(self): return os.environ.get('JUPYTERHUB_SSL_KEYFILE') or '' @default('certfile') def _certfile_default(self): return os.environ.get('JUPYTERHUB_SSL_CERTFILE') or '' @default('client_ca') def _client_ca_default(self): return os.environ.get('JUPYTERHUB_SSL_CLIENT_CA') or '' @default('hub_prefix') def _hub_prefix_default(self): base_url = os.environ.get('JUPYTERHUB_BASE_URL') or '/' return base_url + 'hub/' hub_api_url = Unicode().tag(config=True) @default('hub_api_url') def _hub_api_url_default(self): return os.environ.get( 'JUPYTERHUB_API_URL') or 'http://127.0.0.1:8081/hub/api' # defaults for some configurables that may come from service env variables: @default('base_url') def _base_url_default(self): return os.environ.get('JUPYTERHUB_SERVICE_PREFIX') or '/' #Note: this may be removed if notebook module is >= 5.0.0b1 @validate('base_url') def _validate_base_url(self, proposal): """ensure base_url starts and ends with /""" value = proposal.value if not value.startswith('/'): value = '/' + value if not value.endswith('/'): value = value + '/' return value @default('port') def _port_default(self): if os.environ.get('JUPYTERHUB_SERVICE_URL'): url = urlparse(os.environ['JUPYTERHUB_SERVICE_URL']) if url.port: return url.port elif url.scheme == 'http': return 80 elif url.scheme == 'https': return 443 return 8888 @default('ip') def _ip_default(self): if os.environ.get('JUPYTERHUB_SERVICE_URL'): url = urlparse(os.environ['JUPYTERHUB_SERVICE_URL']) if url.hostname: return url.hostname return '127.0.0.1' aliases = aliases flags = flags # disble some single-user configurables token = '' open_browser = False quit_button = False trust_xheaders = True login_handler_class = JupyterHubLoginHandler logout_handler_class = JupyterHubLogoutHandler port_retries = 0 # disable port-retries, since the Spawner will tell us what port to use disable_user_config = Bool( False, help="""Disable user configuration of single-user server. Prevents user-writable files that normally configure the single-user server from being loaded, ensuring admins have full control of configuration. """).tag(config=True) @validate('notebook_dir') def _notebook_dir_validate(self, proposal): value = os.path.expanduser(proposal['value']) # Strip any trailing slashes # *except* if it's root _, path = os.path.splitdrive(value) if path == os.sep: return value value = value.rstrip(os.sep) if not os.path.isabs(value): # If we receive a non-absolute path, make it absolute. value = os.path.abspath(value) if not os.path.isdir(value): raise TraitError("No such notebook dir: %r" % value) return value @default('log_datefmt') def _log_datefmt_default(self): """Exclude date from default date format""" return "%Y-%m-%d %H:%M:%S" @default('log_format') def _log_format_default(self): """override default log format to include time""" return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s %(module)s:%(lineno)d]%(end_color)s %(message)s" def _confirm_exit(self): # disable the exit confirmation for background notebook processes self.io_loop.add_callback_from_signal(self.io_loop.stop) def migrate_config(self): if self.disable_user_config: # disable config-migration when user config is disabled return else: super(SingleUserNotebookApp, self).migrate_config() @property def config_file_paths(self): path = super(SingleUserNotebookApp, self).config_file_paths if self.disable_user_config: # filter out user-writable config dirs if user config is disabled path = list(_exclude_home(path)) return path @property def nbextensions_path(self): path = super(SingleUserNotebookApp, self).nbextensions_path if self.disable_user_config: path = list(_exclude_home(path)) return path @validate('static_custom_path') def _validate_static_custom_path(self, proposal): path = proposal['value'] if self.disable_user_config: path = list(_exclude_home(path)) return path # create dynamic default http client, # configured with any relevant ssl config hub_http_client = Any() @default('hub_http_client') def _default_client(self): ssl_context = make_ssl_context( self.keyfile, self.certfile, cafile=self.client_ca, ) AsyncHTTPClient.configure( None, defaults={ "ssl_options": ssl_context, }, ) return AsyncHTTPClient() async def check_hub_version(self): """Test a connection to my Hub - exit if I can't connect at all - check version and warn on sufficient mismatch """ client = self.hub_http_client RETRIES = 5 for i in range(1, RETRIES + 1): try: resp = await client.fetch(self.hub_api_url) except Exception: self.log.exception( "Failed to connect to my Hub at %s (attempt %i/%i). Is it running?", self.hub_api_url, i, RETRIES) await gen.sleep(min(2**i, 16)) else: break else: self.exit(1) hub_version = resp.headers.get('X-JupyterHub-Version') _check_version(hub_version, __version__, self.log) server_name = Unicode() @default('server_name') def _server_name_default(self): return os.environ.get('JUPYTERHUB_SERVER_NAME', '') hub_activity_url = Unicode( config=True, help="URL for sending JupyterHub activity updates", ) @default('hub_activity_url') def _default_activity_url(self): return os.environ.get('JUPYTERHUB_ACTIVITY_URL', '') hub_activity_interval = Integer(300, config=True, help=""" Interval (in seconds) on which to update the Hub with our latest activity. """) @default('hub_activity_interval') def _default_activity_interval(self): env_value = os.environ.get('JUPYTERHUB_ACTIVITY_INTERVAL') if env_value: return int(env_value) else: return 300 _last_activity_sent = Any(allow_none=True) async def notify_activity(self): """Notify jupyterhub of activity""" client = self.hub_http_client last_activity = self.web_app.last_activity() if not last_activity: self.log.debug("No activity to send to the Hub") return if last_activity: # protect against mixed timezone comparisons if not last_activity.tzinfo: # assume naive timestamps are utc self.log.warning("last activity is using naïve timestamps") last_activity = last_activity.replace(tzinfo=timezone.utc) if (self._last_activity_sent and last_activity < self._last_activity_sent): self.log.debug("No activity since %s", self._last_activity_sent) return last_activity_timestamp = isoformat(last_activity) async def notify(): self.log.debug("Notifying Hub of activity %s", last_activity_timestamp) req = HTTPRequest(url=self.hub_activity_url, method='POST', headers={ "Authorization": "token {}".format(self.hub_auth.api_token), "Content-Type": "application/json", }, body=json.dumps({ 'servers': { self.server_name: { 'last_activity': last_activity_timestamp, }, }, 'last_activity': last_activity_timestamp, })) try: await client.fetch(req) except Exception: self.log.exception("Error notifying Hub of activity") return False else: return True await exponential_backoff( notify, fail_message="Failed to notify Hub of activity", start_wait=1, max_wait=15, timeout=60, ) self._last_activity_sent = last_activity async def keep_activity_updated(self): if not self.hub_activity_url or not self.hub_activity_interval: self.log.warning("Activity events disabled") return self.log.info("Updating Hub with activity every %s seconds", self.hub_activity_interval) while True: try: await self.notify_activity() except Exception as e: self.log.exception("Error notifying Hub of activity") # add 20% jitter to the interval to avoid alignment # of lots of requests from user servers t = self.hub_activity_interval * (1 + 0.2 * (random.random() - 0.5)) await asyncio.sleep(t) def initialize(self, argv=None): # disable trash by default # this can be re-enabled by config self.config.FileContentsManager.delete_to_trash = False return super().initialize(argv) def start(self): self.log.info("Starting jupyterhub-singleuser server version %s", __version__) # start by hitting Hub to check version ioloop.IOLoop.current().run_sync(self.check_hub_version) ioloop.IOLoop.current().add_callback(self.keep_activity_updated) super(SingleUserNotebookApp, self).start() def init_hub_auth(self): api_token = None if os.getenv('JPY_API_TOKEN'): # Deprecated env variable (as of 0.7.2) api_token = os.environ['JPY_API_TOKEN'] if os.getenv('JUPYTERHUB_API_TOKEN'): api_token = os.environ['JUPYTERHUB_API_TOKEN'] if not api_token: self.exit( "JUPYTERHUB_API_TOKEN env is required to run jupyterhub-singleuser. Did you launch it manually?" ) self.hub_auth = HubOAuth( parent=self, api_token=api_token, api_url=self.hub_api_url, hub_prefix=self.hub_prefix, base_url=self.base_url, keyfile=self.keyfile, certfile=self.certfile, client_ca=self.client_ca, ) # smoke check if not self.hub_auth.oauth_client_id: raise ValueError("Missing OAuth client ID") def init_webapp(self): # load the hub-related settings into the tornado settings dict self.init_hub_auth() s = self.tornado_settings s['log_function'] = log_request s['user'] = self.user s['group'] = self.group s['hub_prefix'] = self.hub_prefix s['hub_host'] = self.hub_host s['hub_auth'] = self.hub_auth csp_report_uri = s['csp_report_uri'] = self.hub_host + url_path_join( self.hub_prefix, 'security/csp-report') headers = s.setdefault('headers', {}) headers['X-JupyterHub-Version'] = __version__ # set CSP header directly to workaround bugs in jupyter/notebook 5.0 headers.setdefault( 'Content-Security-Policy', ';'.join([ "frame-ancestors 'self'", "report-uri " + csp_report_uri, ])) super(SingleUserNotebookApp, self).init_webapp() # add OAuth callback self.web_app.add_handlers(r".*$", [(urlparse( self.hub_auth.oauth_redirect_uri).path, OAuthCallbackHandler)]) # apply X-JupyterHub-Version to *all* request handlers (even redirects) self.patch_default_headers() self.patch_templates() def patch_default_headers(self): if hasattr(RequestHandler, '_orig_set_default_headers'): return RequestHandler._orig_set_default_headers = RequestHandler.set_default_headers def set_jupyterhub_header(self): self._orig_set_default_headers() self.set_header('X-JupyterHub-Version', __version__) RequestHandler.set_default_headers = set_jupyterhub_header def patch_templates(self): """Patch page templates to add Hub-related buttons""" self.jinja_template_vars['logo_url'] = self.hub_host + url_path_join( self.hub_prefix, 'logo') self.jinja_template_vars['hub_host'] = self.hub_host self.jinja_template_vars['hub_prefix'] = self.hub_prefix env = self.web_app.settings['jinja2_env'] env.globals['hub_control_panel_url'] = \ self.hub_host + url_path_join(self.hub_prefix, 'home') # patch jinja env loading to modify page template def get_page(name): if name == 'page.html': return page_template orig_loader = env.loader env.loader = ChoiceLoader([ FunctionLoader(get_page), orig_loader, ])
class ReviewManager(Application): username = Unicode( help="Admin user github username. Must be organization owner.").tag(config=True) @default('username') def _default_username(self): return getpass.getuser() password = Unicode() org = Unicode(allow_none=False, help="Classroom organization.").tag(config=True) repo_prefix = Unicode("", help="Assignment repository prefix, eg. 'hw-1-'. Repository title after prefix is target username.").tag(config=True) skip_users = Set(trait=Unicode(allow_none=False), help="Skip given user's repos when establishing reviews, used to skip instructor/example repos.").tag(config=True) num_reviewers = Integer(2, help="Number of reviewers added per project").tag(config=True) review_title_template = Unicode("Peer Review: %s", help="Issue title format, % formatted with reviewer username.").tag(config=True) review_template_file = Unicode( help="File containing a markdown review template for peer review issues.").tag(config=True) review_template = Unicode( help="Markdown review template for peer review issues.").tag(config=True) dry_run = Bool(True, help="Load classroom data and log events, but do not perform github updates.").tag(config=True) def setup_reviews(self): gh = github3.login( username=self.username, password=self.password if self.password else getpass.getpass("%s github password: "******"Got github: %s", gh) org = gh.organization(self.org) self.log.info("Got organization: %s", org) repos = [r for r in org.repositories() if r.name.startswith(self.repo_prefix)] self.log.info("Got repo count: %i", len(repos)) repos_by_user = { r.name[len(self.repo_prefix):] : r for r in repos} self.log.info("Repo list:\n%s", pprint.pformat(repos_by_user)) for n in self.skip_users: if n in repos_by_user: self.log.info("Skipping user repo: %s", n) del repos_by_user[n] user_list = list(repos_by_user) random.shuffle(user_list) reviewers = { user_list[n] : [ user_list[(n+1+i) % len(user_list)] for i in range(self.num_reviewers) ] for n in range(len(user_list)) } self.log.info("Reviewers:\n%s", pprint.pformat(reviewers)) # Run a self-test of the reviewer list review_counts = collections.Counter() for n, r in reviewers.items(): assert(len(r) == self.num_reviewers) assert(len(set(r)) == len(r)) assert n not in r review_counts.update(r) assert(len(review_counts) == len(user_list)) assert(set(review_counts) == set(user_list)) for u in user_list: assert review_counts[u] == self.num_reviewers assert self.review_template_file or self.review_template if self.review_template_file: review_template = open(self.review_template_file, "r").read() elif self.review_template: review_template = self.review_template self.log.info("Review template:\n%s", review_template) for u in user_list: urepo = repos_by_user[u] for r in reviewers[u]: self.log.info("Add collaborator %s: %s", urepo, r) if not self.dry_run: urepo.add_collaborator(r) self.log.info("Create review issue %s: %r", urepo, self.review_title_template % r) if not self.dry_run: urepo.create_issue( title="Peer Review: %s" % r, body=self.review_template, labels=["peer_review"], assignee=r ) config_file = Unicode(u'', help="Load this config file").tag(config=True) aliases = {"config_file" : "ReviewManager.config_file"} flags = Dict(dict( no_dry_run=({'ReviewManager': {'dry_run' : False}}, "Enable github api calls.") )) def initialize(self, argv=None): self.parse_command_line(argv) if self.config_file: self.load_config_file(self.config_file)