class ResourceUseDisplay(Configurable): """ Holds server-side configuration for nbresuse """ mem_warning_threshold = Float(default_value=0.1, help=""" Warn user with flashing lights when memory usage is within this fraction memory limit. For example, if memory limit is 128MB, `mem_warning_threshold` is 0.1, we will start warning the user when they use (128 - (128 * 0.1)) MB. Set to 0 to disable warning. """).tag(config=True) mem_limit = Union(trait_types=[Int(), Callable()], help=""" Memory limit to display to the user, in bytes. Can also be a function which calculates the memory limit. Note that this does not actually limit the user's memory usage! Defaults to reading from the `MEM_LIMIT` environment variable. If set to 0, the max memory available is displayed. """).tag(config=True) @default('mem_limit') def _mem_limit_default(self): return int(os.environ.get('MEM_LIMIT', 0)) track_cpu_percent = Bool(default_value=False, help=""" Set to True in order to enable reporting of CPU usage statistics. """).tag(config=True) cpu_warning_threshold = Float(default_value=0.1, help=""" Warn user with flashing lights when CPU usage is within this fraction CPU usage limit. For example, if CPU limit is 150%, `cpu_warning_threshold` is 0.1, we will start warning the user when they use (150 - (150 * 0.1)) %. Set to 0 to disable warning. """).tag(config=True) cpu_limit = Float(default_value=0, help=""" CPU usage limit to display to the user. Note that this does not actually limit the user's CPU usage! Defaults to reading from the `CPU_LIMIT` environment variable. If set to 0, the total CPU count available is displayed. """).tag(config=True) @default('cpu_limit') def _cpu_limit_default(self): return float(os.environ.get('CPU_LIMIT', 0))
class ResourceUseDisplay(Configurable): """ Holds server-side configuration for nbresuse """ trash_dir = Union( trait_types=[Unicode(), Callable()], default_value=os.getcwd(), help=""" The directory that the notebook copies Trash to on deletion Defaults to reading from the `TRASH_DIR` environment variable. If not defined, it effectively defaults to $HOME/.local/share/Trash/ """, ).tag(config=True) @default("trash_dir") def _trash_dir_default(self): return str( os.environ.get( "TRASH_DIR", os.path.join( os.environ.get("HOME", os.getcwd()), ".local/share/Trash/" ), ) )
class Artist(HasTraits): aname = Unicode('Artist') zorder = Int(default_value = 0) #_prop_order = dict(color=-1) _prop_order = Dict() #asked thomas question about this asstribute # pchanged = Bool(default_value = False) _stale = Bool(default_value = True) stale_callback = Callable(allow_none = True, default_value = True) _axes = Instance('matplotlib.axes.Axes', allow_none = True, default_value = None) figure = Instance('matplotlib.figure.Figure', allow_none = True, default_value = None) _transform = Instance('matplotlib.transform.Transform', allow_none = True, default_value = None) _transformSet = Bool(default_value = False ) _visible = Bool(default_value = True) _animated = Bool(default_value = False) _alpha = Float(default_value = None ,allow_none = True) clipbox = Instance('matplotlib.transforms.Bbox', allow_none = True, default_value = None) """ Notes from Documentation: Union([Float(), Bool(), Int()]) attempts to validate the provided values with the validation function of Float, then Bool, and finally Int. """ _clippath = Perishable(Union([Instance('matplotlib.path.Path'), Instance('matplotlib.transforms.Transform'), Instance('matplotlib.patches.Patch')], allow_none = True, default_value = None)) _clipon = Boolean(default_value = True) _label = Unicode(allow_none = True, default_value = '') _picker = Union(Float, Boolean, Callable, allow_none = True, default_value = None) _contains = List(default_value=None) _rasterized = Perishable(Boolean(allow_none = True, default_value = None)) _agg_filter = Unicode(allow_none = True, default_value = None) #set agg_filter function _mouseover = Boolean(default_value = False) eventson = Boolean(default_value = False) _oid = Int(allow_none = True, default_value = 0) _propobservers = Dict(default_value = {}) #this may or may not work o/w leave alone and see what happens # _remove_method = #have to look into this _url = Unicode(allow_none = True,default_value = None) _gid = Unicode(allow_none = True, default_value = None) _snap = Perishable(Boolean(allow_none = True, default_value = None)) _sketch = Tuple(Float(), Float(), Float(), default_value = rcParams['path.sketch']) _path_effects = List(Instance('matplotlib.patheffect._Base'), default_value = rcParams['path.effects']) #_XYPair = namedtuple("_XYPair", "x y") #sticky_edges is a tuple with lists of floats #the first element of this tuple represents x #and the second element of sticky_edges represents y _sticky_edges = Tuple(List(trait=Float()), List(trait=Float()))
class JSONWebTokenAuthenticator(Authenticator): """ Accept the authenticated JSON Web Token from header or query parameter. """ redirect_unauthorized = Unicode( default_value='', config=True, help="""Login url to redirect if can't login.""") signing_certificate = Unicode(config=True, help=""" The public certificate of the private key used to sign the incoming JSON Web Tokens. Should be a path to an X509 PEM format certificate filesystem. """) rsa_public_key = Unicode(config=True, help=""" String with rsa public key encoded with base64. """) cookie_name = Unicode( config=True, default_value=DEFAULT_COOKIE_NAME, help="""The name of the cookie where is stored the JWT token""") username_claim_field = Unicode(default_value='upn', config=True, help=""" The field in the claims that contains the user name. It can be either a straight username, of an email/userPrincipalName. """) expected_audience = Unicode( default_value='', config=True, help="""HTTP header to inspect for the authenticated JSON Web Token.""" ) secret = Unicode( config=True, help= """Shared secret key for signing JWT token. If defined, it overrides any setting for signing_certificate""" ) home_dir = Unicode(config=True, help="""Home directory.""") token_file = Unicode(default_value='.jwt_sso.json', config=True, help="""User token file name.""") validate_token_hook = Callable( default_value=None, allow_none=True, config=True, help= """Function that will be called when the validation of the token are required.""" ) redirect_to_sso_hook = Callable( default_value=None, allow_none=True, config=True, help= """Function that will be called when the jwt is invalid. This redirect to sso login url. """ ) def get_handlers(_self, _app): return [ (r'/login', JSONWebTokenLoginHandler), ] async def authenticate(_self, _handler, _data): raise NotImplementedError() async def pre_spawn_start(self, user, spawner): if not self.home_dir: raise ExceptionMissingConfigurationParameter( "Missing home directory.") path = os.path.join(self.home_dir, user.name, self.token_file) try: with open(path, "r") as f: jwt = json.load(f) except Exception as ex: self.log.error("Can't load token from file!", ex) spawner.environment['JWT'] = '' return spawner.environment['JWT'] = jwt['jwt'] async def refresh_user(self, user, handler, force=False): self.log.info( f"refresh user {user.name}, force={force}, home dir: {self.home_dir}" ) if force: return False sso_path = "" if self.home_dir: sso_path = os.path.join(self.home_dir, user.name, self.token_file) valid = self._validate_auth_token(user, sso_path) if valid: return True self.log.info( f"quicking off user {user.name} (force={force}, home_dir={self.home_dir})" ) self._quick_off_user(handler, sso_path) return False def _validate_auth_token(self, user, sso_path): if not os.path.exists(sso_path): return True try: with open(sso_path, "r") as f: jwt = json.load(f) token = jwt['jwt'] if token and self.validate_token_hook: if self.validate_token_hook(token): self.log.info(f"user {user.name} have a valid token") user.spawner.environment['JWT'] = token return True except Exception as ex: self.log.error( f"Can't load token from file for user {user.name}: {ex}") self.log.info(f"user {user.name} have a invalid token") return False def _quick_off_user(self, handler, sso_path): user.spawner.environment['JWT'] = '' if sso_path and os.path.exists(sso_path): os.remove(sso_path) handler.clear_cookie(self.cookie_name) handler.clear_cookie("jupyterhub-hub-login") handler.clear_cookie("jupyterhub-session-id")
class ServerProxy(Configurable): servers = Dict({}, help=""" Dictionary of processes to supervise & proxy. Key should be the name of the process. This is also used by default as the URL prefix, and all requests matching this prefix are routed to this process. Value should be a dictionary with the following keys: command A list of strings that should be the full command to be executed. The optional template arguments {{port}} and {{base_url}} will be substituted with the port the process should listen on and the base-url of the notebook. Could also be a callable. It should return a list. environment A dictionary of environment variable mappings. {{port}} and {{base_url}} will be substituted as for command. Could also be a callable. It should return a dictionary. timeout Timeout in seconds for the process to become ready, default 5s. absolute_url Proxy requests default to being rewritten to '/'. If this is True, the absolute URL will be sent to the backend instead. port Set the port that the service will listen on. The default is to automatically select an unused port. mappath Map request paths to proxied paths. Either a dictionary of request paths to proxied paths, or a callable that takes parameter ``path`` and returns the proxied path. launcher_entry A dictionary of various options for entries in classic notebook / jupyterlab launchers. Keys recognized are: enabled Set to True (default) to make an entry in the launchers. Set to False to have no explicit entry. icon_path Full path to an svg icon that could be used with a launcher. Currently only used by the JupyterLab launcher title Title to be used for the launcher entry. Defaults to the name of the server if missing. new_browser_tab Set to True (default) to make the proxied server interface opened as a new browser tab. Set to False to have it open a new JupyterLab tab. This has no effect in classic notebook. """, config=True) host_allowlist = Union(trait_types=[List(), Callable()], help=""" List of allowed hosts. Can also be a function that decides whether a host can be proxied. If implemented as a function, this should return True if a host should be proxied and False if it should not. Such a function could verify that the host matches a particular regular expression pattern or falls into a specific subnet. It should probably not be a slow check against some external service. Here is an example that could be placed in a site-wide Jupyter notebook config: def host_allowlist(handler, host): handler.log.info("Request to proxy to host " + host) return host.startswith("10.") c.ServerProxy.host_allowlist = host_allowlist Defaults to a list of ["localhost", "127.0.0.1"]. """, config=True) @default("host_allowlist") def _host_allowlist_default(self): return ["localhost", "127.0.0.1"] host_whitelist = Union(trait_types=[List(), Callable()], help="Deprecated, use host_allowlist", config=True) @observe("host_whitelist") def _host_whitelist_deprecated(self, change): old_attr = change.name if self.host_allowlist != change.new: # only warn if different # protects backward-compatible config from warnings # if they set the same value under both names # Configurable doesn't have a log # https://github.com/ipython/traitlets/blob/5.0.5/traitlets/config/configurable.py#L181 warn( "{cls}.{old} is deprecated in jupyter-server-proxy {version}, use {cls}.{new} instead" .format( cls=self.__class__.__name__, old=old_attr, new="host_allowlist", version="3.0.0", )) self.host_allowlist = change.new
class ServerProxy(Configurable): servers = Dict( {}, help=""" Dictionary of processes to supervise & proxy. Key should be the name of the process. This is also used by default as the URL prefix, and all requests matching this prefix are routed to this process. Value should be a dictionary with the following keys: command A list of strings that should be the full command to be executed. The optional template arguments {{port}} and {{base_url}} will be substituted with the port the process should listen on and the base-url of the notebook. Could also be a callable. It should return a dictionary. environment A dictionary of environment variable mappings. {{port}} and {{base_url}} will be substituted as for command. Could also be a callable. It should return a dictionary. timeout Timeout in seconds for the process to become ready, default 5s. absolute_url Proxy requests default to being rewritten to '/'. If this is True, the absolute URL will be sent to the backend instead. port Set the port that the service will listen on. The default is to automatically select an unused port. mappath Map request paths to proxied paths. Either a dictionary of request paths to proxied paths, or a callable that takes parameter ``path`` and returns the proxied path. launcher_entry A dictionary of various options for entries in classic notebook / jupyterlab launchers. Keys recognized are: enabled Set to True (default) to make an entry in the launchers. Set to False to have no explicit entry. icon_path Full path to an svg icon that could be used with a launcher. Currently only used by the JupyterLab launcher title Title to be used for the launcher entry. Defaults to the name of the server if missing. """, config=True ) host_whitelist = Union( trait_types=[List(), Callable()], help=""" List of allowed hosts. Can also be a function that decides whether a host can be proxied. If implemented as a function, this should return True if a host should be proxied and False if it should not. Such a function could verify that the host matches a particular regular expression pattern or falls into a specific subnet. It should probably not be a slow check against some external service. Here is an example that could be placed in a site-wide Jupyter notebook config: def host_whitelist(handler, host): handler.log.info("Request to proxy to host " + host) return host.startswith("10.") c.ServerProxy.host_whitelist = host_whitelist Defaults to a list of ["localhost", "127.0.0.1"]. """, config=True ) @default("host_whitelist") def _host_whitelist_default(self): return ["localhost", "127.0.0.1"]
class ServerProxy(Configurable): servers = Dict({}, help=""" Dictionary of processes to supervise & proxy. Key should be the name of the process. This is also used by default as the URL prefix, and all requests matching this prefix are routed to this process. Value should be a dictionary with the following keys: command A list of strings that should be the full command to be executed. The optional template arguments {{port}} and {{base_url}} will be substituted with the port the process should listen on and the base-url of the notebook. Could also be a callable. It should return a list. environment A dictionary of environment variable mappings. As with the command traitlet, {{port}} and {{base_url}} will be substituted. Could also be a callable. It should return a dictionary. timeout Timeout in seconds for the process to become ready, default 5s. absolute_url Proxy requests default to being rewritten to '/'. If this is True, the absolute URL will be sent to the backend instead. port Set the port that the service will listen on. The default is to automatically select an unused port. mappath Map request paths to proxied paths. Either a dictionary of request paths to proxied paths, or a callable that takes parameter ``path`` and returns the proxied path. launcher_entry A dictionary of various options for entries in classic notebook / jupyterlab launchers. Keys recognized are: enabled Set to True (default) to make an entry in the launchers. Set to False to have no explicit entry. icon_path Full path to an svg icon that could be used with a launcher. Currently only used by the JupyterLab launcher title Title to be used for the launcher entry. Defaults to the name of the server if missing. new_browser_tab Set to True (default) to make the proxied server interface opened as a new browser tab. Set to False to have it open a new JupyterLab tab. This has no effect in classic notebook. request_headers_override A dictionary of additional HTTP headers for the proxy request. As with the command traitlet, {{port}} and {{base_url}} will be substituted. path_info The trailing path that is appended to the user's server URL to access the proxied server. By default it is the name of the server followed by a trailing slash. rewrite_response An optional function to rewrite the response for the given service. Input is a RewritableResponse object which is an argument that MUST be named ``response``. The function should modify one or more of the attributes ``.body``, ``.headers``, ``.code``, or ``.reason`` of the ``response`` argument. For example: def cat_to_dog(response): response.headers["I-Like"] = "tacos" response.body = response.body.replace(b'cat', b'dog') c.ServerProxy.servers['my_server']['rewrite_response'] = cat_to_dog The ``rewrite_response`` function can also accept several optional positional arguments. Arguments named ``host``, ``port``, and ``path`` will receive values corresponding to the URL ``/proxy/<host>:<port><path>``. In addition, the original Tornado ``HTTPRequest`` and ``HTTPResponse`` objects are available as arguments named ``request`` and ``orig_response``. (These objects should not be modified.) A list or tuple of functions can also be specified for chaining multiple rewrites. Defaults to the empty tuple ``tuple()``. """, config=True) non_service_rewrite_response = Union( default_value=tuple(), trait_types=[List(), Tuple(), Callable()], help=""" A function (or list or tuple of functions) to rewrite the response for a non-service request, for example a request to ``/proxy/<host>:<port><path>``. See the description for ``rewrite_response`` for more information. Defaults to the empty tuple ``tuple()``. """, config=True) host_allowlist = Union(trait_types=[List(), Callable()], help=""" List of allowed hosts. Can also be a function that decides whether a host can be proxied. If implemented as a function, this should return True if a host should be proxied and False if it should not. Such a function could verify that the host matches a particular regular expression pattern or falls into a specific subnet. It should probably not be a slow check against some external service. Here is an example that could be placed in a site-wide Jupyter notebook config: def host_allowlist(handler, host): handler.log.info("Request to proxy to host " + host) return host.startswith("10.") c.ServerProxy.host_allowlist = host_allowlist Defaults to a list of ["localhost", "127.0.0.1"]. """, config=True) @default("host_allowlist") def _host_allowlist_default(self): return ["localhost", "127.0.0.1"] host_whitelist = Union(trait_types=[List(), Callable()], help="Deprecated, use host_allowlist", config=True) @observe("host_whitelist") def _host_whitelist_deprecated(self, change): old_attr = change.name if self.host_allowlist != change.new: # only warn if different # protects backward-compatible config from warnings # if they set the same value under both names # Configurable doesn't have a log # https://github.com/ipython/traitlets/blob/5.0.5/traitlets/config/configurable.py#L181 warn( "{cls}.{old} is deprecated in jupyter-server-proxy {version}, use {cls}.{new} instead" .format( cls=self.__class__.__name__, old=old_attr, new="host_allowlist", version="3.0.0", )) self.host_allowlist = change.new
class ResourceUseDisplay(Configurable): """ Holds server-side configuration for jupyter-resource-usage """ process_memory_metrics = List( trait=PSUtilMetric(), default_value=[{"name": "memory_info", "attribute": "rss"}], ) system_memory_metrics = List( trait=PSUtilMetric(), default_value=[{"name": "virtual_memory", "attribute": "total"}], ) process_cpu_metrics = List( trait=PSUtilMetric(), default_value=[{"name": "cpu_percent", "kwargs": {"interval": 0.05}}], ) system_cpu_metrics = List( trait=PSUtilMetric(), default_value=[{"name": "cpu_count"}] ) mem_warning_threshold = Float( default_value=0.1, help=""" Warn user with flashing lights when memory usage is within this fraction memory limit. For example, if memory limit is 128MB, `mem_warning_threshold` is 0.1, we will start warning the user when they use (128 - (128 * 0.1)) MB. Set to 0 to disable warning. """, ).tag(config=True) mem_limit = Union( trait_types=[Int(), Callable()], help=""" Memory limit to display to the user, in bytes. Can also be a function which calculates the memory limit. Note that this does not actually limit the user's memory usage! Defaults to reading from the `MEM_LIMIT` environment variable. If set to 0, the max memory available is displayed. """, ).tag(config=True) @default("mem_limit") def _mem_limit_default(self): return int(os.environ.get("MEM_LIMIT", 0)) track_cpu_percent = Bool( default_value=False, help=""" Set to True in order to enable reporting of CPU usage statistics. """, ).tag(config=True) cpu_warning_threshold = Float( default_value=0.1, help=""" Warn user with flashing lights when CPU usage is within this fraction CPU usage limit. For example, if CPU limit is 150%, `cpu_warning_threshold` is 0.1, we will start warning the user when they use (150 - (150 * 0.1)) %. Set to 0 to disable warning. """, ).tag(config=True) cpu_limit = Union( trait_types=[Float(), Callable()], default_value=0, help=""" CPU usage limit to display to the user. Note that this does not actually limit the user's CPU usage! Defaults to reading from the `CPU_LIMIT` environment variable. If set to 0, the total CPU count available is displayed. """, ).tag(config=True) @default("cpu_limit") def _cpu_limit_default(self): return float(os.environ.get("CPU_LIMIT", 0))
class NotebookClient(LoggingConfigurable): """ Encompasses a Client for executing cells in a notebook """ timeout: int = Integer( None, allow_none=True, help=dedent(""" The time to wait (in seconds) for output from executions. If a cell execution takes longer, a TimeoutError is raised. ``None`` or ``-1`` will disable the timeout. If ``timeout_func`` is set, it overrides ``timeout``. """), ).tag(config=True) timeout_func: t.Any = Any( default_value=None, allow_none=True, help=dedent(""" A callable which, when given the cell source as input, returns the time to wait (in seconds) for output from cell executions. If a cell execution takes longer, a TimeoutError is raised. Returning ``None`` or ``-1`` will disable the timeout for the cell. Not setting ``timeout_func`` will cause the client to default to using the ``timeout`` trait for all cells. The ``timeout_func`` trait overrides ``timeout`` if it is not ``None``. """), ).tag(config=True) interrupt_on_timeout: bool = Bool( False, help=dedent(""" If execution of a cell times out, interrupt the kernel and continue executing other cells rather than throwing an error and stopping. """), ).tag(config=True) startup_timeout: int = Integer( 60, help=dedent(""" The time to wait (in seconds) for the kernel to start. If kernel startup takes longer, a RuntimeError is raised. """), ).tag(config=True) allow_errors: bool = Bool( False, help=dedent(""" If ``False`` (default), when a cell raises an error the execution is stopped and a `CellExecutionError` is raised, except if the error name is in ``allow_error_names``. If ``True``, execution errors are ignored and the execution is continued until the end of the notebook. Output from exceptions is included in the cell output in both cases. """), ).tag(config=True) allow_error_names: t.List[str] = List( Unicode(), help=dedent(""" List of error names which won't stop the execution. Use this if the ``allow_errors`` option it too general and you want to allow only specific kinds of errors. """), ).tag(config=True) force_raise_errors: bool = Bool( False, help=dedent(""" If False (default), errors from executing the notebook can be allowed with a ``raises-exception`` tag on a single cell, or the ``allow_errors`` or ``allow_error_names`` configurable options for all cells. An allowed error will be recorded in notebook output, and execution will continue. If an error occurs when it is not explicitly allowed, a `CellExecutionError` will be raised. If True, `CellExecutionError` will be raised for any error that occurs while executing the notebook. This overrides the ``allow_errors`` and ``allow_error_names`` options and the ``raises-exception`` cell tag. """), ).tag(config=True) skip_cells_with_tag: str = Unicode( 'skip-execution', help=dedent(""" Name of the cell tag to use to denote a cell that should be skipped. """), ).tag(config=True) extra_arguments: t.List = List(Unicode()).tag(config=True) kernel_name: str = Unicode( '', help=dedent(""" Name of kernel to use to execute the cells. If not set, use the kernel_spec embedded in the notebook. """), ).tag(config=True) raise_on_iopub_timeout: bool = Bool( False, help=dedent(""" If ``False`` (default), then the kernel will continue waiting for iopub messages until it receives a kernel idle message, or until a timeout occurs, at which point the currently executing cell will be skipped. If ``True``, then an error will be raised after the first timeout. This option generally does not need to be used, but may be useful in contexts where there is the possibility of executing notebooks with memory-consuming infinite loops. """), ).tag(config=True) store_widget_state: bool = Bool( True, help=dedent(""" If ``True`` (default), then the state of the Jupyter widgets created at the kernel will be stored in the metadata of the notebook. """), ).tag(config=True) record_timing: bool = Bool( True, help=dedent(""" If ``True`` (default), then the execution timings of each cell will be stored in the metadata of the notebook. """), ).tag(config=True) iopub_timeout: int = Integer( 4, allow_none=False, help=dedent(""" The time to wait (in seconds) for IOPub output. This generally doesn't need to be set, but on some slow networks (such as CI systems) the default timeout might not be long enough to get all messages. """), ).tag(config=True) shell_timeout_interval: int = Integer( 5, allow_none=False, help=dedent(""" The time to wait (in seconds) for Shell output before retrying. This generally doesn't need to be set, but if one needs to check for dead kernels at a faster rate this can help. """), ).tag(config=True) shutdown_kernel = Enum( ['graceful', 'immediate'], default_value='graceful', help=dedent(""" If ``graceful`` (default), then the kernel is given time to clean up after executing all cells, e.g., to execute its ``atexit`` hooks. If ``immediate``, then the kernel is signaled to immediately terminate. """), ).tag(config=True) ipython_hist_file: str = Unicode( default_value=':memory:', help= """Path to file to use for SQLite history database for an IPython kernel. The specific value ``:memory:`` (including the colon at both end but not the back ticks), avoids creating a history file. Otherwise, IPython will create a history file for each kernel. When running kernels simultaneously (e.g. via multiprocessing) saving history a single SQLite file can result in database errors, so using ``:memory:`` is recommended in non-interactive contexts. """, ).tag(config=True) kernel_manager_class: KernelManager = Type( config=True, help='The kernel manager class to use.') on_notebook_start: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes after the kernel manager and kernel client are setup, and cells are about to execute. Called with kwargs `notebook`. """), ).tag(config=True) on_notebook_complete: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes after the kernel is cleaned up. Called with kwargs `notebook`. """), ).tag(config=True) on_notebook_error: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes when the notebook encounters an error. Called with kwargs `notebook`. """), ).tag(config=True) on_cell_start: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes before a cell is executed and before non-executing cells are skipped. Called with kwargs `cell` and `cell_index`. """), ).tag(config=True) on_cell_execute: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes just before a code cell is executed. Called with kwargs `cell` and `cell_index`. """), ).tag(config=True) on_cell_complete: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes after a cell execution is complete. It is called even when a cell results in a failure. Called with kwargs `cell` and `cell_index`. """), ).tag(config=True) on_cell_error: t.Optional[t.Callable] = Callable( default_value=None, allow_none=True, help=dedent(""" A callable which executes when a cell execution results in an error. This is executed even if errors are suppressed with `cell_allows_errors`. Called with kwargs `cell` and `cell_index`. """), ).tag(config=True) @default('kernel_manager_class') def _kernel_manager_class_default(self) -> KernelManager: """Use a dynamic default to avoid importing jupyter_client at startup""" from jupyter_client import AsyncKernelManager return AsyncKernelManager _display_id_map: t.Dict[str, t.Dict] = Dict(help=dedent(""" mapping of locations of outputs with a given display_id tracks cell index and output index within cell.outputs for each appearance of the display_id { 'display_id': { cell_idx: [output_idx,] } } """)) display_data_priority: t.List = List( [ 'text/html', 'application/pdf', 'text/latex', 'image/svg+xml', 'image/png', 'image/jpeg', 'text/markdown', 'text/plain', ], help=""" An ordered list of preferred output type, the first encountered will usually be used when converting discarding the others. """, ).tag(config=True) resources: t.Dict = Dict(help=dedent(""" Additional resources used in the conversion process. For example, passing ``{'metadata': {'path': run_path}}`` sets the execution path to ``run_path``. """)) def __init__(self, nb: NotebookNode, km: t.Optional[KernelManager] = None, **kw) -> None: """Initializes the execution manager. Parameters ---------- nb : NotebookNode Notebook being executed. km : KernelManager (optional) Optional kernel manager. If none is provided, a kernel manager will be created. """ super().__init__(**kw) self.nb: NotebookNode = nb self.km: t.Optional[KernelManager] = km self.owns_km: bool = km is None # whether the NotebookClient owns the kernel manager self.kc: t.Optional[KernelClient] = None self.reset_execution_trackers() self.widget_registry: t.Dict[str, t.Dict] = { '@jupyter-widgets/output': { 'OutputModel': OutputWidget } } # comm_open_handlers should return an object with a .handle_msg(msg) method or None self.comm_open_handlers: t.Dict[str, t.Any] = { 'jupyter.widget': self.on_comm_open_jupyter_widget } def reset_execution_trackers(self) -> None: """Resets any per-execution trackers.""" self.task_poll_for_reply: t.Optional[asyncio.Future] = None self.code_cells_executed = 0 self._display_id_map = {} self.widget_state: t.Dict[str, t.Dict] = {} self.widget_buffers: t.Dict[str, t.Dict[t.Tuple[str, ...], t.Dict[str, str]]] = {} # maps to list of hooks, where the last is used, this is used # to support nested use of output widgets. self.output_hook_stack: t.Any = collections.defaultdict(list) # our front-end mimicking Output widgets self.comm_objects: t.Dict[str, t.Any] = {} def create_kernel_manager(self) -> KernelManager: """Creates a new kernel manager. Returns ------- km : KernelManager Kernel manager whose client class is asynchronous. """ if not self.kernel_name: kn = self.nb.metadata.get('kernelspec', {}).get('name') if kn is not None: self.kernel_name = kn if not self.kernel_name: self.km = self.kernel_manager_class(config=self.config) else: self.km = self.kernel_manager_class(kernel_name=self.kernel_name, config=self.config) # If the current kernel manager is still using the default (synchronous) KernelClient class, # switch to the async version since that's what NBClient prefers. if self.km.client_class == 'jupyter_client.client.KernelClient': self.km.client_class = 'jupyter_client.asynchronous.AsyncKernelClient' return self.km async def _async_cleanup_kernel(self) -> None: assert self.km is not None now = self.shutdown_kernel == "immediate" try: # Queue the manager to kill the process, and recover gracefully if it's already dead. if await ensure_async(self.km.is_alive()): await ensure_async(self.km.shutdown_kernel(now=now)) except RuntimeError as e: # The error isn't specialized, so we have to check the message if 'No kernel is running!' not in str(e): raise finally: # Remove any state left over even if we failed to stop the kernel await ensure_async(self.km.cleanup_resources()) if getattr(self, "kc") and self.kc is not None: await ensure_async(self.kc.stop_channels()) self.kc = None self.km = None _cleanup_kernel = run_sync(_async_cleanup_kernel) async def async_start_new_kernel(self, **kwargs) -> None: """Creates a new kernel. Parameters ---------- kwargs : Any options for ``self.kernel_manager_class.start_kernel()``. Because that defaults to AsyncKernelManager, this will likely include options accepted by ``AsyncKernelManager.start_kernel()``, which includes ``cwd``. """ assert self.km is not None resource_path = self.resources.get('metadata', {}).get('path') or None if resource_path and 'cwd' not in kwargs: kwargs["cwd"] = resource_path has_history_manager_arg = any( arg.startswith('--HistoryManager.hist_file') for arg in self.extra_arguments) if (hasattr(self.km, 'ipykernel') and self.km.ipykernel and self.ipython_hist_file and not has_history_manager_arg): self.extra_arguments += [ f'--HistoryManager.hist_file={self.ipython_hist_file}' ] await ensure_async( self.km.start_kernel(extra_arguments=self.extra_arguments, **kwargs)) start_new_kernel = run_sync(async_start_new_kernel) async def async_start_new_kernel_client(self) -> KernelClient: """Creates a new kernel client. Returns ------- kc : KernelClient Kernel client as created by the kernel manager ``km``. """ assert self.km is not None self.kc = self.km.client() await ensure_async(self.kc.start_channels()) try: await ensure_async( self.kc.wait_for_ready(timeout=self.startup_timeout)) except RuntimeError: await self._async_cleanup_kernel() raise self.kc.allow_stdin = False await run_hook(self.on_notebook_start, notebook=self.nb) return self.kc start_new_kernel_client = run_sync(async_start_new_kernel_client) @contextmanager def setup_kernel(self, **kwargs) -> t.Generator: """ Context manager for setting up the kernel to execute a notebook. The assigns the Kernel Manager (``self.km``) if missing and Kernel Client(``self.kc``). When control returns from the yield it stops the client's zmq channels, and shuts down the kernel. """ # by default, cleanup the kernel client if we own the kernel manager # and keep it alive if we don't cleanup_kc = kwargs.pop('cleanup_kc', self.owns_km) # Can't use run_until_complete on an asynccontextmanager function :( if self.km is None: self.km = self.create_kernel_manager() if not self.km.has_kernel: self.start_new_kernel(**kwargs) self.start_new_kernel_client() try: yield finally: if cleanup_kc: self._cleanup_kernel() @asynccontextmanager async def async_setup_kernel(self, **kwargs) -> t.AsyncGenerator: """ Context manager for setting up the kernel to execute a notebook. This assigns the Kernel Manager (``self.km``) if missing and Kernel Client(``self.kc``). When control returns from the yield it stops the client's zmq channels, and shuts down the kernel. Handlers for SIGINT and SIGTERM are also added to cleanup in case of unexpected shutdown. """ # by default, cleanup the kernel client if we own the kernel manager # and keep it alive if we don't cleanup_kc = kwargs.pop('cleanup_kc', self.owns_km) if self.km is None: self.km = self.create_kernel_manager() # self._cleanup_kernel uses run_async, which ensures the ioloop is running again. # This is necessary as the ioloop has stopped once atexit fires. atexit.register(self._cleanup_kernel) def on_signal(): asyncio.ensure_future(self._async_cleanup_kernel()) atexit.unregister(self._cleanup_kernel) loop = asyncio.get_event_loop() try: loop.add_signal_handler(signal.SIGINT, on_signal) loop.add_signal_handler(signal.SIGTERM, on_signal) except (NotImplementedError, RuntimeError): # NotImplementedError: Windows does not support signals. # RuntimeError: Raised when add_signal_handler is called outside the main thread pass if not self.km.has_kernel: await self.async_start_new_kernel(**kwargs) await self.async_start_new_kernel_client() try: yield except RuntimeError as e: await run_hook(self.on_notebook_error, notebook=self.nb) raise e finally: if cleanup_kc: await self._async_cleanup_kernel() await run_hook(self.on_notebook_complete, notebook=self.nb) atexit.unregister(self._cleanup_kernel) try: loop.remove_signal_handler(signal.SIGINT) loop.remove_signal_handler(signal.SIGTERM) except (NotImplementedError, RuntimeError): pass async def async_execute(self, reset_kc: bool = False, **kwargs) -> NotebookNode: """ Executes each code cell. Parameters ---------- kwargs : Any option for ``self.kernel_manager_class.start_kernel()``. Because that defaults to AsyncKernelManager, this will likely include options accepted by ``jupyter_client.AsyncKernelManager.start_kernel()``, which includes ``cwd``. ``reset_kc`` if True, the kernel client will be reset and a new one will be created (default: False). Returns ------- nb : NotebookNode The executed notebook. """ if reset_kc and self.owns_km: await self._async_cleanup_kernel() self.reset_execution_trackers() async with self.async_setup_kernel(**kwargs): assert self.kc is not None self.log.info("Executing notebook with kernel: %s" % self.kernel_name) msg_id = await ensure_async(self.kc.kernel_info()) info_msg = await self.async_wait_for_reply(msg_id) if info_msg is not None: if 'language_info' in info_msg['content']: self.nb.metadata['language_info'] = info_msg['content'][ 'language_info'] else: raise RuntimeError( 'Kernel info received message content has no "language_info" key. ' 'Content is:\n' + str(info_msg['content'])) for index, cell in enumerate(self.nb.cells): # Ignore `'execution_count' in content` as it's always 1 # when store_history is False await self.async_execute_cell( cell, index, execution_count=self.code_cells_executed + 1) self.set_widgets_metadata() return self.nb execute = run_sync(async_execute) def set_widgets_metadata(self) -> None: if self.widget_state: self.nb.metadata.widgets = { 'application/vnd.jupyter.widget-state+json': { 'state': { model_id: self._serialize_widget_state(state) for model_id, state in self.widget_state.items() if '_model_name' in state }, 'version_major': 2, 'version_minor': 0, } } for key, widget in self.nb.metadata.widgets[ 'application/vnd.jupyter.widget-state+json'][ 'state'].items(): buffers = self.widget_buffers.get(key) if buffers: widget['buffers'] = list(buffers.values()) def _update_display_id(self, display_id: str, msg: t.Dict) -> None: """Update outputs with a given display_id""" if display_id not in self._display_id_map: self.log.debug("display id %r not in %s", display_id, self._display_id_map) return if msg['header']['msg_type'] == 'update_display_data': msg['header']['msg_type'] = 'display_data' try: out = output_from_msg(msg) except ValueError: self.log.error("unhandled iopub msg: " + msg['msg_type']) return for cell_idx, output_indices in self._display_id_map[display_id].items( ): cell = self.nb['cells'][cell_idx] outputs = cell['outputs'] for output_idx in output_indices: outputs[output_idx]['data'] = out['data'] outputs[output_idx]['metadata'] = out['metadata'] async def _async_poll_for_reply( self, msg_id: str, cell: NotebookNode, timeout: t.Optional[int], task_poll_output_msg: asyncio.Future, task_poll_kernel_alive: asyncio.Future, ) -> t.Dict: assert self.kc is not None new_timeout: t.Optional[float] = None if timeout is not None: deadline = monotonic() + timeout new_timeout = float(timeout) while True: try: msg = await ensure_async( self.kc.shell_channel.get_msg(timeout=new_timeout)) if msg['parent_header'].get('msg_id') == msg_id: if self.record_timing: cell['metadata']['execution'][ 'shell.execute_reply'] = timestamp(msg) try: await asyncio.wait_for(task_poll_output_msg, self.iopub_timeout) except (asyncio.TimeoutError, Empty): if self.raise_on_iopub_timeout: task_poll_kernel_alive.cancel() raise CellTimeoutError.error_from_timeout_and_cell( "Timeout waiting for IOPub output", self.iopub_timeout, cell) else: self.log.warning( "Timeout waiting for IOPub output") task_poll_kernel_alive.cancel() return msg else: if new_timeout is not None: new_timeout = max(0, deadline - monotonic()) except Empty: # received no message, check if kernel is still alive assert timeout is not None task_poll_kernel_alive.cancel() await self._async_check_alive() await self._async_handle_timeout(timeout, cell) async def _async_poll_output_msg(self, parent_msg_id: str, cell: NotebookNode, cell_index: int) -> None: assert self.kc is not None while True: msg = await ensure_async( self.kc.iopub_channel.get_msg(timeout=None)) if msg['parent_header'].get('msg_id') == parent_msg_id: try: # Will raise CellExecutionComplete when completed self.process_message(msg, cell, cell_index) except CellExecutionComplete: return async def _async_poll_kernel_alive(self) -> None: while True: await asyncio.sleep(1) try: await self._async_check_alive() except DeadKernelError: assert self.task_poll_for_reply is not None self.task_poll_for_reply.cancel() return def _get_timeout(self, cell: t.Optional[NotebookNode]) -> int: if self.timeout_func is not None and cell is not None: timeout = self.timeout_func(cell) else: timeout = self.timeout if not timeout or timeout < 0: timeout = None return timeout async def _async_handle_timeout(self, timeout: int, cell: t.Optional[NotebookNode] = None ) -> None: self.log.error("Timeout waiting for execute reply (%is)." % timeout) if self.interrupt_on_timeout: self.log.error("Interrupting kernel") assert self.km is not None await ensure_async(self.km.interrupt_kernel()) else: raise CellTimeoutError.error_from_timeout_and_cell( "Cell execution timed out", timeout, cell) async def _async_check_alive(self) -> None: assert self.kc is not None if not await ensure_async(self.kc.is_alive()): self.log.error("Kernel died while waiting for execute reply.") raise DeadKernelError("Kernel died") async def async_wait_for_reply( self, msg_id: str, cell: t.Optional[NotebookNode] = None) -> t.Optional[t.Dict]: assert self.kc is not None # wait for finish, with timeout timeout = self._get_timeout(cell) cummulative_time = 0 while True: try: msg = await ensure_async( self.kc.shell_channel.get_msg( timeout=self.shell_timeout_interval)) except Empty: await self._async_check_alive() cummulative_time += self.shell_timeout_interval if timeout and cummulative_time > timeout: await self._async_async_handle_timeout(timeout, cell) break else: if msg['parent_header'].get('msg_id') == msg_id: return msg return None wait_for_reply = run_sync(async_wait_for_reply) # Backwards compatibility naming for papermill _wait_for_reply = wait_for_reply def _passed_deadline(self, deadline: int) -> bool: if deadline is not None and deadline - monotonic() <= 0: return True return False async def _check_raise_for_error(self, cell: NotebookNode, cell_index: int, exec_reply: t.Optional[t.Dict]) -> None: if exec_reply is None: return None exec_reply_content = exec_reply['content'] if exec_reply_content['status'] != 'error': return None cell_allows_errors = (not self.force_raise_errors) and ( self.allow_errors or exec_reply_content.get('ename') in self.allow_error_names or "raises-exception" in cell.metadata.get("tags", [])) await run_hook(self.on_cell_error, cell=cell, cell_index=cell_index) if not cell_allows_errors: raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content) async def async_execute_cell( self, cell: NotebookNode, cell_index: int, execution_count: t.Optional[int] = None, store_history: bool = True, ) -> NotebookNode: """ Executes a single code cell. To execute all cells see :meth:`execute`. Parameters ---------- cell : nbformat.NotebookNode The cell which is currently being processed. cell_index : int The position of the cell within the notebook object. execution_count : int The execution count to be assigned to the cell (default: Use kernel response) store_history : bool Determines if history should be stored in the kernel (default: False). Specific to ipython kernels, which can store command histories. Returns ------- output : dict The execution output payload (or None for no output). Raises ------ CellExecutionError If execution failed and should raise an exception, this will be raised with defaults about the failure. Returns ------- cell : NotebookNode The cell which was just processed. """ assert self.kc is not None await run_hook(self.on_cell_start, cell=cell, cell_index=cell_index) if cell.cell_type != 'code' or not cell.source.strip(): self.log.debug("Skipping non-executing cell %s", cell_index) return cell if self.skip_cells_with_tag in cell.metadata.get("tags", []): self.log.debug("Skipping tagged cell %s", cell_index) return cell if self.record_timing: # clear execution metadata prior to execution cell['metadata']['execution'] = {} self.log.debug("Executing cell:\n%s", cell.source) cell_allows_errors = (not self.force_raise_errors) and ( self.allow_errors or "raises-exception" in cell.metadata.get("tags", [])) await run_hook(self.on_cell_execute, cell=cell, cell_index=cell_index) parent_msg_id = await ensure_async( self.kc.execute(cell.source, store_history=store_history, stop_on_error=not cell_allows_errors)) await run_hook(self.on_cell_complete, cell=cell, cell_index=cell_index) # We launched a code cell to execute self.code_cells_executed += 1 exec_timeout = self._get_timeout(cell) cell.outputs = [] self.clear_before_next_output = False task_poll_kernel_alive = asyncio.ensure_future( self._async_poll_kernel_alive()) task_poll_output_msg = asyncio.ensure_future( self._async_poll_output_msg(parent_msg_id, cell, cell_index)) self.task_poll_for_reply = asyncio.ensure_future( self._async_poll_for_reply(parent_msg_id, cell, exec_timeout, task_poll_output_msg, task_poll_kernel_alive)) try: exec_reply = await self.task_poll_for_reply except asyncio.CancelledError: # can only be cancelled by task_poll_kernel_alive when the kernel is dead task_poll_output_msg.cancel() raise DeadKernelError("Kernel died") except Exception as e: # Best effort to cancel request if it hasn't been resolved try: # Check if the task_poll_output is doing the raising for us if not isinstance(e, CellControlSignal): task_poll_output_msg.cancel() finally: raise if execution_count: cell['execution_count'] = execution_count await self._check_raise_for_error(cell, cell_index, exec_reply) self.nb['cells'][cell_index] = cell return cell execute_cell = run_sync(async_execute_cell) def process_message(self, msg: t.Dict, cell: NotebookNode, cell_index: int) -> t.Optional[t.List]: """ Processes a kernel message, updates cell state, and returns the resulting output object that was appended to cell.outputs. The input argument *cell* is modified in-place. Parameters ---------- msg : dict The kernel message being processed. cell : nbformat.NotebookNode The cell which is currently being processed. cell_index : int The position of the cell within the notebook object. Returns ------- output : dict The execution output payload (or None for no output). Raises ------ CellExecutionComplete Once a message arrives which indicates computation completeness. """ msg_type = msg['msg_type'] self.log.debug("msg_type: %s", msg_type) content = msg['content'] self.log.debug("content: %s", content) display_id = content.get('transient', {}).get('display_id', None) if display_id and msg_type in { 'execute_result', 'display_data', 'update_display_data' }: self._update_display_id(display_id, msg) # set the prompt number for the input and the output if 'execution_count' in content: cell['execution_count'] = content['execution_count'] if self.record_timing: if msg_type == 'status': if content['execution_state'] == 'idle': cell['metadata']['execution'][ 'iopub.status.idle'] = timestamp(msg) elif content['execution_state'] == 'busy': cell['metadata']['execution'][ 'iopub.status.busy'] = timestamp(msg) elif msg_type == 'execute_input': cell['metadata']['execution'][ 'iopub.execute_input'] = timestamp(msg) if msg_type == 'status': if content['execution_state'] == 'idle': raise CellExecutionComplete() elif msg_type == 'clear_output': self.clear_output(cell.outputs, msg, cell_index) elif msg_type.startswith('comm'): self.handle_comm_msg(cell.outputs, msg, cell_index) # Check for remaining messages we don't process elif msg_type not in ['execute_input', 'update_display_data']: # Assign output as our processed "result" return self.output(cell.outputs, msg, display_id, cell_index) return None def output(self, outs: t.List, msg: t.Dict, display_id: str, cell_index: int) -> t.Optional[t.List]: msg_type = msg['msg_type'] parent_msg_id = msg['parent_header'].get('msg_id') if self.output_hook_stack[parent_msg_id]: # if we have a hook registered, it will override our # default output behaviour (e.g. OutputWidget) hook = self.output_hook_stack[parent_msg_id][-1] hook.output(outs, msg, display_id, cell_index) return None try: out = output_from_msg(msg) except ValueError: self.log.error("unhandled iopub msg: " + msg_type) return None if self.clear_before_next_output: self.log.debug('Executing delayed clear_output') outs[:] = [] self.clear_display_id_mapping(cell_index) self.clear_before_next_output = False if display_id: # record output index in: # _display_id_map[display_id][cell_idx] cell_map = self._display_id_map.setdefault(display_id, {}) output_idx_list = cell_map.setdefault(cell_index, []) output_idx_list.append(len(outs)) outs.append(out) return out def clear_output(self, outs: t.List, msg: t.Dict, cell_index: int) -> None: content = msg['content'] parent_msg_id = msg['parent_header'].get('msg_id') if self.output_hook_stack[parent_msg_id]: # if we have a hook registered, it will override our # default clear_output behaviour (e.g. OutputWidget) hook = self.output_hook_stack[parent_msg_id][-1] hook.clear_output(outs, msg, cell_index) return if content.get('wait'): self.log.debug('Wait to clear output') self.clear_before_next_output = True else: self.log.debug('Immediate clear output') outs[:] = [] self.clear_display_id_mapping(cell_index) def clear_display_id_mapping(self, cell_index: int) -> None: for display_id, cell_map in self._display_id_map.items(): if cell_index in cell_map: cell_map[cell_index] = [] def handle_comm_msg(self, outs: t.List, msg: t.Dict, cell_index: int) -> None: content = msg['content'] data = content['data'] if self.store_widget_state and 'state' in data: # ignore custom msg'es self.widget_state.setdefault(content['comm_id'], {}).update(data['state']) if 'buffer_paths' in data and data['buffer_paths']: comm_id = content['comm_id'] if comm_id not in self.widget_buffers: self.widget_buffers[comm_id] = {} # for each comm, the path uniquely identifies a buffer new_buffers: t.Dict[t.Tuple[str, ...], t.Dict[str, str]] = { tuple(k["path"]): k for k in self._get_buffer_data(msg) } self.widget_buffers[comm_id].update(new_buffers) # There are cases where we need to mimic a frontend, to get similar behaviour as # when using the Output widget from Jupyter lab/notebook if msg['msg_type'] == 'comm_open': target = msg['content'].get('target_name') handler = self.comm_open_handlers.get(target) if handler: comm_id = msg['content']['comm_id'] comm_object = handler(msg) if comm_object: self.comm_objects[comm_id] = comm_object else: self.log.warning( f'No handler found for comm target {target!r}') elif msg['msg_type'] == 'comm_msg': content = msg['content'] comm_id = msg['content']['comm_id'] if comm_id in self.comm_objects: self.comm_objects[comm_id].handle_msg(msg) def _serialize_widget_state(self, state: t.Dict) -> t.Dict[str, t.Any]: """Serialize a widget state, following format in @jupyter-widgets/schema.""" return { 'model_name': state.get('_model_name'), 'model_module': state.get('_model_module'), 'model_module_version': state.get('_model_module_version'), 'state': state, } def _get_buffer_data(self, msg: t.Dict) -> t.List[t.Dict[str, str]]: encoded_buffers = [] paths = msg['content']['data']['buffer_paths'] buffers = msg['buffers'] for path, buffer in zip(paths, buffers): encoded_buffers.append({ 'data': base64.b64encode(buffer).decode('utf-8'), 'encoding': 'base64', 'path': path, }) return encoded_buffers def register_output_hook(self, msg_id: str, hook: OutputWidget) -> None: """Registers an override object that handles output/clear_output instead. Multiple hooks can be registered, where the last one will be used (stack based) """ # mimics # https://jupyterlab.github.io/jupyterlab/services/interfaces/kernel.ikernelconnection.html#registermessagehook self.output_hook_stack[msg_id].append(hook) def remove_output_hook(self, msg_id: str, hook: OutputWidget) -> None: """Unregisters an override object that handles output/clear_output instead""" # mimics # https://jupyterlab.github.io/jupyterlab/services/interfaces/kernel.ikernelconnection.html#removemessagehook removed_hook = self.output_hook_stack[msg_id].pop() assert removed_hook == hook def on_comm_open_jupyter_widget(self, msg: t.Dict): content = msg['content'] data = content['data'] state = data['state'] comm_id = msg['content']['comm_id'] module = self.widget_registry.get(state['_model_module']) if module: widget_class = module.get(state['_model_name']) if widget_class: return widget_class(comm_id, state, self.kc, self)
class NotebookNotary(LoggingConfigurable): """A class for computing and verifying notebook signatures.""" data_dir = Unicode(help="""The storage directory for notary secret and database.""").tag( config=True ) @default("data_dir") def _data_dir_default(self): app = None try: if JupyterApp.initialized(): app = JupyterApp.instance() except MultipleInstanceError: pass if app is None: # create an app, without the global instance app = JupyterApp() app.initialize(argv=[]) return app.data_dir store_factory = Callable( help="""A callable returning the storage backend for notebook signatures. The default uses an SQLite database.""" ).tag(config=True) @default("store_factory") def _store_factory_default(self): def factory(): if sqlite3 is None: self.log.warning("Missing SQLite3, all notebooks will be untrusted!") return MemorySignatureStore() return SQLiteSignatureStore(self.db_file) return factory db_file = Unicode( help="""The sqlite file in which to store notebook signatures. By default, this will be in your Jupyter data directory. You can set it to ':memory:' to disable sqlite writing to the filesystem. """ ).tag(config=True) @default("db_file") def _db_file_default(self): if not self.data_dir: return ":memory:" return os.path.join(self.data_dir, "nbsignatures.db") algorithm = Enum( algorithms, default_value="sha256", help="""The hashing algorithm used to sign notebooks.""" ).tag(config=True) @observe("algorithm") def _algorithm_changed(self, change): self.digestmod = getattr(hashlib, change["new"]) digestmod = Any() @default("digestmod") def _digestmod_default(self): return getattr(hashlib, self.algorithm) secret_file = Unicode(help="""The file where the secret key is stored.""").tag(config=True) @default("secret_file") def _secret_file_default(self): if not self.data_dir: return "" return os.path.join(self.data_dir, "notebook_secret") secret = Bytes(help="""The secret key with which notebooks are signed.""").tag(config=True) @default("secret") def _secret_default(self): # note : this assumes an Application is running if os.path.exists(self.secret_file): with open(self.secret_file, "rb") as f: return f.read() else: secret = encodebytes(os.urandom(1024)) self._write_secret_file(secret) return secret def __init__(self, **kwargs): super().__init__(**kwargs) self.store = self.store_factory() def _write_secret_file(self, secret): """write my secret to my secret_file""" self.log.info("Writing notebook-signing key to %s", self.secret_file) with open(self.secret_file, "wb") as f: f.write(secret) try: os.chmod(self.secret_file, 0o600) except OSError: self.log.warning("Could not set permissions on %s", self.secret_file) return secret def compute_signature(self, nb): """Compute a notebook's signature by hashing the entire contents of the notebook via HMAC digest. """ hmac = HMAC(self.secret, digestmod=self.digestmod) # don't include the previous hash in the content to hash with signature_removed(nb): # sign the whole thing for b in yield_everything(nb): hmac.update(b) return hmac.hexdigest() def check_signature(self, nb): """Check a notebook's stored signature If a signature is stored in the notebook's metadata, a new signature is computed and compared with the stored value. Returns True if the signature is found and matches, False otherwise. The following conditions must all be met for a notebook to be trusted: - a signature is stored in the form 'scheme:hexdigest' - the stored scheme matches the requested scheme - the requested scheme is available from hashlib - the computed hash from notebook_signature matches the stored hash """ if nb.nbformat < 3: return False signature = self.compute_signature(nb) return self.store.check_signature(signature, self.algorithm) def sign(self, nb): """Sign a notebook, indicating that its output is trusted on this machine Stores hash algorithm and hmac digest in a local database of trusted notebooks. """ if nb.nbformat < 3: return signature = self.compute_signature(nb) self.store.store_signature(signature, self.algorithm) def unsign(self, nb): """Ensure that a notebook is untrusted by removing its signature from the trusted database, if present. """ signature = self.compute_signature(nb) self.store.remove_signature(signature, self.algorithm) def mark_cells(self, nb, trusted): """Mark cells as trusted if the notebook's signature can be verified Sets ``cell.metadata.trusted = True | False`` on all code cells, depending on the *trusted* parameter. This will typically be the return value from ``self.check_signature(nb)``. This function is the inverse of check_cells """ if nb.nbformat < 3: return for cell in yield_code_cells(nb): cell["metadata"]["trusted"] = trusted def _check_cell(self, cell, nbformat_version): """Do we trust an individual cell? Return True if: - cell is explicitly trusted - cell has no potentially unsafe rich output If a cell has no output, or only simple print statements, it will always be trusted. """ # explicitly trusted if cell["metadata"].pop("trusted", False): return True # explicitly safe output if nbformat_version >= 4: unsafe_output_types = ["execute_result", "display_data"] safe_keys = {"output_type", "execution_count", "metadata"} else: # v3 unsafe_output_types = ["pyout", "display_data"] safe_keys = {"output_type", "prompt_number", "metadata"} for output in cell["outputs"]: output_type = output["output_type"] if output_type in unsafe_output_types: # if there are any data keys not in the safe whitelist output_keys = set(output) if output_keys.difference(safe_keys): return False return True def check_cells(self, nb): """Return whether all code cells are trusted. A cell is trusted if the 'trusted' field in its metadata is truthy, or if it has no potentially unsafe outputs. If there are no code cells, return True. This function is the inverse of mark_cells. """ if nb.nbformat < 3: return False trusted = True for cell in yield_code_cells(nb): # only distrust a cell if it actually has some output to distrust if not self._check_cell(cell, nb.nbformat): trusted = False return trusted
class ResourceUseDisplay(Configurable): """ Holds server-side configuration for nbresuse """ process_cpu_metrics = List( trait=PSUtilMetric(), default_value=[{ "name": "cpu_percent", "kwargs": { "interval": 0.05 } }], ) system_cpu_metrics = List(trait=PSUtilMetric(), default_value=[{ "name": "cpu_count" }]) mem_limit = Union( trait_types=[Int(), Callable()], help=""" Memory limit to display to the user, in bytes. Can also be a function which calculates the memory limit. Note that this does not actually limit the user's memory usage! Defaults to reading from the `MEM_LIMIT` environment variable. If set to 0, the max memory available is displayed. """, ).tag(config=True) @default("mem_limit") def _mem_limit_default(self): return int(os.environ.get("MEM_LIMIT", 0)) track_cpu_percent = Bool( default_value=True, help=""" Set to True in order to enable reporting of CPU usage statistics. """, ).tag(config=True) cpu_limit = Union( trait_types=[Float(), Callable()], default_value=0, help=""" CPU usage limit to display to the user. Note that this does not actually limit the user's CPU usage! Defaults to reading from the `CPU_LIMIT` environment variable. If set to 0, the total CPU count available is displayed. """, ).tag(config=True) @default("cpu_limit") def _cpu_limit_default(self): return float(os.environ.get("CPU_LIMIT", 0)) track_disk_usage = Bool( default_value=True, help=""" Set to True in order to enable reporting of Disk usage statistics. """, ).tag(config=True) disk_limit = Union( trait_types=[Int(), Callable()], default_value=0, help=""" Disk usage limit to display to the user. Note that this does not actually limit the user's Disk space! Defaults to reading from the `DISK_LIMIT` environment variable. If set to 0, the total partition space available is displayed. """, ).tag(config=True) @default("disk_limit") def _disk_limit_default(self): return int(os.environ.get("DISK_LIMIT", 0)) disk_dir = Union( trait_types=[Unicode(), Callable()], default_value=os.getcwd(), help=""" The directory that is on the partition to get the size of. Note that this does not actually limit the user's Disk space! Defaults to reading from the `DISK_DIR` environment variable. If not defined, it effectively defaults to /home/jovyan. """, ).tag(config=True) @default("disk_dir") def _disk_dir_default(self): return str(os.environ.get("DISK_DIR", os.getcwd()))
class GenericOAuthAuthentication(Authentication): """ A provider-agnostic OAuth authentication provider. Configure endpoints, secrets and other parameters to enable any OAuth-compatible platform. """ access_token_url = Unicode( config=True, help="URL used to request an access token once app has been authorized", ) authorize_url = Unicode( config=True, help="URL used to request authorization to OAuth provider", ) client_id = Unicode( config=True, help="Unique string that identifies the app against the OAuth provider", ) client_secret = Unicode( config=True, help= "Secret string used to authenticate the app against the OAuth provider", ) access_scope = Unicode( config=True, help="Permissions that will be requested to OAuth provider.", ) user_data_url = Unicode( config=True, help= "API endpoint for OAuth provider that returns a JSON dict with user data", ) user_data_key = Unicode( config=True, help= "Key in the payload returned by `user_data_url` endpoint that provides the username", ) tls_verify = Bool( True, config=True, help="Disable TLS verification on http request.", ) oauth_callback_url = Union( [Unicode(), Callable()], config=True, help= "Callback URL to use. Typically `{protocol}://{host}/{prefix}/oauth_callback`", ) @default("oauth_callback_url") def _default_oauth_callback_url(self): def _oauth_callback_url(request: Request): return request.url_for("post_login_method") return _oauth_callback_url def get_oauth_callback_url(self, request: Request): if callable(self.oauth_callback_url): return self.oauth_callback_url(request) else: return self.oauth_callback_url login_html = Unicode( """ <div id="login" class="text-center"> <h1 class="h3 mb-3 fw-normal">Please sign in via OAuth</h1> <a class="w-100 btn btn-lg btn-primary" href="{authorization_url}">Sign in with OAuth</a> </div> """, help="html form to use for login", config=True, ) def get_login_html(self, request: Request, templates): state = secrets.token_urlsafe() request.session["oauth_state"] = state authorization_url = self.oauth_route( auth_url=self.authorize_url, client_id=self.client_id, redirect_uri=self.get_oauth_callback_url(request), scope=self.access_scope, state=state, ) return self.login_html.format(authorization_url=authorization_url) @staticmethod def oauth_route(auth_url, client_id, redirect_uri, scope=None, state=None): r = f"{auth_url}?client_id={client_id}&redirect_uri={redirect_uri}&response_type=code" if scope is not None: r += f"&scope={scope}" if state is not None: r += f"&state={state}" return r @property def routes(self): return [ ("/login/", "get", self.get_login_method), ("/logout/", "post", self.post_logout_method), ("/oauth_callback/", "get", self.post_login_method), ] async def authenticate(self, request: Request): # 1. using the callback_url code and state in request oauth_access_token = self._get_oauth_token(request) if oauth_access_token is None: return None # authentication failed # 2. Who is the username? We need one more request username = self._get_username(oauth_access_token) # 3. create our own internal token return schema.AuthenticationToken( primary_namespace=username, role_bindings={ "*/*": ["admin"], }, ) def _get_oauth_token(self, request: Request): # 1. Get callback URI params, which include `code` and `state` # `code` will be used to request the token; `state` must match our session's! code = request.query_params.get("code") state = request.query_params.get("state") if request.session["oauth_state"] != state: raise HTTPException(status_code=401, detail="OAuth states do not match") del request.session["oauth_state"] # 2. Request actual access token with code and secret r_response = requests.post( self.access_token_url, data={ "code": code, "grant_type": "authorization_code", "client_id": self.client_id, "client_secret": self.client_secret, "redirect_uri": self.get_oauth_callback_url(request), }, headers={"Accept": "application/json"}, verify=self.tls_verify, ) if r_response.status_code != 200: return None data = r_response.json() return data["access_token"] def _get_username(self, authentication_token): response = requests.get( self.user_data_url, headers={"Authorization": f"Bearer {authentication_token}"}, verify=self.tls_verify, ) response.raise_for_status() return response.json()[self.user_data_key]
class CondaStore(LoggingConfigurable): storage_class = Type( default_value=storage.S3Storage, klass=storage.Storage, allow_none=False, config=True, ) store_directory = Unicode( "conda-store-state", help="directory for conda-store to build environments and store state", config=True, ) build_directory = Unicode( "{store_directory}/{namespace}", help= "Template used to form the directory for storing conda environment builds. Available keys: store_directory, namespace, name. The default will put all built environments in the same namespace within the same directory.", config=True, ) environment_directory = Unicode( "{store_directory}/{namespace}/envs", help= "Template used to form the directory for symlinking conda environment builds. Available keys: store_directory, namespace, name. The default will put all environments in the same namespace within the same directory.", config=True, ) conda_command = Unicode( "mamba", help="conda executable to use for solves", config=True, ) conda_channel_alias = Unicode( "https://conda.anaconda.org", help="The prepended url location to associate with channel names", config=True, ) conda_platforms = List( [conda.conda_platform(), "noarch"], help= "Conda platforms to download package repodata.json from. By default includes current architecture and noarch", config=True, ) conda_default_channels = List( ["conda-forge"], help= "Conda channels that by default are included if channels are empty", config=True, ) conda_allowed_channels = List( [ "main", "conda-forge", ], help="Allowed conda channels to be used in conda environments", config=True, ) conda_default_packages = List( [], help="Conda packages that included by default if none are included", config=True, ) conda_required_packages = List( [], help= "Conda packages that are required to be within environment specification. Will raise a validation error is package not in specification", config=True, ) conda_included_packages = List( [], help= "Conda packages that auto included within environment specification. Will not raise a validation error if package not in specification and will be auto added", config=True, ) pypi_default_packages = List( [], help="PyPi packages that included by default if none are included", config=True, ) pypi_required_packages = List( [], help= "PyPi packages that are required to be within environment specification. Will raise a validation error is package not in specification", config=True, ) pypi_included_packages = List( [], help= "PyPi packages that auto included within environment specification. Will not raise a validation error if package not in specification and will be auto added", config=True, ) conda_max_solve_time = Integer( 5 * 60, # 5 minute help= "Maximum time in seconds to allow for solving a given conda environment", config=True, ) database_url = Unicode( "sqlite:///conda-store.sqlite", help= "url for the database. e.g. 'sqlite:///conda-store.sqlite' tables will be automatically created if they do not exist", config=True, ) redis_url = Unicode( help= "Redis connection url in form 'redis://:<password>@<hostname>:<port>/0'. Connection is used by Celery along with Conda-Store internally", config=True, ) @default("redis_url") def _default_redis(self): raise TraitError( "c.CondaStore.redis_url Redis connection url is required") @validate("redis_url") def _check_redis(self, proposal): try: self.redis.ping() except Exception: raise TraitError( f'c.CondaStore.redis_url unable to connect with Redis database at "{self.redis_url}"' ) return proposal.value celery_broker_url = Unicode( help="broker url to use for celery tasks", config=True, ) build_artifacts = List( [ schema.BuildArtifactType.LOCKFILE, schema.BuildArtifactType.YAML, schema.BuildArtifactType.CONDA_PACK, schema.BuildArtifactType.DOCKER_MANIFEST, ], help= "artifacts to build in conda-store. By default all of the artifacts", config=True, ) build_artifacts_kept_on_deletion = List( [ schema.BuildArtifactType.LOGS, schema.BuildArtifactType.LOCKFILE, schema.BuildArtifactType.YAML, ], help="artifacts to keep on build deletion", config=True, ) serialize_builds = Bool( True, help= "No longer build conda environment in parallel. This is due to an issue in conda/mamba that when downloading files in two concurent builds the downloads/extraction can overlap. This is a bug in conda/mamba that needs to be fixed.", config=True, ) @default("celery_broker_url") def _default_celery_broker_url(self): return self.redis_url celery_results_backend = Unicode( help="backend to use for celery task results", config=True, ) @default("celery_results_backend") def _default_celery_results_backend(self): return self.redis_url default_namespace = Unicode("default", help="default namespace for conda-store", config=True) filesystem_namespace = Unicode( "filesystem", help= "namespace to use for environments picked up via `CondaStoreWorker.watch_paths` on the filesystem", config=True, ) default_uid = Integer( os.getuid(), help="default uid to assign to built environments", config=True, ) default_gid = Integer( os.getgid(), help="default gid to assign to built environments", config=True, ) default_permissions = Unicode( "775", help="default file permissions to assign to built environments", config=True, ) default_docker_base_image = Unicode( "frolvlad/alpine-glibc:latest", help="default base image used for the Dockerized environments", config=True, ) validate_specification = Callable( conda_store_validate_specification, help= "callable function taking conda_store and specification as input arguments to apply for validating and modifying a given specification. If there are validation issues with the environment ValueError with message should be raised. If changed you may need to call the default function to preseve many of the trait effects e.g. `c.CondaStore.default_channels` etc", config=True, ) @property def session_factory(self): if hasattr(self, "_session_factory"): return self._session_factory # https://docs.sqlalchemy.org/en/14/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork # This is the most simplistic, one shot system that prevents # the Engine from using any connection more than once self._session_factory = orm.new_session_factory(url=self.database_url, poolclass=NullPool) return self._session_factory @property def db(self): # we are using a scoped_session which always returns the same # session if within the same thread # https://docs.sqlalchemy.org/en/14/orm/contextual.html return self.session_factory() @property def redis(self): if hasattr(self, "_redis"): return self._redis self._redis = redis.Redis.from_url(self.redis_url) return self._redis @property def configuration(self): return orm.CondaStoreConfiguration.configuration(self.db) @property def storage(self): if hasattr(self, "_storage"): return self._storage self._storage = self.storage_class(parent=self, log=self.log) return self._storage @property def celery_app(self): if hasattr(self, "_celery_app"): return self._celery_app self._celery_app = Celery( "tasks", backend=self.celery_results_backend, broker=self.celery_broker_url, include=[ "conda_store_server.worker.tasks", ], ) self._celery_app.conf.beat_schedule = { "watch-paths": { "task": "task_watch_paths", "schedule": 60.0, # 1 minute "args": [], "kwargs": {}, }, "update-conda-channels": { "task": "task_update_conda_channels", "schedule": 15.0 * 60.0, # 15 minutes "args": [], "kwargs": {}, }, } if self.celery_results_backend.startswith("sqla"): # https://github.com/celery/celery/issues/4653#issuecomment-400029147 # race condition in table construction in celery # despite issue being closed still causes first task to fail # in celery if tables not created from celery.backends.database import SessionManager session = SessionManager() engine = session.get_engine(self._celery_app.backend.url) session.prepare_models(engine) return self._celery_app def ensure_namespace(self): """Ensure that conda-store default namespaces exists""" namespace = api.get_namespace(self.db, name=self.default_namespace) if namespace is None: api.create_namespace(self.db, name=self.default_namespace) def ensure_directories(self): """Ensure that conda-store filesystem directories exist""" os.makedirs(self.store_directory, exist_ok=True) def ensure_conda_channels(self): """Ensure that conda-store allowed channels and packages are in database""" self.log.info("updating conda store channels") for channel in self.conda_allowed_channels: normalized_channel = conda.normalize_channel_name( self.conda_channel_alias, channel) conda_channel = api.get_conda_channel(self.db, normalized_channel) if conda_channel is None: conda_channel = orm.CondaChannel(name=normalized_channel, last_update=None) self.db.add(conda_channel) self.db.commit() def register_solve(self, specification: schema.CondaSpecification): specification_model = self.validate_specification( conda_store=self, namespace="solve", specification=specification, ) specification_sha256 = utils.datastructure_hash( specification_model.dict()) specification = api.get_specification(self.db, sha256=specification_sha256) if specification is None: self.log.info( f"specification name={specification_model.name} sha256={specification_sha256} registered" ) specification = orm.Specification(specification_model.dict()) self.db.add(specification) self.db.commit() else: self.log.debug( f"specification name={specification_model.name} sha256={specification_sha256} already registered" ) solve_model = orm.Solve(specification_id=specification.id) self.db.add(solve_model) self.db.commit() # must import tasks after a celery app has been initialized from conda_store_server.worker import tasks task = tasks.task_solve_conda_environment.apply_async( args=[solve_model.id], time_limit=self.conda_max_solve_time) return task, solve_model.id def register_environment(self, specification: dict, namespace: str = None, force_build=False): """Register a given specification to conda store with given namespace/name. If force_build is True a build will be triggered even if specification already exists. """ namespace = namespace or self.default_namespace # Create Namespace if namespace if it does not exist namespace_model = api.get_namespace(self.db, name=namespace) if namespace_model is None: namespace = api.create_namespace(self.db, name=namespace) self.db.commit() else: namespace = namespace_model specification_model = self.validate_specification( conda_store=self, namespace=namespace.name, specification=schema.CondaSpecification.parse_obj(specification), ) specification_sha256 = utils.datastructure_hash( specification_model.dict()) specification = api.get_specification(self.db, sha256=specification_sha256) if specification is None: self.log.info( f"specification name={specification_model.name} sha256={specification_sha256} registered" ) specification = orm.Specification(specification_model.dict()) self.db.add(specification) self.db.commit() else: self.log.debug( f"specification name={specification_model.name} sha256={specification_sha256} already registered" ) if not force_build: return # Create Environment if specification of given namespace/name # does not exist yet environment = api.get_environment(self.db, namespace_id=namespace.id, name=specification.name) environment_was_empty = environment is None if environment_was_empty: environment = orm.Environment( name=specification.name, namespace_id=namespace.id, ) self.db.add(environment) self.db.commit() build = self.create_build(environment.id, specification.sha256) if environment_was_empty: environment.current_build = build self.db.commit() return build.id def create_build(self, environment_id: int, specification_sha256: str): specification = api.get_specification(self.db, specification_sha256) build = orm.Build(environment_id=environment_id, specification_id=specification.id) self.db.add(build) self.db.commit() self.celery_app # must import tasks after a celery app has been initialized from conda_store_server.worker import tasks artifact_tasks = [] if schema.BuildArtifactType.YAML in self.build_artifacts: artifact_tasks.append( tasks.task_build_conda_env_export.si(build.id)) if schema.BuildArtifactType.CONDA_PACK in self.build_artifacts: artifact_tasks.append(tasks.task_build_conda_pack.si(build.id)) if schema.BuildArtifactType.DOCKER_MANIFEST in self.build_artifacts: artifact_tasks.append(tasks.task_build_conda_docker.si(build.id)) (tasks.task_update_storage_metrics.si() | tasks.task_build_conda_environment.si(build.id) | group(*artifact_tasks) | tasks.task_update_storage_metrics.si()).apply_async() return build def update_environment_build(self, namespace, name, build_id): build = api.get_build(self.db, build_id) if build is None: raise utils.CondaStoreError(f"build id={build_id} does not exist") environment = api.get_environment(self.db, namespace=namespace, name=name) if environment is None: raise utils.CondaStoreError( f"environment namespace={namespace} name={name} does not exist" ) if build.status != schema.BuildStatus.COMPLETED: raise utils.CondaStoreError( "cannot update environment to build id since not completed") if build.specification.name != name: raise utils.CondaStoreError( "cannot update environment to build id since specification does not match environment name" ) environment.current_build_id = build.id self.db.commit() self.celery_app # must import tasks after a celery app has been initialized from conda_store_server.worker import tasks tasks.task_update_environment_build.si(environment.id).apply_async() def delete_namespace(self, namespace): namespace = api.get_namespace(self.db, name=namespace) if namespace is None: raise utils.CondaStoreError( f"namespace={namespace} does not exist") utcnow = datetime.datetime.utcnow() namespace.deleted_on = utcnow for environment_orm in namespace.environments: environment_orm.deleted_on = utcnow for build in environment_orm.builds: build.deleted_on = utcnow self.db.commit() self.celery_app # must import tasks after a celery app has been initialized from conda_store_server.worker import tasks tasks.task_delete_namespace.si(namespace.id).apply_async() def delete_environment(self, namespace, name): environment = api.get_environment(self.db, namespace=namespace, name=name) if environment is None: raise utils.CondaStoreError( f"environment namespace={namespace} name={name} does not exist" ) utcnow = datetime.datetime.utcnow() environment.deleted_on = utcnow for build in environment.builds: build.deleted_on = utcnow self.db.commit() self.celery_app # must import tasks after a celery app has been initialized from conda_store_server.worker import tasks tasks.task_delete_environment.si(environment.id).apply_async() def delete_build(self, build_id): build = api.get_build(self.db, build_id) if build.status not in [ schema.BuildStatus.FAILED, schema.BuildStatus.COMPLETED, ]: raise utils.CondaStoreError( "cannot delete build since not finished building") build.deleted_on = datetime.datetime.utcnow() self.db.commit() self.celery_app # must import tasks after a celery app has been initialized from conda_store_server.worker import tasks tasks.task_delete_build.si(build.id).apply_async()