Exemple #1
0
class ResourceUseDisplay(Configurable):
    """
    Holds server-side configuration for nbresuse
    """

    mem_warning_threshold = Float(default_value=0.1,
                                  help="""
        Warn user with flashing lights when memory usage is within this fraction
        memory limit.

        For example, if memory limit is 128MB, `mem_warning_threshold` is 0.1,
        we will start warning the user when they use (128 - (128 * 0.1)) MB.

        Set to 0 to disable warning.
        """).tag(config=True)

    mem_limit = Union(trait_types=[Int(), Callable()],
                      help="""
        Memory limit to display to the user, in bytes.
        Can also be a function which calculates the memory limit.

        Note that this does not actually limit the user's memory usage!

        Defaults to reading from the `MEM_LIMIT` environment variable. If
        set to 0, the max memory available is displayed.
        """).tag(config=True)

    @default('mem_limit')
    def _mem_limit_default(self):
        return int(os.environ.get('MEM_LIMIT', 0))

    track_cpu_percent = Bool(default_value=False,
                             help="""
        Set to True in order to enable reporting of CPU usage statistics.
        """).tag(config=True)

    cpu_warning_threshold = Float(default_value=0.1,
                                  help="""
        Warn user with flashing lights when CPU usage is within this fraction
        CPU usage limit.

        For example, if CPU limit is 150%, `cpu_warning_threshold` is 0.1,
        we will start warning the user when they use (150 - (150 * 0.1)) %.

        Set to 0 to disable warning.
        """).tag(config=True)

    cpu_limit = Float(default_value=0,
                      help="""
        CPU usage limit to display to the user.

        Note that this does not actually limit the user's CPU usage!

        Defaults to reading from the `CPU_LIMIT` environment variable. If
        set to 0, the total CPU count available is displayed.
        """).tag(config=True)

    @default('cpu_limit')
    def _cpu_limit_default(self):
        return float(os.environ.get('CPU_LIMIT', 0))
class ResourceUseDisplay(Configurable):
    """
    Holds server-side configuration for nbresuse
    """

    trash_dir = Union(
        trait_types=[Unicode(), Callable()],
        default_value=os.getcwd(),
        help="""
        The directory that the notebook copies Trash to on deletion

        Defaults to reading from the `TRASH_DIR` environment variable. If
        not defined, it effectively defaults to $HOME/.local/share/Trash/
        """,
    ).tag(config=True)

    @default("trash_dir")
    def _trash_dir_default(self):
        return str(
            os.environ.get(
                "TRASH_DIR",
                os.path.join(
                    os.environ.get("HOME", os.getcwd()), ".local/share/Trash/"
                ),
            )
        )
class Artist(HasTraits):

    aname = Unicode('Artist')
    zorder = Int(default_value = 0)
    #_prop_order = dict(color=-1)
    _prop_order = Dict() #asked thomas question about this asstribute
    # pchanged = Bool(default_value = False)
    _stale = Bool(default_value = True)
    stale_callback = Callable(allow_none = True, default_value = True)
    _axes = Instance('matplotlib.axes.Axes', allow_none = True, default_value = None)
    figure = Instance('matplotlib.figure.Figure', allow_none = True, default_value = None)
    _transform = Instance('matplotlib.transform.Transform', allow_none = True, default_value = None)
    _transformSet = Bool(default_value = False )
    _visible = Bool(default_value = True)
    _animated = Bool(default_value = False)
    _alpha = Float(default_value = None ,allow_none = True)
    clipbox = Instance('matplotlib.transforms.Bbox', allow_none = True, default_value = None)
    """
    Notes from Documentation:
    Union([Float(), Bool(), Int()]) attempts to
    validate the provided values with the validation function of Float, then Bool, and finally Int.
    """
    _clippath = Perishable(Union([Instance('matplotlib.path.Path'), Instance('matplotlib.transforms.Transform'), Instance('matplotlib.patches.Patch')], allow_none = True, default_value = None))
    _clipon = Boolean(default_value = True)
    _label = Unicode(allow_none = True, default_value = '')
    _picker = Union(Float, Boolean, Callable, allow_none = True, default_value = None)
    _contains = List(default_value=None)
    _rasterized = Perishable(Boolean(allow_none = True, default_value = None))
    _agg_filter = Unicode(allow_none = True, default_value = None) #set agg_filter function
    _mouseover = Boolean(default_value = False)
    eventson = Boolean(default_value = False)
    _oid = Int(allow_none = True, default_value = 0)
    _propobservers = Dict(default_value = {}) #this may or may not work o/w leave alone and see what happens
    # _remove_method =  #have to look into this
    _url = Unicode(allow_none = True,default_value = None)
    _gid = Unicode(allow_none = True, default_value = None)
    _snap = Perishable(Boolean(allow_none = True, default_value = None))
    _sketch = Tuple(Float(), Float(), Float(), default_value = rcParams['path.sketch'])
    _path_effects = List(Instance('matplotlib.patheffect._Base'), default_value = rcParams['path.effects'])
    #_XYPair = namedtuple("_XYPair", "x y")
    #sticky_edges is a tuple with lists of floats
    #the first element of this tuple represents x
    #and the second element of sticky_edges represents y
    _sticky_edges = Tuple(List(trait=Float()), List(trait=Float()))
class JSONWebTokenAuthenticator(Authenticator):
    """
    Accept the authenticated JSON Web Token from header or query parameter.
    """
    redirect_unauthorized = Unicode(
        default_value='',
        config=True,
        help="""Login url to redirect if can't login.""")

    signing_certificate = Unicode(config=True,
                                  help="""
        The public certificate of the private key used to sign the incoming JSON Web Tokens.

        Should be a path to an X509 PEM format certificate filesystem.
        """)

    rsa_public_key = Unicode(config=True,
                             help="""
        String with rsa public key encoded with base64.
        """)

    cookie_name = Unicode(
        config=True,
        default_value=DEFAULT_COOKIE_NAME,
        help="""The name of the cookie where is stored the JWT token""")

    username_claim_field = Unicode(default_value='upn',
                                   config=True,
                                   help="""
        The field in the claims that contains the user name. It can be either a straight username,
        of an email/userPrincipalName.
        """)

    expected_audience = Unicode(
        default_value='',
        config=True,
        help="""HTTP header to inspect for the authenticated JSON Web Token."""
    )

    secret = Unicode(
        config=True,
        help=
        """Shared secret key for signing JWT token.  If defined, it overrides any setting for signing_certificate"""
    )

    home_dir = Unicode(config=True, help="""Home directory.""")

    token_file = Unicode(default_value='.jwt_sso.json',
                         config=True,
                         help="""User token file name.""")

    validate_token_hook = Callable(
        default_value=None,
        allow_none=True,
        config=True,
        help=
        """Function that will be called when the validation of the token are required."""
    )

    redirect_to_sso_hook = Callable(
        default_value=None,
        allow_none=True,
        config=True,
        help=
        """Function that will be called when the jwt is invalid. This redirect to sso login url. """
    )

    def get_handlers(_self, _app):
        return [
            (r'/login', JSONWebTokenLoginHandler),
        ]

    async def authenticate(_self, _handler, _data):
        raise NotImplementedError()

    async def pre_spawn_start(self, user, spawner):
        if not self.home_dir:
            raise ExceptionMissingConfigurationParameter(
                "Missing home directory.")
        path = os.path.join(self.home_dir, user.name, self.token_file)
        try:
            with open(path, "r") as f:
                jwt = json.load(f)
        except Exception as ex:
            self.log.error("Can't load token from file!", ex)
            spawner.environment['JWT'] = ''
            return
        spawner.environment['JWT'] = jwt['jwt']

    async def refresh_user(self, user, handler, force=False):
        self.log.info(
            f"refresh user {user.name}, force={force}, home dir: {self.home_dir}"
        )
        if force:
            return False
        sso_path = ""
        if self.home_dir:
            sso_path = os.path.join(self.home_dir, user.name, self.token_file)
            valid = self._validate_auth_token(user, sso_path)
            if valid:
                return True
        self.log.info(
            f"quicking off user {user.name} (force={force}, home_dir={self.home_dir})"
        )
        self._quick_off_user(handler, sso_path)
        return False

    def _validate_auth_token(self, user, sso_path):
        if not os.path.exists(sso_path):
            return True
        try:
            with open(sso_path, "r") as f:
                jwt = json.load(f)
            token = jwt['jwt']
            if token and self.validate_token_hook:
                if self.validate_token_hook(token):
                    self.log.info(f"user {user.name} have a valid token")
                    user.spawner.environment['JWT'] = token
                    return True
        except Exception as ex:
            self.log.error(
                f"Can't load token from file for user {user.name}: {ex}")
        self.log.info(f"user {user.name} have a invalid token")
        return False

    def _quick_off_user(self, handler, sso_path):
        user.spawner.environment['JWT'] = ''
        if sso_path and os.path.exists(sso_path):
            os.remove(sso_path)
        handler.clear_cookie(self.cookie_name)
        handler.clear_cookie("jupyterhub-hub-login")
        handler.clear_cookie("jupyterhub-session-id")
Exemple #5
0
class ServerProxy(Configurable):
    servers = Dict({},
                   help="""
        Dictionary of processes to supervise & proxy.

        Key should be the name of the process. This is also used by default as
        the URL prefix, and all requests matching this prefix are routed to this process.

        Value should be a dictionary with the following keys:
          command
            A list of strings that should be the full command to be executed.
            The optional template arguments {{port}} and {{base_url}} will be substituted with the
            port the process should listen on and the base-url of the notebook.

            Could also be a callable. It should return a list.

          environment
            A dictionary of environment variable mappings. {{port}} and {{base_url}} will be
            substituted as for command.

            Could also be a callable. It should return a dictionary.

          timeout
            Timeout in seconds for the process to become ready, default 5s.

          absolute_url
            Proxy requests default to being rewritten to '/'. If this is True,
            the absolute URL will be sent to the backend instead.

          port
            Set the port that the service will listen on. The default is to automatically select an unused port.

          mappath
            Map request paths to proxied paths.
            Either a dictionary of request paths to proxied paths,
            or a callable that takes parameter ``path`` and returns the proxied path.

          launcher_entry
            A dictionary of various options for entries in classic notebook / jupyterlab launchers.

            Keys recognized are:

            enabled
              Set to True (default) to make an entry in the launchers. Set to False to have no
              explicit entry.

            icon_path
              Full path to an svg icon that could be used with a launcher. Currently only used by the
              JupyterLab launcher

            title
              Title to be used for the launcher entry. Defaults to the name of the server if missing.

          new_browser_tab
            Set to True (default) to make the proxied server interface opened as a new browser tab. Set to False
            to have it open a new JupyterLab tab. This has no effect in classic notebook.
        """,
                   config=True)

    host_allowlist = Union(trait_types=[List(), Callable()],
                           help="""
        List of allowed hosts.
        Can also be a function that decides whether a host can be proxied.

        If implemented as a function, this should return True if a host should
        be proxied and False if it should not.  Such a function could verify
        that the host matches a particular regular expression pattern or falls
        into a specific subnet.  It should probably not be a slow check against
        some external service.  Here is an example that could be placed in a
        site-wide Jupyter notebook config:

            def host_allowlist(handler, host):
                handler.log.info("Request to proxy to host " + host)
                return host.startswith("10.")
            c.ServerProxy.host_allowlist = host_allowlist

        Defaults to a list of ["localhost", "127.0.0.1"].
        """,
                           config=True)

    @default("host_allowlist")
    def _host_allowlist_default(self):
        return ["localhost", "127.0.0.1"]

    host_whitelist = Union(trait_types=[List(), Callable()],
                           help="Deprecated, use host_allowlist",
                           config=True)

    @observe("host_whitelist")
    def _host_whitelist_deprecated(self, change):
        old_attr = change.name
        if self.host_allowlist != change.new:
            # only warn if different
            # protects backward-compatible config from warnings
            # if they set the same value under both names
            # Configurable doesn't have a log
            # https://github.com/ipython/traitlets/blob/5.0.5/traitlets/config/configurable.py#L181
            warn(
                "{cls}.{old} is deprecated in jupyter-server-proxy {version}, use {cls}.{new} instead"
                .format(
                    cls=self.__class__.__name__,
                    old=old_attr,
                    new="host_allowlist",
                    version="3.0.0",
                ))
            self.host_allowlist = change.new
Exemple #6
0
class ServerProxy(Configurable):
    servers = Dict(
        {},
        help="""
        Dictionary of processes to supervise & proxy.

        Key should be the name of the process. This is also used by default as
        the URL prefix, and all requests matching this prefix are routed to this process.

        Value should be a dictionary with the following keys:
          command
            A list of strings that should be the full command to be executed.
            The optional template arguments {{port}} and {{base_url}} will be substituted with the
            port the process should listen on and the base-url of the notebook.

            Could also be a callable. It should return a dictionary.

          environment
            A dictionary of environment variable mappings. {{port}} and {{base_url}} will be
            substituted as for command.

            Could also be a callable. It should return a dictionary.

          timeout
            Timeout in seconds for the process to become ready, default 5s.

          absolute_url
            Proxy requests default to being rewritten to '/'. If this is True,
            the absolute URL will be sent to the backend instead.

          port
            Set the port that the service will listen on. The default is to automatically select an unused port.

          mappath
            Map request paths to proxied paths.
            Either a dictionary of request paths to proxied paths,
            or a callable that takes parameter ``path`` and returns the proxied path.

          launcher_entry
            A dictionary of various options for entries in classic notebook / jupyterlab launchers.

            Keys recognized are:

            enabled
              Set to True (default) to make an entry in the launchers. Set to False to have no
              explicit entry.

            icon_path
              Full path to an svg icon that could be used with a launcher. Currently only used by the
              JupyterLab launcher

            title
              Title to be used for the launcher entry. Defaults to the name of the server if missing.
        """,
        config=True
    )

    host_whitelist = Union(
        trait_types=[List(), Callable()],
        help="""
        List of allowed hosts.
        Can also be a function that decides whether a host can be proxied.

        If implemented as a function, this should return True if a host should
        be proxied and False if it should not.  Such a function could verify
        that the host matches a particular regular expression pattern or falls
        into a specific subnet.  It should probably not be a slow check against
        some external service.  Here is an example that could be placed in a 
        site-wide Jupyter notebook config:

            def host_whitelist(handler, host):
                handler.log.info("Request to proxy to host " + host)
                return host.startswith("10.")
            c.ServerProxy.host_whitelist = host_whitelist

        Defaults to a list of ["localhost", "127.0.0.1"].
        """,
        config=True
    )

    @default("host_whitelist")
    def _host_whitelist_default(self):
        return ["localhost", "127.0.0.1"]
class ServerProxy(Configurable):
    servers = Dict({},
                   help="""
        Dictionary of processes to supervise & proxy.

        Key should be the name of the process. This is also used by default as
        the URL prefix, and all requests matching this prefix are routed to this process.

        Value should be a dictionary with the following keys:
          command
            A list of strings that should be the full command to be executed.
            The optional template arguments {{port}} and {{base_url}} will be substituted with the
            port the process should listen on and the base-url of the notebook.

            Could also be a callable. It should return a list.

          environment
            A dictionary of environment variable mappings. As with the command
            traitlet, {{port}} and {{base_url}} will be substituted.

            Could also be a callable. It should return a dictionary.

          timeout
            Timeout in seconds for the process to become ready, default 5s.

          absolute_url
            Proxy requests default to being rewritten to '/'. If this is True,
            the absolute URL will be sent to the backend instead.

          port
            Set the port that the service will listen on. The default is to automatically select an unused port.

          mappath
            Map request paths to proxied paths.
            Either a dictionary of request paths to proxied paths,
            or a callable that takes parameter ``path`` and returns the proxied path.

          launcher_entry
            A dictionary of various options for entries in classic notebook / jupyterlab launchers.

            Keys recognized are:

            enabled
              Set to True (default) to make an entry in the launchers. Set to False to have no
              explicit entry.

            icon_path
              Full path to an svg icon that could be used with a launcher. Currently only used by the
              JupyterLab launcher

            title
              Title to be used for the launcher entry. Defaults to the name of the server if missing.

          new_browser_tab
            Set to True (default) to make the proxied server interface opened as a new browser tab. Set to False
            to have it open a new JupyterLab tab. This has no effect in classic notebook.

          request_headers_override
            A dictionary of additional HTTP headers for the proxy request. As with
            the command traitlet, {{port}} and {{base_url}} will be substituted.

          path_info
            The trailing path that is appended to the user's server URL to access the proxied server.
            By default it is the name of the server followed by a trailing slash.

          rewrite_response
            An optional function to rewrite the response for the given service.
            Input is a RewritableResponse object which is an argument that MUST be named
            ``response``. The function should modify one or more of the attributes
            ``.body``, ``.headers``, ``.code``, or ``.reason`` of the ``response``
            argument. For example:

                def cat_to_dog(response):
                    response.headers["I-Like"] = "tacos"
                    response.body = response.body.replace(b'cat', b'dog')

                c.ServerProxy.servers['my_server']['rewrite_response'] = cat_to_dog

            The ``rewrite_response`` function can also accept several optional
            positional arguments. Arguments named ``host``, ``port``, and ``path`` will
            receive values corresponding to the URL ``/proxy/<host>:<port><path>``. In
            addition, the original Tornado ``HTTPRequest`` and ``HTTPResponse`` objects
            are available as arguments named ``request`` and ``orig_response``. (These
            objects should not be modified.)

            A list or tuple of functions can also be specified for chaining multiple
            rewrites.

            Defaults to the empty tuple ``tuple()``.
        """,
                   config=True)

    non_service_rewrite_response = Union(
        default_value=tuple(),
        trait_types=[List(), Tuple(), Callable()],
        help="""
        A function (or list or tuple of functions) to rewrite the response for a
        non-service request, for example a request to ``/proxy/<host>:<port><path>``.

        See the description for ``rewrite_response`` for more information.
        Defaults to the empty tuple ``tuple()``.
        """,
        config=True)

    host_allowlist = Union(trait_types=[List(), Callable()],
                           help="""
        List of allowed hosts.
        Can also be a function that decides whether a host can be proxied.

        If implemented as a function, this should return True if a host should
        be proxied and False if it should not.  Such a function could verify
        that the host matches a particular regular expression pattern or falls
        into a specific subnet.  It should probably not be a slow check against
        some external service.  Here is an example that could be placed in a
        site-wide Jupyter notebook config:

            def host_allowlist(handler, host):
                handler.log.info("Request to proxy to host " + host)
                return host.startswith("10.")
            c.ServerProxy.host_allowlist = host_allowlist

        Defaults to a list of ["localhost", "127.0.0.1"].
        """,
                           config=True)

    @default("host_allowlist")
    def _host_allowlist_default(self):
        return ["localhost", "127.0.0.1"]

    host_whitelist = Union(trait_types=[List(), Callable()],
                           help="Deprecated, use host_allowlist",
                           config=True)

    @observe("host_whitelist")
    def _host_whitelist_deprecated(self, change):
        old_attr = change.name
        if self.host_allowlist != change.new:
            # only warn if different
            # protects backward-compatible config from warnings
            # if they set the same value under both names
            # Configurable doesn't have a log
            # https://github.com/ipython/traitlets/blob/5.0.5/traitlets/config/configurable.py#L181
            warn(
                "{cls}.{old} is deprecated in jupyter-server-proxy {version}, use {cls}.{new} instead"
                .format(
                    cls=self.__class__.__name__,
                    old=old_attr,
                    new="host_allowlist",
                    version="3.0.0",
                ))
            self.host_allowlist = change.new
Exemple #8
0
class ResourceUseDisplay(Configurable):
    """
    Holds server-side configuration for jupyter-resource-usage
    """

    process_memory_metrics = List(
        trait=PSUtilMetric(),
        default_value=[{"name": "memory_info", "attribute": "rss"}],
    )

    system_memory_metrics = List(
        trait=PSUtilMetric(),
        default_value=[{"name": "virtual_memory", "attribute": "total"}],
    )

    process_cpu_metrics = List(
        trait=PSUtilMetric(),
        default_value=[{"name": "cpu_percent", "kwargs": {"interval": 0.05}}],
    )

    system_cpu_metrics = List(
        trait=PSUtilMetric(), default_value=[{"name": "cpu_count"}]
    )

    mem_warning_threshold = Float(
        default_value=0.1,
        help="""
        Warn user with flashing lights when memory usage is within this fraction
        memory limit.

        For example, if memory limit is 128MB, `mem_warning_threshold` is 0.1,
        we will start warning the user when they use (128 - (128 * 0.1)) MB.

        Set to 0 to disable warning.
        """,
    ).tag(config=True)

    mem_limit = Union(
        trait_types=[Int(), Callable()],
        help="""
        Memory limit to display to the user, in bytes.
        Can also be a function which calculates the memory limit.

        Note that this does not actually limit the user's memory usage!

        Defaults to reading from the `MEM_LIMIT` environment variable. If
        set to 0, the max memory available is displayed.
        """,
    ).tag(config=True)

    @default("mem_limit")
    def _mem_limit_default(self):
        return int(os.environ.get("MEM_LIMIT", 0))

    track_cpu_percent = Bool(
        default_value=False,
        help="""
        Set to True in order to enable reporting of CPU usage statistics.
        """,
    ).tag(config=True)

    cpu_warning_threshold = Float(
        default_value=0.1,
        help="""
        Warn user with flashing lights when CPU usage is within this fraction
        CPU usage limit.

        For example, if CPU limit is 150%, `cpu_warning_threshold` is 0.1,
        we will start warning the user when they use (150 - (150 * 0.1)) %.

        Set to 0 to disable warning.
        """,
    ).tag(config=True)

    cpu_limit = Union(
        trait_types=[Float(), Callable()],
        default_value=0,
        help="""
        CPU usage limit to display to the user.

        Note that this does not actually limit the user's CPU usage!

        Defaults to reading from the `CPU_LIMIT` environment variable. If
        set to 0, the total CPU count available is displayed.
        """,
    ).tag(config=True)

    @default("cpu_limit")
    def _cpu_limit_default(self):
        return float(os.environ.get("CPU_LIMIT", 0))
Exemple #9
0
class NotebookClient(LoggingConfigurable):
    """
    Encompasses a Client for executing cells in a notebook
    """

    timeout: int = Integer(
        None,
        allow_none=True,
        help=dedent("""
            The time to wait (in seconds) for output from executions.
            If a cell execution takes longer, a TimeoutError is raised.

            ``None`` or ``-1`` will disable the timeout. If ``timeout_func`` is set,
            it overrides ``timeout``.
            """),
    ).tag(config=True)

    timeout_func: t.Any = Any(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which, when given the cell source as input,
            returns the time to wait (in seconds) for output from cell
            executions. If a cell execution takes longer, a TimeoutError
            is raised.

            Returning ``None`` or ``-1`` will disable the timeout for the cell.
            Not setting ``timeout_func`` will cause the client to
            default to using the ``timeout`` trait for all cells. The
            ``timeout_func`` trait overrides ``timeout`` if it is not ``None``.
            """),
    ).tag(config=True)

    interrupt_on_timeout: bool = Bool(
        False,
        help=dedent("""
            If execution of a cell times out, interrupt the kernel and
            continue executing other cells rather than throwing an error and
            stopping.
            """),
    ).tag(config=True)

    startup_timeout: int = Integer(
        60,
        help=dedent("""
            The time to wait (in seconds) for the kernel to start.
            If kernel startup takes longer, a RuntimeError is
            raised.
            """),
    ).tag(config=True)

    allow_errors: bool = Bool(
        False,
        help=dedent("""
            If ``False`` (default), when a cell raises an error the
            execution is stopped and a `CellExecutionError`
            is raised, except if the error name is in
            ``allow_error_names``.
            If ``True``, execution errors are ignored and the execution
            is continued until the end of the notebook. Output from
            exceptions is included in the cell output in both cases.
            """),
    ).tag(config=True)

    allow_error_names: t.List[str] = List(
        Unicode(),
        help=dedent("""
            List of error names which won't stop the execution. Use this if the
            ``allow_errors`` option it too general and you want to allow only
            specific kinds of errors.
            """),
    ).tag(config=True)

    force_raise_errors: bool = Bool(
        False,
        help=dedent("""
            If False (default), errors from executing the notebook can be
            allowed with a ``raises-exception`` tag on a single cell, or the
            ``allow_errors`` or ``allow_error_names`` configurable options for
            all cells. An allowed error will be recorded in notebook output, and
            execution will continue. If an error occurs when it is not
            explicitly allowed, a `CellExecutionError` will be raised.
            If True, `CellExecutionError` will be raised for any error that occurs
            while executing the notebook. This overrides the ``allow_errors``
            and ``allow_error_names`` options and the ``raises-exception`` cell
            tag.
            """),
    ).tag(config=True)

    skip_cells_with_tag: str = Unicode(
        'skip-execution',
        help=dedent("""
            Name of the cell tag to use to denote a cell that should be skipped.
            """),
    ).tag(config=True)

    extra_arguments: t.List = List(Unicode()).tag(config=True)

    kernel_name: str = Unicode(
        '',
        help=dedent("""
            Name of kernel to use to execute the cells.
            If not set, use the kernel_spec embedded in the notebook.
            """),
    ).tag(config=True)

    raise_on_iopub_timeout: bool = Bool(
        False,
        help=dedent("""
            If ``False`` (default), then the kernel will continue waiting for
            iopub messages until it receives a kernel idle message, or until a
            timeout occurs, at which point the currently executing cell will be
            skipped. If ``True``, then an error will be raised after the first
            timeout. This option generally does not need to be used, but may be
            useful in contexts where there is the possibility of executing
            notebooks with memory-consuming infinite loops.
            """),
    ).tag(config=True)

    store_widget_state: bool = Bool(
        True,
        help=dedent("""
            If ``True`` (default), then the state of the Jupyter widgets created
            at the kernel will be stored in the metadata of the notebook.
            """),
    ).tag(config=True)

    record_timing: bool = Bool(
        True,
        help=dedent("""
            If ``True`` (default), then the execution timings of each cell will
            be stored in the metadata of the notebook.
            """),
    ).tag(config=True)

    iopub_timeout: int = Integer(
        4,
        allow_none=False,
        help=dedent("""
            The time to wait (in seconds) for IOPub output. This generally
            doesn't need to be set, but on some slow networks (such as CI
            systems) the default timeout might not be long enough to get all
            messages.
            """),
    ).tag(config=True)

    shell_timeout_interval: int = Integer(
        5,
        allow_none=False,
        help=dedent("""
            The time to wait (in seconds) for Shell output before retrying.
            This generally doesn't need to be set, but if one needs to check
            for dead kernels at a faster rate this can help.
            """),
    ).tag(config=True)

    shutdown_kernel = Enum(
        ['graceful', 'immediate'],
        default_value='graceful',
        help=dedent("""
            If ``graceful`` (default), then the kernel is given time to clean
            up after executing all cells, e.g., to execute its ``atexit`` hooks.
            If ``immediate``, then the kernel is signaled to immediately
            terminate.
            """),
    ).tag(config=True)

    ipython_hist_file: str = Unicode(
        default_value=':memory:',
        help=
        """Path to file to use for SQLite history database for an IPython kernel.

        The specific value ``:memory:`` (including the colon
        at both end but not the back ticks), avoids creating a history file. Otherwise, IPython
        will create a history file for each kernel.

        When running kernels simultaneously (e.g. via multiprocessing) saving history a single
        SQLite file can result in database errors, so using ``:memory:`` is recommended in
        non-interactive contexts.
        """,
    ).tag(config=True)

    kernel_manager_class: KernelManager = Type(
        config=True, help='The kernel manager class to use.')

    on_notebook_start: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes after the kernel manager and kernel client are setup, and
            cells are about to execute.
            Called with kwargs `notebook`.
            """),
    ).tag(config=True)

    on_notebook_complete: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes after the kernel is cleaned up.
            Called with kwargs `notebook`.
            """),
    ).tag(config=True)

    on_notebook_error: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes when the notebook encounters an error.
            Called with kwargs `notebook`.
            """),
    ).tag(config=True)

    on_cell_start: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes before a cell is executed and before non-executing cells
            are skipped.
            Called with kwargs `cell` and `cell_index`.
            """),
    ).tag(config=True)

    on_cell_execute: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes just before a code cell is executed.
            Called with kwargs `cell` and `cell_index`.
            """),
    ).tag(config=True)

    on_cell_complete: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes after a cell execution is complete. It is
            called even when a cell results in a failure.
            Called with kwargs `cell` and `cell_index`.
            """),
    ).tag(config=True)

    on_cell_error: t.Optional[t.Callable] = Callable(
        default_value=None,
        allow_none=True,
        help=dedent("""
            A callable which executes when a cell execution results in an error.
            This is executed even if errors are suppressed with `cell_allows_errors`.
            Called with kwargs `cell` and `cell_index`.
            """),
    ).tag(config=True)

    @default('kernel_manager_class')
    def _kernel_manager_class_default(self) -> KernelManager:
        """Use a dynamic default to avoid importing jupyter_client at startup"""
        from jupyter_client import AsyncKernelManager

        return AsyncKernelManager

    _display_id_map: t.Dict[str, t.Dict] = Dict(help=dedent("""
              mapping of locations of outputs with a given display_id
              tracks cell index and output index within cell.outputs for
              each appearance of the display_id
              {
                   'display_id': {
                  cell_idx: [output_idx,]
                   }
              }
              """))

    display_data_priority: t.List = List(
        [
            'text/html',
            'application/pdf',
            'text/latex',
            'image/svg+xml',
            'image/png',
            'image/jpeg',
            'text/markdown',
            'text/plain',
        ],
        help="""
            An ordered list of preferred output type, the first
            encountered will usually be used when converting discarding
            the others.
            """,
    ).tag(config=True)

    resources: t.Dict = Dict(help=dedent("""
            Additional resources used in the conversion process. For example,
            passing ``{'metadata': {'path': run_path}}`` sets the
            execution path to ``run_path``.
            """))

    def __init__(self,
                 nb: NotebookNode,
                 km: t.Optional[KernelManager] = None,
                 **kw) -> None:
        """Initializes the execution manager.

        Parameters
        ----------
        nb : NotebookNode
            Notebook being executed.
        km : KernelManager (optional)
            Optional kernel manager. If none is provided, a kernel manager will
            be created.
        """
        super().__init__(**kw)
        self.nb: NotebookNode = nb
        self.km: t.Optional[KernelManager] = km
        self.owns_km: bool = km is None  # whether the NotebookClient owns the kernel manager
        self.kc: t.Optional[KernelClient] = None
        self.reset_execution_trackers()
        self.widget_registry: t.Dict[str, t.Dict] = {
            '@jupyter-widgets/output': {
                'OutputModel': OutputWidget
            }
        }
        # comm_open_handlers should return an object with a .handle_msg(msg) method or None
        self.comm_open_handlers: t.Dict[str, t.Any] = {
            'jupyter.widget': self.on_comm_open_jupyter_widget
        }

    def reset_execution_trackers(self) -> None:
        """Resets any per-execution trackers."""
        self.task_poll_for_reply: t.Optional[asyncio.Future] = None
        self.code_cells_executed = 0
        self._display_id_map = {}
        self.widget_state: t.Dict[str, t.Dict] = {}
        self.widget_buffers: t.Dict[str, t.Dict[t.Tuple[str, ...],
                                                t.Dict[str, str]]] = {}
        # maps to list of hooks, where the last is used, this is used
        # to support nested use of output widgets.
        self.output_hook_stack: t.Any = collections.defaultdict(list)
        # our front-end mimicking Output widgets
        self.comm_objects: t.Dict[str, t.Any] = {}

    def create_kernel_manager(self) -> KernelManager:
        """Creates a new kernel manager.

        Returns
        -------
        km : KernelManager
            Kernel manager whose client class is asynchronous.
        """
        if not self.kernel_name:
            kn = self.nb.metadata.get('kernelspec', {}).get('name')
            if kn is not None:
                self.kernel_name = kn

        if not self.kernel_name:
            self.km = self.kernel_manager_class(config=self.config)
        else:
            self.km = self.kernel_manager_class(kernel_name=self.kernel_name,
                                                config=self.config)

        # If the current kernel manager is still using the default (synchronous) KernelClient class,
        # switch to the async version since that's what NBClient prefers.
        if self.km.client_class == 'jupyter_client.client.KernelClient':
            self.km.client_class = 'jupyter_client.asynchronous.AsyncKernelClient'

        return self.km

    async def _async_cleanup_kernel(self) -> None:
        assert self.km is not None
        now = self.shutdown_kernel == "immediate"
        try:
            # Queue the manager to kill the process, and recover gracefully if it's already dead.
            if await ensure_async(self.km.is_alive()):
                await ensure_async(self.km.shutdown_kernel(now=now))
        except RuntimeError as e:
            # The error isn't specialized, so we have to check the message
            if 'No kernel is running!' not in str(e):
                raise
        finally:
            # Remove any state left over even if we failed to stop the kernel
            await ensure_async(self.km.cleanup_resources())
            if getattr(self, "kc") and self.kc is not None:
                await ensure_async(self.kc.stop_channels())
                self.kc = None
                self.km = None

    _cleanup_kernel = run_sync(_async_cleanup_kernel)

    async def async_start_new_kernel(self, **kwargs) -> None:
        """Creates a new kernel.

        Parameters
        ----------
        kwargs :
            Any options for ``self.kernel_manager_class.start_kernel()``. Because
            that defaults to AsyncKernelManager, this will likely include options
            accepted by ``AsyncKernelManager.start_kernel()``, which includes ``cwd``.
        """
        assert self.km is not None
        resource_path = self.resources.get('metadata', {}).get('path') or None
        if resource_path and 'cwd' not in kwargs:
            kwargs["cwd"] = resource_path

        has_history_manager_arg = any(
            arg.startswith('--HistoryManager.hist_file')
            for arg in self.extra_arguments)
        if (hasattr(self.km, 'ipykernel') and self.km.ipykernel
                and self.ipython_hist_file and not has_history_manager_arg):
            self.extra_arguments += [
                f'--HistoryManager.hist_file={self.ipython_hist_file}'
            ]

        await ensure_async(
            self.km.start_kernel(extra_arguments=self.extra_arguments,
                                 **kwargs))

    start_new_kernel = run_sync(async_start_new_kernel)

    async def async_start_new_kernel_client(self) -> KernelClient:
        """Creates a new kernel client.

        Returns
        -------
        kc : KernelClient
            Kernel client as created by the kernel manager ``km``.
        """
        assert self.km is not None
        self.kc = self.km.client()
        await ensure_async(self.kc.start_channels())
        try:
            await ensure_async(
                self.kc.wait_for_ready(timeout=self.startup_timeout))
        except RuntimeError:
            await self._async_cleanup_kernel()
            raise
        self.kc.allow_stdin = False
        await run_hook(self.on_notebook_start, notebook=self.nb)
        return self.kc

    start_new_kernel_client = run_sync(async_start_new_kernel_client)

    @contextmanager
    def setup_kernel(self, **kwargs) -> t.Generator:
        """
        Context manager for setting up the kernel to execute a notebook.

        The assigns the Kernel Manager (``self.km``) if missing and Kernel Client(``self.kc``).

        When control returns from the yield it stops the client's zmq channels, and shuts
        down the kernel.
        """
        # by default, cleanup the kernel client if we own the kernel manager
        # and keep it alive if we don't
        cleanup_kc = kwargs.pop('cleanup_kc', self.owns_km)

        # Can't use run_until_complete on an asynccontextmanager function :(
        if self.km is None:
            self.km = self.create_kernel_manager()

        if not self.km.has_kernel:
            self.start_new_kernel(**kwargs)
            self.start_new_kernel_client()
        try:
            yield
        finally:
            if cleanup_kc:
                self._cleanup_kernel()

    @asynccontextmanager
    async def async_setup_kernel(self, **kwargs) -> t.AsyncGenerator:
        """
        Context manager for setting up the kernel to execute a notebook.

        This assigns the Kernel Manager (``self.km``) if missing and Kernel Client(``self.kc``).

        When control returns from the yield it stops the client's zmq channels, and shuts
        down the kernel.

        Handlers for SIGINT and SIGTERM are also added to cleanup in case of unexpected shutdown.
        """
        # by default, cleanup the kernel client if we own the kernel manager
        # and keep it alive if we don't
        cleanup_kc = kwargs.pop('cleanup_kc', self.owns_km)
        if self.km is None:
            self.km = self.create_kernel_manager()

        # self._cleanup_kernel uses run_async, which ensures the ioloop is running again.
        # This is necessary as the ioloop has stopped once atexit fires.
        atexit.register(self._cleanup_kernel)

        def on_signal():
            asyncio.ensure_future(self._async_cleanup_kernel())
            atexit.unregister(self._cleanup_kernel)

        loop = asyncio.get_event_loop()
        try:
            loop.add_signal_handler(signal.SIGINT, on_signal)
            loop.add_signal_handler(signal.SIGTERM, on_signal)
        except (NotImplementedError, RuntimeError):
            # NotImplementedError: Windows does not support signals.
            # RuntimeError: Raised when add_signal_handler is called outside the main thread
            pass

        if not self.km.has_kernel:
            await self.async_start_new_kernel(**kwargs)
            await self.async_start_new_kernel_client()
        try:
            yield
        except RuntimeError as e:
            await run_hook(self.on_notebook_error, notebook=self.nb)
            raise e
        finally:
            if cleanup_kc:
                await self._async_cleanup_kernel()
            await run_hook(self.on_notebook_complete, notebook=self.nb)
            atexit.unregister(self._cleanup_kernel)
            try:
                loop.remove_signal_handler(signal.SIGINT)
                loop.remove_signal_handler(signal.SIGTERM)
            except (NotImplementedError, RuntimeError):
                pass

    async def async_execute(self,
                            reset_kc: bool = False,
                            **kwargs) -> NotebookNode:
        """
        Executes each code cell.

        Parameters
        ----------
        kwargs :
            Any option for ``self.kernel_manager_class.start_kernel()``. Because
            that defaults to AsyncKernelManager, this will likely include options
            accepted by ``jupyter_client.AsyncKernelManager.start_kernel()``,
            which includes ``cwd``.

            ``reset_kc`` if True, the kernel client will be reset and a new one
            will be created (default: False).

        Returns
        -------
        nb : NotebookNode
            The executed notebook.
        """
        if reset_kc and self.owns_km:
            await self._async_cleanup_kernel()
        self.reset_execution_trackers()

        async with self.async_setup_kernel(**kwargs):
            assert self.kc is not None
            self.log.info("Executing notebook with kernel: %s" %
                          self.kernel_name)
            msg_id = await ensure_async(self.kc.kernel_info())
            info_msg = await self.async_wait_for_reply(msg_id)
            if info_msg is not None:
                if 'language_info' in info_msg['content']:
                    self.nb.metadata['language_info'] = info_msg['content'][
                        'language_info']
                else:
                    raise RuntimeError(
                        'Kernel info received message content has no "language_info" key. '
                        'Content is:\n' + str(info_msg['content']))
            for index, cell in enumerate(self.nb.cells):
                # Ignore `'execution_count' in content` as it's always 1
                # when store_history is False
                await self.async_execute_cell(
                    cell, index, execution_count=self.code_cells_executed + 1)
            self.set_widgets_metadata()

        return self.nb

    execute = run_sync(async_execute)

    def set_widgets_metadata(self) -> None:
        if self.widget_state:
            self.nb.metadata.widgets = {
                'application/vnd.jupyter.widget-state+json': {
                    'state': {
                        model_id: self._serialize_widget_state(state)
                        for model_id, state in self.widget_state.items()
                        if '_model_name' in state
                    },
                    'version_major': 2,
                    'version_minor': 0,
                }
            }
            for key, widget in self.nb.metadata.widgets[
                    'application/vnd.jupyter.widget-state+json'][
                        'state'].items():
                buffers = self.widget_buffers.get(key)
                if buffers:
                    widget['buffers'] = list(buffers.values())

    def _update_display_id(self, display_id: str, msg: t.Dict) -> None:
        """Update outputs with a given display_id"""
        if display_id not in self._display_id_map:
            self.log.debug("display id %r not in %s", display_id,
                           self._display_id_map)
            return

        if msg['header']['msg_type'] == 'update_display_data':
            msg['header']['msg_type'] = 'display_data'

        try:
            out = output_from_msg(msg)
        except ValueError:
            self.log.error("unhandled iopub msg: " + msg['msg_type'])
            return

        for cell_idx, output_indices in self._display_id_map[display_id].items(
        ):
            cell = self.nb['cells'][cell_idx]
            outputs = cell['outputs']
            for output_idx in output_indices:
                outputs[output_idx]['data'] = out['data']
                outputs[output_idx]['metadata'] = out['metadata']

    async def _async_poll_for_reply(
        self,
        msg_id: str,
        cell: NotebookNode,
        timeout: t.Optional[int],
        task_poll_output_msg: asyncio.Future,
        task_poll_kernel_alive: asyncio.Future,
    ) -> t.Dict:

        assert self.kc is not None
        new_timeout: t.Optional[float] = None
        if timeout is not None:
            deadline = monotonic() + timeout
            new_timeout = float(timeout)
        while True:
            try:
                msg = await ensure_async(
                    self.kc.shell_channel.get_msg(timeout=new_timeout))
                if msg['parent_header'].get('msg_id') == msg_id:
                    if self.record_timing:
                        cell['metadata']['execution'][
                            'shell.execute_reply'] = timestamp(msg)
                    try:
                        await asyncio.wait_for(task_poll_output_msg,
                                               self.iopub_timeout)
                    except (asyncio.TimeoutError, Empty):
                        if self.raise_on_iopub_timeout:
                            task_poll_kernel_alive.cancel()
                            raise CellTimeoutError.error_from_timeout_and_cell(
                                "Timeout waiting for IOPub output",
                                self.iopub_timeout, cell)
                        else:
                            self.log.warning(
                                "Timeout waiting for IOPub output")
                    task_poll_kernel_alive.cancel()
                    return msg
                else:
                    if new_timeout is not None:
                        new_timeout = max(0, deadline - monotonic())
            except Empty:
                # received no message, check if kernel is still alive
                assert timeout is not None
                task_poll_kernel_alive.cancel()
                await self._async_check_alive()
                await self._async_handle_timeout(timeout, cell)

    async def _async_poll_output_msg(self, parent_msg_id: str,
                                     cell: NotebookNode,
                                     cell_index: int) -> None:

        assert self.kc is not None
        while True:
            msg = await ensure_async(
                self.kc.iopub_channel.get_msg(timeout=None))
            if msg['parent_header'].get('msg_id') == parent_msg_id:
                try:
                    # Will raise CellExecutionComplete when completed
                    self.process_message(msg, cell, cell_index)
                except CellExecutionComplete:
                    return

    async def _async_poll_kernel_alive(self) -> None:
        while True:
            await asyncio.sleep(1)
            try:
                await self._async_check_alive()
            except DeadKernelError:
                assert self.task_poll_for_reply is not None
                self.task_poll_for_reply.cancel()
                return

    def _get_timeout(self, cell: t.Optional[NotebookNode]) -> int:
        if self.timeout_func is not None and cell is not None:
            timeout = self.timeout_func(cell)
        else:
            timeout = self.timeout

        if not timeout or timeout < 0:
            timeout = None

        return timeout

    async def _async_handle_timeout(self,
                                    timeout: int,
                                    cell: t.Optional[NotebookNode] = None
                                    ) -> None:

        self.log.error("Timeout waiting for execute reply (%is)." % timeout)
        if self.interrupt_on_timeout:
            self.log.error("Interrupting kernel")
            assert self.km is not None
            await ensure_async(self.km.interrupt_kernel())
        else:
            raise CellTimeoutError.error_from_timeout_and_cell(
                "Cell execution timed out", timeout, cell)

    async def _async_check_alive(self) -> None:
        assert self.kc is not None
        if not await ensure_async(self.kc.is_alive()):
            self.log.error("Kernel died while waiting for execute reply.")
            raise DeadKernelError("Kernel died")

    async def async_wait_for_reply(
            self,
            msg_id: str,
            cell: t.Optional[NotebookNode] = None) -> t.Optional[t.Dict]:

        assert self.kc is not None
        # wait for finish, with timeout
        timeout = self._get_timeout(cell)
        cummulative_time = 0
        while True:
            try:
                msg = await ensure_async(
                    self.kc.shell_channel.get_msg(
                        timeout=self.shell_timeout_interval))
            except Empty:
                await self._async_check_alive()
                cummulative_time += self.shell_timeout_interval
                if timeout and cummulative_time > timeout:
                    await self._async_async_handle_timeout(timeout, cell)
                    break
            else:
                if msg['parent_header'].get('msg_id') == msg_id:
                    return msg
        return None

    wait_for_reply = run_sync(async_wait_for_reply)
    # Backwards compatibility naming for papermill
    _wait_for_reply = wait_for_reply

    def _passed_deadline(self, deadline: int) -> bool:
        if deadline is not None and deadline - monotonic() <= 0:
            return True
        return False

    async def _check_raise_for_error(self, cell: NotebookNode, cell_index: int,
                                     exec_reply: t.Optional[t.Dict]) -> None:

        if exec_reply is None:
            return None

        exec_reply_content = exec_reply['content']
        if exec_reply_content['status'] != 'error':
            return None

        cell_allows_errors = (not self.force_raise_errors) and (
            self.allow_errors
            or exec_reply_content.get('ename') in self.allow_error_names
            or "raises-exception" in cell.metadata.get("tags", []))
        await run_hook(self.on_cell_error, cell=cell, cell_index=cell_index)
        if not cell_allows_errors:
            raise CellExecutionError.from_cell_and_msg(cell,
                                                       exec_reply_content)

    async def async_execute_cell(
        self,
        cell: NotebookNode,
        cell_index: int,
        execution_count: t.Optional[int] = None,
        store_history: bool = True,
    ) -> NotebookNode:
        """
        Executes a single code cell.

        To execute all cells see :meth:`execute`.

        Parameters
        ----------
        cell : nbformat.NotebookNode
            The cell which is currently being processed.
        cell_index : int
            The position of the cell within the notebook object.
        execution_count : int
            The execution count to be assigned to the cell (default: Use kernel response)
        store_history : bool
            Determines if history should be stored in the kernel (default: False).
            Specific to ipython kernels, which can store command histories.

        Returns
        -------
        output : dict
            The execution output payload (or None for no output).

        Raises
        ------
        CellExecutionError
            If execution failed and should raise an exception, this will be raised
            with defaults about the failure.

        Returns
        -------
        cell : NotebookNode
            The cell which was just processed.
        """
        assert self.kc is not None

        await run_hook(self.on_cell_start, cell=cell, cell_index=cell_index)

        if cell.cell_type != 'code' or not cell.source.strip():
            self.log.debug("Skipping non-executing cell %s", cell_index)
            return cell

        if self.skip_cells_with_tag in cell.metadata.get("tags", []):
            self.log.debug("Skipping tagged cell %s", cell_index)
            return cell

        if self.record_timing:  # clear execution metadata prior to execution
            cell['metadata']['execution'] = {}

        self.log.debug("Executing cell:\n%s", cell.source)

        cell_allows_errors = (not self.force_raise_errors) and (
            self.allow_errors
            or "raises-exception" in cell.metadata.get("tags", []))

        await run_hook(self.on_cell_execute, cell=cell, cell_index=cell_index)
        parent_msg_id = await ensure_async(
            self.kc.execute(cell.source,
                            store_history=store_history,
                            stop_on_error=not cell_allows_errors))
        await run_hook(self.on_cell_complete, cell=cell, cell_index=cell_index)
        # We launched a code cell to execute
        self.code_cells_executed += 1
        exec_timeout = self._get_timeout(cell)

        cell.outputs = []
        self.clear_before_next_output = False

        task_poll_kernel_alive = asyncio.ensure_future(
            self._async_poll_kernel_alive())
        task_poll_output_msg = asyncio.ensure_future(
            self._async_poll_output_msg(parent_msg_id, cell, cell_index))
        self.task_poll_for_reply = asyncio.ensure_future(
            self._async_poll_for_reply(parent_msg_id, cell, exec_timeout,
                                       task_poll_output_msg,
                                       task_poll_kernel_alive))
        try:
            exec_reply = await self.task_poll_for_reply
        except asyncio.CancelledError:
            # can only be cancelled by task_poll_kernel_alive when the kernel is dead
            task_poll_output_msg.cancel()
            raise DeadKernelError("Kernel died")
        except Exception as e:
            # Best effort to cancel request if it hasn't been resolved
            try:
                # Check if the task_poll_output is doing the raising for us
                if not isinstance(e, CellControlSignal):
                    task_poll_output_msg.cancel()
            finally:
                raise

        if execution_count:
            cell['execution_count'] = execution_count
        await self._check_raise_for_error(cell, cell_index, exec_reply)
        self.nb['cells'][cell_index] = cell
        return cell

    execute_cell = run_sync(async_execute_cell)

    def process_message(self, msg: t.Dict, cell: NotebookNode,
                        cell_index: int) -> t.Optional[t.List]:
        """
        Processes a kernel message, updates cell state, and returns the
        resulting output object that was appended to cell.outputs.

        The input argument *cell* is modified in-place.

        Parameters
        ----------
        msg : dict
            The kernel message being processed.
        cell : nbformat.NotebookNode
            The cell which is currently being processed.
        cell_index : int
            The position of the cell within the notebook object.

        Returns
        -------
        output : dict
            The execution output payload (or None for no output).

        Raises
        ------
        CellExecutionComplete
          Once a message arrives which indicates computation completeness.

        """
        msg_type = msg['msg_type']
        self.log.debug("msg_type: %s", msg_type)
        content = msg['content']
        self.log.debug("content: %s", content)

        display_id = content.get('transient', {}).get('display_id', None)
        if display_id and msg_type in {
                'execute_result', 'display_data', 'update_display_data'
        }:
            self._update_display_id(display_id, msg)

        # set the prompt number for the input and the output
        if 'execution_count' in content:
            cell['execution_count'] = content['execution_count']

        if self.record_timing:
            if msg_type == 'status':
                if content['execution_state'] == 'idle':
                    cell['metadata']['execution'][
                        'iopub.status.idle'] = timestamp(msg)
                elif content['execution_state'] == 'busy':
                    cell['metadata']['execution'][
                        'iopub.status.busy'] = timestamp(msg)
            elif msg_type == 'execute_input':
                cell['metadata']['execution'][
                    'iopub.execute_input'] = timestamp(msg)

        if msg_type == 'status':
            if content['execution_state'] == 'idle':
                raise CellExecutionComplete()
        elif msg_type == 'clear_output':
            self.clear_output(cell.outputs, msg, cell_index)
        elif msg_type.startswith('comm'):
            self.handle_comm_msg(cell.outputs, msg, cell_index)
        # Check for remaining messages we don't process
        elif msg_type not in ['execute_input', 'update_display_data']:
            # Assign output as our processed "result"
            return self.output(cell.outputs, msg, display_id, cell_index)
        return None

    def output(self, outs: t.List, msg: t.Dict, display_id: str,
               cell_index: int) -> t.Optional[t.List]:

        msg_type = msg['msg_type']

        parent_msg_id = msg['parent_header'].get('msg_id')
        if self.output_hook_stack[parent_msg_id]:
            # if we have a hook registered, it will override our
            # default output behaviour (e.g. OutputWidget)
            hook = self.output_hook_stack[parent_msg_id][-1]
            hook.output(outs, msg, display_id, cell_index)
            return None

        try:
            out = output_from_msg(msg)
        except ValueError:
            self.log.error("unhandled iopub msg: " + msg_type)
            return None

        if self.clear_before_next_output:
            self.log.debug('Executing delayed clear_output')
            outs[:] = []
            self.clear_display_id_mapping(cell_index)
            self.clear_before_next_output = False

        if display_id:
            # record output index in:
            #   _display_id_map[display_id][cell_idx]
            cell_map = self._display_id_map.setdefault(display_id, {})
            output_idx_list = cell_map.setdefault(cell_index, [])
            output_idx_list.append(len(outs))

        outs.append(out)

        return out

    def clear_output(self, outs: t.List, msg: t.Dict, cell_index: int) -> None:

        content = msg['content']

        parent_msg_id = msg['parent_header'].get('msg_id')
        if self.output_hook_stack[parent_msg_id]:
            # if we have a hook registered, it will override our
            # default clear_output behaviour (e.g. OutputWidget)
            hook = self.output_hook_stack[parent_msg_id][-1]
            hook.clear_output(outs, msg, cell_index)
            return

        if content.get('wait'):
            self.log.debug('Wait to clear output')
            self.clear_before_next_output = True
        else:
            self.log.debug('Immediate clear output')
            outs[:] = []
            self.clear_display_id_mapping(cell_index)

    def clear_display_id_mapping(self, cell_index: int) -> None:

        for display_id, cell_map in self._display_id_map.items():
            if cell_index in cell_map:
                cell_map[cell_index] = []

    def handle_comm_msg(self, outs: t.List, msg: t.Dict,
                        cell_index: int) -> None:

        content = msg['content']
        data = content['data']
        if self.store_widget_state and 'state' in data:  # ignore custom msg'es
            self.widget_state.setdefault(content['comm_id'],
                                         {}).update(data['state'])
            if 'buffer_paths' in data and data['buffer_paths']:
                comm_id = content['comm_id']
                if comm_id not in self.widget_buffers:
                    self.widget_buffers[comm_id] = {}
                # for each comm, the path uniquely identifies a buffer
                new_buffers: t.Dict[t.Tuple[str, ...], t.Dict[str, str]] = {
                    tuple(k["path"]): k
                    for k in self._get_buffer_data(msg)
                }
                self.widget_buffers[comm_id].update(new_buffers)
        # There are cases where we need to mimic a frontend, to get similar behaviour as
        # when using the Output widget from Jupyter lab/notebook
        if msg['msg_type'] == 'comm_open':
            target = msg['content'].get('target_name')
            handler = self.comm_open_handlers.get(target)
            if handler:
                comm_id = msg['content']['comm_id']
                comm_object = handler(msg)
                if comm_object:
                    self.comm_objects[comm_id] = comm_object
            else:
                self.log.warning(
                    f'No handler found for comm target {target!r}')
        elif msg['msg_type'] == 'comm_msg':
            content = msg['content']
            comm_id = msg['content']['comm_id']
            if comm_id in self.comm_objects:
                self.comm_objects[comm_id].handle_msg(msg)

    def _serialize_widget_state(self, state: t.Dict) -> t.Dict[str, t.Any]:
        """Serialize a widget state, following format in @jupyter-widgets/schema."""
        return {
            'model_name': state.get('_model_name'),
            'model_module': state.get('_model_module'),
            'model_module_version': state.get('_model_module_version'),
            'state': state,
        }

    def _get_buffer_data(self, msg: t.Dict) -> t.List[t.Dict[str, str]]:
        encoded_buffers = []
        paths = msg['content']['data']['buffer_paths']
        buffers = msg['buffers']
        for path, buffer in zip(paths, buffers):
            encoded_buffers.append({
                'data':
                base64.b64encode(buffer).decode('utf-8'),
                'encoding':
                'base64',
                'path':
                path,
            })
        return encoded_buffers

    def register_output_hook(self, msg_id: str, hook: OutputWidget) -> None:
        """Registers an override object that handles output/clear_output instead.

        Multiple hooks can be registered, where the last one will be used (stack based)
        """
        # mimics
        # https://jupyterlab.github.io/jupyterlab/services/interfaces/kernel.ikernelconnection.html#registermessagehook
        self.output_hook_stack[msg_id].append(hook)

    def remove_output_hook(self, msg_id: str, hook: OutputWidget) -> None:
        """Unregisters an override object that handles output/clear_output instead"""
        # mimics
        # https://jupyterlab.github.io/jupyterlab/services/interfaces/kernel.ikernelconnection.html#removemessagehook
        removed_hook = self.output_hook_stack[msg_id].pop()
        assert removed_hook == hook

    def on_comm_open_jupyter_widget(self, msg: t.Dict):
        content = msg['content']
        data = content['data']
        state = data['state']
        comm_id = msg['content']['comm_id']
        module = self.widget_registry.get(state['_model_module'])
        if module:
            widget_class = module.get(state['_model_name'])
            if widget_class:
                return widget_class(comm_id, state, self.kc, self)
Exemple #10
0
class NotebookNotary(LoggingConfigurable):
    """A class for computing and verifying notebook signatures."""

    data_dir = Unicode(help="""The storage directory for notary secret and database.""").tag(
        config=True
    )

    @default("data_dir")
    def _data_dir_default(self):
        app = None
        try:
            if JupyterApp.initialized():
                app = JupyterApp.instance()
        except MultipleInstanceError:
            pass
        if app is None:
            # create an app, without the global instance
            app = JupyterApp()
            app.initialize(argv=[])
        return app.data_dir

    store_factory = Callable(
        help="""A callable returning the storage backend for notebook signatures.
         The default uses an SQLite database."""
    ).tag(config=True)

    @default("store_factory")
    def _store_factory_default(self):
        def factory():
            if sqlite3 is None:
                self.log.warning("Missing SQLite3, all notebooks will be untrusted!")
                return MemorySignatureStore()
            return SQLiteSignatureStore(self.db_file)

        return factory

    db_file = Unicode(
        help="""The sqlite file in which to store notebook signatures.
        By default, this will be in your Jupyter data directory.
        You can set it to ':memory:' to disable sqlite writing to the filesystem.
        """
    ).tag(config=True)

    @default("db_file")
    def _db_file_default(self):
        if not self.data_dir:
            return ":memory:"
        return os.path.join(self.data_dir, "nbsignatures.db")

    algorithm = Enum(
        algorithms, default_value="sha256", help="""The hashing algorithm used to sign notebooks."""
    ).tag(config=True)

    @observe("algorithm")
    def _algorithm_changed(self, change):
        self.digestmod = getattr(hashlib, change["new"])

    digestmod = Any()

    @default("digestmod")
    def _digestmod_default(self):
        return getattr(hashlib, self.algorithm)

    secret_file = Unicode(help="""The file where the secret key is stored.""").tag(config=True)

    @default("secret_file")
    def _secret_file_default(self):
        if not self.data_dir:
            return ""
        return os.path.join(self.data_dir, "notebook_secret")

    secret = Bytes(help="""The secret key with which notebooks are signed.""").tag(config=True)

    @default("secret")
    def _secret_default(self):
        # note : this assumes an Application is running
        if os.path.exists(self.secret_file):
            with open(self.secret_file, "rb") as f:
                return f.read()
        else:
            secret = encodebytes(os.urandom(1024))
            self._write_secret_file(secret)
            return secret

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.store = self.store_factory()

    def _write_secret_file(self, secret):
        """write my secret to my secret_file"""
        self.log.info("Writing notebook-signing key to %s", self.secret_file)
        with open(self.secret_file, "wb") as f:
            f.write(secret)
        try:
            os.chmod(self.secret_file, 0o600)
        except OSError:
            self.log.warning("Could not set permissions on %s", self.secret_file)
        return secret

    def compute_signature(self, nb):
        """Compute a notebook's signature

        by hashing the entire contents of the notebook via HMAC digest.
        """
        hmac = HMAC(self.secret, digestmod=self.digestmod)
        # don't include the previous hash in the content to hash
        with signature_removed(nb):
            # sign the whole thing
            for b in yield_everything(nb):
                hmac.update(b)

        return hmac.hexdigest()

    def check_signature(self, nb):
        """Check a notebook's stored signature

        If a signature is stored in the notebook's metadata,
        a new signature is computed and compared with the stored value.

        Returns True if the signature is found and matches, False otherwise.

        The following conditions must all be met for a notebook to be trusted:
        - a signature is stored in the form 'scheme:hexdigest'
        - the stored scheme matches the requested scheme
        - the requested scheme is available from hashlib
        - the computed hash from notebook_signature matches the stored hash
        """
        if nb.nbformat < 3:
            return False
        signature = self.compute_signature(nb)
        return self.store.check_signature(signature, self.algorithm)

    def sign(self, nb):
        """Sign a notebook, indicating that its output is trusted on this machine

        Stores hash algorithm and hmac digest in a local database of trusted notebooks.
        """
        if nb.nbformat < 3:
            return
        signature = self.compute_signature(nb)
        self.store.store_signature(signature, self.algorithm)

    def unsign(self, nb):
        """Ensure that a notebook is untrusted

        by removing its signature from the trusted database, if present.
        """
        signature = self.compute_signature(nb)
        self.store.remove_signature(signature, self.algorithm)

    def mark_cells(self, nb, trusted):
        """Mark cells as trusted if the notebook's signature can be verified

        Sets ``cell.metadata.trusted = True | False`` on all code cells,
        depending on the *trusted* parameter. This will typically be the return
        value from ``self.check_signature(nb)``.

        This function is the inverse of check_cells
        """
        if nb.nbformat < 3:
            return

        for cell in yield_code_cells(nb):
            cell["metadata"]["trusted"] = trusted

    def _check_cell(self, cell, nbformat_version):
        """Do we trust an individual cell?

        Return True if:

        - cell is explicitly trusted
        - cell has no potentially unsafe rich output

        If a cell has no output, or only simple print statements,
        it will always be trusted.
        """
        # explicitly trusted
        if cell["metadata"].pop("trusted", False):
            return True

        # explicitly safe output
        if nbformat_version >= 4:
            unsafe_output_types = ["execute_result", "display_data"]
            safe_keys = {"output_type", "execution_count", "metadata"}
        else:  # v3
            unsafe_output_types = ["pyout", "display_data"]
            safe_keys = {"output_type", "prompt_number", "metadata"}

        for output in cell["outputs"]:
            output_type = output["output_type"]
            if output_type in unsafe_output_types:
                # if there are any data keys not in the safe whitelist
                output_keys = set(output)
                if output_keys.difference(safe_keys):
                    return False

        return True

    def check_cells(self, nb):
        """Return whether all code cells are trusted.

        A cell is trusted if the 'trusted' field in its metadata is truthy, or
        if it has no potentially unsafe outputs.
        If there are no code cells, return True.

        This function is the inverse of mark_cells.
        """
        if nb.nbformat < 3:
            return False
        trusted = True
        for cell in yield_code_cells(nb):
            # only distrust a cell if it actually has some output to distrust
            if not self._check_cell(cell, nb.nbformat):
                trusted = False

        return trusted
Exemple #11
0
class ResourceUseDisplay(Configurable):
    """
    Holds server-side configuration for nbresuse
    """

    process_cpu_metrics = List(
        trait=PSUtilMetric(),
        default_value=[{
            "name": "cpu_percent",
            "kwargs": {
                "interval": 0.05
            }
        }],
    )

    system_cpu_metrics = List(trait=PSUtilMetric(),
                              default_value=[{
                                  "name": "cpu_count"
                              }])

    mem_limit = Union(
        trait_types=[Int(), Callable()],
        help="""
        Memory limit to display to the user, in bytes.
        Can also be a function which calculates the memory limit.

        Note that this does not actually limit the user's memory usage!

        Defaults to reading from the `MEM_LIMIT` environment variable. If
        set to 0, the max memory available is displayed.
        """,
    ).tag(config=True)

    @default("mem_limit")
    def _mem_limit_default(self):
        return int(os.environ.get("MEM_LIMIT", 0))

    track_cpu_percent = Bool(
        default_value=True,
        help="""
        Set to True in order to enable reporting of CPU usage statistics.
        """,
    ).tag(config=True)

    cpu_limit = Union(
        trait_types=[Float(), Callable()],
        default_value=0,
        help="""
        CPU usage limit to display to the user.

        Note that this does not actually limit the user's CPU usage!

        Defaults to reading from the `CPU_LIMIT` environment variable. If
        set to 0, the total CPU count available is displayed.
        """,
    ).tag(config=True)

    @default("cpu_limit")
    def _cpu_limit_default(self):
        return float(os.environ.get("CPU_LIMIT", 0))

    track_disk_usage = Bool(
        default_value=True,
        help="""
        Set to True in order to enable reporting of Disk usage statistics.
        """,
    ).tag(config=True)

    disk_limit = Union(
        trait_types=[Int(), Callable()],
        default_value=0,
        help="""
        Disk usage limit to display to the user.

        Note that this does not actually limit the user's Disk space!

        Defaults to reading from the `DISK_LIMIT` environment variable. If
        set to 0, the total partition space available is displayed.
        """,
    ).tag(config=True)

    @default("disk_limit")
    def _disk_limit_default(self):
        return int(os.environ.get("DISK_LIMIT", 0))

    disk_dir = Union(
        trait_types=[Unicode(), Callable()],
        default_value=os.getcwd(),
        help="""
        The directory that is on the partition to get the size of.

        Note that this does not actually limit the user's Disk space!

        Defaults to reading from the `DISK_DIR` environment variable. If
        not defined, it effectively defaults to /home/jovyan.
        """,
    ).tag(config=True)

    @default("disk_dir")
    def _disk_dir_default(self):
        return str(os.environ.get("DISK_DIR", os.getcwd()))
Exemple #12
0
class GenericOAuthAuthentication(Authentication):
    """
    A provider-agnostic OAuth authentication provider. Configure endpoints, secrets and other
    parameters to enable any OAuth-compatible platform.
    """

    access_token_url = Unicode(
        config=True,
        help="URL used to request an access token once app has been authorized",
    )
    authorize_url = Unicode(
        config=True,
        help="URL used to request authorization to OAuth provider",
    )
    client_id = Unicode(
        config=True,
        help="Unique string that identifies the app against the OAuth provider",
    )
    client_secret = Unicode(
        config=True,
        help=
        "Secret string used to authenticate the app against the OAuth provider",
    )
    access_scope = Unicode(
        config=True,
        help="Permissions that will be requested to OAuth provider.",
    )
    user_data_url = Unicode(
        config=True,
        help=
        "API endpoint for OAuth provider that returns a JSON dict with user data",
    )
    user_data_key = Unicode(
        config=True,
        help=
        "Key in the payload returned by `user_data_url` endpoint that provides the username",
    )

    tls_verify = Bool(
        True,
        config=True,
        help="Disable TLS verification on http request.",
    )

    oauth_callback_url = Union(
        [Unicode(), Callable()],
        config=True,
        help=
        "Callback URL to use. Typically `{protocol}://{host}/{prefix}/oauth_callback`",
    )

    @default("oauth_callback_url")
    def _default_oauth_callback_url(self):
        def _oauth_callback_url(request: Request):
            return request.url_for("post_login_method")

        return _oauth_callback_url

    def get_oauth_callback_url(self, request: Request):
        if callable(self.oauth_callback_url):
            return self.oauth_callback_url(request)
        else:
            return self.oauth_callback_url

    login_html = Unicode(
        """
<div id="login" class="text-center">
    <h1 class="h3 mb-3 fw-normal">Please sign in via OAuth</h1>
    <a class="w-100 btn btn-lg btn-primary" href="{authorization_url}">Sign in with OAuth</a>
</div>
        """,
        help="html form to use for login",
        config=True,
    )

    def get_login_html(self, request: Request, templates):
        state = secrets.token_urlsafe()
        request.session["oauth_state"] = state
        authorization_url = self.oauth_route(
            auth_url=self.authorize_url,
            client_id=self.client_id,
            redirect_uri=self.get_oauth_callback_url(request),
            scope=self.access_scope,
            state=state,
        )
        return self.login_html.format(authorization_url=authorization_url)

    @staticmethod
    def oauth_route(auth_url, client_id, redirect_uri, scope=None, state=None):
        r = f"{auth_url}?client_id={client_id}&redirect_uri={redirect_uri}&response_type=code"
        if scope is not None:
            r += f"&scope={scope}"
        if state is not None:
            r += f"&state={state}"
        return r

    @property
    def routes(self):
        return [
            ("/login/", "get", self.get_login_method),
            ("/logout/", "post", self.post_logout_method),
            ("/oauth_callback/", "get", self.post_login_method),
        ]

    async def authenticate(self, request: Request):
        # 1. using the callback_url code and state in request
        oauth_access_token = self._get_oauth_token(request)
        if oauth_access_token is None:
            return None  # authentication failed

        # 2. Who is the username? We need one more request
        username = self._get_username(oauth_access_token)

        # 3. create our own internal token
        return schema.AuthenticationToken(
            primary_namespace=username,
            role_bindings={
                "*/*": ["admin"],
            },
        )

    def _get_oauth_token(self, request: Request):
        # 1. Get callback URI params, which include `code` and `state`
        #    `code` will be used to request the token; `state` must match our session's!
        code = request.query_params.get("code")
        state = request.query_params.get("state")
        if request.session["oauth_state"] != state:
            raise HTTPException(status_code=401,
                                detail="OAuth states do not match")
        del request.session["oauth_state"]

        # 2. Request actual access token with code and secret
        r_response = requests.post(
            self.access_token_url,
            data={
                "code": code,
                "grant_type": "authorization_code",
                "client_id": self.client_id,
                "client_secret": self.client_secret,
                "redirect_uri": self.get_oauth_callback_url(request),
            },
            headers={"Accept": "application/json"},
            verify=self.tls_verify,
        )
        if r_response.status_code != 200:
            return None
        data = r_response.json()
        return data["access_token"]

    def _get_username(self, authentication_token):
        response = requests.get(
            self.user_data_url,
            headers={"Authorization": f"Bearer {authentication_token}"},
            verify=self.tls_verify,
        )
        response.raise_for_status()
        return response.json()[self.user_data_key]
Exemple #13
0
class CondaStore(LoggingConfigurable):
    storage_class = Type(
        default_value=storage.S3Storage,
        klass=storage.Storage,
        allow_none=False,
        config=True,
    )

    store_directory = Unicode(
        "conda-store-state",
        help="directory for conda-store to build environments and store state",
        config=True,
    )

    build_directory = Unicode(
        "{store_directory}/{namespace}",
        help=
        "Template used to form the directory for storing conda environment builds. Available keys: store_directory, namespace, name. The default will put all built environments in the same namespace within the same directory.",
        config=True,
    )

    environment_directory = Unicode(
        "{store_directory}/{namespace}/envs",
        help=
        "Template used to form the directory for symlinking conda environment builds. Available keys: store_directory, namespace, name. The default will put all environments in the same namespace within the same directory.",
        config=True,
    )

    conda_command = Unicode(
        "mamba",
        help="conda executable to use for solves",
        config=True,
    )

    conda_channel_alias = Unicode(
        "https://conda.anaconda.org",
        help="The prepended url location to associate with channel names",
        config=True,
    )

    conda_platforms = List(
        [conda.conda_platform(), "noarch"],
        help=
        "Conda platforms to download package repodata.json from. By default includes current architecture and noarch",
        config=True,
    )

    conda_default_channels = List(
        ["conda-forge"],
        help=
        "Conda channels that by default are included if channels are empty",
        config=True,
    )

    conda_allowed_channels = List(
        [
            "main",
            "conda-forge",
        ],
        help="Allowed conda channels to be used in conda environments",
        config=True,
    )

    conda_default_packages = List(
        [],
        help="Conda packages that included by default if none are included",
        config=True,
    )

    conda_required_packages = List(
        [],
        help=
        "Conda packages that are required to be within environment specification. Will raise a validation error is package not in specification",
        config=True,
    )

    conda_included_packages = List(
        [],
        help=
        "Conda packages that auto included within environment specification. Will not raise a validation error if package not in specification and will be auto added",
        config=True,
    )

    pypi_default_packages = List(
        [],
        help="PyPi packages that included by default if none are included",
        config=True,
    )

    pypi_required_packages = List(
        [],
        help=
        "PyPi packages that are required to be within environment specification. Will raise a validation error is package not in specification",
        config=True,
    )

    pypi_included_packages = List(
        [],
        help=
        "PyPi packages that auto included within environment specification. Will not raise a validation error if package not in specification and will be auto added",
        config=True,
    )

    conda_max_solve_time = Integer(
        5 * 60,  # 5 minute
        help=
        "Maximum time in seconds to allow for solving a given conda environment",
        config=True,
    )

    database_url = Unicode(
        "sqlite:///conda-store.sqlite",
        help=
        "url for the database. e.g. 'sqlite:///conda-store.sqlite' tables will be automatically created if they do not exist",
        config=True,
    )

    redis_url = Unicode(
        help=
        "Redis connection url in form 'redis://:<password>@<hostname>:<port>/0'. Connection is used by Celery along with Conda-Store internally",
        config=True,
    )

    @default("redis_url")
    def _default_redis(self):
        raise TraitError(
            "c.CondaStore.redis_url Redis connection url is required")

    @validate("redis_url")
    def _check_redis(self, proposal):
        try:
            self.redis.ping()
        except Exception:
            raise TraitError(
                f'c.CondaStore.redis_url unable to connect with Redis database at "{self.redis_url}"'
            )
        return proposal.value

    celery_broker_url = Unicode(
        help="broker url to use for celery tasks",
        config=True,
    )

    build_artifacts = List(
        [
            schema.BuildArtifactType.LOCKFILE,
            schema.BuildArtifactType.YAML,
            schema.BuildArtifactType.CONDA_PACK,
            schema.BuildArtifactType.DOCKER_MANIFEST,
        ],
        help=
        "artifacts to build in conda-store. By default all of the artifacts",
        config=True,
    )

    build_artifacts_kept_on_deletion = List(
        [
            schema.BuildArtifactType.LOGS,
            schema.BuildArtifactType.LOCKFILE,
            schema.BuildArtifactType.YAML,
        ],
        help="artifacts to keep on build deletion",
        config=True,
    )

    serialize_builds = Bool(
        True,
        help=
        "No longer build conda environment in parallel. This is due to an issue in conda/mamba that when downloading files in two concurent builds the downloads/extraction can overlap. This is a bug in conda/mamba that needs to be fixed.",
        config=True,
    )

    @default("celery_broker_url")
    def _default_celery_broker_url(self):
        return self.redis_url

    celery_results_backend = Unicode(
        help="backend to use for celery task results",
        config=True,
    )

    @default("celery_results_backend")
    def _default_celery_results_backend(self):
        return self.redis_url

    default_namespace = Unicode("default",
                                help="default namespace for conda-store",
                                config=True)

    filesystem_namespace = Unicode(
        "filesystem",
        help=
        "namespace to use for environments picked up via `CondaStoreWorker.watch_paths` on the filesystem",
        config=True,
    )

    default_uid = Integer(
        os.getuid(),
        help="default uid to assign to built environments",
        config=True,
    )

    default_gid = Integer(
        os.getgid(),
        help="default gid to assign to built environments",
        config=True,
    )

    default_permissions = Unicode(
        "775",
        help="default file permissions to assign to built environments",
        config=True,
    )

    default_docker_base_image = Unicode(
        "frolvlad/alpine-glibc:latest",
        help="default base image used for the Dockerized environments",
        config=True,
    )

    validate_specification = Callable(
        conda_store_validate_specification,
        help=
        "callable function taking conda_store and specification as input arguments to apply for validating and modifying a given specification. If there are validation issues with the environment ValueError with message should be raised. If changed you may need to call the default function to preseve many of the trait effects e.g. `c.CondaStore.default_channels` etc",
        config=True,
    )

    @property
    def session_factory(self):
        if hasattr(self, "_session_factory"):
            return self._session_factory

        # https://docs.sqlalchemy.org/en/14/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork
        # This is the most simplistic, one shot system that prevents
        # the Engine from using any connection more than once
        self._session_factory = orm.new_session_factory(url=self.database_url,
                                                        poolclass=NullPool)
        return self._session_factory

    @property
    def db(self):
        # we are using a scoped_session which always returns the same
        # session if within the same thread
        # https://docs.sqlalchemy.org/en/14/orm/contextual.html
        return self.session_factory()

    @property
    def redis(self):
        if hasattr(self, "_redis"):
            return self._redis
        self._redis = redis.Redis.from_url(self.redis_url)
        return self._redis

    @property
    def configuration(self):
        return orm.CondaStoreConfiguration.configuration(self.db)

    @property
    def storage(self):
        if hasattr(self, "_storage"):
            return self._storage
        self._storage = self.storage_class(parent=self, log=self.log)
        return self._storage

    @property
    def celery_app(self):
        if hasattr(self, "_celery_app"):
            return self._celery_app

        self._celery_app = Celery(
            "tasks",
            backend=self.celery_results_backend,
            broker=self.celery_broker_url,
            include=[
                "conda_store_server.worker.tasks",
            ],
        )
        self._celery_app.conf.beat_schedule = {
            "watch-paths": {
                "task": "task_watch_paths",
                "schedule": 60.0,  # 1 minute
                "args": [],
                "kwargs": {},
            },
            "update-conda-channels": {
                "task": "task_update_conda_channels",
                "schedule": 15.0 * 60.0,  # 15 minutes
                "args": [],
                "kwargs": {},
            },
        }

        if self.celery_results_backend.startswith("sqla"):
            # https://github.com/celery/celery/issues/4653#issuecomment-400029147
            # race condition in table construction in celery
            # despite issue being closed still causes first task to fail
            # in celery if tables not created
            from celery.backends.database import SessionManager

            session = SessionManager()
            engine = session.get_engine(self._celery_app.backend.url)
            session.prepare_models(engine)

        return self._celery_app

    def ensure_namespace(self):
        """Ensure that conda-store default namespaces exists"""
        namespace = api.get_namespace(self.db, name=self.default_namespace)
        if namespace is None:
            api.create_namespace(self.db, name=self.default_namespace)

    def ensure_directories(self):
        """Ensure that conda-store filesystem directories exist"""
        os.makedirs(self.store_directory, exist_ok=True)

    def ensure_conda_channels(self):
        """Ensure that conda-store allowed channels and packages are in database"""
        self.log.info("updating conda store channels")

        for channel in self.conda_allowed_channels:
            normalized_channel = conda.normalize_channel_name(
                self.conda_channel_alias, channel)

            conda_channel = api.get_conda_channel(self.db, normalized_channel)
            if conda_channel is None:
                conda_channel = orm.CondaChannel(name=normalized_channel,
                                                 last_update=None)
                self.db.add(conda_channel)
                self.db.commit()

    def register_solve(self, specification: schema.CondaSpecification):
        specification_model = self.validate_specification(
            conda_store=self,
            namespace="solve",
            specification=specification,
        )
        specification_sha256 = utils.datastructure_hash(
            specification_model.dict())
        specification = api.get_specification(self.db,
                                              sha256=specification_sha256)
        if specification is None:
            self.log.info(
                f"specification name={specification_model.name} sha256={specification_sha256} registered"
            )
            specification = orm.Specification(specification_model.dict())
            self.db.add(specification)
            self.db.commit()
        else:
            self.log.debug(
                f"specification name={specification_model.name} sha256={specification_sha256} already registered"
            )

        solve_model = orm.Solve(specification_id=specification.id)
        self.db.add(solve_model)
        self.db.commit()

        # must import tasks after a celery app has been initialized
        from conda_store_server.worker import tasks

        task = tasks.task_solve_conda_environment.apply_async(
            args=[solve_model.id], time_limit=self.conda_max_solve_time)

        return task, solve_model.id

    def register_environment(self,
                             specification: dict,
                             namespace: str = None,
                             force_build=False):
        """Register a given specification to conda store with given namespace/name.

        If force_build is True a build will be triggered even if
        specification already exists.

        """
        namespace = namespace or self.default_namespace

        # Create Namespace if namespace if it does not exist
        namespace_model = api.get_namespace(self.db, name=namespace)
        if namespace_model is None:
            namespace = api.create_namespace(self.db, name=namespace)
            self.db.commit()
        else:
            namespace = namespace_model

        specification_model = self.validate_specification(
            conda_store=self,
            namespace=namespace.name,
            specification=schema.CondaSpecification.parse_obj(specification),
        )
        specification_sha256 = utils.datastructure_hash(
            specification_model.dict())

        specification = api.get_specification(self.db,
                                              sha256=specification_sha256)
        if specification is None:
            self.log.info(
                f"specification name={specification_model.name} sha256={specification_sha256} registered"
            )
            specification = orm.Specification(specification_model.dict())
            self.db.add(specification)
            self.db.commit()
        else:
            self.log.debug(
                f"specification name={specification_model.name} sha256={specification_sha256} already registered"
            )
            if not force_build:
                return

        # Create Environment if specification of given namespace/name
        # does not exist yet
        environment = api.get_environment(self.db,
                                          namespace_id=namespace.id,
                                          name=specification.name)
        environment_was_empty = environment is None
        if environment_was_empty:
            environment = orm.Environment(
                name=specification.name,
                namespace_id=namespace.id,
            )
            self.db.add(environment)
            self.db.commit()

        build = self.create_build(environment.id, specification.sha256)

        if environment_was_empty:
            environment.current_build = build
            self.db.commit()

        return build.id

    def create_build(self, environment_id: int, specification_sha256: str):
        specification = api.get_specification(self.db, specification_sha256)
        build = orm.Build(environment_id=environment_id,
                          specification_id=specification.id)
        self.db.add(build)
        self.db.commit()

        self.celery_app

        # must import tasks after a celery app has been initialized
        from conda_store_server.worker import tasks

        artifact_tasks = []
        if schema.BuildArtifactType.YAML in self.build_artifacts:
            artifact_tasks.append(
                tasks.task_build_conda_env_export.si(build.id))
        if schema.BuildArtifactType.CONDA_PACK in self.build_artifacts:
            artifact_tasks.append(tasks.task_build_conda_pack.si(build.id))
        if schema.BuildArtifactType.DOCKER_MANIFEST in self.build_artifacts:
            artifact_tasks.append(tasks.task_build_conda_docker.si(build.id))

        (tasks.task_update_storage_metrics.si()
         | tasks.task_build_conda_environment.si(build.id)
         | group(*artifact_tasks)
         | tasks.task_update_storage_metrics.si()).apply_async()

        return build

    def update_environment_build(self, namespace, name, build_id):
        build = api.get_build(self.db, build_id)
        if build is None:
            raise utils.CondaStoreError(f"build id={build_id} does not exist")

        environment = api.get_environment(self.db,
                                          namespace=namespace,
                                          name=name)
        if environment is None:
            raise utils.CondaStoreError(
                f"environment namespace={namespace} name={name} does not exist"
            )

        if build.status != schema.BuildStatus.COMPLETED:
            raise utils.CondaStoreError(
                "cannot update environment to build id since not completed")

        if build.specification.name != name:
            raise utils.CondaStoreError(
                "cannot update environment to build id since specification does not match environment name"
            )

        environment.current_build_id = build.id
        self.db.commit()

        self.celery_app
        # must import tasks after a celery app has been initialized
        from conda_store_server.worker import tasks

        tasks.task_update_environment_build.si(environment.id).apply_async()

    def delete_namespace(self, namespace):
        namespace = api.get_namespace(self.db, name=namespace)
        if namespace is None:
            raise utils.CondaStoreError(
                f"namespace={namespace} does not exist")

        utcnow = datetime.datetime.utcnow()
        namespace.deleted_on = utcnow
        for environment_orm in namespace.environments:
            environment_orm.deleted_on = utcnow
            for build in environment_orm.builds:
                build.deleted_on = utcnow
        self.db.commit()

        self.celery_app

        # must import tasks after a celery app has been initialized
        from conda_store_server.worker import tasks

        tasks.task_delete_namespace.si(namespace.id).apply_async()

    def delete_environment(self, namespace, name):
        environment = api.get_environment(self.db,
                                          namespace=namespace,
                                          name=name)
        if environment is None:
            raise utils.CondaStoreError(
                f"environment namespace={namespace} name={name} does not exist"
            )

        utcnow = datetime.datetime.utcnow()
        environment.deleted_on = utcnow
        for build in environment.builds:
            build.deleted_on = utcnow
        self.db.commit()

        self.celery_app

        # must import tasks after a celery app has been initialized
        from conda_store_server.worker import tasks

        tasks.task_delete_environment.si(environment.id).apply_async()

    def delete_build(self, build_id):
        build = api.get_build(self.db, build_id)
        if build.status not in [
                schema.BuildStatus.FAILED,
                schema.BuildStatus.COMPLETED,
        ]:
            raise utils.CondaStoreError(
                "cannot delete build since not finished building")

        build.deleted_on = datetime.datetime.utcnow()
        self.db.commit()

        self.celery_app

        # must import tasks after a celery app has been initialized
        from conda_store_server.worker import tasks

        tasks.task_delete_build.si(build.id).apply_async()