Exemple #1
0
class PipelineProcessor(LoggingConfigurable):  # ABC

    _type = None

    root_dir = Unicode(allow_none=True)

    enable_pipeline_info = Bool(config=True,
                                default_value=(os.getenv('ELYRA_ENABLE_PIPELINE_INFO', 'true').lower() == 'true'),
                                help="""Produces formatted logging of informational messages with durations
                                (default=True). (ELYRA_ENABLE_PIPELINE_INFO env var)""")

    def __init__(self, root_dir, **kwargs):
        super(PipelineProcessor, self).__init__(**kwargs)
        self.root_dir = root_dir

    @property
    @abstractmethod
    def type(self):
        raise NotImplementedError()

    @abstractmethod
    def process(self, pipeline) -> PipelineProcessorResponse:
        raise NotImplementedError()

    @abstractmethod
    def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite):
        raise NotImplementedError()

    def log_pipeline_info(self, pipeline_name: str, action_clause: str, **kwargs):
        """Produces a formatted log INFO message used entirely for support purposes.

        This method is intended to be called for any entries that should be captured across aggregated
        log files to identify steps within a given pipeline and each of its operations.  As a result,
        calls to this method should produce single-line entries in the log (no embedded newlines).
        Each entry is prefixed with the pipeline name.  This functionality can be disabled by setting
        PipelineProcessor.enable_pipeline_info = False (or via env ELYRA_ENABLE_PIPELINE_INFO).

        General logging should NOT use this method but use logger.<level>() statements directly.

        :param pipeline_name: str representing the name of the pipeline that is being executed
        :param action_clause: str representing the action that is being logged
        :param **kwargs: dict representing the keyword arguments.  Recognized keywords include:
               operation_name: str representing the name of the operation applicable for this entry
               duration: float value representing the duration of the action being logged
        """
        if self.enable_pipeline_info:
            duration = kwargs.get('duration')
            duration_clause = f"({duration:.3f} secs)" if duration else ""

            operation_name = kwargs.get('operation_name')
            op_clause = f":'{operation_name}'" if operation_name else ""

            self.log.info(f"{self._type} '{pipeline_name}'{op_clause} - {action_clause} {duration_clause}")
class SSLContext(Configurable):
    """Class used to create an SSL cert to authenticate the service with Jupyter"""

    keyfile = Unicode(os.getenv("JUPYTERHUB_SSL_KEYFILE", ""),
                      help="SSL key, use with certfile").tag(config=True)

    certfile = Unicode(os.getenv("JUPYTERHUB_SSL_CERTFILE", ""),
                       help="SSL cert, use with keyfile").tag(config=True)

    cafile = Unicode(
        os.getenv("JUPYTERHUB_SSL_CLIENT_CA", ""),
        help="SSL CA, use with keyfile and certfile").tag(config=True)

    def ssl_context(self):
        if self.keyfile and self.certfile and self.cafile:
            return make_ssl_context(self.keyfile,
                                    self.certfile,
                                    cafile=self.cafile,
                                    check_hostname=False)
        else:
            return None
class MetatabExporter(Exporter):
    template_path = List(['.']).tag(config=True, affects_environment=True)

    output_dir = Unicode(help='Output directory').tag(config=True)
    notebook_dir = Unicode(help='CWD in which notebook will be executed').tag(config=True)
    package_dir = Unicode(help='Directory in which to store generated package').tag(config=True)
    package_name = Unicode(help='Name of package to generate. Defaults to the Metatab Root.Name').tag(config=True)

    def __init__(self, config=None, **kw):
        # import pdb; pdb.set_trace();
        super().__init__(config, **kw)

        self.log = kw.get('log', logging.getLogger(self.__class__.__name__))

    def from_file(self, file_stream, resources=None, **kw):
        return super().from_file(file_stream, resources, **kw)

    def from_filename(self, filename, resources=None, **kw):
        if not self.notebook_dir:
            self.notebook_dir = dirname(abspath(filename))

        return super().from_filename(filename, resources, **kw)
Exemple #4
0
class PipelineProcessor(LoggingConfigurable):  # ABC

    _type = None

    root_dir = Unicode(allow_none=True)

    component_registry: ComponentRegistry = ComponentRegistry()

    enable_pipeline_info = Bool(
        config=True,
        default_value=(os.getenv('ELYRA_ENABLE_PIPELINE_INFO',
                                 'true').lower() == 'true'),
        help=
        """Produces formatted logging of informational messages with durations
                                (default=True). (ELYRA_ENABLE_PIPELINE_INFO env var)"""
    )

    def __init__(self, root_dir, **kwargs):
        super(PipelineProcessor, self).__init__(**kwargs)
        self.root_dir = root_dir

    @property
    @abstractmethod
    def type(self):
        raise NotImplementedError()

    def get_components(self):
        components = self.component_registry.get_all_components(
            processor_type=self.type)
        return components

    @abstractmethod
    def process(self, pipeline) -> PipelineProcessorResponse:
        raise NotImplementedError()

    @abstractmethod
    def export(self, pipeline, pipeline_export_format, pipeline_export_path,
               overwrite):
        raise NotImplementedError()

    def log_pipeline_info(self, pipeline_name: str, action_clause: str,
                          **kwargs):
        """Produces a formatted log INFO message used entirely for support purposes.

        This method is intended to be called for any entries that should be captured across aggregated
        log files to identify steps within a given pipeline and each of its operations.  As a result,
        calls to this method should produce single-line entries in the log (no embedded newlines).
        Each entry is prefixed with the pipeline name.  This functionality can be disabled by setting
        PipelineProcessor.enable_pipeline_info = False (or via env ELYRA_ENABLE_PIPELINE_INFO).

        General logging should NOT use this method but use logger.<level>() statements directly.

        :param pipeline_name: str representing the name of the pipeline that is being executed
        :param action_clause: str representing the action that is being logged
        :param **kwargs: dict representing the keyword arguments.  Recognized keywords include:
               operation_name: str representing the name of the operation applicable for this entry
               duration: float value representing the duration of the action being logged
        """
        if self.enable_pipeline_info:
            duration = kwargs.get('duration')
            duration_clause = f"({duration:.3f} secs)" if duration else ""

            operation_name = kwargs.get('operation_name')
            op_clause = f":'{operation_name}'" if operation_name else ""

            self.log.info(
                f"{self._type} '{pipeline_name}'{op_clause} - {action_clause} {duration_clause}"
            )

    @staticmethod
    def _propagate_operation_inputs_outputs(
            pipeline: Pipeline, sorted_operations: List[Operation]) -> None:
        """
        All previous operation outputs should be propagated throughout the pipeline.
        In order to process this recursively, the current operation's inputs should be combined
        from its parent's inputs (which, themselves are derived from the outputs of their parent)
        and its parent's outputs.
        """
        for operation in sorted_operations:
            parent_io = set()  # gathers inputs & outputs relative to parent
            for parent_operation_id in operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_io.update(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_io.update(parent_operation.outputs)

            if parent_io:
                parent_io.update(operation.inputs)
                operation.inputs = list(parent_io)

    @staticmethod
    def _sort_operations(operations_by_id: dict) -> List[Operation]:
        """
        Sort the list of operations based on its dependency graph
        """
        ordered_operations = []

        for operation in operations_by_id.values():
            PipelineProcessor._sort_operation_dependencies(
                operations_by_id, ordered_operations, operation)

        return ordered_operations

    @staticmethod
    def _sort_operation_dependencies(operations_by_id: dict,
                                     ordered_operations: list,
                                     operation: Operation) -> None:
        """
        Helper method to the main sort operation function
        """
        # Optimization: check if already processed
        if operation not in ordered_operations:
            # process each of the dependencies that needs to be executed first
            for parent_operation_id in operation.parent_operations:
                parent_operation = operations_by_id[parent_operation_id]
                if parent_operation not in ordered_operations:
                    PipelineProcessor._sort_operation_dependencies(
                        operations_by_id, ordered_operations, parent_operation)
            ordered_operations.append(operation)
Exemple #5
0
class LTI11Authenticator(Authenticator):
    """
    JupyterHub LTI 1.1 Authenticator which extends the ltiauthenticator.LTIAuthenticator class.
    Messages sent to this authenticator are sent from a tool consumer (TC), such as
    an LMS. JupyterHub, as the authenticator, works as the tool provider (TP), also
    known as the external tool.

    The LTIAuthenticator base class defines the consumers, defined as 1 or (n) consumer key
    and shared secret k/v's to verify requests from their tool consumer.
    """

    auto_login = True
    login_service = "LTI 1.1"

    consumers = Dict(
        {},
        config=True,
        help="""
        A dict of consumer keys mapped to consumer secrets for those keys.
        Allows multiple consumers to securely send users to this JupyterHub
        instance.
        """,
    )

    username_key = Unicode(
        "custom_canvas_user_id",
        allow_none=True,
        config=True,
        help="""
        Key present in LTI 1.1 launch request used to set the user's JupyterHub's username.
        Some common examples include:
          - User's email address: lis_person_contact_email_primary
          - Canvas LMS custom user id: custom_canvas_user_id
        Your LMS (Canvas / Open EdX / Moodle / others) may provide additional keys in the
        LTI 1.1 launch request that you can use to set the username. In most cases these
        are prefixed with `custom_`. You may also have the option of using variable substitutions
        to fetch values that aren't provided with your vendor's standard LTI 1.1 launch request.
        Reference the IMS LTI specification on variable substitutions:
        https://www.imsglobal.org/specs/ltiv1p1p1/implementation-guide#toc-9.
        
        Current default behavior:
        
        To preserve legacy behavior, if custom_canvas_user_id is present in the LTI
        request, it is used as the username. If not, user_id is used. In the future,
        the default will be just user_id - if you want to use custom_canvas_user_id,
        you must explicitly set username_key to custom_canvas_user_id.
        """,
    )

    def get_handlers(self, app: JupyterHub) -> BaseHandler:
        return [("/lti/launch", LTI11AuthenticateHandler)]

    def login_url(self, base_url):
        return url_path_join(base_url, "/lti/launch")

    async def authenticate(  # noqa: C901
            self,
            handler: BaseHandler,
            data: dict = None) -> dict:  # noqa: C901
        """
        LTI 1.1 Authenticator. One or more consumer keys/values must be set in the jupyterhub config with the
        LTI11Authenticator.consumers dict.

        Args:
            handler: JupyterHub's Authenticator handler object. For LTI 1.1 requests, the handler is
              an instance of LTIAuthenticateHandler.
            data: optional data object

        Returns:
            Authentication dictionary

        Raises:
            HTTPError if the required values are not in the request
        """
        # log deprecation warning when using the default custom_canvas_user_id setting
        if self.username_key == "custom_canvas_user_id":
            self.log.warning(
                dedent(
                    """The default username_key 'custom_canvas_user_id' will be replaced by 'user_id' in a future release.
                Set c.LTIAuthenticator.username_key to `custom_canvas_user_id` to preserve current behavior.
                """))
        validator = LTI11LaunchValidator(self.consumers)

        self.log.debug("Original arguments received in request: %s" %
                       handler.request.arguments)

        # extract the request arguments to a dict
        args = convert_request_to_dict(handler.request.arguments)
        self.log.debug("Decoded args from request: %s" % args)

        # get the origin protocol
        protocol = get_client_protocol(handler)
        self.log.debug("Origin protocol is: %s" % protocol)

        # build the full launch url value required for oauth1 signatures
        launch_url = f"{protocol}://{handler.request.host}{handler.request.uri}"
        self.log.debug("Launch url is: %s" % launch_url)

        if validator.validate_launch_request(launch_url,
                                             handler.request.headers, args):

            # raise an http error if the username_key is not in the request's arguments.
            if self.username_key not in args.keys():
                self.log.warning(
                    "%s the specified username_key did not match any of the launch request arguments."
                )

            # get the username_key. if empty, fetch the username from the request's user_id value.
            username = args.get(self.username_key)
            if not username:
                username = args.get("user_id")

            # if username is still empty or none, raise an http error.
            if not username:
                raise HTTPError(
                    400,
                    "The %s value in the launch request is empty or None." %
                    self.username_key,
                )

            # return standard authentication where all launch request arguments are added to the auth_state key
            # except for the oauth_* arguments.
            return {
                "name": username,
                "auth_state":
                {k: v
                 for k, v in args.items() if not k.startswith("oauth_")},
            }
Exemple #6
0
class KernelProvisionerFactory(SingletonConfigurable):
    """
    :class:`KernelProvisionerFactory` is responsible for creating provisioner instances.

    A singleton instance, `KernelProvisionerFactory` is also used by the :class:`KernelSpecManager`
    to validate `kernel_provisioner` references found in kernel specifications to confirm their
    availability (in cases where the kernel specification references a kernel provisioner that has
    not been installed into the current Python environment).

    It's `default_provisioner_name` attribute can be used to specify the default provisioner
    to use when a kernel_spec is found to not reference a provisioner.  It's value defaults to
    `"local-provisioner"` which identifies the local provisioner implemented by
    :class:`LocalProvisioner`.
    """

    GROUP_NAME = 'jupyter_client.kernel_provisioners'
    provisioners: Dict[str, EntryPoint] = {}

    default_provisioner_name_env = "JUPYTER_DEFAULT_PROVISIONER_NAME"
    default_provisioner_name = Unicode(
        config=True,
        help=
        """Indicates the name of the provisioner to use when no kernel_provisioner
                                       entry is present in the kernelspec.""",
    )

    @default('default_provisioner_name')
    def default_provisioner_name_default(self):
        return getenv(self.default_provisioner_name_env, "local-provisioner")

    def __init__(self, **kwargs: Any) -> None:
        super().__init__(**kwargs)

        for ep in KernelProvisionerFactory._get_all_provisioners():
            self.provisioners[ep.name] = ep

    def is_provisioner_available(self, kernel_spec: Any) -> bool:
        """
        Reads the associated ``kernel_spec`` to determine the provisioner and returns whether it
        exists as an entry_point (True) or not (False).  If the referenced provisioner is not
        in the current cache or cannot be loaded via entry_points, a warning message is issued
        indicating it is not available.
        """
        is_available: bool = True
        provisioner_cfg = self._get_provisioner_config(kernel_spec)
        provisioner_name = str(provisioner_cfg.get('provisioner_name'))
        if not self._check_availability(provisioner_name):
            is_available = False
            self.log.warning(
                f"Kernel '{kernel_spec.display_name}' is referencing a kernel "
                f"provisioner ('{provisioner_name}') that is not available.  "
                f"Ensure the appropriate package has been installed and retry."
            )
        return is_available

    def create_provisioner_instance(self, kernel_id: str, kernel_spec: Any,
                                    parent: Any) -> KernelProvisionerBase:
        """
        Reads the associated ``kernel_spec`` to see if it has a `kernel_provisioner` stanza.
        If one exists, it instantiates an instance.  If a kernel provisioner is not
        specified in the kernel specification, a default provisioner stanza is fabricated
        and instantiated corresponding to the current value of `default_provisioner_name` trait.
        The instantiated instance is returned.

        If the provisioner is found to not exist (not registered via entry_points),
        `ModuleNotFoundError` is raised.
        """
        provisioner_cfg = self._get_provisioner_config(kernel_spec)
        provisioner_name = str(provisioner_cfg.get('provisioner_name'))
        if not self._check_availability(provisioner_name):
            raise ModuleNotFoundError(
                f"Kernel provisioner '{provisioner_name}' has not been registered."
            )

        self.log.debug(
            f"Instantiating kernel '{kernel_spec.display_name}' with "
            f"kernel provisioner: {provisioner_name}")
        provisioner_class = self.provisioners[provisioner_name].load()
        provisioner_config = provisioner_cfg.get('config')
        provisioner: KernelProvisionerBase = provisioner_class(
            kernel_id=kernel_id,
            kernel_spec=kernel_spec,
            parent=parent,
            **provisioner_config)
        return provisioner

    def _check_availability(self, provisioner_name: str) -> bool:
        """
        Checks that the given provisioner is available.

        If the given provisioner is not in the current set of loaded provisioners an attempt
        is made to fetch the named entry point and, if successful, loads it into the cache.

        :param provisioner_name:
        :return:
        """
        is_available = True
        if provisioner_name not in self.provisioners:
            try:
                ep = self._get_provisioner(provisioner_name)
                self.provisioners[provisioner_name] = ep  # Update cache
            except NoSuchEntryPoint:
                is_available = False
        return is_available

    def _get_provisioner_config(self, kernel_spec: Any) -> Dict[str, Any]:
        """
        Return the kernel_provisioner stanza from the kernel_spec.

        Checks the kernel_spec's metadata dictionary for a kernel_provisioner entry.
        If found, it is returned, else one is created relative to the DEFAULT_PROVISIONER
        and returned.

        Parameters
        ----------
        kernel_spec : Any - this is a KernelSpec type but listed as Any to avoid circular import
            The kernel specification object from which the provisioner dictionary is derived.

        Returns
        -------
        dict
            The provisioner portion of the kernel_spec.  If one does not exist, it will contain
            the default information.  If no `config` sub-dictionary exists, an empty `config`
            dictionary will be added.
        """
        env_provisioner = kernel_spec.metadata.get('kernel_provisioner', {})
        if 'provisioner_name' in env_provisioner:  # If no provisioner_name, return default
            if ('config' not in env_provisioner
                ):  # if provisioner_name, but no config stanza, add one
                env_provisioner.update({"config": {}})
            return env_provisioner  # Return what we found (plus config stanza if necessary)
        return {
            "provisioner_name": self.default_provisioner_name,
            "config": {}
        }

    def get_provisioner_entries(self) -> Dict[str, str]:
        """
        Returns a dictionary of provisioner entries.

        The key is the provisioner name for its entry point.  The value is the colon-separated
        string of the entry point's module name and object name.
        """
        entries = {}
        for name, ep in self.provisioners.items():
            entries[name] = f"{ep.module_name}:{ep.object_name}"
        return entries

    @staticmethod
    def _get_all_provisioners() -> List[EntryPoint]:
        """Wrapper around entrypoints.get_group_all() - primarily to facilitate testing."""
        return get_group_all(KernelProvisionerFactory.GROUP_NAME)

    def _get_provisioner(self, name: str) -> EntryPoint:
        """Wrapper around entrypoints.get_single() - primarily to facilitate testing."""
        try:
            ep = get_single(KernelProvisionerFactory.GROUP_NAME, name)
        except NoSuchEntryPoint:
            # Check if the entrypoint name is 'local-provisioner'.  Although this should never
            # happen, we have seen cases where the previous distribution of jupyter_client has
            # remained which doesn't include kernel-provisioner entrypoints (so 'local-provisioner'
            # is deemed not found even though its definition is in THIS package).  In such cass,
            # the entrypoints package uses what it first finds - which is the older distribution
            # resulting in a violation of a supposed invariant condition.  To address this scenario,
            # we will log a warning message indicating this situation, then build the entrypoint
            # instance ourselves - since we have that information.
            if name == 'local-provisioner':
                distros = glob.glob(
                    f"{path.dirname(path.dirname(__file__))}-*")
                self.log.warning(
                    f"Kernel Provisioning: The 'local-provisioner' is not found.  This is likely "
                    f"due to the presence of multiple jupyter_client distributions and a previous "
                    f"distribution is being used as the source for entrypoints - which does not "
                    f"include 'local-provisioner'.  That distribution should be removed such that "
                    f"only the version-appropriate distribution remains (version >= 7).  Until "
                    f"then, a 'local-provisioner' entrypoint will be automatically constructed "
                    f"and used.\nThe candidate distribution locations are: {distros}"
                )
                ep = EntryPoint('local-provisioner',
                                'jupyter_client.provisioning',
                                'LocalProvisioner')
            else:
                raise
        return ep
class KernelProvisionerBase(ABC,
                            LoggingConfigurable,
                            metaclass=KernelProvisionerMeta):
    """
    Abstract base class defining methods for KernelProvisioner classes.

    A majority of methods are abstract (requiring implementations via a subclass) while
    some are optional and others provide implementations common to all instances.
    Subclasses should be aware of which methods require a call to the superclass.

    Many of these methods model those of :class:`subprocess.Popen` for parity with
    previous versions where the kernel process was managed directly.
    """

    # The kernel specification associated with this provisioner
    kernel_spec: Any = Instance('jupyter_client.kernelspec.KernelSpec',
                                allow_none=True)
    kernel_id: str = Unicode(None, allow_none=True)
    connection_info: KernelConnectionInfo = {}

    @property
    @abstractmethod
    def has_process(self) -> bool:
        """
        Returns true if this provisioner is currently managing a process.

        This property is asserted to be True immediately following a call to
        the provisioner's :meth:`launch_kernel` method.
        """
        pass

    @abstractmethod
    async def poll(self) -> Optional[int]:
        """
        Checks if kernel process is still running.

        If running, None is returned, otherwise the process's integer-valued exit code is returned.
        This method is called from :meth:`KernelManager.is_alive`.
        """
        pass

    @abstractmethod
    async def wait(self) -> Optional[int]:
        """
        Waits for kernel process to terminate.

        This method is called from `KernelManager.finish_shutdown()` and
        `KernelManager.kill_kernel()` when terminating a kernel gracefully or
        immediately, respectively.
        """
        pass

    @abstractmethod
    async def send_signal(self, signum: int) -> None:
        """
        Sends signal identified by signum to the kernel process.

        This method is called from `KernelManager.signal_kernel()` to send the
        kernel process a signal.
        """
        pass

    @abstractmethod
    async def kill(self, restart: bool = False) -> None:
        """
        Kill the kernel process.

        This is typically accomplished via a SIGKILL signal, which cannot be caught.
        This method is called from `KernelManager.kill_kernel()` when terminating
        a kernel immediately.

        restart is True if this operation will precede a subsequent launch_kernel request.
        """
        pass

    @abstractmethod
    async def terminate(self, restart: bool = False) -> None:
        """
        Terminates the kernel process.

        This is typically accomplished via a SIGTERM signal, which can be caught, allowing
        the kernel provisioner to perform possible cleanup of resources.  This method is
        called indirectly from `KernelManager.finish_shutdown()` during a kernel's
        graceful termination.

        restart is True if this operation precedes a start launch_kernel request.
        """
        pass

    @abstractmethod
    async def launch_kernel(self, cmd: List[str],
                            **kwargs: Any) -> KernelConnectionInfo:
        """
        Launch the kernel process and return its connection information.

        This method is called from `KernelManager.launch_kernel()` during the
        kernel manager's start kernel sequence.
        """
        pass

    @abstractmethod
    async def cleanup(self, restart: bool = False) -> None:
        """
        Cleanup any resources allocated on behalf of the kernel provisioner.

        This method is called from `KernelManager.cleanup_resources()` as part of
        its shutdown kernel sequence.

        restart is True if this operation precedes a start launch_kernel request.
        """
        pass

    async def shutdown_requested(self, restart: bool = False) -> None:
        """
        Allows the provisioner to determine if the kernel's shutdown has been requested.

        This method is called from `KernelManager.request_shutdown()` as part of
        its shutdown sequence.

        This method is optional and is primarily used in scenarios where the provisioner
        may need to perform other operations in preparation for a kernel's shutdown.
        """
        pass

    async def pre_launch(self, **kwargs: Any) -> Dict[str, Any]:
        """
        Perform any steps in preparation for kernel process launch.

        This includes applying additional substitutions to the kernel launch command
        and environment. It also includes preparation of launch parameters.

        NOTE: Subclass implementations are advised to call this method as it applies
        environment variable substitutions from the local environment and calls the
        provisioner's :meth:`_finalize_env()` method to allow each provisioner the
        ability to cleanup the environment variables that will be used by the kernel.

        This method is called from `KernelManager.pre_start_kernel()` as part of its
        start kernel sequence.

        Returns the (potentially updated) keyword arguments that are passed to
        :meth:`launch_kernel()`.
        """
        env = kwargs.pop('env', os.environ).copy()
        env.update(self.__apply_env_substitutions(env))
        self._finalize_env(env)
        kwargs['env'] = env

        return kwargs

    async def post_launch(self, **kwargs: Any) -> None:
        """
        Perform any steps following the kernel process launch.

        This method is called from `KernelManager.post_start_kernel()` as part of its
        start kernel sequence.
        """
        pass

    async def get_provisioner_info(self) -> Dict[str, Any]:
        """
        Captures the base information necessary for persistence relative to this instance.

        This enables applications that subclass `KernelManager` to persist a kernel provisioner's
        relevant information to accomplish functionality like disaster recovery or high availability
        by calling this method via the kernel manager's `provisioner` attribute.

        NOTE: The superclass method must always be called first to ensure proper serialization.
        """
        provisioner_info: Dict[str, Any] = {}
        provisioner_info['kernel_id'] = self.kernel_id
        provisioner_info['connection_info'] = self.connection_info
        return provisioner_info

    async def load_provisioner_info(self, provisioner_info: Dict) -> None:
        """
        Loads the base information necessary for persistence relative to this instance.

        The inverse of `get_provisioner_info()`, this enables applications that subclass
        `KernelManager` to re-establish communication with a provisioner that is managing
        a (presumably) remote kernel from an entirely different process that the original
        provisioner.

        NOTE: The superclass method must always be called first to ensure proper deserialization.
        """
        self.kernel_id = provisioner_info['kernel_id']
        self.connection_info = provisioner_info['connection_info']

    def get_shutdown_wait_time(self, recommended: float = 5.0) -> float:
        """
        Returns the time allowed for a complete shutdown. This may vary by provisioner.

        This method is called from `KernelManager.finish_shutdown()` during the graceful
        phase of its kernel shutdown sequence.

        The recommended value will typically be what is configured in the kernel manager.
        """
        return recommended

    def get_stable_start_time(self, recommended: float = 10.0) -> float:
        """
        Returns the expected upper bound for a kernel (re-)start to complete.
        This may vary by provisioner.

        The recommended value will typically be what is configured in the kernel restarter.
        """
        return recommended

    def _finalize_env(self, env: Dict[str, str]) -> None:
        """
        Ensures env is appropriate prior to launch.

        This method is called from `KernelProvisionerBase.pre_launch()` during the kernel's
        start sequence.

        NOTE: Subclasses should be sure to call super()._finalize_env(env)
        """
        if self.kernel_spec.language and self.kernel_spec.language.lower(
        ).startswith("python"):
            # Don't allow PYTHONEXECUTABLE to be passed to kernel process.
            # If set, it can bork all the things.
            env.pop('PYTHONEXECUTABLE', None)

    def __apply_env_substitutions(self, substitution_values: Dict[str, str]):
        """
        Walks entries in the kernelspec's env stanza and applies substitutions from current env.

        This method is called from `KernelProvisionerBase.pre_launch()` during the kernel's
        start sequence.

        Returns the substituted list of env entries.

        NOTE: This method is private and is not intended to be overridden by provisioners.
        """
        substituted_env = {}
        if self.kernel_spec:
            from string import Template

            # For each templated env entry, fill any templated references
            # matching names of env variables with those values and build
            # new dict with substitutions.
            templated_env = self.kernel_spec.env
            for k, v in templated_env.items():
                substituted_env.update(
                    {k: Template(v).safe_substitute(substitution_values)})
        return substituted_env
Exemple #8
0
class LTI13Authenticator(OAuthenticator):
    """
    JupyterHub LTI 1.3 Authenticator which extends the `OAuthenticator` class. (LTI 1.3
    is basically an extension of OIDC/OAuth2). Messages sent to this authenticator are sent
    from a LTI 1.3 Platform, such as an LMS. JupyterHub, as the authenticator, works as the
    LTI 1.3 External Tool. The basic login flow uses the authorization code grant type. As such,
    the client id is only required if the JupyterHub is configured to send information back to the
    LTI 1.3 Platform, in which case it would require the client credentials grant type.

    This class utilizes the following configurables defined in the `OAuthenticator` base class
    (all are required unless stated otherwise):

        - authorize_url
        - oauth_callback_url
        - token_url
        - (Optional) client_id

    Ref:
      - https://github.com/jupyterhub/oauthenticator/blob/master/oauthenticator/oauth2.py
      - http://www.imsglobal.org/spec/lti/v1p3/
    """

    login_service = "LTI 1.3"

    # handlers used for login, callback, and jwks endpoints
    login_handler = LTI13LoginHandler
    callback_handler = LTI13CallbackHandler

    endpoint = Unicode(
        os.getenv("LTI13_ENDPOINT", ""),
        allow_none=False,
        config=True,
        help="""
        The platform's base endpoint used when redirecting requests to the platform
        after receiving the initial login request.
        """,
    )

    username_key = Unicode(
        "email",
        allow_none=False,
        config=True,
        help="""
        JWT claim present in LTI 1.3 login initiation flow used to set the user's JupyterHub's username.
        Some common examples include:

          - User's email address: email
          - Given name: given_name
        
        Your LMS (Canvas / Open EdX / Moodle / others) may provide additional keys in the
        LTI 1.3 login initiatino flow that you can use to set the username. In most cases these
        are located in the `https://purl.imsglobal.org/spec/lti/claim/custom` claim. You may also
        have the option of using variable substitutions to fetch values that aren't provided with
        your vendor's standard LTI 1.3 login initiation flow request. If your platform's LTI 1.3
        settings are defined with privacy enabled, then by default the `sub` claim is used to set the
        username.

        Reference the IMS LTI specification on variable substitutions:
        http://www.imsglobal.org/spec/lti/v1p3/#customproperty.
        """,
    )

    tool_name = Unicode(
        "JupyterHub",
        config=True,
        help="""
        Name of tool provided to the LMS when installed via the config URL.

        This is primarily used for display purposes.
        """,
    )

    tool_description = Unicode(
        "Launch interactive Jupyter Notebooks with JupyterHub",
        config=True,
        help="""
        Description of tool provided to the LMS when installed via the config URL.

        This is primarily used for display purposes.
        """,
    )

    def get_handlers(self, app: JupyterHub) -> List[BaseHandler]:
        return [
            ("/lti13/config", LTI13ConfigHandler),
        ]

    async def authenticate(  # noqa: C901
            self,
            handler: LTI13LoginHandler,
            data: Dict[str, str] = None) -> Dict[str, str]:
        """
        Overrides authenticate from base class to handle LTI 1.3 authentication requests.

        Args:
          handler: handler object
          data: authentication dictionary

        Returns:
          Authentication dictionary
        """
        validator = LTI13LaunchValidator()

        # get jwks endpoint and token to use as args to decode jwt.
        self.log.debug(f"JWKS platform endpoint is {self.endpoint}")
        id_token = handler.get_argument("id_token")

        # extract claims from jwt (id_token) sent by the platform. as tool use the jwks (public key)
        # to verify the jwt's signature.
        jwt_decoded = await validator.jwt_verify_and_decode(
            id_token, self.endpoint, False, audience=self.client_id)
        self.log.debug(f"Decoded JWT: {jwt_decoded}")

        if validator.validate_launch_request(jwt_decoded):
            username = jwt_decoded.get(self.username_key)
            self.log.debug(
                f"Username_key is {self.username_key} and value fetched from JWT is {username}"
            )
            if not username:
                if "sub" in jwt_decoded and jwt_decoded["sub"]:
                    username = jwt_decoded["sub"]
                else:
                    raise HTTPError(400, "Unable to set the username")

            self.log.debug(f"username is {username}")

            return {
                "name": username,
                "auth_state": {k: v
                               for k, v in jwt_decoded.items()},  # noqa: E231
            }
Exemple #9
0
class MyClass(Configurable):
    name = Unicode(u'defaultname', help="the name of the object").tag(config=True)
    raise_config_file_errors = True
Exemple #10
0
class HugoExporter(MarkdownExporter):
    """ Export a python notebook to markdown, with frontmatter for Hugo. Not much of this is particular to
    Metapack.

    The Frontmatter is contained in a cell of type RawNBConvert, tagged with the tag 'frontmatter', and
    formatted in YAML. For instance

        https://github.com/sandiegodata/notebooks/blob/master/crime/Crime%20Monthly%20Rhythm%20Maps.ipynb

    Has this frontmatter:

        draft: false
        weight: 3
        description: Rhythm maps for San Diego Crime incidents, from 2007 to 2014
        toc: false
        show_input: hide
        section: notebooks
        authors:
        - name: Eric Busboom
        github: https://github.com/sandiegodata/notebooks/blob/master/crime/Crime%20Monthly%20Rhythm%20Maps.ipynb

    """

    hugo_dir = Unicode(help="Root of the Hugo directory").tag(config=True)

    section = Unicode(
        help="Hugo section in which to write the converted notebook").tag(
            config=True)

    @default('section')
    def _section_file_default(self):
        return 'notebooks'

    @property
    def default_config(self):
        import metapack.jupyter.templates

        c = Config({})

        c.TemplateExporter.template_path = [
            dirname(metapack.jupyter.templates.__file__)
        ]
        c.TemplateExporter.template_file = 'markdown_hugo.tpl'

        c.MarkdownExporter.preprocessors = [
            'metapack.jupyter.preprocessors.OrganizeMetadata',
            HugoOutputExtractor
        ]

        c.merge(super(HugoExporter, self).default_config)

        c.ExtractOutputPreprocessor.enabled = False

        return c

    def get_creators(self, meta):

        for typ in ('wrangler', 'creator'):
            try:
                # Multiple authors
                for e in meta[typ]:
                    d = dict(e.items())
                    d['type'] = typ

                    yield d
            except AttributeError:
                # only one
                d = meta[typ]
                d['type'] = typ
                yield d
            except KeyError:
                pass

    def from_notebook_node(self, nb, resources=None, **kw):

        nb_copy = copy.deepcopy(nb)

        resources = self._init_resources(resources)

        if 'language' in nb['metadata']:
            resources['language'] = nb['metadata']['language'].lower()

        # Preprocess
        nb_copy, resources = self._preprocess(nb_copy, resources)

        # move over some more metadata
        if 'authors' not in nb_copy.metadata.frontmatter:
            nb_copy.metadata.frontmatter['authors'] = list(
                self.get_creators(nb_copy.metadata.metatab))

        # Other useful metadata
        if not 'date' in nb_copy.metadata.frontmatter:
            nb_copy.metadata.frontmatter['date'] = datetime.now().isoformat()

        resources.setdefault('raw_mimetypes', self.raw_mimetypes)
        resources['global_content_filter'] = {
            'include_code': not self.exclude_code_cell,
            'include_markdown': not self.exclude_markdown,
            'include_raw': not self.exclude_raw,
            'include_unknown': not self.exclude_unknown,
            'include_input': not self.exclude_input,
            'include_output': not self.exclude_output,
            'include_input_prompt': not self.exclude_input_prompt,
            'include_output_prompt': not self.exclude_output_prompt,
            'no_prompt': self.exclude_input_prompt
            and self.exclude_output_prompt,
        }

        slug = nb_copy.metadata.frontmatter.slug

        # Rebuild all of the image names
        for cell_index, cell in enumerate(nb_copy.cells):
            for output_index, out in enumerate(cell.get('outputs', [])):

                if 'metadata' in out:
                    for type_name, fn in list(
                            out.metadata.get('filenames', {}).items()):
                        if fn in resources['outputs']:
                            html_path = join('img', slug, basename(fn))
                            file_path = join(self.hugo_dir, 'static',
                                             html_path)

                            resources['outputs'][file_path] = resources[
                                'outputs'][fn]
                            del resources['outputs'][fn]

                            # Can't put the '/' in the join() or it will be absolute

                            out.metadata.filenames[type_name] = '/' + html_path

        output = self.template.render(nb=nb_copy, resources=resources)

        section = nb_copy.metadata.frontmatter.get('section') or self.section

        # Don't know why this isn't being set from the config
        # resources['output_file_dir'] = self.config.NbConvertApp.output_base

        # Setting full path to subvert the join() in the file writer. I can't
        # figure out how to set the output directories from this function
        resources['unique_key'] = join(self.hugo_dir, 'content', section, slug)

        # Probably should be done with a postprocessor.
        output = re.sub(r'__IMGDIR__', join('/img', slug), output)

        return output, resources
Exemple #11
0
class DocumentationExporter(MetatabExporter):
    """Exports multiple forms of documentation"""

    metadata = Dict(help='Extra metadata, added to the \'metatab\' key', default_value={}).tag(config=True)

    base_name = Unicode(u'documentation',
                               help="Base name for the documentation file").tag(config=True)

    @property
    def default_config(self):
        import metapack.jupyter.templates

        c = Config()

        c.TemplateExporter.template_path = [dirname(metapack.jupyter.templates.__file__)]

        c.HTMLExporter.preprocessors = [
            'metapack.jupyter.preprocessors.NoShowInput',
            'metapack.jupyter.preprocessors.RemoveMetatab',
            'metapack.jupyter.preprocessors.HtmlBib'
        ]

        c.HTMLExporter.exclude_input_prompt = True
        c.HTMLExporter.exclude_output_prompt = True

        c.MarkdownExporter.preprocessors = ['metapack.jupyter.preprocessors.RemoveMagics']

        c.PDFExporter.preprocessors = [
            # 'metapack.jupyter.preprocessors.NoShowInput',
            'metapack.jupyter.preprocessors.RemoveMetatab',
            'metapack.jupyter.preprocessors.LatexBib',
            'metapack.jupyter.preprocessors.MoveTitleDescription'
        ]

        c.PDFExporter.exclude_input_prompt = True
        # Excluding the output prompt also excludes the output tables.
        # .PDFExporter.exclude_output_prompt = True

        c.merge(super(DocumentationExporter, self).default_config)
        return c

    def from_notebook_node(self, nb, resources=None, **kw):

        nb_copy = copy.deepcopy(nb)

        nb_copy['metadata']['metatab'] = self.metadata

        # get the Normal HTML output:
        output, resources = HTMLExporter(config=self.config).from_notebook_node(nb_copy)

        resources['unique_key'] = 'notebook'

        # Get all of the image resources
        nb_copy, resources = self.extract_resources(nb_copy, resources)

        # Add resources for the html and markdown version of the notebook

        self.add_pdf(nb_copy, resources)
        self.add_markdown_doc(nb_copy, resources)
        self.add_html_doc(nb_copy, resources)
        self.add_basic_html_doc(nb_copy, resources)

        return output, resources

    def extract_resources(self, nb, resources):

        output_filename_template = "image_{cell_index}_{index}{extension}"

        return ExtractOutputPreprocessor(output_filename_template=output_filename_template) \
            .preprocess(nb, resources)

    def add_pdf(self, nb, resources):
        from ipython_genutils.py3compat import which

        template_file = 'notebook.tplx'

        exp = PDFExporter(config=self.config, template_file=template_file)

        if not which(exp.latex_command[0]):
            return

        (body, _) = exp.from_notebook_node(nb)

        resources['outputs'][self.base_name+'.pdf'] = body

        exp = LatexExporter(config=self.config, template_file=template_file)

        (body, _) = exp.from_notebook_node(nb)

        resources['outputs'][self.base_name+'.latex'] = body.encode('utf-8')

    def add_basic_html_doc(self, nb, resources):
        html_exp = HTMLExporter(config=self.config, template_file='hide_input_html_basic.tpl')

        (html_basic_body, _) = html_exp.from_notebook_node(nb)

        resources['outputs'][self.base_name+'_html_body.html'] = html_basic_body.encode('utf-8')

    def add_html_doc(self, nb, resources):
        html_exp = HTMLExporter(config=self.config, template_file='hide_input_html.tpl')

        (html_full_body, _) = html_exp.from_notebook_node(nb)

        resources['outputs'][self.base_name+'.html'] = html_full_body.encode('utf-8')

    def add_markdown_doc(self, nb, resources):

        exp = MarkdownExporter(config=self.config)
        (md_body, _) = exp.from_notebook_node(nb)

        resources['outputs'][self.base_name+'.md'] = md_body.encode('utf-8')

    def update_metatab(self, doc, resources):
        """Add documentation entries for resources"""

        if not 'Documentation' in doc:

            doc.new_section("Documentation")

        ds = doc['Documentation']

        if not 'Name' in ds.args:
            ds.add_arg('Name', prepend=True)

        # This is the main output from the HTML exporter, not a resource.
        ds.new_term('Root.Documentation', 'docs/notebook.html', name="notebook.html", title='Jupyter Notebook (HTML)')

        for name, data in resources.get('outputs', {}).items():

            if name == 'documentation.html':
                ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Primary Documentation (HTML)')

            elif name == 'html_basic_body.html':
                pass
            elif name.endswith('.html'):
                ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Documentation (HTML)')
            elif name.endswith('.md'):
                ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Documentation (Markdown)')
            elif name.endswith('.pdf'):
                ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Documentation (PDF)')
            elif name.endswith('.png'):
                ds.new_term('Root.Image', 'docs/' + name, name=name, title='Image for HTML Documentation')
            else:
                pass
Exemple #12
0
class DependencyExtractor(Application):

    name = Unicode(u'nbflow')
    description = Unicode(
        u'Extract the hierarchy of dependencies from notebooks in the specified folder.'
    )
    version = __version__

    def extract_parameters(self, nb):
        # find the first code cell
        defs_cell = None
        for cell in nb.cells:
            if cell.cell_type == 'code':
                defs_cell = cell
                break

        if defs_cell is None:
            return {}

        defs_code = defs_cell.source
        globals_dict = {}
        locals_dict = {}
        exec(defs_code, globals_dict, locals_dict)
        return locals_dict

    def resolve_path(self, source, path):
        dirname = os.path.dirname(source)
        return os.path.abspath(os.path.join(dirname, path))

    def get_dependencies(self, dirnames):
        dependencies = {}

        for dirname in dirnames:
            files = glob.glob("{}/*.ipynb".format(dirname))

            for filename in files:
                modname = os.path.splitext(os.path.basename(filename))[0]
                with open(filename, "r") as fh:
                    nb = reads(fh.read())

                params = self.extract_parameters(nb)
                if '__depends__' not in params:
                    continue
                if '__dest__' not in params:
                    raise ValueError(
                        "__dest__ is not defined in {}".format(filename))

                # get sources that are specified in the file
                sources = [
                    self.resolve_path(filename, x)
                    for x in params['__depends__']
                ]

                targets = params['__dest__']
                if not hasattr(targets, '__iter__'):
                    if targets is None:
                        targets = []
                    else:
                        targets = [targets]
                targets = [self.resolve_path(filename, x) for x in targets]

                dependencies[os.path.join(dirname,
                                          '{}.ipynb'.format(modname))] = {
                                              'targets': targets,
                                              'sources': sources
                                          }

        return json.dumps(dependencies, indent=2)

    def start(self):
        if len(self.extra_args) == 0:
            self.log.error("No directory names specified.")
            sys.exit(1)

        print(self.get_dependencies(self.extra_args))
Exemple #13
0
class Builder(Application):
    config_file = Unicode('builder_config.py', config=True)

    build_name = Unicode(None, allow_none=True, config=True)

    source_url = Unicode(None, allow_none=True, config=True)

    source_ref = Unicode('master', allow_none=True, config=True)

    output_image_spec = Unicode(None, allow_none=True, config=True)

    git_workdir = Unicode("/tmp/git", config=True)

    buildpacks = List(None, [DockerBuildPack, PythonBuildPack], config=True)

    aliases = Dict({
        'source': 'Builder.source_url',
        'ref': 'Builder.source_ref',
        'output': 'Builder.output_image_spec',
        'f': 'Builder.config_file',
        'n': 'Builder.build_name'
    })

    def fetch(self, url, ref, output_path):
        try:
            for line in execute_cmd(['git', 'clone', url, output_path]):
                self.log.info(line, extra=dict(phase='fetching'))
        except subprocess.CalledProcessError:
            self.log.error('Failed to clone repository!',
                           extra=dict(phase='failed'))
            sys.exit(1)

        try:
            for line in execute_cmd([
                    'git', '--git-dir',
                    os.path.join(output_path, '.git'), 'reset', '--hard', ref
            ]):
                self.log.info(line, extra=dict(phase='fetching'))
        except subprocess.CalledProcessError:
            self.log.error('Failed to check out ref %s',
                           ref,
                           extra=dict(phase='failed'))
            sys.exit(1)

    def initialize(self, *args, **kwargs):
        super().initialize(*args, **kwargs)
        logHandler = logging.StreamHandler()
        formatter = jsonlogger.JsonFormatter()
        logHandler.setFormatter(formatter)
        # Need to reset existing handlers, or we repeat messages
        self.log.handlers = []
        self.log.addHandler(logHandler)
        self.log.setLevel(logging.INFO)
        self.load_config_file(self.config_file)

    def run(self):
        # HACK: Try to just pull this and see if that works.
        # if it does, then just bail.
        # WHAT WE REALLY WANT IS TO NOT DO ANY WORK IF THE IMAGE EXISTS
        client = docker.APIClient(base_url='unix://var/run/docker.sock',
                                  version='auto')

        repo, tag = self.output_image_spec.split(':')
        for line in client.pull(
                repository=repo,
                tag=tag,
                stream=True,
        ):
            progress = json.loads(line.decode('utf-8'))
            if 'error' in progress:
                break
        else:
            return

        output_path = os.path.join(self.git_workdir, self.build_name)
        self.fetch(self.source_url, self.source_ref, output_path)
        for bp_class in self.buildpacks:
            bp = bp_class()
            if bp.detect(output_path):
                self.log.info('Using %s builder',
                              bp.name,
                              extra=dict(phase='building'))
                bp.build(output_path, self.source_ref, self.output_image_spec)
                break
        else:
            self.log.error(
                'Could not figure out how to build this repository! Tell us?',
                extra=dict(phase='failed'))
            sys.exit(1)

        # Build a progress setup for each layer, and only emit per-layer info every 1.5s
        layers = {}
        last_emit_time = time.time()
        for line in client.push(self.output_image_spec, stream=True):
            progress = json.loads(line.decode('utf-8'))
            if 'id' not in progress:
                continue
            if 'progressDetail' in progress and progress['progressDetail']:
                layers[progress['id']] = progress['progressDetail']
            else:
                layers[progress['id']] = progress['status']
            if time.time() - last_emit_time > 1.5:
                self.log.info('Pushing image',
                              extra=dict(progress=layers, phase='pushing'))
                last_emit_time = time.time()