Exemple #1
0
def register_worker_magic(connection_info, magic_name="worker"):
    """Register a %worker magic, given connection_info.

    Both a line and cell magic are registered,
    which run the given cell in a remote kernel.
    """
    ip = get_ipython()
    kc = BlockingKernelClient()
    kc.load_connection_info(connection_info)
    kc.start_channels()

    def remote(line, cell=None):
        """Run the current cell on a remote IPython kernel"""
        if cell is None:
            # both line and cell magic
            cell = line
        run_cell_remote(ip, kc, cell)

    remote.client = kc  # preserve reference on kc, largely for mocking
    ip.register_magic_function(remote,
                               magic_kind="line",
                               magic_name=magic_name)
    ip.register_magic_function(remote,
                               magic_kind="cell",
                               magic_name=magic_name)
Exemple #2
0
 def create_kernel_client(self, ci):
     kernel_client = BlockingKernelClient()
     kernel_client.load_connection_info(ci)
     kernel_client.start_channels(shell=True,
                                  iopub=False,
                                  stdin=False,
                                  hb=False)
     return kernel_client
Exemple #3
0
def test_start_ipython_scheduler(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Client(s["address"], loop=loop) as e:
            info = e.start_ipython_scheduler()
            kc = BlockingKernelClient()
            kc.load_connection_info(info)
            kc.start_channels()
            msg_id = kc.execute("scheduler")
            reply = kc.get_shell_msg(timeout=10)
            kc.stop_channels()
Exemple #4
0
def test_start_ipython_workers(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Client(s["address"], loop=loop) as e:
            info_dict = e.start_ipython_workers()
            info = first(info_dict.values())
            kc = BlockingKernelClient()
            kc.load_connection_info(info)
            kc.start_channels()
            kc.wait_for_ready(timeout=10)
            msg_id = kc.execute("worker")
            reply = kc.get_shell_msg(timeout=10)
            assert reply["parent_header"]["msg_id"] == msg_id
            assert reply["content"]["status"] == "ok"
            kc.stop_channels()
Exemple #5
0
def remote_magic(line, cell=None):
    """A magic for running code on a specified remote worker

    The connection_info dict of the worker will be looked up
    as the first positional arg to the magic.
    The rest of the line (or the entire cell for a %%cell magic)
    will be passed to the remote kernel.

    Usage:

        info = e.start_ipython(worker)[worker]
        %remote info print(worker.data)
    """
    # get connection info from IPython's user namespace
    ip = get_ipython()
    split_line = line.split(None, 1)
    info_name = split_line[0]
    if info_name not in ip.user_ns:
        raise NameError(info_name)
    connection_info = dict(ip.user_ns[info_name])

    if not cell:  # line magic, use the rest of the line
        if len(split_line) == 1:
            raise ValueError("I need some code to run!")
        cell = split_line[1]

    # turn info dict to hashable str for use as lookup key in _clients cache
    key = ",".join(map(str, sorted(connection_info.items())))

    if key in remote_magic._clients:
        kc = remote_magic._clients[key]
    else:
        kc = BlockingKernelClient()
        kc.load_connection_info(connection_info)
        kc.start_channels()
        kc.wait_for_ready(timeout=10)
        remote_magic._clients[key] = kc

    # actually run the code
    run_cell_remote(ip, kc, cell)
Exemple #6
0
class JupyterRAMUsage(Stat):
    """
    tag: ``jupyter.ram_usage``

    settings:

    .. code-block:: javascript

        {
            "connection info": "",
            "query interval [s]": 1
        }

    Tracks the RAM usage of all variables in
    a user-specified jupyter notebook. If no connection info is given in the
    settings, take the kernel with the latest start date.

    ``connection info`` must be a string containing the info displayed when
    running ``%connect_info`` in a jupyter notebook
    (you can directly copy-paste it).

    ``query interval [s]`` specifies how often the thread running in the
    jupyter notebook should read the variables. The lower this is, the higher
    the resolution of the stat but it might start affecting the speed of
    your notebook when too low.

    Note that RAM tracked in this way is not equal to the actual RAM
    the OS needs because some further optimization is done by e. g. numpy
    to reduce the OS memory usage.
    """
    name = 'RAM Usage of objects in a Python Jupyter Notebook [MB]'
    base_tag = 'ram_usage'
    default_settings = {
        'connection info': '',
        # how often the memory usage is read in the jupyter notebook
        'query interval [s]': 1.
    }

    @classmethod
    def _read_latest_connection_file(cls):
        """
        Reads the latest jupyter kernel connection file.
        https://jupyter.readthedocs.io/en/latest/projects/jupyter-directories.html.
        """
        runtime_dir = jupyter_runtime_dir()
        files = glob.glob(os.path.join(runtime_dir, 'kernel-*.json'))
        if len(files) == 0:
            return None

        # use the latest connection file
        connection_file = max(files, key=os.path.getctime)
        with open(connection_file, 'r') as f:
            return json.load(f)

    @classmethod
    def get_connection_info(cls):
        """
        Get the target kernel connection info.
        Returns a dictionary of the connection info supplied
        in the settings, or the latest started kernel if none is given.
        Retuns `None` if no kernel has been found.
        """
        if len(cls.settings['connection info']) == 0:
            return cls._read_latest_connection_file()
        return json.loads(cls.settings['connection info'])

    @classmethod
    def check_availability(cls):
        # the stat is not available if no suitable connection info
        # can be found
        if cls.get_connection_info() is None:
            raise exceptions.StatNotAvailableError(
                'Could not find any running kernel.')

    def __init__(self, fps):
        self.config = self.get_connection_info()
        data_dir = appdirs.user_data_dir('permon', 'bminixhofer')
        os.makedirs(data_dir, exist_ok=True)

        self.usage_file = os.path.join(data_dir, 'jupyter_ram_usage.csv')
        open(self.usage_file, 'w').close()

        # self.setup_code is the code that is run in the notebook when the
        # stat is instantiated. It starts a thread which reads the memory
        # usage of all public variables in a set interval and saves it to a
        # csv file in the user data directory
        self.setup_code = f"""
if '_permon_running' not in globals() or not _permon_running:
    import threading
    import csv
    import sys
    import time
    from pympler import asizeof
    from types import ModuleType

    def _permon_get_ram_usage_per_object():
        while _permon_running:
            ram_usage = []
            global_vars = [key for key in globals() if not key.startswith('_')]
            for name in global_vars:
                value = globals()[name] if name in globals() else None
                if isinstance(value, ModuleType):
                    continue

                try:
                    ram_usage.append((name, asizeof.asizeof(value)))
                except TypeError:
                    continue

            with open('{self.usage_file}', 'w') as f:
                writer = csv.writer(f, delimiter=',')
                for name, ram in ram_usage:
                    writer.writerow([name, ram])
            time.sleep({self.settings['query interval [s]']})

    _permon_thread = threading.Thread(target=_permon_get_ram_usage_per_object)
    _permon_running = True
    _permon_thread.start()
"""
        self.teardown_code = """
_permon_running = False
"""
        self.client = BlockingKernelClient()
        self.client.load_connection_info(self.config)
        self.client.start_channels()
        self.client.execute(self.setup_code)
        super(JupyterRAMUsage, self).__init__(fps=fps)

    def __del__(self):
        # stop the thread running in the jupyter notebook
        # and stop the connection to the kernel upon deletion
        self.client.execute(self.teardown_code)
        self.client.stop_channels()

    def get_stat(self):
        # reads the csv file the setup code has written to
        ram_usage = []
        with open(self.usage_file, 'r') as f:
            reader = csv.reader(f)
            for row in reader:
                ram_usage.append((row[0], float(row[1]) / 1000**2))
        # sort the ram_usage list so that the largest variables come first
        ram_usage = sorted(ram_usage, key=lambda x: x[1], reverse=True)

        # return the sum of RAM usage and the variables taking up the most RAM
        return sum(x[1] for x in ram_usage), ram_usage[:5]

    @property
    def minimum(self):
        return 0

    @property
    def maximum(self):
        return None
 def create_kernel_client(self, ci):
     kernel_client = BlockingKernelClient()
     kernel_client.load_connection_info(ci)
     kernel_client.start_channels()
     return kernel_client
Exemple #8
0
class SshKernel:
    """Remote ipykernel via SSH

    Raises:
        SshKernelException: "Could not execute remote command, connection died"
        SshKernelException: "Connection failed"
        SshKernelException: "Could not create kernel_info file"

        Arguments:
            host {str} -- host where the remote ipykernel should be started
            connection_info {dict} -- Local ipykernel connection info as provided by Juypter lab
            python_path {str} -- Remote python path to be used to start ipykernel

        Keyword Arguments:
            sudo {bool} -- Start ipykernel as root if necessary (default: {False})
            timeout {int} -- SSH connection timeout (default: {5})
            env {str} -- Environment variables passd to the ipykernel "VAR1=VAL1 VAR2=VAL2" (default: {""})
            ssh_config {str} -- Path to the local SSH config file (default: {Path.home() / ".ssh" / "config"})
    """
    def __init__(
        self,
        host,
        connection_info,
        python_path,
        sudo=False,
        timeout=5,
        env="",
        ssh_config=None,
        quiet=True,
        verbose=False,
        msg_interval=30,
        logger=None,
    ):
        self.host = host
        self.connection_info = connection_info
        self.python_full_path = PurePosixPath(python_path) / "bin/python"
        self.sudo = sudo
        self.timeout = timeout
        self.env = env
        self.ssh_config = (Path.home() / ".ssh" /
                           "config" if ssh_config is None else ssh_config
                           )  # OS specific path

        self.quiet = quiet
        self.verbose = verbose

        self._connection = None

        self.remote_ports = {}
        self.uuid = str(uuid.uuid4())
        self.fname = "/tmp/.ssh_ipykernel_%s.json" % self.uuid  # POSIX path

        if logger is None:
            self._logger = setup_logging("SshKernel")
        else:
            self._logger = logger

        self._logger.debug("Remote kernel info file: {0}".format(self.fname))
        self._logger.debug(
            "Local connection info: {0}".format(connection_info))

        self.kernel_pid = 0
        self.status = Status(connection_info, self._logger)
        self.msg_interval = int(msg_interval / timeout)
        self.msg_counter = 0

    def _execute(self, cmd):
        try:
            result = subprocess.check_output(cmd)
            return 0, result
        except subprocess.CalledProcessError as e:
            return e.returncode, e.args

    def _ssh(self, cmd):
        return self._execute([SSH, self.host, cmd])

    def close(self):
        """Close pcssh connection
        """
        if self._connection is not None:  # and self._connection.isalive():
            if self._connection.isalive():
                self._connection.logout()
                self._logger.debug("Ssh connection closed")
            if self.kc.is_alive():
                self.kc.stop_channels()
                self._logger.debug("Kernel client channels stopped")

    def create_remote_connection_info(self):
        """Create a remote ipykernel connection info file
        Uses KERNEL_SCRIPT to execute jupyter_client.write_connection_file remotely to request remote ports.
        The remote ports will be returned as json and stored to built the SSH tunnels later.
        The pxssh connection will be closed at the end.

        Raises:
            SshKernelException: "Could not create kernel_info file"
        """
        self._logger.info("Creating remote connection info")
        script = KERNEL_SCRIPT.format(fname=self.fname, **self.connection_info)

        cmd = "{python} -c '{command}'".format(python=self.python_full_path,
                                               command="; ".join(
                                                   script.strip().split("\n")))

        result = self._ssh(cmd)
        self._logger.debug(result)
        if result[0] == 0:
            self.remote_ports = json.loads(result[1].decode("utf-8"))
            self._logger.debug("Local ports  = %s" % {
                k: v
                for k, v in self.connection_info.items() if "_port" in k
            })
            self._logger.debug("Remote ports = %s" % self.remote_ports)
        else:
            self.status.set_unreachable(self.kernel_pid, self.sudo)
            raise SshKernelException("Could not create kernel_info file")

    def kernel_client(self):
        self.kc = BlockingKernelClient()
        self.kc.load_connection_info(self.connection_info)
        self.kc.start_channels()

    def kernel_init(self):
        done = False
        if self.check_alive(show_pid=False):
            i = 0
            while not done:
                try:
                    i += 1
                    self._logger.debug("Retrieving kernel pid, attempt %d" % i)
                    result = self.kc.execute_interactive(
                        "import os",
                        user_expressions={"pid": "os.getpid()"},
                        store_history=False,
                        silent=True,
                        timeout=2,
                    )
                    self._logger.debug("result = %s" % str(result["content"]))
                    self.kernel_pid = int(result["content"]["user_expressions"]
                                          ["pid"]["data"]["text/plain"])
                    self._logger.debug("Remote kernel pid %d" %
                                       self.kernel_pid)
                    done = True
                except Exception as ex:
                    msg = str(ex)
                    if msg == "Timeout waiting for output":
                        self._logger.warning("Warning: {}".format(msg))
                        if i > 5:
                            self._logger.error(
                                "Max attempts (5) reached, stopping")
                            raise SshKernelException(
                                "Could not initialize kernel")
                            break
                    else:
                        self._logger.error("Warning: {}".format(str(ex)))
        return done

    def kernel_customize(self):
        pass

    def check_alive(self, show_pid=True):
        alive = self._connection.isalive() and self.kc.is_alive()
        if show_pid:
            msg = "Remote kernel ({}, pid = {}) is {}alive".format(
                self.host, self.kernel_pid, "" if alive else "not ")
        else:
            msg = "Remote kernel is {}alive".format("" if alive else "not ")

        if not alive or self.msg_counter % self.msg_interval == 0:
            self.msg_counter = 0
            self._logger.info(msg)

        self.msg_counter += 1
        return alive

    def interrupt_kernel(self):
        if self._connection.isalive():
            if is_windows:
                self._logger.warning(
                    'On Windows use "Interrupt remote kernel" button')
            else:
                self._logger.warning("Sending interrupt to remote kernel")
                self._connection.sendintr()  # send SIGINT

    def start_kernel_and_tunnels(self):
        """Start Kernels and SSH tunnels
        A new pxssh connection will be created that will
        - set up the necessary ssh tunnels between remote kernel ports and local kernel ports
        - start the ipykernel on the remote host
        """
        self._logger.info("Setting up ssh tunnels")

        ssh_tunnels = []
        for port_name in self.remote_ports.keys():
            ssh_tunnels += [
                "-L",
                "{local_port}:127.0.0.1:{remote_port}".format(
                    local_port=self.connection_info[port_name],
                    remote_port=self.remote_ports[port_name],
                ),
            ]

        self._logger.info("Starting remote kernel")

        # Build remote command
        sudo = "sudo " if self.sudo else ""

        if self.env is not None:
            env = " ".join(self.env)
        cmd = "{sudo} {env} {python} -m ipykernel_launcher -f {fname}".format(
            sudo=sudo, env=env, python=self.python_full_path, fname=self.fname)

        # Build ssh command with all flags and tunnels
        if self.quiet:
            args = ["-q"]
        elif self.verbose:
            args = ["-v"]
        else:
            args = []
        args += ["-t", "-F", str(self.ssh_config)
                 ] + ssh_tunnels + [self.host, cmd]

        self._logger.debug("%s %s" % (SSH, " ".join(args)))

        try:
            # Start the child process
            self._connection = expect.spawn(SSH,
                                            args=args,
                                            timeout=self.timeout,
                                            **ENCODING)
            # subprocess.check_output([SSH] + args)
            #
            # get blocking kernel client
            self.kernel_client()
            # initialize it
            if self.kernel_init():
                self.status.set_running(self.kernel_pid, self.sudo)
                # run custom code if part of sub class
                self.kernel_customize()
            else:
                self.status.set_connect_failed(sudo=self.sudo)
        except Exception as e:
            tb = sys.exc_info()[2]
            self._logger.error(str(e.with_traceback(tb)))
            self._logger.error("Cannot contiune, exiting")
            sys.exit(1)

        prompt = re.compile(r"\n")

        while True:
            try:
                # Wait for prompt
                self._connection.expect(prompt)
                # print the outputs
                self._logger.info(self._connection.before.strip("\r\n"))

            except KeyboardInterrupt:
                self.interrupt_kernel()
                self.check_alive()

            except expect.TIMEOUT:
                self.check_alive()

            except expect.EOF:
                # The program has exited
                self._logger.info("The program has exited.")
                self.status.set_down(self.kernel_pid, self.sudo)
                break

        self.close()
        self.status.close()