def register_worker_magic(connection_info, magic_name="worker"): """Register a %worker magic, given connection_info. Both a line and cell magic are registered, which run the given cell in a remote kernel. """ ip = get_ipython() kc = BlockingKernelClient() kc.load_connection_info(connection_info) kc.start_channels() def remote(line, cell=None): """Run the current cell on a remote IPython kernel""" if cell is None: # both line and cell magic cell = line run_cell_remote(ip, kc, cell) remote.client = kc # preserve reference on kc, largely for mocking ip.register_magic_function(remote, magic_kind="line", magic_name=magic_name) ip.register_magic_function(remote, magic_kind="cell", magic_name=magic_name)
def create_kernel_client(self, ci): kernel_client = BlockingKernelClient() kernel_client.load_connection_info(ci) kernel_client.start_channels(shell=True, iopub=False, stdin=False, hb=False) return kernel_client
def test_start_ipython_scheduler(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Client(s["address"], loop=loop) as e: info = e.start_ipython_scheduler() kc = BlockingKernelClient() kc.load_connection_info(info) kc.start_channels() msg_id = kc.execute("scheduler") reply = kc.get_shell_msg(timeout=10) kc.stop_channels()
def test_start_ipython_workers(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Client(s["address"], loop=loop) as e: info_dict = e.start_ipython_workers() info = first(info_dict.values()) kc = BlockingKernelClient() kc.load_connection_info(info) kc.start_channels() kc.wait_for_ready(timeout=10) msg_id = kc.execute("worker") reply = kc.get_shell_msg(timeout=10) assert reply["parent_header"]["msg_id"] == msg_id assert reply["content"]["status"] == "ok" kc.stop_channels()
def remote_magic(line, cell=None): """A magic for running code on a specified remote worker The connection_info dict of the worker will be looked up as the first positional arg to the magic. The rest of the line (or the entire cell for a %%cell magic) will be passed to the remote kernel. Usage: info = e.start_ipython(worker)[worker] %remote info print(worker.data) """ # get connection info from IPython's user namespace ip = get_ipython() split_line = line.split(None, 1) info_name = split_line[0] if info_name not in ip.user_ns: raise NameError(info_name) connection_info = dict(ip.user_ns[info_name]) if not cell: # line magic, use the rest of the line if len(split_line) == 1: raise ValueError("I need some code to run!") cell = split_line[1] # turn info dict to hashable str for use as lookup key in _clients cache key = ",".join(map(str, sorted(connection_info.items()))) if key in remote_magic._clients: kc = remote_magic._clients[key] else: kc = BlockingKernelClient() kc.load_connection_info(connection_info) kc.start_channels() kc.wait_for_ready(timeout=10) remote_magic._clients[key] = kc # actually run the code run_cell_remote(ip, kc, cell)
class JupyterRAMUsage(Stat): """ tag: ``jupyter.ram_usage`` settings: .. code-block:: javascript { "connection info": "", "query interval [s]": 1 } Tracks the RAM usage of all variables in a user-specified jupyter notebook. If no connection info is given in the settings, take the kernel with the latest start date. ``connection info`` must be a string containing the info displayed when running ``%connect_info`` in a jupyter notebook (you can directly copy-paste it). ``query interval [s]`` specifies how often the thread running in the jupyter notebook should read the variables. The lower this is, the higher the resolution of the stat but it might start affecting the speed of your notebook when too low. Note that RAM tracked in this way is not equal to the actual RAM the OS needs because some further optimization is done by e. g. numpy to reduce the OS memory usage. """ name = 'RAM Usage of objects in a Python Jupyter Notebook [MB]' base_tag = 'ram_usage' default_settings = { 'connection info': '', # how often the memory usage is read in the jupyter notebook 'query interval [s]': 1. } @classmethod def _read_latest_connection_file(cls): """ Reads the latest jupyter kernel connection file. https://jupyter.readthedocs.io/en/latest/projects/jupyter-directories.html. """ runtime_dir = jupyter_runtime_dir() files = glob.glob(os.path.join(runtime_dir, 'kernel-*.json')) if len(files) == 0: return None # use the latest connection file connection_file = max(files, key=os.path.getctime) with open(connection_file, 'r') as f: return json.load(f) @classmethod def get_connection_info(cls): """ Get the target kernel connection info. Returns a dictionary of the connection info supplied in the settings, or the latest started kernel if none is given. Retuns `None` if no kernel has been found. """ if len(cls.settings['connection info']) == 0: return cls._read_latest_connection_file() return json.loads(cls.settings['connection info']) @classmethod def check_availability(cls): # the stat is not available if no suitable connection info # can be found if cls.get_connection_info() is None: raise exceptions.StatNotAvailableError( 'Could not find any running kernel.') def __init__(self, fps): self.config = self.get_connection_info() data_dir = appdirs.user_data_dir('permon', 'bminixhofer') os.makedirs(data_dir, exist_ok=True) self.usage_file = os.path.join(data_dir, 'jupyter_ram_usage.csv') open(self.usage_file, 'w').close() # self.setup_code is the code that is run in the notebook when the # stat is instantiated. It starts a thread which reads the memory # usage of all public variables in a set interval and saves it to a # csv file in the user data directory self.setup_code = f""" if '_permon_running' not in globals() or not _permon_running: import threading import csv import sys import time from pympler import asizeof from types import ModuleType def _permon_get_ram_usage_per_object(): while _permon_running: ram_usage = [] global_vars = [key for key in globals() if not key.startswith('_')] for name in global_vars: value = globals()[name] if name in globals() else None if isinstance(value, ModuleType): continue try: ram_usage.append((name, asizeof.asizeof(value))) except TypeError: continue with open('{self.usage_file}', 'w') as f: writer = csv.writer(f, delimiter=',') for name, ram in ram_usage: writer.writerow([name, ram]) time.sleep({self.settings['query interval [s]']}) _permon_thread = threading.Thread(target=_permon_get_ram_usage_per_object) _permon_running = True _permon_thread.start() """ self.teardown_code = """ _permon_running = False """ self.client = BlockingKernelClient() self.client.load_connection_info(self.config) self.client.start_channels() self.client.execute(self.setup_code) super(JupyterRAMUsage, self).__init__(fps=fps) def __del__(self): # stop the thread running in the jupyter notebook # and stop the connection to the kernel upon deletion self.client.execute(self.teardown_code) self.client.stop_channels() def get_stat(self): # reads the csv file the setup code has written to ram_usage = [] with open(self.usage_file, 'r') as f: reader = csv.reader(f) for row in reader: ram_usage.append((row[0], float(row[1]) / 1000**2)) # sort the ram_usage list so that the largest variables come first ram_usage = sorted(ram_usage, key=lambda x: x[1], reverse=True) # return the sum of RAM usage and the variables taking up the most RAM return sum(x[1] for x in ram_usage), ram_usage[:5] @property def minimum(self): return 0 @property def maximum(self): return None
def create_kernel_client(self, ci): kernel_client = BlockingKernelClient() kernel_client.load_connection_info(ci) kernel_client.start_channels() return kernel_client
class SshKernel: """Remote ipykernel via SSH Raises: SshKernelException: "Could not execute remote command, connection died" SshKernelException: "Connection failed" SshKernelException: "Could not create kernel_info file" Arguments: host {str} -- host where the remote ipykernel should be started connection_info {dict} -- Local ipykernel connection info as provided by Juypter lab python_path {str} -- Remote python path to be used to start ipykernel Keyword Arguments: sudo {bool} -- Start ipykernel as root if necessary (default: {False}) timeout {int} -- SSH connection timeout (default: {5}) env {str} -- Environment variables passd to the ipykernel "VAR1=VAL1 VAR2=VAL2" (default: {""}) ssh_config {str} -- Path to the local SSH config file (default: {Path.home() / ".ssh" / "config"}) """ def __init__( self, host, connection_info, python_path, sudo=False, timeout=5, env="", ssh_config=None, quiet=True, verbose=False, msg_interval=30, logger=None, ): self.host = host self.connection_info = connection_info self.python_full_path = PurePosixPath(python_path) / "bin/python" self.sudo = sudo self.timeout = timeout self.env = env self.ssh_config = (Path.home() / ".ssh" / "config" if ssh_config is None else ssh_config ) # OS specific path self.quiet = quiet self.verbose = verbose self._connection = None self.remote_ports = {} self.uuid = str(uuid.uuid4()) self.fname = "/tmp/.ssh_ipykernel_%s.json" % self.uuid # POSIX path if logger is None: self._logger = setup_logging("SshKernel") else: self._logger = logger self._logger.debug("Remote kernel info file: {0}".format(self.fname)) self._logger.debug( "Local connection info: {0}".format(connection_info)) self.kernel_pid = 0 self.status = Status(connection_info, self._logger) self.msg_interval = int(msg_interval / timeout) self.msg_counter = 0 def _execute(self, cmd): try: result = subprocess.check_output(cmd) return 0, result except subprocess.CalledProcessError as e: return e.returncode, e.args def _ssh(self, cmd): return self._execute([SSH, self.host, cmd]) def close(self): """Close pcssh connection """ if self._connection is not None: # and self._connection.isalive(): if self._connection.isalive(): self._connection.logout() self._logger.debug("Ssh connection closed") if self.kc.is_alive(): self.kc.stop_channels() self._logger.debug("Kernel client channels stopped") def create_remote_connection_info(self): """Create a remote ipykernel connection info file Uses KERNEL_SCRIPT to execute jupyter_client.write_connection_file remotely to request remote ports. The remote ports will be returned as json and stored to built the SSH tunnels later. The pxssh connection will be closed at the end. Raises: SshKernelException: "Could not create kernel_info file" """ self._logger.info("Creating remote connection info") script = KERNEL_SCRIPT.format(fname=self.fname, **self.connection_info) cmd = "{python} -c '{command}'".format(python=self.python_full_path, command="; ".join( script.strip().split("\n"))) result = self._ssh(cmd) self._logger.debug(result) if result[0] == 0: self.remote_ports = json.loads(result[1].decode("utf-8")) self._logger.debug("Local ports = %s" % { k: v for k, v in self.connection_info.items() if "_port" in k }) self._logger.debug("Remote ports = %s" % self.remote_ports) else: self.status.set_unreachable(self.kernel_pid, self.sudo) raise SshKernelException("Could not create kernel_info file") def kernel_client(self): self.kc = BlockingKernelClient() self.kc.load_connection_info(self.connection_info) self.kc.start_channels() def kernel_init(self): done = False if self.check_alive(show_pid=False): i = 0 while not done: try: i += 1 self._logger.debug("Retrieving kernel pid, attempt %d" % i) result = self.kc.execute_interactive( "import os", user_expressions={"pid": "os.getpid()"}, store_history=False, silent=True, timeout=2, ) self._logger.debug("result = %s" % str(result["content"])) self.kernel_pid = int(result["content"]["user_expressions"] ["pid"]["data"]["text/plain"]) self._logger.debug("Remote kernel pid %d" % self.kernel_pid) done = True except Exception as ex: msg = str(ex) if msg == "Timeout waiting for output": self._logger.warning("Warning: {}".format(msg)) if i > 5: self._logger.error( "Max attempts (5) reached, stopping") raise SshKernelException( "Could not initialize kernel") break else: self._logger.error("Warning: {}".format(str(ex))) return done def kernel_customize(self): pass def check_alive(self, show_pid=True): alive = self._connection.isalive() and self.kc.is_alive() if show_pid: msg = "Remote kernel ({}, pid = {}) is {}alive".format( self.host, self.kernel_pid, "" if alive else "not ") else: msg = "Remote kernel is {}alive".format("" if alive else "not ") if not alive or self.msg_counter % self.msg_interval == 0: self.msg_counter = 0 self._logger.info(msg) self.msg_counter += 1 return alive def interrupt_kernel(self): if self._connection.isalive(): if is_windows: self._logger.warning( 'On Windows use "Interrupt remote kernel" button') else: self._logger.warning("Sending interrupt to remote kernel") self._connection.sendintr() # send SIGINT def start_kernel_and_tunnels(self): """Start Kernels and SSH tunnels A new pxssh connection will be created that will - set up the necessary ssh tunnels between remote kernel ports and local kernel ports - start the ipykernel on the remote host """ self._logger.info("Setting up ssh tunnels") ssh_tunnels = [] for port_name in self.remote_ports.keys(): ssh_tunnels += [ "-L", "{local_port}:127.0.0.1:{remote_port}".format( local_port=self.connection_info[port_name], remote_port=self.remote_ports[port_name], ), ] self._logger.info("Starting remote kernel") # Build remote command sudo = "sudo " if self.sudo else "" if self.env is not None: env = " ".join(self.env) cmd = "{sudo} {env} {python} -m ipykernel_launcher -f {fname}".format( sudo=sudo, env=env, python=self.python_full_path, fname=self.fname) # Build ssh command with all flags and tunnels if self.quiet: args = ["-q"] elif self.verbose: args = ["-v"] else: args = [] args += ["-t", "-F", str(self.ssh_config) ] + ssh_tunnels + [self.host, cmd] self._logger.debug("%s %s" % (SSH, " ".join(args))) try: # Start the child process self._connection = expect.spawn(SSH, args=args, timeout=self.timeout, **ENCODING) # subprocess.check_output([SSH] + args) # # get blocking kernel client self.kernel_client() # initialize it if self.kernel_init(): self.status.set_running(self.kernel_pid, self.sudo) # run custom code if part of sub class self.kernel_customize() else: self.status.set_connect_failed(sudo=self.sudo) except Exception as e: tb = sys.exc_info()[2] self._logger.error(str(e.with_traceback(tb))) self._logger.error("Cannot contiune, exiting") sys.exit(1) prompt = re.compile(r"\n") while True: try: # Wait for prompt self._connection.expect(prompt) # print the outputs self._logger.info(self._connection.before.strip("\r\n")) except KeyboardInterrupt: self.interrupt_kernel() self.check_alive() except expect.TIMEOUT: self.check_alive() except expect.EOF: # The program has exited self._logger.info("The program has exited.") self.status.set_down(self.kernel_pid, self.sudo) break self.close() self.status.close()