Beispiel #1
0
def exec_code(kid, code):
    """
    Executes arbitrary `code` in the kernel with id `kid`.

    Returns:
        - tuple: the output of the code and the error, if any.
    """
    # Load connection info and init communications.
    cf = find_connection_file(kid)

    with jupyter_lock:
        km = BlockingKernelClient(connection_file=cf)
        km.load_connection_file()
        km.start_channels()
        msg_id = km.execute(code, store_history=False)
        reply = km.get_shell_msg(msg_id, timeout=60)
        output, error = None, None

        while km.is_alive():
            msg = km.get_iopub_msg(timeout=10)
            if ("content" in msg and "name" in msg["content"]
                    and msg["content"]["name"] == "stdout"):
                output = msg["content"]["text"]
                break

        km.stop_channels()
        if reply["content"]["status"] != "ok":
            logging.error(f"Status is {reply['content']['status']}")
            logging.error(output)
            error = output
            output = None

    return output, error
Beispiel #2
0
def register_worker_magic(connection_info, magic_name="worker"):
    """Register a %worker magic, given connection_info.

    Both a line and cell magic are registered,
    which run the given cell in a remote kernel.
    """
    ip = get_ipython()
    kc = BlockingKernelClient()
    kc.load_connection_info(connection_info)
    kc.start_channels()

    def remote(line, cell=None):
        """Run the current cell on a remote IPython kernel"""
        if cell is None:
            # both line and cell magic
            cell = line
        run_cell_remote(ip, kc, cell)

    remote.client = kc  # preserve reference on kc, largely for mocking
    ip.register_magic_function(remote,
                               magic_kind="line",
                               magic_name=magic_name)
    ip.register_magic_function(remote,
                               magic_kind="cell",
                               magic_name=magic_name)
def register_worker_magic(connection_info, magic_name='worker'):
    """Register a %worker magic, given connection_info.

    Both a line and cell magic are registered,
    which run the given cell in a remote kernel.
    """
    ip = get_ipython()
    info = dict(connection_info)  # copy
    key = info.pop('key')
    kc = BlockingKernelClient(**connection_info)
    kc.session.key = key
    kc.start_channels()

    def remote(line, cell=None):
        """Run the current cell on a remote IPython kernel"""
        if cell is None:
            # both line and cell magic
            cell = line
        run_cell_remote(ip, kc, cell)

    remote.client = kc  # preserve reference on kc, largely for mocking
    ip.register_magic_function(remote,
                               magic_kind='line',
                               magic_name=magic_name)
    ip.register_magic_function(remote,
                               magic_kind='cell',
                               magic_name=magic_name)
Beispiel #4
0
def setup_kernel(cmd):
    """start an embedded kernel in a subprocess, and wait for it to be ready

    Returns
    -------
    kernel_manager: connected KernelManager instance
    """
    kernel = Popen([sys.executable, "-c", cmd], stdout=PIPE, stderr=PIPE, env=env)
    connection_file = os.path.join(IPYTHONDIR, "profile_default", "security", "kernel-%i.json" % kernel.pid)
    # wait for connection file to exist, timeout after 5s
    tic = time.time()
    while not os.path.exists(connection_file) and kernel.poll() is None and time.time() < tic + SETUP_TIMEOUT:
        time.sleep(0.1)

    if kernel.poll() is not None:
        o, e = kernel.communicate()
        e = py3compat.cast_unicode(e)
        raise IOError("Kernel failed to start:\n%s" % e)

    if not os.path.exists(connection_file):
        if kernel.poll() is None:
            kernel.terminate()
        raise IOError("Connection file %r never arrived" % connection_file)

    client = BlockingKernelClient(connection_file=connection_file)
    client.load_connection_file()
    client.start_channels()
    client.wait_for_ready()

    try:
        yield client
    finally:
        client.stop_channels()
        kernel.terminate()
Beispiel #5
0
def main(kid):
    # Load connection info and init communications.
    cf = find_connection_file(kid)
    km = BlockingKernelClient(connection_file=cf)
    km.load_connection_file()
    km.start_channels()

    # Define a function that is useful from within the user's notebook: juneau_connect() can be
    # used to directly connect the notebook to the source database.  Note that this includes the
    # full "root" credentials.

    # FIXME: allow for user-specific credentials on SQL tables.  The DBMS may also not be at localhost.
    code = f"""
        from sqlalchemy import create_engine
        
        def juneau_connect():
            engine = create_engine(
                "postgresql://{config.sql.name}:{config.sql.password}@{config.sql.host}/{config.sql.dbname}",
                connect_args={{ 
                    "options": "-csearch_path='{config.sql.dbs}'" 
                }}
            )
            return engine.connect()
        """
    km.execute_interactive(code, timeout=TIMEOUT)
    km.stop_channels()
Beispiel #6
0
def exec_code(kid, var, code):
    # load connection info and init communication
    cf = find_connection_file(kid)  # str(port))

    global jupyter_lock

    jupyter_lock.acquire()

    try:
        km = BlockingKernelClient(connection_file=cf)
        km.load_connection_file()
        km.start_channels()

        # logging.debug('Executing:\n' + str(code))
        msg_id = km.execute(code, store_history=False)

        reply = km.get_shell_msg(msg_id, timeout=10)
        # logging.info('Execution reply:\n' + str(reply))
        state = 'busy'

        output = None
        idle_count = 0
        try:
            while km.is_alive():
                try:
                    msg = km.get_iopub_msg(timeout=10)
                    # logging.debug('Read ' + str(msg))
                    if not 'content' in msg:
                        continue
                    if 'name' in msg['content'] and msg['content'][
                            'name'] == 'stdout':
                        # logging.debug('Got data '+ msg['content']['text'])
                        output = msg['content']['text']
                        break
                    if 'execution_state' in msg['content']:
                        # logging.debug('Got state')
                        state = msg['content']['execution_state']
                    if state == 'idle':
                        idle_count = idle_count + 1
                except Empty:
                    pass
        except KeyboardInterrupt:
            logging.error('Keyboard interrupt')
            pass
        finally:
            # logging.info('Kernel IO finished')
            km.stop_channels()

        # logging.info(str(output))
        error = ''
        if reply['content']['status'] != 'ok':
            logging.error('Status is ' + reply['content']['status'])
            logging.error(str(output))
            error = output
            output = None
    finally:
        jupyter_lock.release()

    return output, error
Beispiel #7
0
 def create_kernel_client(self, ci):
     kernel_client = BlockingKernelClient()
     kernel_client.load_connection_info(ci)
     kernel_client.start_channels(shell=True,
                                  iopub=False,
                                  stdin=False,
                                  hb=False)
     return kernel_client
Beispiel #8
0
def setup_kernel(cmd):
    """start an embedded kernel in a subprocess, and wait for it to be ready

    Returns
    -------
    kernel_manager: connected KernelManager instance
    """

    def connection_file_ready(connection_file):
        """Check if connection_file is a readable json file."""
        if not os.path.exists(connection_file):
            return False
        try:
            with open(connection_file) as f:
                json.load(f)
            return True
        except ValueError:
            return False

    kernel = Popen([sys.executable, '-c', cmd], stdout=PIPE, stderr=PIPE)
    try:
        connection_file = os.path.join(
            paths.jupyter_runtime_dir(),
            'kernel-%i.json' % kernel.pid,
        )
        # wait for connection file to exist, timeout after 5s
        tic = time.time()
        while not connection_file_ready(connection_file) \
            and kernel.poll() is None \
            and time.time() < tic + SETUP_TIMEOUT:
            time.sleep(0.1)

        # Wait 100ms for the writing to finish
        time.sleep(0.1)

        if kernel.poll() is not None:
            o,e = kernel.communicate()
            e = py3compat.cast_unicode(e)
            raise IOError("Kernel failed to start:\n%s" % e)

        if not os.path.exists(connection_file):
            if kernel.poll() is None:
                kernel.terminate()
            raise IOError("Connection file %r never arrived" % connection_file)

        client = BlockingKernelClient(connection_file=connection_file)
        client.load_connection_file()
        client.start_channels()
        client.wait_for_ready()
        try:
            yield client
        finally:
            client.stop_channels()
    finally:
        kernel.terminate()
Beispiel #9
0
 def run(self):
     cf = find_connection_file()
     client = BlockingKernelClient(connection_file=cf)
     client.load_connection_file()
     client.start_channels(shell=False,
                           iopub=True,
                           stdin=False,
                           control=False,
                           hb=False)
     while True:
         msg = client.get_iopub_msg()
         self.received.emit(msg)
Beispiel #10
0
def test_start_ipython_scheduler(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Client(s["address"], loop=loop) as e:
            info = e.start_ipython_scheduler()
            kc = BlockingKernelClient()
            kc.load_connection_info(info)
            kc.start_channels()
            msg_id = kc.execute("scheduler")
            reply = kc.get_shell_msg(timeout=10)
            kc.stop_channels()
    def send_to(self, args):
        if args and args[0].endswith('(newest)'):
            args[0] = args[0][:-len('(newest)')]
        cf = find_connection_file(*args)

        if cf not in self.clients:
            client = BlockingKernelClient()
            client.load_connection_file(cf)
            client.start_channels()
            self.clients[cf] = client

        return cf
Beispiel #12
0
def test_start_ipython_scheduler(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1, should_check_state=False) as (s, [a]):
        with Client(s['address'], loop=loop) as e:
            info = e.start_ipython_scheduler()
            key = info.pop('key')
            kc = BlockingKernelClient(**info)
            kc.session.key = key
            kc.start_channels()
            msg_id = kc.execute("scheduler")
            reply = kc.get_shell_msg(timeout=10)
            kc.stop_channels()
Beispiel #13
0
def test_start_ipython_scheduler(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Executor(('127.0.0.1', s['port']), loop=loop) as e:
            info = e.start_ipython_scheduler()
            key = info.pop('key')
            kc = BlockingKernelClient(**info)
            kc.session.key = key
            kc.start_channels()
            msg_id = kc.execute("scheduler")
            reply = kc.get_shell_msg(timeout=10)
            kc.stop_channels()
Beispiel #14
0
def test_start_ipython_scheduler(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Client(('127.0.0.1', s['port']), loop=loop) as e:
            info = e.start_ipython_scheduler()
            key = info.pop('key')
            kc = BlockingKernelClient(**info)
            kc.session.key = key
            kc.start_channels()
            msg_id = kc.execute("scheduler")
            reply = kc.get_shell_msg(timeout=10)
            kc.stop_channels()
Beispiel #15
0
def connect(connection_file):

    if connection_file not in clients:

        print "[nyroglancer] connecting to: " + connection_file

        kernel_client = BlockingKernelClient(connection_file=connection_file)
        kernel_client.load_connection_file()
        kernel_client.start_channels()
        clients[connection_file] = kernel_client

        return kernel_client

    return clients[connection_file]
Beispiel #16
0
def connect(connection_file):

    if connection_file not in clients:

        print "[nyroglancer] connecting to: " + connection_file

        kernel_client = BlockingKernelClient(connection_file=connection_file)
        kernel_client.load_connection_file()
        kernel_client.start_channels()
        clients[connection_file] = kernel_client

        return kernel_client

    return clients[connection_file]
Beispiel #17
0
def main(kid, var):
    # Load connection info and init communications.
    cf = find_connection_file(kid)  # str(port))
    km = BlockingKernelClient(connection_file=cf)
    km.load_connection_file()
    km.start_channels()

    code = f"""
        import pandas as pd
        import numpy as np
        if type({var}) in [pd.DataFrame, np.ndarray, list]:
            print({var}.to_json(orient='split', index=False))
        """
    km.execute_interactive(code, timeout=TIMEOUT)
    km.stop_channels()
Beispiel #18
0
def test_start_ipython_workers(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Client(s["address"], loop=loop) as e:
            info_dict = e.start_ipython_workers()
            info = first(info_dict.values())
            kc = BlockingKernelClient()
            kc.load_connection_info(info)
            kc.start_channels()
            kc.wait_for_ready(timeout=10)
            msg_id = kc.execute("worker")
            reply = kc.get_shell_msg(timeout=10)
            assert reply["parent_header"]["msg_id"] == msg_id
            assert reply["content"]["status"] == "ok"
            kc.stop_channels()
Beispiel #19
0
def test_start_ipython_workers(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Client(('127.0.0.1', s['port']), loop=loop) as e:
            info_dict = e.start_ipython_workers()
            info = first(info_dict.values())
            key = info.pop('key')
            kc = BlockingKernelClient(**info)
            kc.session.key = key
            kc.start_channels()
            kc.wait_for_ready(timeout=10)
            msg_id = kc.execute("worker")
            reply = kc.get_shell_msg(timeout=10)
            assert reply['parent_header']['msg_id'] == msg_id
            assert reply['content']['status'] == 'ok'
            kc.stop_channels()
Beispiel #20
0
def test_start_ipython_workers(loop, zmq_ctx):
    from jupyter_client import BlockingKernelClient

    with cluster(1) as (s, [a]):
        with Executor(('127.0.0.1', s['port']), loop=loop) as e:
            info_dict = e.start_ipython_workers()
            info = first(info_dict.values())
            key = info.pop('key')
            kc = BlockingKernelClient(**info)
            kc.session.key = key
            kc.start_channels()
            kc.wait_for_ready(timeout=10)
            msg_id = kc.execute("worker")
            reply = kc.get_shell_msg(timeout=10)
            assert reply['parent_header']['msg_id'] == msg_id
            assert reply['content']['status'] == 'ok'
            kc.stop_channels()
def setup_kernel(cmd):
    """start an embedded kernel in a subprocess, and wait for it to be ready

    This function was taken from the ipykernel project.
    We plan to remove it when dropping support for python 2.

    Yields
    -------
    client: jupyter_client.BlockingKernelClient connected to the kernel
    """
    kernel = Popen([sys.executable, '-c', cmd], stdout=PIPE, stderr=PIPE)
    try:
        connection_file = os.path.join(
            paths.jupyter_runtime_dir(),
            'kernel-%i.json' % kernel.pid,
        )
        # wait for connection file to exist, timeout after 5s
        tic = time.time()
        while not os.path.exists(connection_file) \
            and kernel.poll() is None \
            and time.time() < tic + SETUP_TIMEOUT:
            time.sleep(0.1)

        if kernel.poll() is not None:
            o, e = kernel.communicate()
            if not PY3 and isinstance(e, bytes):
                e = e.decode()
            raise IOError("Kernel failed to start:\n%s" % e)

        if not os.path.exists(connection_file):
            if kernel.poll() is None:
                kernel.terminate()
            raise IOError("Connection file %r never arrived" % connection_file)

        client = BlockingKernelClient(connection_file=connection_file)
        client.load_connection_file()
        client.start_channels()
        client.wait_for_ready()
        try:
            yield client
        finally:
            client.stop_channels()
    finally:
        if not PY2:
            kernel.terminate()
def setup():
  global client

  kernel = Popen([sys.executable, '-m', 'ipykernel'], stdout=PIPE, stderr=PIPE)
  connection_file = os.path.join(
    paths.jupyter_runtime_dir(),
    'kernel-%i.json' % kernel.pid,
  )
  sleep(1)
  client = BlockingKernelClient(connection_file=connection_file)
  client.load_connection_file()
  client.start_channels()
  client.wait_for_ready()
  loaded = client.execute_interactive(load_splonky)
  if loaded['content']['status'] == 'error':
    raise Exception("Could not load core Splonky libraries")
  os_process_id = re.findall('.*\/kernel-(\d+)\.json$', connection_file)[0]
  return os_process_id
Beispiel #23
0
def register_worker_magic(connection_info, magic_name='worker'):
    """Register a %worker magic, given connection_info.

    Both a line and cell magic are registered,
    which run the given cell in a remote kernel.
    """
    ip = get_ipython()
    kc = BlockingKernelClient(**connection_info)
    kc.session.key = connection_info['key']
    kc.start_channels()
    def remote(line, cell=None):
        """Run the current cell on a remote IPython kernel"""
        if cell is None:
            # both line and cell magic
            cell = line
        run_cell_remote(ip, kc, cell)
    ip.register_magic_function(remote, magic_kind='line', magic_name=magic_name)
    ip.register_magic_function(remote, magic_kind='cell', magic_name=magic_name)
Beispiel #24
0
def remote_magic(line, cell=None):
    """A magic for running code on a specified remote worker

    The connection_info dict of the worker will be looked up
    as the first positional arg to the magic.
    The rest of the line (or the entire cell for a %%cell magic)
    will be passed to the remote kernel.

    Usage:

        info = e.start_ipython(worker)[worker]
        %remote info print(worker.data)
    """
    # get connection info from IPython's user namespace
    ip = get_ipython()
    split_line = line.split(None, 1)
    info_name = split_line[0]
    if info_name not in ip.user_ns:
        raise NameError(info_name)
    connection_info = dict(ip.user_ns[info_name])

    if not cell:  # line magic, use the rest of the line
        if len(split_line) == 1:
            raise ValueError("I need some code to run!")
        cell = split_line[1]

    # turn info dict to hashable str for use as lookup key in _clients cache
    key = ",".join(map(str, sorted(connection_info.items())))
    session_key = connection_info.pop("key")

    if key in remote_magic._clients:
        kc = remote_magic._clients[key]
    else:
        kc = BlockingKernelClient(**connection_info)
        kc.session.key = session_key
        kc.start_channels()
        kc.wait_for_ready(timeout=10)
        remote_magic._clients[key] = kc

    # actually run the code
    run_cell_remote(ip, kc, cell)
Beispiel #25
0
def remote_magic(line, cell=None):
    """A magic for running code on a specified remote worker

    The connection_info dict of the worker will be looked up
    as the first positional arg to the magic.
    The rest of the line (or the entire cell for a %%cell magic)
    will be passed to the remote kernel.

    Usage:

        info = e.start_ipython(worker)[worker]
        %remote info print(worker.data)
    """
    # get connection info from IPython's user namespace
    ip = get_ipython()
    split_line = line.split(None, 1)
    info_name = split_line[0]
    if info_name not in ip.user_ns:
        raise NameError(info_name)
    connection_info = dict(ip.user_ns[info_name])

    if not cell: # line magic, use the rest of the line
        if len(split_line) == 1:
            raise ValueError("I need some code to run!")
        cell = split_line[1]

    # turn info dict to hashable str for use as lookup key in _clients cache
    key = ','.join(map(str, sorted(connection_info.items())))
    session_key = connection_info.pop('key')

    if key in remote_magic._clients:
        kc = remote_magic._clients[key]
    else:
        kc = BlockingKernelClient(**connection_info)
        kc.session.key = session_key
        kc.start_channels()
        kc.wait_for_ready(timeout=10)
        remote_magic._clients[key] = kc

    # actually run the code
    run_cell_remote(ip, kc, cell)
Beispiel #26
0
def register_worker_magic(connection_info, magic_name='worker'):
    """Register a %worker magic, given connection_info.

    Both a line and cell magic are registered,
    which run the given cell in a remote kernel.
    """
    ip = get_ipython()
    info = dict(connection_info) # copy
    key = info.pop('key')
    kc = BlockingKernelClient(**connection_info)
    kc.session.key = key
    kc.start_channels()
    def remote(line, cell=None):
        """Run the current cell on a remote IPython kernel"""
        if cell is None:
            # both line and cell magic
            cell = line
        run_cell_remote(ip, kc, cell)
    remote.client = kc # preserve reference on kc, largely for mocking
    ip.register_magic_function(remote, magic_kind='line', magic_name=magic_name)
    ip.register_magic_function(remote, magic_kind='cell', magic_name=magic_name)
Beispiel #27
0
def setup_kernel(cmd):
    """start an embedded kernel in a subprocess, and wait for it to be ready

    Returns
    -------
    kernel_manager: connected KernelManager instance
    """
    kernel = Popen([sys.executable, '-c', cmd],
                   stdout=PIPE,
                   stderr=PIPE,
                   env=env)
    connection_file = os.path.join(IPYTHONDIR, 'profile_default', 'security',
                                   'kernel-%i.json' % kernel.pid)
    # wait for connection file to exist, timeout after 5s
    tic = time.time()
    while not os.path.exists(connection_file) \
        and kernel.poll() is None \
        and time.time() < tic + SETUP_TIMEOUT:
        time.sleep(0.1)

    if kernel.poll() is not None:
        o, e = kernel.communicate()
        e = py3compat.cast_unicode(e)
        raise IOError("Kernel failed to start:\n%s" % e)

    if not os.path.exists(connection_file):
        if kernel.poll() is None:
            kernel.terminate()
        raise IOError("Connection file %r never arrived" % connection_file)

    client = BlockingKernelClient(connection_file=connection_file)
    client.load_connection_file()
    client.start_channels()
    client.wait_for_ready()

    try:
        yield client
    finally:
        client.stop_channels()
        kernel.terminate()
 def run(self):
     cf = find_connection_file()
     client = BlockingKernelClient(connection_file=cf)
     client.load_connection_file()
     client.start_channels(shell=False,
                           iopub=True,
                           stdin=False,
                           control=True,
                           hb=False)
     while True:
         try:
             msg = client.get_iopub_msg(TIMEOUT)
             self.pub_q.put(msg)
         except Empty:
             pass
         if self.cmd_q.qsize():
             cmd = self.cmd_q.get()
             if cmd is None:
                 print('Client thread closing')
                 break
             client.execute(cmd)
             self.ctrl_q.put(client.get_shell_msg())
class SendToIPython(object):
    def __init__(self, nvim):
        self.nvim = nvim
        self.client = None
        self.kerneldir = Path(jupyter_runtime_dir())

    @neovim.function('RunningKernels', sync=True)
    def running_kernels(self, args):
        l = self.kerneldir.glob('kernel-*.json')
        l = sorted(l, reverse=True, key=lambda f: f.stat().st_ctime)
        return [f.name for f in l]

    @neovim.command('SendTo', complete='customlist,RunningKernels', nargs='?')
    def send_to(self, args):
        cfs = args or self.running_kernels(None)
        if not cfs:
            self.nvim.command('echom "No kernel found"')
            return

        if self.client is not None:
            self.client.stop_channels()

        cf = cfs[0]
        self.client = BlockingKernelClient()
        self.client.load_connection_file(self.kerneldir / cf)
        self.client.start_channels()

        # run function once to register it for the `funcref` function
        self.nvim.command('call SendLinesToJupyter()')
        self.nvim.command(
            'let g:send_target = {"send": funcref("SendLinesToJupyter")}')
        self.nvim.command('echom "Sending to %s"' % cf)

    @neovim.function('SendLinesToJupyter')
    def send_lines(self, args):
        if args:
            self.client.execute('\n'.join(args[0]))

    @neovim.function('SendComplete', sync=True)
    def complete(self, args):
        findstart, base = args
        if self.client is None:
            return -3  # no client setup yet: cancel silently and leave completion mode

        if findstart:
            line = self.nvim.current.line
            if not line:
                return -2  # empty line: cancel silently but stay in completion mode
            pos = self.nvim.current.window.cursor[1]
            try:
                reply = self.client.complete(line,
                                             pos,
                                             reply=True,
                                             timeout=timeout)['content']
            except TimeoutError:
                return -2
            self.completions = [{
                'word': w,
                'info': ' '
            } for w in reply['matches']]
            return reply['cursor_start']
        else:
            # TODO: use vim's complete_add/complete_check for async operation
            get_info(self.client, self.completions)
            return {'words': self.completions, 'refresh': 'always'}

    @neovim.function('SendCanComplete', sync=True)
    def can_complete(self, args):
        return args[
            0] != '' and self.client is not None and self.client.is_alive()
Beispiel #30
0
class Kernel(object):
    def __init__(self, active_dir):
        # kernel config is stored in a dot file with the active directory
        config = os.path.join(active_dir,
                              ".kernel-%s.json" % str(uuid.uuid4()))
        # right now we're spawning a child process for IPython. we can
        # probably work directly with the IPython kernel API, but the docs
        # don't really explain how to do it.
        args = [sys.executable, '-m', 'IPython', 'kernel', '-f', config]
        p = subprocess.Popen(args,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)

        # when __this__ process exits, we're going to remove the ipython config
        # file and kill the ipython subprocess
        atexit.register(p.terminate)

        def remove_config():
            os.remove(config)

        atexit.register(remove_config)

        # i found that if i tried to connect to the kernel immediately, it wasn't
        # up and running. 1.5 seconds was arbitrarily chosen (but seems to work)
        time.sleep(1.5)
        # fire up the kernel with the appropriate config
        self.client = BlockingKernelClient(connection_file=config)
        self.client.load_connection_file()
        self.client.start_channels()
        # load our monkeypatches...
        self.client.execute("%matplotlib inline")
        self.client.execute(autocomplete_patch)
        self.client.execute(vars_patch)

    def _run_code(self, code, timeout=0.1):
        # this function executes some code and waits for it to completely finish
        # before returning. i don't think that this is neccessarily the best
        # way to do this, but the IPython documentation isn't very helpful for
        # this particular topic.
        #
        # 1) execute code and grab the ID for that execution thread
        # 2) look for messages coming from the "iopub" channel (this is just a
        #    stream of output)
        # 3) when we get a message that is one of the following, save relevant
        # data to `data`:
        #       - execute_result - content from repr
        #       - stream - content from stdout
        #       - error - ansii encoded stacktrace
        # the final piece is that we check for when the message indicates that
        # the kernel is idle and the message's parent is the original execution
        # ID (msg_id) that's associated with our executing code. if this is the
        # case, we'll return the data and the msg_id and exit
        msg_id = self.client.execute(code)
        data = None
        image = None
        while True:
            try:
                reply = self.client.get_iopub_msg(timeout=timeout)
            except Empty:
                continue

            if "execution_state" in reply['content']:
                if reply['content']['execution_state'] == "idle" and reply[
                        'parent_header']['msg_id'] == msg_id:
                    if reply['parent_header']['msg_type'] == "execute_request":
                        return {
                            "msg_id": msg_id,
                            "output": data,
                            "image": image
                        }
            elif reply['header']['msg_type'] == "execute_result":
                data = reply['content']['data'].get('text/plain', '')
            elif reply['header']['msg_type'] == "display_data":
                image = reply['content']['data'].get('image/png', '')
            elif reply['header']['msg_type'] == "stream":
                data = reply['content'].get('text', '')
            elif reply['header']['msg_type'] == "error":
                data = "\n".join(reply['content']['traceback'])

    def execute(self, code):
        return self._run_code(code)

    def complete(self, code):
        # i couldn't figure out how to get the autocomplete working with the
        # ipython kernel (i couldn't get a completion_reply from the iopub), so
        # we're using jedi to do the autocompletion. the __autocomplete is
        # defined in `autocomplete_patch` above.
        return self.execute("__autocomplete('%s')" % code)

    def get_dataframes(self):
        return self.execute("__get_variables()")
def setup_kernel(cmd):
    """start an embedded kernel in a subprocess, and wait for it to be ready

    Returns
    -------
    kernel_manager: connected KernelManager instance
    """

    def connection_file_ready(connection_file):
        """Check if connection_file is a readable json file."""
        if not os.path.exists(connection_file):
            return False
        try:
            with open(connection_file) as f:
                json.load(f)
            return True
        except ValueError:
            return False

    kernel = Popen([sys.executable, "-c", cmd], stdout=PIPE, stderr=PIPE, encoding="utf-8")
    try:
        connection_file = os.path.join(
            paths.jupyter_runtime_dir(),
            "kernel-%i.json" % kernel.pid,
        )
        # wait for connection file to exist, timeout after 5s
        tic = time.time()
        while (
            not connection_file_ready(connection_file)
            and kernel.poll() is None
            and time.time() < tic + SETUP_TIMEOUT
        ):
            time.sleep(0.1)

        # Wait 100ms for the writing to finish
        time.sleep(0.1)

        if kernel.poll() is not None:
            o, e = kernel.communicate()
            raise OSError("Kernel failed to start:\n%s" % e)

        if not os.path.exists(connection_file):
            if kernel.poll() is None:
                kernel.terminate()
            raise OSError("Connection file %r never arrived" % connection_file)

        client = BlockingKernelClient(connection_file=connection_file)
        client.load_connection_file()
        client.start_channels()
        client.wait_for_ready()
        try:
            yield client
        finally:
            client.stop_channels()
    finally:
        kernel.terminate()
        kernel.wait()
        # Make sure all the fds get closed.
        for attr in ["stdout", "stderr", "stdin"]:
            fid = getattr(kernel, attr)
            if fid:
                fid.close()
Beispiel #32
0
class Kernel(object):
    def __init__(self, active_dir, pyspark):
        # kernel config is stored in a dot file with the active directory
        config = os.path.join(active_dir, ".kernel-%s.json" % str(uuid.uuid4()))
        # right now we're spawning a child process for IPython. we can 
        # probably work directly with the IPython kernel API, but the docs
        # don't really explain how to do it.
        log_file = None
        if pyspark:
            os.environ["IPYTHON_OPTS"] = "kernel -f %s" % config
            pyspark = os.path.join(os.environ.get("SPARK_HOME"), "bin/pyspark")
            spark_log = os.environ.get("SPARK_LOG", None)
            if spark_log:
                log_file = open(spark_log, "w")
            spark_opts = os.environ.get("SPARK_OPTS", "")
            args = [pyspark] + spark_opts.split()  # $SPARK_HOME/bin/pyspark <SPARK_OPTS>
            p = subprocess.Popen(args, stdout=log_file, stderr=log_file)
        else:
            args = [sys.executable, '-m', 'IPython', 'kernel', '-f', config]
            p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        
        # when __this__ process exits, we're going to remove the ipython config
        # file and kill the ipython subprocess
        atexit.register(p.terminate)

        def remove_config():
            os.remove(config)
        atexit.register(remove_config)

        def close_file():
            if log_file:
                log_file.close()
        atexit.register(close_file)

        # i found that if i tried to connect to the kernel immediately, it wasn't
        # up and running. 1.5 seconds was arbitrarily chosen (but seems to work)
        time.sleep(1.5)
        # fire up the kernel with the appropriate config
        self.client = BlockingKernelClient(connection_file=config)
        self.client.load_connection_file()
        self.client.start_channels()
        # load our monkeypatches...
        self.client.execute("%matplotlib inline")
        self.client.execute(autocomplete_patch)
        self.client.execute(vars_patch)

    def _run_code(self, code, timeout=0.1):
        # this function executes some code and waits for it to completely finish 
        # before returning. i don't think that this is neccessarily the best 
        # way to do this, but the IPython documentation isn't very helpful for 
        # this particular topic.
        # 
        # 1) execute code and grab the ID for that execution thread
        # 2) look for messages coming from the "iopub" channel (this is just a 
        #    stream of output)
        # 3) when we get a message that is one of the following, save relevant 
        # data to `data`:
        #       - execute_result - content from repr
        #       - stream - content from stdout
        #       - error - ansii encoded stacktrace
        # the final piece is that we check for when the message indicates that 
        # the kernel is idle and the message's parent is the original execution 
        # ID (msg_id) that's associated with our executing code. if this is the 
        # case, we'll return the data and the msg_id and exit
        msg_id = self.client.execute(code)
        output = { "msg_id": msg_id, "output": None, "image": None, "error": None }
        while True:
            try:
                reply = self.client.get_iopub_msg(timeout=timeout)
            except Empty:
                continue

            if "execution_state" in reply['content']:
                if reply['content']['execution_state']=="idle" and reply['parent_header']['msg_id']==msg_id:
                    if reply['parent_header']['msg_type']=="execute_request":
                        return output
            elif reply['header']['msg_type']=="execute_result":
                output['output'] = reply['content']['data'].get('text/plain', '')
            elif reply['header']['msg_type']=="display_data":
                output['image'] = reply['content']['data'].get('image/png', '')
            elif reply['header']['msg_type']=="stream":
                output['output'] = reply['content'].get('text', '')
            elif reply['header']['msg_type']=="error":
                output['error'] = "\n".join(reply['content']['traceback'])

    def execute(self, code):
        return self._run_code(code)

    def complete(self, code):
        # i couldn't figure out how to get the autocomplete working with the 
        # ipython kernel (i couldn't get a completion_reply from the iopub), so 
        # we're using jedi to do the autocompletion. the __autocomplete is 
        # defined in `autocomplete_patch` above.
        return self.execute("__autocomplete('%s')" % code)
    
    def get_dataframes(self):
        return self.execute("__get_variables()")
Beispiel #33
0
def main(kid, var, pid):
    # load connection info and init communication
    cf = find_connection_file(kid)  # str(port))
    km = BlockingKernelClient(connection_file=cf)
    km.load_connection_file()
    km.start_channels()

    # Step 0: get all the inputs

    load_input_code = f"""
proc_id="{pid}"
var={var}
var_name="{var}"
sql_name = "{cfg.sql_name}"
sql_password = "******"
sql_dbname = "{cfg.sql_dbname}"
sql_schema_name = "{cfg.sql_schema_name}"
sql_table_name = "{cfg.sql_table_name}"
json_file_name = "/Users/peterchan/Desktop/GitHub/jupyter-extension/juneau_extension/data_file.json"
    """

    # Step 1: access the table and convert it to JSON

    request_var_code = f"""
import numpy as np
import pandas as pd
import json
import copy

if type(var) is pd.DataFrame or type(var) is np.ndarray or type(var) is list:
    df_json_string = var.to_json(orient='split', index=False)
    df_ls = json.loads(df_json_string)['data']
    df_ls_copy = copy.deepcopy(df_ls)
    """

    # Step 2: define the functions used to write to the JSON file

    json_lock_code = """
def initialize():
    data = {
        "ownerID": "",
        "id123": "operating",
        "id124": "finish"
    }
    with open("./juneau_extension/data_file.json", "w") as file:
        json.dump(data, file, indent=4)


def acquire_lock(pid):
    with open(json_file_name, "r+") as file:
        try:
            data = json.load(file)
            if data["ownerID"]:
                return False
            else:
                file.seek(0)
                file.truncate()
                data['ownerID'] = pid
                json.dump(data, file, indent=4)
                return True
        except Exception:
            return False


def release_lock(pid):
    with open(json_file_name, "r+") as file:
        data = json.load(file)
        if data['ownerID'] == pid:
            file.seek(0)
            file.truncate()
            data['ownerID'] = ""
            json.dump(data, file, indent=4)


# input: id of the process
# remove from the file if the process is completed/ terminated/ timed out
def update_exec_status(status, pid):
    done = False
    while not done:
        success = acquire_lock(pid)
        if success:
            try:
                with open(json_file_name, "r+") as file:
                    data = json.load(file)
                    if not data['ownerID'] == pid:
                        continue
                    file.seek(0)
                    file.truncate()
                    data[pid] = status
                    json.dump(data, file, indent=4)
                release_lock(pid)
                done = True
            except Exception:
                continue
    return True
    """

    # Step 3: connect to SQL and insert the table

    insert_code = """
from sqlalchemy import create_engine

conn_string = f"postgresql://{sql_name}:{sql_password}@localhost/{sql_dbname}"
table_string = f"{sql_schema_name}.{sql_table_name}"

engine = create_engine(conn_string)
with engine.connect() as connection:
    insertion_string = f'CREATE TABLE {sql_schema_name}.{var_name} ("A" int, "B" int, "C" int, "D" int);'
    for ls in df_ls_copy:
        insertion_string += f"INSERT INTO {sql_schema_name}.{var_name} VALUES ({ls[0]}, {ls[1]}, {ls[2]}, {ls[3]});"

    connection.execute(insertion_string)
    print(proc_id)
    update_exec_status("done", proc_id)
    
    rows = connection.execute(f"select * from {sql_schema_name}.{var_name} limit 5;")
    for row in rows:
        print(row)
    """

    code = load_input_code + request_var_code + json_lock_code + insert_code

    km.execute_interactive(code, timeout=TIMEOUT)
    km.stop_channels()
Beispiel #34
0
class SshKernel:
    """Remote ipykernel via SSH

    Raises:
        SshKernelException: "Could not execute remote command, connection died"
        SshKernelException: "Connection failed"
        SshKernelException: "Could not create kernel_info file"

        Arguments:
            host {str} -- host where the remote ipykernel should be started
            connection_info {dict} -- Local ipykernel connection info as provided by Juypter lab
            python_path {str} -- Remote python path to be used to start ipykernel

        Keyword Arguments:
            sudo {bool} -- Start ipykernel as root if necessary (default: {False})
            timeout {int} -- SSH connection timeout (default: {5})
            env {str} -- Environment variables passd to the ipykernel "VAR1=VAL1 VAR2=VAL2" (default: {""})
            ssh_config {str} -- Path to the local SSH config file (default: {Path.home() / ".ssh" / "config"})
    """
    def __init__(
        self,
        host,
        connection_info,
        python_path,
        sudo=False,
        timeout=5,
        env="",
        ssh_config=None,
        quiet=True,
        verbose=False,
        msg_interval=30,
        logger=None,
    ):
        self.host = host
        self.connection_info = connection_info
        self.python_full_path = PurePosixPath(python_path) / "bin/python"
        self.sudo = sudo
        self.timeout = timeout
        self.env = env
        self.ssh_config = (Path.home() / ".ssh" /
                           "config" if ssh_config is None else ssh_config
                           )  # OS specific path

        self.quiet = quiet
        self.verbose = verbose

        self._connection = None

        self.remote_ports = {}
        self.uuid = str(uuid.uuid4())
        self.fname = "/tmp/.ssh_ipykernel_%s.json" % self.uuid  # POSIX path

        if logger is None:
            self._logger = setup_logging("SshKernel")
        else:
            self._logger = logger

        self._logger.debug("Remote kernel info file: {0}".format(self.fname))
        self._logger.debug(
            "Local connection info: {0}".format(connection_info))

        self.kernel_pid = 0
        self.status = Status(connection_info, self._logger)
        self.msg_interval = int(msg_interval / timeout)
        self.msg_counter = 0

    def _execute(self, cmd):
        try:
            result = subprocess.check_output(cmd)
            return 0, result
        except subprocess.CalledProcessError as e:
            return e.returncode, e.args

    def _ssh(self, cmd):
        return self._execute([SSH, self.host, cmd])

    def close(self):
        """Close pcssh connection
        """
        if self._connection is not None:  # and self._connection.isalive():
            if self._connection.isalive():
                self._connection.logout()
                self._logger.debug("Ssh connection closed")
            if self.kc.is_alive():
                self.kc.stop_channels()
                self._logger.debug("Kernel client channels stopped")

    def create_remote_connection_info(self):
        """Create a remote ipykernel connection info file
        Uses KERNEL_SCRIPT to execute jupyter_client.write_connection_file remotely to request remote ports.
        The remote ports will be returned as json and stored to built the SSH tunnels later.
        The pxssh connection will be closed at the end.

        Raises:
            SshKernelException: "Could not create kernel_info file"
        """
        self._logger.info("Creating remote connection info")
        script = KERNEL_SCRIPT.format(fname=self.fname, **self.connection_info)

        cmd = "{python} -c '{command}'".format(python=self.python_full_path,
                                               command="; ".join(
                                                   script.strip().split("\n")))

        result = self._ssh(cmd)
        self._logger.debug(result)
        if result[0] == 0:
            self.remote_ports = json.loads(result[1].decode("utf-8"))
            self._logger.debug("Local ports  = %s" % {
                k: v
                for k, v in self.connection_info.items() if "_port" in k
            })
            self._logger.debug("Remote ports = %s" % self.remote_ports)
        else:
            self.status.set_unreachable(self.kernel_pid, self.sudo)
            raise SshKernelException("Could not create kernel_info file")

    def kernel_client(self):
        self.kc = BlockingKernelClient()
        self.kc.load_connection_info(self.connection_info)
        self.kc.start_channels()

    def kernel_init(self):
        done = False
        if self.check_alive(show_pid=False):
            i = 0
            while not done:
                try:
                    i += 1
                    self._logger.debug("Retrieving kernel pid, attempt %d" % i)
                    result = self.kc.execute_interactive(
                        "import os",
                        user_expressions={"pid": "os.getpid()"},
                        store_history=False,
                        silent=True,
                        timeout=2,
                    )
                    self._logger.debug("result = %s" % str(result["content"]))
                    self.kernel_pid = int(result["content"]["user_expressions"]
                                          ["pid"]["data"]["text/plain"])
                    self._logger.debug("Remote kernel pid %d" %
                                       self.kernel_pid)
                    done = True
                except Exception as ex:
                    msg = str(ex)
                    if msg == "Timeout waiting for output":
                        self._logger.warning("Warning: {}".format(msg))
                        if i > 5:
                            self._logger.error(
                                "Max attempts (5) reached, stopping")
                            raise SshKernelException(
                                "Could not initialize kernel")
                            break
                    else:
                        self._logger.error("Warning: {}".format(str(ex)))
        return done

    def kernel_customize(self):
        pass

    def check_alive(self, show_pid=True):
        alive = self._connection.isalive() and self.kc.is_alive()
        if show_pid:
            msg = "Remote kernel ({}, pid = {}) is {}alive".format(
                self.host, self.kernel_pid, "" if alive else "not ")
        else:
            msg = "Remote kernel is {}alive".format("" if alive else "not ")

        if not alive or self.msg_counter % self.msg_interval == 0:
            self.msg_counter = 0
            self._logger.info(msg)

        self.msg_counter += 1
        return alive

    def interrupt_kernel(self):
        if self._connection.isalive():
            if is_windows:
                self._logger.warning(
                    'On Windows use "Interrupt remote kernel" button')
            else:
                self._logger.warning("Sending interrupt to remote kernel")
                self._connection.sendintr()  # send SIGINT

    def start_kernel_and_tunnels(self):
        """Start Kernels and SSH tunnels
        A new pxssh connection will be created that will
        - set up the necessary ssh tunnels between remote kernel ports and local kernel ports
        - start the ipykernel on the remote host
        """
        self._logger.info("Setting up ssh tunnels")

        ssh_tunnels = []
        for port_name in self.remote_ports.keys():
            ssh_tunnels += [
                "-L",
                "{local_port}:127.0.0.1:{remote_port}".format(
                    local_port=self.connection_info[port_name],
                    remote_port=self.remote_ports[port_name],
                ),
            ]

        self._logger.info("Starting remote kernel")

        # Build remote command
        sudo = "sudo " if self.sudo else ""

        if self.env is not None:
            env = " ".join(self.env)
        cmd = "{sudo} {env} {python} -m ipykernel_launcher -f {fname}".format(
            sudo=sudo, env=env, python=self.python_full_path, fname=self.fname)

        # Build ssh command with all flags and tunnels
        if self.quiet:
            args = ["-q"]
        elif self.verbose:
            args = ["-v"]
        else:
            args = []
        args += ["-t", "-F", str(self.ssh_config)
                 ] + ssh_tunnels + [self.host, cmd]

        self._logger.debug("%s %s" % (SSH, " ".join(args)))

        try:
            # Start the child process
            self._connection = expect.spawn(SSH,
                                            args=args,
                                            timeout=self.timeout,
                                            **ENCODING)
            # subprocess.check_output([SSH] + args)
            #
            # get blocking kernel client
            self.kernel_client()
            # initialize it
            if self.kernel_init():
                self.status.set_running(self.kernel_pid, self.sudo)
                # run custom code if part of sub class
                self.kernel_customize()
            else:
                self.status.set_connect_failed(sudo=self.sudo)
        except Exception as e:
            tb = sys.exc_info()[2]
            self._logger.error(str(e.with_traceback(tb)))
            self._logger.error("Cannot contiune, exiting")
            sys.exit(1)

        prompt = re.compile(r"\n")

        while True:
            try:
                # Wait for prompt
                self._connection.expect(prompt)
                # print the outputs
                self._logger.info(self._connection.before.strip("\r\n"))

            except KeyboardInterrupt:
                self.interrupt_kernel()
                self.check_alive()

            except expect.TIMEOUT:
                self.check_alive()

            except expect.EOF:
                # The program has exited
                self._logger.info("The program has exited.")
                self.status.set_down(self.kernel_pid, self.sudo)
                break

        self.close()
        self.status.close()
 def create_kernel_client(self, ci):
     kernel_client = BlockingKernelClient()
     kernel_client.load_connection_info(ci)
     kernel_client.start_channels()
     return kernel_client
Beispiel #36
0
class Kernel(object):
    def __init__(self, active_dir):
        # kernel config is stored in a temp file 
        config = os.path.join(active_dir, ".kernel-%s.json" % str(uuid.uuid4()))
        args = [sys.executable, '-m', 'IPython', 'kernel', '-f', config]
        p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        
        # when __this__ process exits, we're going to remove the ipython config
        # file and kill the ipython subprocess
        atexit.register(p.terminate)

        def remove_config():
            os.remove(config)
        atexit.register(remove_config)

        # i found that if i tried to connect to the kernel immediately, it wasn't up
        # and running. 1.5 seconds was arbitrarily chosen (but seems to work)
        time.sleep(1.5)
        # fire up the kernel with the appropriate config
        self.client = BlockingKernelClient(connection_file=config)
        self.client.load_connection_file()
        self.client.start_channels()
        # load our monkeypatches...
        self.client.execute("%matplotlib inline")
        self.client.execute(autocomplete_patch)
        self.client.execute(vars_patch)

    def _run_code(self, code, timeout=0.1):
        # this function executes some code and waits for it to completely finish before
        # returning. i don't think that this is neccessarily the best way to do this, but
        # the IPython documentation isn't very helpful for this particular topic.
        #
        # 1) execute code and grab the ID for that execution thread
        # 2) look for messages coming from the "iopub" channel (this is just a stream of output)
        # 3) when we get a message that is one of the following, save relevant data to `data`:
        #       - execute_result - content from repr
        #       - stream - content from stdout
        #       - error - ansii encoded stacktrace
        # the final piece is that we check for when the message indicates that the kernel is idle
        # and the message's parent is the original execution ID (msg_id) that's associated with
        # our executing code. if this is the case, we'll return the data and the msg_id and exit
        msg_id = self.client.execute(code)
        data = None
        image = None
        while True:
            try:
                reply = self.client.get_iopub_msg(timeout=timeout)
            except Empty:
                continue

            if "execution_state" in reply['content']:
                if reply['content']['execution_state']=="idle" and reply['parent_header']['msg_id']==msg_id:
                    if reply['parent_header']['msg_type']=="execute_request":
                        return { "msg_id": msg_id, "output": data, "image": image }
            elif reply['header']['msg_type']=="execute_result":
                data = reply['content']['data'].get('text/plain', '')
            elif reply['header']['msg_type']=="display_data":
                image = reply['content']['data'].get('image/png', '')
            elif reply['header']['msg_type']=="stream":
                data = reply['content'].get('text', '')
            elif reply['header']['msg_type']=="error":
                data = "\n".join(reply['content']['traceback'])

    def execute(self, code):
        return self._run_code(code)

    def complete(self, code):
        # i couldn't figure out how to get the autocomplete working with the ipython 
        # kernel (i couldn't get a completion_reply from the iopub), so we're using
        # jedi to do the autocompletion. the __autocomplete is defined in `autocomplete_patch`
        # above.
        return self.execute("__autocomplete('%s')" % code)
Beispiel #37
0
class JupyterRAMUsage(Stat):
    """
    tag: ``jupyter.ram_usage``

    settings:

    .. code-block:: javascript

        {
            "connection info": "",
            "query interval [s]": 1
        }

    Tracks the RAM usage of all variables in
    a user-specified jupyter notebook. If no connection info is given in the
    settings, take the kernel with the latest start date.

    ``connection info`` must be a string containing the info displayed when
    running ``%connect_info`` in a jupyter notebook
    (you can directly copy-paste it).

    ``query interval [s]`` specifies how often the thread running in the
    jupyter notebook should read the variables. The lower this is, the higher
    the resolution of the stat but it might start affecting the speed of
    your notebook when too low.

    Note that RAM tracked in this way is not equal to the actual RAM
    the OS needs because some further optimization is done by e. g. numpy
    to reduce the OS memory usage.
    """
    name = 'RAM Usage of objects in a Python Jupyter Notebook [MB]'
    base_tag = 'ram_usage'
    default_settings = {
        'connection info': '',
        # how often the memory usage is read in the jupyter notebook
        'query interval [s]': 1.
    }

    @classmethod
    def _read_latest_connection_file(cls):
        """
        Reads the latest jupyter kernel connection file.
        https://jupyter.readthedocs.io/en/latest/projects/jupyter-directories.html.
        """
        runtime_dir = jupyter_runtime_dir()
        files = glob.glob(os.path.join(runtime_dir, 'kernel-*.json'))
        if len(files) == 0:
            return None

        # use the latest connection file
        connection_file = max(files, key=os.path.getctime)
        with open(connection_file, 'r') as f:
            return json.load(f)

    @classmethod
    def get_connection_info(cls):
        """
        Get the target kernel connection info.
        Returns a dictionary of the connection info supplied
        in the settings, or the latest started kernel if none is given.
        Retuns `None` if no kernel has been found.
        """
        if len(cls.settings['connection info']) == 0:
            return cls._read_latest_connection_file()
        return json.loads(cls.settings['connection info'])

    @classmethod
    def check_availability(cls):
        # the stat is not available if no suitable connection info
        # can be found
        if cls.get_connection_info() is None:
            raise exceptions.StatNotAvailableError(
                'Could not find any running kernel.')

    def __init__(self, fps):
        self.config = self.get_connection_info()
        data_dir = appdirs.user_data_dir('permon', 'bminixhofer')
        os.makedirs(data_dir, exist_ok=True)

        self.usage_file = os.path.join(data_dir, 'jupyter_ram_usage.csv')
        open(self.usage_file, 'w').close()

        # self.setup_code is the code that is run in the notebook when the
        # stat is instantiated. It starts a thread which reads the memory
        # usage of all public variables in a set interval and saves it to a
        # csv file in the user data directory
        self.setup_code = f"""
if '_permon_running' not in globals() or not _permon_running:
    import threading
    import csv
    import sys
    import time
    from pympler import asizeof
    from types import ModuleType

    def _permon_get_ram_usage_per_object():
        while _permon_running:
            ram_usage = []
            global_vars = [key for key in globals() if not key.startswith('_')]
            for name in global_vars:
                value = globals()[name] if name in globals() else None
                if isinstance(value, ModuleType):
                    continue

                try:
                    ram_usage.append((name, asizeof.asizeof(value)))
                except TypeError:
                    continue

            with open('{self.usage_file}', 'w') as f:
                writer = csv.writer(f, delimiter=',')
                for name, ram in ram_usage:
                    writer.writerow([name, ram])
            time.sleep({self.settings['query interval [s]']})

    _permon_thread = threading.Thread(target=_permon_get_ram_usage_per_object)
    _permon_running = True
    _permon_thread.start()
"""
        self.teardown_code = """
_permon_running = False
"""
        self.client = BlockingKernelClient()
        self.client.load_connection_info(self.config)
        self.client.start_channels()
        self.client.execute(self.setup_code)
        super(JupyterRAMUsage, self).__init__(fps=fps)

    def __del__(self):
        # stop the thread running in the jupyter notebook
        # and stop the connection to the kernel upon deletion
        self.client.execute(self.teardown_code)
        self.client.stop_channels()

    def get_stat(self):
        # reads the csv file the setup code has written to
        ram_usage = []
        with open(self.usage_file, 'r') as f:
            reader = csv.reader(f)
            for row in reader:
                ram_usage.append((row[0], float(row[1]) / 1000**2))
        # sort the ram_usage list so that the largest variables come first
        ram_usage = sorted(ram_usage, key=lambda x: x[1], reverse=True)

        # return the sum of RAM usage and the variables taking up the most RAM
        return sum(x[1] for x in ram_usage), ram_usage[:5]

    @property
    def minimum(self):
        return 0

    @property
    def maximum(self):
        return None
Beispiel #38
0
class Kernel(object):
    # kernel config is stored in a dot file with the active directory
    def __init__(self, config, active_dir, pyspark):
        # right now we're spawning a child process for IPython. we can
        # probably work directly with the IPython kernel API, but the docs
        # don't really explain how to do it.
        log_file = None
        if pyspark:
            os.environ["IPYTHON_OPTS"] = "kernel -f %s" % config
            pyspark = os.path.join(os.environ.get("SPARK_HOME"), "bin/pyspark")
            spark_log = os.environ.get("SPARK_LOG", None)
            if spark_log:
                log_file = open(spark_log, "w")
            spark_opts = os.environ.get("SPARK_OPTS", "")
            args = [pyspark] + spark_opts.split()  # $SPARK_HOME/bin/pyspark <SPARK_OPTS>
            p = subprocess.Popen(args, stdout=log_file, stderr=log_file)
        else:
            args = [sys.executable, '-m', 'IPython', 'kernel', '-f', config]
            p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        # when __this__ process exits, we're going to remove the ipython config
        # file and kill the ipython subprocess
        atexit.register(p.terminate)

        def remove_config():
            if os.path.isfile(config):
                os.remove(config)
        atexit.register(remove_config)

        # i found that if i tried to connect to the kernel immediately, so we'll
        # wait until the config file exists before moving on
        while os.path.isfile(config)==False:
            time.sleep(0.1)

        def close_file():
            if log_file:
                log_file.close()
        atexit.register(close_file)

        # fire up the kernel with the appropriate config
        self.client = BlockingKernelClient(connection_file=config)
        self.client.load_connection_file()
        self.client.start_channels()
        # load our monkeypatches...
        self.client.execute("%matplotlib inline")
        python_patch_file = os.path.join(dirname, "langs", "python-patch.py")
        self.client.execute("%run " + python_patch_file)

    def _run_code(self, execution_id, code, timeout=0.1):
        # this function executes some code and waits for it to completely finish
        # before returning. i don't think that this is neccessarily the best
        # way to do this, but the IPython documentation isn't very helpful for
        # this particular topic.
        #
        # 1) execute code and grab the ID for that execution thread
        # 2) look for messages coming from the "iopub" channel (this is just a
        #    stream of output)
        # 3) when we get a message that is one of the following, save relevant
        # data to `data`:
        #       - execute_result - content from repr
        #       - stream - content from stdout
        #       - error - ansii encoded stacktrace
        # the final piece is that we check for when the message indicates that
        # the kernel is idle and the message's parent is the original execution
        # ID (msg_id) that's associated with our executing code. if this is the
        # case, we'll return the data and the msg_id and exit
        msg_id = self.client.execute(code, allow_stdin=False)
        request = { "id": execution_id, "msg_id": msg_id, "code": code, "status": "started" }
        sys.stdout.write(json.dumps(request) + '\n')
        sys.stdout.flush()
        output = {
            "id": execution_id,
            "msg_id": msg_id,
            "output": "",
            "stream": None,
            "image": None,
            "error": None
        }
        while True:
            try:
                reply = self.client.get_iopub_msg(timeout=timeout)
            except Empty:
                continue

            if "execution_state" in reply['content']:
                if reply['content']['execution_state']=="idle" and reply['parent_header']['msg_id']==msg_id:
                    if reply['parent_header']['msg_type']=="execute_request":
                        request["status"] = "complete"
                        sys.stdout.write(json.dumps(request) + '\n')
                        sys.stdout.flush()
                        return
            elif reply['header']['msg_type']=="execute_result":
                output['output'] = reply['content']['data'].get('text/plain', '')
                output['stream'] = reply['content']['data'].get('text/plain', '')
            elif reply['header']['msg_type']=="display_data":
                if 'image/png' in reply['content']['data']:
                    output['image'] = reply['content']['data']['image/png']
                elif 'text/html' in reply['content']['data']:
                    output['html'] = reply['content']['data']['text/html']
            elif reply['header']['msg_type']=="stream":
                output['output'] += reply['content'].get('text', '')
                output['stream'] = reply['content'].get('text', '')
            elif reply['header']['msg_type']=="error":
                output['error'] = "\n".join(reply['content']['traceback'])

            # TODO: if we have something non-trivial to send back...
            sys.stdout.write(json.dumps(output) + '\n')
            sys.stdout.flush()
            # TODO: should probably get rid of all this
            output['stream'] = None
            output['image'] = None
            output['html'] = None

    def _complete(self, execution_id, code, timeout=0.5):
        # Call ipython kernel complete, wait for response with the correct msg_id,
        # and construct appropriate UI payload.
        # See below for an example response from ipython kernel completion for 'el'
        #
        # {
        # 'parent_header':
        #     {u'username': u'ubuntu', u'version': u'5.0', u'msg_type': u'complete_request',
        #     u'msg_id': u'5222d158-ada8-474e-88d8-8907eb7cc74c', u'session': u'cda4a03d-a8a1-4e6c-acd0-de62d169772e',
        #     u'date': datetime.datetime(2015, 5, 7, 15, 25, 8, 796886)},
        # 'msg_type': u'complete_reply',
        # 'msg_id': u'a3a957d6-5865-4c6f-a0b2-9aa8da718b0d',
        # 'content':
        #     {u'matches': [u'elif', u'else'], u'status': u'ok', u'cursor_start': 0, u'cursor_end': 2, u'metadata': {}},
        # 'header':
        #     {u'username': u'ubuntu', u'version': u'5.0', u'msg_type': u'complete_reply',
        #     u'msg_id': u'a3a957d6-5865-4c6f-a0b2-9aa8da718b0d', u'session': u'f1491112-7234-4782-8601-b4fb2697a2f6',
        #     u'date': datetime.datetime(2015, 5, 7, 15, 25, 8, 803470)},
        # 'buffers': [],
        # 'metadata': {}
        # }
        #
        msg_id = self.client.complete(code)
        request = { "id": execution_id, "msg_id": msg_id, "code": code, "status": "started" }
        sys.stdout.write(json.dumps(request) + '\n')
        sys.stdout.flush()
        output = { "id": execution_id, "msg_id": msg_id, "output": None, "image": None, "error": None }
        while True:
            try:
                reply = self.client.get_shell_msg(timeout=timeout)
            except Empty:
                continue

            if "matches" in reply['content'] and reply['msg_type']=="complete_reply" and reply['parent_header']['msg_id']==msg_id:
                results = []
                for completion in reply['content']['matches']:
                    result = {
                        "value": completion,
                        "dtype": "---"
                    }
                    if "." in code:
                        # result['text'] = result['value'] # ".".join(result['value'].split(".")[1:])
                        result['text'] = result['value'] #.split('.')[-1]
                        result["dtype"] = "function"
                    else:
                        result['text'] = result['value']
                        result["dtype"] = "" # type(globals().get(code)).__name__
                    results.append(result)
                output['output'] = results
                output['status'] = "complete"
                sys.stdout.write(json.dumps(output) + '\n')
                sys.stdout.flush()
                return

    def execute(self, execution_id, code, complete=False):
        if complete==True:
            return self._complete(execution_id, code)
        else:
            result = self._run_code(execution_id, code)
            if re.match("%?reset", code):
                # load our monkeypatches...
                k.client.execute("%matplotlib inline")
                k.client.execute(vars_patch)
            return result

    def get_packages(self):
        return self.execute("__get_packages()")
Beispiel #39
0
        'WARNING': '\033[93m',
        'FAIL': '\033[91m',
        'ENDC': '\033[0m',
        'BOLD': '\033[1m',
        'UNDERLINE': '\033[4m',
    }
    print(lookup[color], end='')
    print(*args, end='')
    print(lookup['ENDC'])


# setup by automatically finding a running kernel
cf = find_connection_file()
client = BlockingKernelClient(connection_file=cf)
client.load_connection_file()
client.start_channels()

# simplest usage - execute statments and check if OK
msgid = client.execute('a = 2')
ret = client.get_shell_msg()
status = ret['content']['status']
if status == 'ok':
    print('statement executed ok')
elif status == 'error':
    ename = ret['content']['ename']
    print('there was a %s exception, which will also appear on the '
          'iopub channel' % ename)

# listen to what's going on in the kernel with blocking calls,
# and take different actions depending on what's arriving
while True: