def exec_code(kid, code): """ Executes arbitrary `code` in the kernel with id `kid`. Returns: - tuple: the output of the code and the error, if any. """ # Load connection info and init communications. cf = find_connection_file(kid) with jupyter_lock: km = BlockingKernelClient(connection_file=cf) km.load_connection_file() km.start_channels() msg_id = km.execute(code, store_history=False) reply = km.get_shell_msg(msg_id, timeout=60) output, error = None, None while km.is_alive(): msg = km.get_iopub_msg(timeout=10) if ("content" in msg and "name" in msg["content"] and msg["content"]["name"] == "stdout"): output = msg["content"]["text"] break km.stop_channels() if reply["content"]["status"] != "ok": logging.error(f"Status is {reply['content']['status']}") logging.error(output) error = output output = None return output, error
def main(kid): # Load connection info and init communications. cf = find_connection_file(kid) km = BlockingKernelClient(connection_file=cf) km.load_connection_file() km.start_channels() # Define a function that is useful from within the user's notebook: juneau_connect() can be # used to directly connect the notebook to the source database. Note that this includes the # full "root" credentials. # FIXME: allow for user-specific credentials on SQL tables. The DBMS may also not be at localhost. code = f""" from sqlalchemy import create_engine def juneau_connect(): engine = create_engine( "postgresql://{config.sql.name}:{config.sql.password}@{config.sql.host}/{config.sql.dbname}", connect_args={{ "options": "-csearch_path='{config.sql.dbs}'" }} ) return engine.connect() """ km.execute_interactive(code, timeout=TIMEOUT) km.stop_channels()
def setup_kernel(cmd): """start an embedded kernel in a subprocess, and wait for it to be ready Returns ------- kernel_manager: connected KernelManager instance """ kernel = Popen([sys.executable, "-c", cmd], stdout=PIPE, stderr=PIPE, env=env) connection_file = os.path.join(IPYTHONDIR, "profile_default", "security", "kernel-%i.json" % kernel.pid) # wait for connection file to exist, timeout after 5s tic = time.time() while not os.path.exists(connection_file) and kernel.poll() is None and time.time() < tic + SETUP_TIMEOUT: time.sleep(0.1) if kernel.poll() is not None: o, e = kernel.communicate() e = py3compat.cast_unicode(e) raise IOError("Kernel failed to start:\n%s" % e) if not os.path.exists(connection_file): if kernel.poll() is None: kernel.terminate() raise IOError("Connection file %r never arrived" % connection_file) client = BlockingKernelClient(connection_file=connection_file) client.load_connection_file() client.start_channels() client.wait_for_ready() try: yield client finally: client.stop_channels() kernel.terminate()
def exec_code(kid, var, code): # load connection info and init communication cf = find_connection_file(kid) # str(port)) global jupyter_lock jupyter_lock.acquire() try: km = BlockingKernelClient(connection_file=cf) km.load_connection_file() km.start_channels() # logging.debug('Executing:\n' + str(code)) msg_id = km.execute(code, store_history=False) reply = km.get_shell_msg(msg_id, timeout=10) # logging.info('Execution reply:\n' + str(reply)) state = 'busy' output = None idle_count = 0 try: while km.is_alive(): try: msg = km.get_iopub_msg(timeout=10) # logging.debug('Read ' + str(msg)) if not 'content' in msg: continue if 'name' in msg['content'] and msg['content'][ 'name'] == 'stdout': # logging.debug('Got data '+ msg['content']['text']) output = msg['content']['text'] break if 'execution_state' in msg['content']: # logging.debug('Got state') state = msg['content']['execution_state'] if state == 'idle': idle_count = idle_count + 1 except Empty: pass except KeyboardInterrupt: logging.error('Keyboard interrupt') pass finally: # logging.info('Kernel IO finished') km.stop_channels() # logging.info(str(output)) error = '' if reply['content']['status'] != 'ok': logging.error('Status is ' + reply['content']['status']) logging.error(str(output)) error = output output = None finally: jupyter_lock.release() return output, error
def setup_kernel(cmd): """start an embedded kernel in a subprocess, and wait for it to be ready Returns ------- kernel_manager: connected KernelManager instance """ def connection_file_ready(connection_file): """Check if connection_file is a readable json file.""" if not os.path.exists(connection_file): return False try: with open(connection_file) as f: json.load(f) return True except ValueError: return False kernel = Popen([sys.executable, '-c', cmd], stdout=PIPE, stderr=PIPE) try: connection_file = os.path.join( paths.jupyter_runtime_dir(), 'kernel-%i.json' % kernel.pid, ) # wait for connection file to exist, timeout after 5s tic = time.time() while not connection_file_ready(connection_file) \ and kernel.poll() is None \ and time.time() < tic + SETUP_TIMEOUT: time.sleep(0.1) # Wait 100ms for the writing to finish time.sleep(0.1) if kernel.poll() is not None: o,e = kernel.communicate() e = py3compat.cast_unicode(e) raise IOError("Kernel failed to start:\n%s" % e) if not os.path.exists(connection_file): if kernel.poll() is None: kernel.terminate() raise IOError("Connection file %r never arrived" % connection_file) client = BlockingKernelClient(connection_file=connection_file) client.load_connection_file() client.start_channels() client.wait_for_ready() try: yield client finally: client.stop_channels() finally: kernel.terminate()
def test_start_ipython_scheduler(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Client(s["address"], loop=loop) as e: info = e.start_ipython_scheduler() kc = BlockingKernelClient() kc.load_connection_info(info) kc.start_channels() msg_id = kc.execute("scheduler") reply = kc.get_shell_msg(timeout=10) kc.stop_channels()
def test_start_ipython_scheduler(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Client(('127.0.0.1', s['port']), loop=loop) as e: info = e.start_ipython_scheduler() key = info.pop('key') kc = BlockingKernelClient(**info) kc.session.key = key kc.start_channels() msg_id = kc.execute("scheduler") reply = kc.get_shell_msg(timeout=10) kc.stop_channels()
def test_start_ipython_scheduler(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1, should_check_state=False) as (s, [a]): with Client(s['address'], loop=loop) as e: info = e.start_ipython_scheduler() key = info.pop('key') kc = BlockingKernelClient(**info) kc.session.key = key kc.start_channels() msg_id = kc.execute("scheduler") reply = kc.get_shell_msg(timeout=10) kc.stop_channels()
def test_start_ipython_scheduler(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: info = e.start_ipython_scheduler() key = info.pop('key') kc = BlockingKernelClient(**info) kc.session.key = key kc.start_channels() msg_id = kc.execute("scheduler") reply = kc.get_shell_msg(timeout=10) kc.stop_channels()
def main(kid, var): # Load connection info and init communications. cf = find_connection_file(kid) # str(port)) km = BlockingKernelClient(connection_file=cf) km.load_connection_file() km.start_channels() code = f""" import pandas as pd import numpy as np if type({var}) in [pd.DataFrame, np.ndarray, list]: print({var}.to_json(orient='split', index=False)) """ km.execute_interactive(code, timeout=TIMEOUT) km.stop_channels()
def test_start_ipython_workers(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Client(s["address"], loop=loop) as e: info_dict = e.start_ipython_workers() info = first(info_dict.values()) kc = BlockingKernelClient() kc.load_connection_info(info) kc.start_channels() kc.wait_for_ready(timeout=10) msg_id = kc.execute("worker") reply = kc.get_shell_msg(timeout=10) assert reply["parent_header"]["msg_id"] == msg_id assert reply["content"]["status"] == "ok" kc.stop_channels()
def test_start_ipython_workers(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: info_dict = e.start_ipython_workers() info = first(info_dict.values()) key = info.pop('key') kc = BlockingKernelClient(**info) kc.session.key = key kc.start_channels() kc.wait_for_ready(timeout=10) msg_id = kc.execute("worker") reply = kc.get_shell_msg(timeout=10) assert reply['parent_header']['msg_id'] == msg_id assert reply['content']['status'] == 'ok' kc.stop_channels()
def test_start_ipython_workers(loop, zmq_ctx): from jupyter_client import BlockingKernelClient with cluster(1) as (s, [a]): with Client(('127.0.0.1', s['port']), loop=loop) as e: info_dict = e.start_ipython_workers() info = first(info_dict.values()) key = info.pop('key') kc = BlockingKernelClient(**info) kc.session.key = key kc.start_channels() kc.wait_for_ready(timeout=10) msg_id = kc.execute("worker") reply = kc.get_shell_msg(timeout=10) assert reply['parent_header']['msg_id'] == msg_id assert reply['content']['status'] == 'ok' kc.stop_channels()
def setup_kernel(cmd): """start an embedded kernel in a subprocess, and wait for it to be ready This function was taken from the ipykernel project. We plan to remove it when dropping support for python 2. Yields ------- client: jupyter_client.BlockingKernelClient connected to the kernel """ kernel = Popen([sys.executable, '-c', cmd], stdout=PIPE, stderr=PIPE) try: connection_file = os.path.join( paths.jupyter_runtime_dir(), 'kernel-%i.json' % kernel.pid, ) # wait for connection file to exist, timeout after 5s tic = time.time() while not os.path.exists(connection_file) \ and kernel.poll() is None \ and time.time() < tic + SETUP_TIMEOUT: time.sleep(0.1) if kernel.poll() is not None: o, e = kernel.communicate() if not PY3 and isinstance(e, bytes): e = e.decode() raise IOError("Kernel failed to start:\n%s" % e) if not os.path.exists(connection_file): if kernel.poll() is None: kernel.terminate() raise IOError("Connection file %r never arrived" % connection_file) client = BlockingKernelClient(connection_file=connection_file) client.load_connection_file() client.start_channels() client.wait_for_ready() try: yield client finally: client.stop_channels() finally: if not PY2: kernel.terminate()
def setup_kernel(cmd): """start an embedded kernel in a subprocess, and wait for it to be ready Returns ------- kernel_manager: connected KernelManager instance """ kernel = Popen([sys.executable, '-c', cmd], stdout=PIPE, stderr=PIPE, env=env) connection_file = os.path.join(IPYTHONDIR, 'profile_default', 'security', 'kernel-%i.json' % kernel.pid) # wait for connection file to exist, timeout after 5s tic = time.time() while not os.path.exists(connection_file) \ and kernel.poll() is None \ and time.time() < tic + SETUP_TIMEOUT: time.sleep(0.1) if kernel.poll() is not None: o, e = kernel.communicate() e = py3compat.cast_unicode(e) raise IOError("Kernel failed to start:\n%s" % e) if not os.path.exists(connection_file): if kernel.poll() is None: kernel.terminate() raise IOError("Connection file %r never arrived" % connection_file) client = BlockingKernelClient(connection_file=connection_file) client.load_connection_file() client.start_channels() client.wait_for_ready() try: yield client finally: client.stop_channels() kernel.terminate()
def setup_kernel(cmd): """start an embedded kernel in a subprocess, and wait for it to be ready Returns ------- kernel_manager: connected KernelManager instance """ def connection_file_ready(connection_file): """Check if connection_file is a readable json file.""" if not os.path.exists(connection_file): return False try: with open(connection_file) as f: json.load(f) return True except ValueError: return False kernel = Popen([sys.executable, "-c", cmd], stdout=PIPE, stderr=PIPE, encoding="utf-8") try: connection_file = os.path.join( paths.jupyter_runtime_dir(), "kernel-%i.json" % kernel.pid, ) # wait for connection file to exist, timeout after 5s tic = time.time() while ( not connection_file_ready(connection_file) and kernel.poll() is None and time.time() < tic + SETUP_TIMEOUT ): time.sleep(0.1) # Wait 100ms for the writing to finish time.sleep(0.1) if kernel.poll() is not None: o, e = kernel.communicate() raise OSError("Kernel failed to start:\n%s" % e) if not os.path.exists(connection_file): if kernel.poll() is None: kernel.terminate() raise OSError("Connection file %r never arrived" % connection_file) client = BlockingKernelClient(connection_file=connection_file) client.load_connection_file() client.start_channels() client.wait_for_ready() try: yield client finally: client.stop_channels() finally: kernel.terminate() kernel.wait() # Make sure all the fds get closed. for attr in ["stdout", "stderr", "stdin"]: fid = getattr(kernel, attr) if fid: fid.close()
class JupyterRAMUsage(Stat): """ tag: ``jupyter.ram_usage`` settings: .. code-block:: javascript { "connection info": "", "query interval [s]": 1 } Tracks the RAM usage of all variables in a user-specified jupyter notebook. If no connection info is given in the settings, take the kernel with the latest start date. ``connection info`` must be a string containing the info displayed when running ``%connect_info`` in a jupyter notebook (you can directly copy-paste it). ``query interval [s]`` specifies how often the thread running in the jupyter notebook should read the variables. The lower this is, the higher the resolution of the stat but it might start affecting the speed of your notebook when too low. Note that RAM tracked in this way is not equal to the actual RAM the OS needs because some further optimization is done by e. g. numpy to reduce the OS memory usage. """ name = 'RAM Usage of objects in a Python Jupyter Notebook [MB]' base_tag = 'ram_usage' default_settings = { 'connection info': '', # how often the memory usage is read in the jupyter notebook 'query interval [s]': 1. } @classmethod def _read_latest_connection_file(cls): """ Reads the latest jupyter kernel connection file. https://jupyter.readthedocs.io/en/latest/projects/jupyter-directories.html. """ runtime_dir = jupyter_runtime_dir() files = glob.glob(os.path.join(runtime_dir, 'kernel-*.json')) if len(files) == 0: return None # use the latest connection file connection_file = max(files, key=os.path.getctime) with open(connection_file, 'r') as f: return json.load(f) @classmethod def get_connection_info(cls): """ Get the target kernel connection info. Returns a dictionary of the connection info supplied in the settings, or the latest started kernel if none is given. Retuns `None` if no kernel has been found. """ if len(cls.settings['connection info']) == 0: return cls._read_latest_connection_file() return json.loads(cls.settings['connection info']) @classmethod def check_availability(cls): # the stat is not available if no suitable connection info # can be found if cls.get_connection_info() is None: raise exceptions.StatNotAvailableError( 'Could not find any running kernel.') def __init__(self, fps): self.config = self.get_connection_info() data_dir = appdirs.user_data_dir('permon', 'bminixhofer') os.makedirs(data_dir, exist_ok=True) self.usage_file = os.path.join(data_dir, 'jupyter_ram_usage.csv') open(self.usage_file, 'w').close() # self.setup_code is the code that is run in the notebook when the # stat is instantiated. It starts a thread which reads the memory # usage of all public variables in a set interval and saves it to a # csv file in the user data directory self.setup_code = f""" if '_permon_running' not in globals() or not _permon_running: import threading import csv import sys import time from pympler import asizeof from types import ModuleType def _permon_get_ram_usage_per_object(): while _permon_running: ram_usage = [] global_vars = [key for key in globals() if not key.startswith('_')] for name in global_vars: value = globals()[name] if name in globals() else None if isinstance(value, ModuleType): continue try: ram_usage.append((name, asizeof.asizeof(value))) except TypeError: continue with open('{self.usage_file}', 'w') as f: writer = csv.writer(f, delimiter=',') for name, ram in ram_usage: writer.writerow([name, ram]) time.sleep({self.settings['query interval [s]']}) _permon_thread = threading.Thread(target=_permon_get_ram_usage_per_object) _permon_running = True _permon_thread.start() """ self.teardown_code = """ _permon_running = False """ self.client = BlockingKernelClient() self.client.load_connection_info(self.config) self.client.start_channels() self.client.execute(self.setup_code) super(JupyterRAMUsage, self).__init__(fps=fps) def __del__(self): # stop the thread running in the jupyter notebook # and stop the connection to the kernel upon deletion self.client.execute(self.teardown_code) self.client.stop_channels() def get_stat(self): # reads the csv file the setup code has written to ram_usage = [] with open(self.usage_file, 'r') as f: reader = csv.reader(f) for row in reader: ram_usage.append((row[0], float(row[1]) / 1000**2)) # sort the ram_usage list so that the largest variables come first ram_usage = sorted(ram_usage, key=lambda x: x[1], reverse=True) # return the sum of RAM usage and the variables taking up the most RAM return sum(x[1] for x in ram_usage), ram_usage[:5] @property def minimum(self): return 0 @property def maximum(self): return None
def main(kid, var, pid): # load connection info and init communication cf = find_connection_file(kid) # str(port)) km = BlockingKernelClient(connection_file=cf) km.load_connection_file() km.start_channels() # Step 0: get all the inputs load_input_code = f""" proc_id="{pid}" var={var} var_name="{var}" sql_name = "{cfg.sql_name}" sql_password = "******" sql_dbname = "{cfg.sql_dbname}" sql_schema_name = "{cfg.sql_schema_name}" sql_table_name = "{cfg.sql_table_name}" json_file_name = "/Users/peterchan/Desktop/GitHub/jupyter-extension/juneau_extension/data_file.json" """ # Step 1: access the table and convert it to JSON request_var_code = f""" import numpy as np import pandas as pd import json import copy if type(var) is pd.DataFrame or type(var) is np.ndarray or type(var) is list: df_json_string = var.to_json(orient='split', index=False) df_ls = json.loads(df_json_string)['data'] df_ls_copy = copy.deepcopy(df_ls) """ # Step 2: define the functions used to write to the JSON file json_lock_code = """ def initialize(): data = { "ownerID": "", "id123": "operating", "id124": "finish" } with open("./juneau_extension/data_file.json", "w") as file: json.dump(data, file, indent=4) def acquire_lock(pid): with open(json_file_name, "r+") as file: try: data = json.load(file) if data["ownerID"]: return False else: file.seek(0) file.truncate() data['ownerID'] = pid json.dump(data, file, indent=4) return True except Exception: return False def release_lock(pid): with open(json_file_name, "r+") as file: data = json.load(file) if data['ownerID'] == pid: file.seek(0) file.truncate() data['ownerID'] = "" json.dump(data, file, indent=4) # input: id of the process # remove from the file if the process is completed/ terminated/ timed out def update_exec_status(status, pid): done = False while not done: success = acquire_lock(pid) if success: try: with open(json_file_name, "r+") as file: data = json.load(file) if not data['ownerID'] == pid: continue file.seek(0) file.truncate() data[pid] = status json.dump(data, file, indent=4) release_lock(pid) done = True except Exception: continue return True """ # Step 3: connect to SQL and insert the table insert_code = """ from sqlalchemy import create_engine conn_string = f"postgresql://{sql_name}:{sql_password}@localhost/{sql_dbname}" table_string = f"{sql_schema_name}.{sql_table_name}" engine = create_engine(conn_string) with engine.connect() as connection: insertion_string = f'CREATE TABLE {sql_schema_name}.{var_name} ("A" int, "B" int, "C" int, "D" int);' for ls in df_ls_copy: insertion_string += f"INSERT INTO {sql_schema_name}.{var_name} VALUES ({ls[0]}, {ls[1]}, {ls[2]}, {ls[3]});" connection.execute(insertion_string) print(proc_id) update_exec_status("done", proc_id) rows = connection.execute(f"select * from {sql_schema_name}.{var_name} limit 5;") for row in rows: print(row) """ code = load_input_code + request_var_code + json_lock_code + insert_code km.execute_interactive(code, timeout=TIMEOUT) km.stop_channels()