def run_notebook(nb, cell_filter = lambda cell: cell, extra_arguments=['--pylab=inline', '--profile=stats'], modify_outputs=True, run_cells=True, timeout=10): """ Take a notebook and send all its cells to a kernel. Takes an optional filter to modify the results of the cell after being run and having its output set by `run_cell` if modify_outputs is True. """ km = KernelManager() km.start_kernel(extra_arguments=extra_arguments, stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel shell.execute("pass") shell.get_msg() successes = 0 failures = 0 errors = 0 prompt_number = 1 for ws in nb.worksheets: new_cells = [] for cell in ws.cells: cell.prompt_number = prompt_number if cell['cell_type'] != 'code': new_cells.append(cell) continue if run_cells: try: outs = run_cell(kc, cell, collect_outputs=modify_outputs, timeout=timeout) except Exception as e: sys.stdout.write("failed to run cell:" + repr(e)) errors += 1 continue sys.stdout.write('.') if modify_outputs: cell.outputs = outs new_cell = cell_filter(cell) if new_cell is not None: new_cells.append(new_cell) prompt_number += 1 sys.stdout.write('\n') ws.cells = new_cells km.shutdown_kernel() del km return nb
def test_notebook(nb): km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline'], stderr=open(os.devnull, 'w')) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break successes = 0 failures = 0 errors = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue try: outs = run_cell(shell, iopub, cell) except Exception as e: print "failed to run cell:", repr(e) print cell.input errors += 1 continue failed = False for out, ref in zip(outs, cell.outputs): if not compare_outputs(out, ref): failed = True if failed: failures += 1 else: successes += 1 sys.stdout.write('.') print print "tested notebook %s" % nb.metadata.name print " %3i cells successfully replicated" % successes if failures: print " %3i cells mismatched output" % failures if errors: print " %3i cells failed to complete" % errors kc.stop_channels() km.shutdown_kernel() del km
def run_notebook(nb): """Run IPython Notebook. Paramters: ---------- nb : IPython Notebook in JSON format. Returns: -------- ret : int Return value; 0 in case of no failure, 1 otherwise """ km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel # simple ping: shell.execute("pass") reply = shell.get_msg() cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue shell.execute(cell.input) # wait for finish, maximum 20s reply = shell.get_msg(timeout=20)['content'] if reply['status'] == 'error': failures += 1 print "\nFAILURE:" print cell.input print '-----' print "raised:" print '\n'.join(reply['traceback']) cells += 1 sys.stdout.write('.') print print "ran notebook %s" % nb.metadata.name print " ran %3i cells" % cells if failures: print " %3i cells raised exceptions" % failures kc.stop_channels() km.shutdown_kernel() del km if failures: return 1 return 0
def run_notebook(nb, cell_filter=lambda cell: cell, extra_arguments=['--pylab=inline', '--profile=stats'], modify_outputs=True, run_cells=True): """ Take a notebook and send all its cells to a kernel. Takes an optional filter to modify the results of the cell after being run and having its output set by `run_cell` if modify_outputs is True. """ km = KernelManager() km.start_kernel(extra_arguments=extra_arguments, stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel shell.execute("pass") shell.get_msg() successes = 0 failures = 0 errors = 0 prompt_number = 1 for ws in nb.worksheets: new_cells = [] for cell in ws.cells: cell.prompt_number = prompt_number if cell['cell_type'] != 'code': new_cells.append(cell) continue if run_cells: try: outs = run_cell(kc, cell, collect_outputs=modify_outputs) except Exception as e: sys.stdout.write("failed to run cell:" + repr(e)) errors += 1 continue sys.stdout.write('.') if modify_outputs: cell.outputs = outs new_cell = cell_filter(cell) if new_cell is not None: new_cells.append(new_cell) prompt_number += 1 sys.stdout.write('\n') ws.cells = new_cells km.shutdown_kernel() del km return nb
def run_notebook(nb): km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) kc = km.client() kc.start_channels() try: kc.wait_for_ready() except AttributeError: # IPython < 3 kc.kernel_info() while True: msg = kc.get_shell_msg(block=True, timeout=30) if msg['msg_type'] == 'kernel_info_reply': break # Flush IOPub channel while True: try: msg = kc.get_iopub_msg(block=True, timeout=0.2) except Empty: break # simple ping: kc.execute("pass") kc.get_shell_msg() cells = 0 failures = 0 if hasattr(nb, 'worksheets'): # nobody uses more than 1 worksheet ws = nb.worksheets[0] else: # no more worksheet level in new format ws = nb for cell in ws.cells: if cell.cell_type != 'code': continue outputs, failed = run_cell(kc, cell) cell.outputs = outputs cell['prompt_number'] = cells failures += failed cells += 1 sys.stdout.write('.') sys.stdout.flush() print() print("ran %3i cells" % cells) if failures: print(" %3i cells raised exceptions" % failures) kc.stop_channels() km.shutdown_kernel() del km
def run_notebook(nb, save_output=False): km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) if hasattr(km, 'client'): kc = km.client() kc.start_channels() iopub = kc.iopub_channel else: # IPython 0.13 compat kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # simple ping: shell.execute("pass") shell.get_msg() cells = 0 failures = 0 for ws in nb.worksheets: rendered_cells = list() for cell in ws.cells: rendered_cells.append(cell) if cell.cell_type != 'code': continue outputs, failed, payload = run_cell(shell, iopub, cell) cell.outputs = outputs cell['prompt_number'] = cells failures += failed cells += 1 sys.stdout.write('.') # Very hacky code to execute the loaded Python code if payload and payload[0]['source'] == 'set_next_input': new_cell = cell.copy() new_cell["input"] = payload[0]["text"] outputs, failed, _ = run_cell(shell, iopub, new_cell) new_cell.outputs = outputs new_cell['prompt_number'] = cells failures += failed cells += 1 sys.stdout.write('.') rendered_cells.append(new_cell) if save_output: ws.cells = rendered_cells print() print("ran notebook %s" % nb.metadata.name) print(" ran %3i cells" % cells) if failures: print(" %3i cells raised exceptions" % failures) kc.stop_channels() km.shutdown_kernel() del km
def run_notebook(nb, output=False): """ """ km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline'], stderr=open(os.devnull, 'w')) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue log.info('Run cell #%i' % cells) cells += 1 outs = run_cell(shell, iopub, cell, output=output) if outs: for i in range(len(outs)): if outs[i]['output_type'] == "pyerr": log.error('Fail to execute cell #%i\n' % cells + '\n'.join(outs[i]['traceback'])) failures += 1 continue log.info('Done') log.info("%i cells runned with %i cells failed" % (cells, failures)) kc.stop_channels() km.shutdown_kernel() del km
def run_nb_offline(nb_path): """ Read notebook from filepath and execute it; report errors in code cells. """ if not os.path.isfile(nb_path): raise Exception('Invalid path: %s' % nb_path) with open(nb_path) as f: nb = reads(f.read(), 'json') logging.info("Running notebook %s" % nb.metadata.name) km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel # simple ping: shell.execute("pass") shell.get_msg() cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': #ONLY RUN CODE CELLS continue shell.execute(cell.input) # wait for finish, maximum TIMEOUT reply = shell.get_msg(timeout=MAX_TIMEOUT)['content'] if reply['status'] == 'error': failures += 1 logging.info("\nNotebook FAILURE:") logging.info(cell.input) logging.info('-----') logging.info('raised:') logging.info('\n'.join(reply['traceback'])) cells += 1 # sys.stdout.write('.') logging.info("Finished running notebook") logging.info(" ran %3i cells" % cells) if failures: logging.warning(" %3i cells raised exceptions" % failures) kc.stop_channels() km.shutdown_kernel() del km
def run_notebook(nb, pylab_inline=True, timeout=20): """ Run the notebook, populating the output cells with appropriate content. Params ------ nb : the contents of a notebook as a string pylab_inline : i.e. should the command be executed as if it was flagged with --paylab=inline timeout : the length of time in seconds to wait before the script is considered timed out. I set this to a big value for some data heavy scripts """ # Start the kernel. km = KernelManager() args = {} if pylab_inline: args['extra_arguments'] = ['--pylab=inline'] km.start_kernel(**args) # Get our client. try: kc = km.client() except AttributeError: kc = km kc.start_channels() shell = kc.shell_channel # Ping the kernel. shell.execute('pass') shell.get_msg() # Run all the cells. cells_executed, cells_failed = 0, 0 for ws in nb.worksheets: for cell in ws.cells: cell.prompt_number = cells_executed + 1 if cell.cell_type != 'code': continue cells_executed += 1 run_cell(kc, cell, timeout) # Clean up resources. (Hopefully?) kc.stop_channels() km.shutdown_kernel() del km return cells_failed
def run_nb_offline(nb_path): """ Read notebook from filepath and execute it; report errors in code cells. """ if not os.path.isfile(nb_path): raise Exception("Invalid path: %s" % nb_path) with open(nb_path) as f: nb = reads(f.read(), "json") logging.info("Running notebook %s" % nb.metadata.name) km = KernelManager() km.start_kernel(stderr=open(os.devnull, "w")) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel # simple ping: shell.execute("pass") shell.get_msg() cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != "code": # ONLY RUN CODE CELLS continue shell.execute(cell.input) # wait for finish, maximum TIMEOUT reply = shell.get_msg(timeout=MAX_TIMEOUT)["content"] if reply["status"] == "error": failures += 1 logging.info("\nNotebook FAILURE:") logging.info(cell.input) logging.info("-----") logging.info("raised:") logging.info("\n".join(reply["traceback"])) cells += 1 # sys.stdout.write('.') logging.info("Finished running notebook") logging.info(" ran %3i cells" % cells) if failures: logging.warning(" %3i cells raised exceptions" % failures) kc.stop_channels() km.shutdown_kernel() del km
class RunningKernel(object): def __init__(self): self.km = KernelManager() self.km.start_kernel(stderr=open(os.devnull, 'w')) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.shell.execute("pass") self.shell.get_msg() def restart(self): self.km.restart_kernel(now=True) def stop(self): self.kc.stop_channels() self.km.shutdown_kernel() del self.km
def run_notebook(nb): km = KernelManager() km.start_kernel(extra_arguments=['--profile', 'stats'])#, stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel # simple ping: shell.execute("pass") shell.get_msg() shell.execute("datadir = '%s'" % os.path.abspath(os.path.join( \ os.path.abspath(os.path.dirname(__file__)), '..', 'data'))) cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue shell.execute(cell.input) # wait for finish, maximum 20s msg = shell.get_msg(timeout=20) reply = msg['content'] if reply['status'] == 'error': failures += 1 print "\nFAILURE:" print cell.input print '-----' print "raised:" print '\n'.join(reply['traceback']) cells += 1 sys.stdout.write('.') print "ran notebook %s" % nb.metadata.name print " ran %3i cells" % cells if failures: print " %3i cells raised exceptions" % failures kc.stop_channels() km.shutdown_kernel() del km
def run_notebook(nb): km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel # simple ping: shell.execute("pass") shell.get_msg() cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue shell.execute(cell.input) # wait for finish, maximum 20s reply = shell.get_msg(timeout=20)['content'] if reply['status'] == 'error': failures += 1 print("\nFAILURE:") print(cell.input) print('-----') print("raised:") print('\n'.join(reply['traceback'])) print(cell) cells += 1 sys.stdout.write('.') print() print("ran notebook %s" % nb.metadata.name) print(" ran %3i cells" % cells) if failures: print(" %3i cells raised exceptions" % failures) kc.stop_channels() km.shutdown_kernel() del km
def run_notebook(nb): km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) if hasattr(km, 'client'): kc = km.client() kc.start_channels() iopub = kc.iopub_channel else: # IPython 0.13 compat kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # simple ping: shell.execute("pass") shell.get_msg() cells = 0 failures = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue outputs, failed = run_cell(shell, iopub, cell) cell.outputs = outputs cell['prompt_number'] = cells failures += failed cells += 1 sys.stdout.write('.') sys.stdout.flush() print() print("ran notebook %s" % nb.metadata.name) print(" ran %3i cells" % cells) if failures: print(" %3i cells raised exceptions" % failures) kc.stop_channels() km.shutdown_kernel() del km
def test_notebook(nb): km = KernelManager() km.start_kernel(extra_arguments=[], stderr=open(os.devnull, 'w')) kc = km.client() kc.start_channels() iopub = kc.iopub_channel shell = kc.shell_channel shell.kernel_info() while True: try: kc.iopub_channel.get_msg(timeout=1) except Empty: break errors = 0 cells = 0 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue cells += 1 try: outs = run_cell(shell, iopub, cell) except Exception as e: print("failed to run cell:", repr(e)) print(cell.input) errors += 1 continue cell.outputs = outs if errors: print(" %3i cells failed to complete" % errors) if cells: print("%i code cells from notebook %s" % (cells, nb.metadata.name)) kc.stop_channels() km.shutdown_kernel() del km
def new_kernel(): """start a kernel in a subprocess, and wait for it to be ready Returns ------- kernel_manager: connected KernelManager instance """ KM = KernelManager() KM.start_kernel(stdout=PIPE, stderr=PIPE) KC = KM.client() KC.start_channels() # wait for kernel to be ready KC.shell_channel.execute("import sys") KC.shell_channel.get_msg(block=True, timeout=STARTUP_TIMEOUT) flush_channels(KC) try: yield KC finally: KC.stop_channels() KM.shutdown_kernel()
def execute_notebook(nb): km = KernelManager() km.start_kernel(extra_arguments=["--pylab=inline", "--profile=stats"], stderr=open(os.devnull, "w")) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel shell.execute("pass") shell.get_msg() successes = 0 failures = 0 errors = 0 prompt_number = 1 for ws in nb.worksheets: for cell in ws.cells: cell.prompt_number = prompt_number if cell.cell_type != "code": continue run_cell(kc, cell) try: outs = run_cell(kc, cell) except Exception as e: print "failed to run cell:", repr(e) print cell.input errors += 1 continue sys.stdout.write(".") cell.outputs = outs prompt_number += 1 km.shutdown_kernel() del km return nb
def run_notebook(nb): """ Run each code cell in a given notebook and update with the new output """ km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline']) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue try: cell.outputs = run_cell(shell, iopub, cell) except Exception as e: return -1 kc.stop_channels() km.shutdown_kernel() del km return nb
def test_noexceptions(nb_path): """Ensure that no cells raise an exception.""" with open(nb_path) as f: nb = current.reads(f.read(), 'json') km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # IPython 0.13 kc = km kc.start_channels() shell = kc.shell_channel # Simple ping shell.execute("pass") shell.get_msg() for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue shell.execute(cell.input) # wait for finish, maximum 2 minutes reply = shell.get_msg(timeout=120)['content'] if reply['status'] == 'error': err_msg = ("\nFAILURE:" + cell.input + "\n" "-----\nraised:\n" + "\n".join(reply['traceback'])) kc.stop_channels() km.shutdown_kernel() del km assert False, err_msg kc.stop_channels() km.shutdown_kernel() # noqa del km # noqa assert True
def execute_notebook(nb): km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline', '--profile=stats'], stderr=open(os.devnull, 'w')) try: kc = km.client() except AttributeError: # 0.13 kc = km kc.start_channels() shell = kc.shell_channel shell.execute("pass") shell.get_msg() successes = 0 failures = 0 errors = 0 prompt_number = 1 for ws in nb.worksheets: for cell in ws.cells: cell.prompt_number = prompt_number if cell.cell_type != 'code': continue run_cell(kc, cell) try: outs = run_cell(kc, cell) except Exception as e: print "failed to run cell:", repr(e) print cell.input errors += 1 continue sys.stdout.write('.') cell.outputs = outs prompt_number += 1 km.shutdown_kernel() del km return nb
class IPyNbFile(pytest.File): def collect(self): with self.fspath.open() as f: self.nb = reads(f.read(), 'json') cell_num = 0 for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == "code": yield IPyNbCell(self.name, self, cell_num, cell) cell_num += 1 def setup(self): self.km = KernelManager() self.km.start_kernel(stderr=open(os.devnull, 'w')) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel def teardown(self): self.km.shutdown_kernel() del self.shell del self.km
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'image/svg+xml': 'svg', } def __init__(self, nb, pylab=False, mpl_inline=False, working_dir = None): self.km = KernelManager() args = [] if pylab: args.append('--pylab=inline') logging.warn('--pylab is deprecated and will be removed in a future version') elif mpl_inline: args.append('--matplotlib=inline') logging.warn('--matplotlib is deprecated and will be removed in a future version') cwd = os.getcwd() if working_dir: os.chdir(working_dir) self.km.start_kernel(extra_arguments = args) os.chdir(cwd) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel self.nb = nb def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cell(self, cell): ''' Run a notebook cell and update the output of that cell in-place. ''' logging.info('Running cell:\n%s\n', cell.input) self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': traceback_text = 'Cell raised uncaught exception: \n' + \ '\n'.join(reply['content']['traceback']) logging.info(traceback_text) else: logging.info('Cell returned') outs = list() while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened raise content = msg['content'] msg_type = msg['msg_type'] # IPython 3.0.0-dev writes pyerr/pyout in the notebook format but uses # error/execute_result in the message spec. This does the translation # needed for tests to pass with IPython 3.0.0-dev notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout' } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'stream': out.stream = content['name'] out.text = content['data'] #print(out.text, end='') elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError('unhandled mime type: %s' % mime) setattr(out, attr, data) #print(data, end='') elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] #logging.error('\n'.join(content['traceback'])) elif msg_type == 'clear_output': outs = list() continue else: raise NotImplementedError('unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs if status == 'error': raise NotebookError(traceback_text) def iter_code_cells(self): ''' Iterate over the notebook cells containing code. ''' for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, skip_exceptions=False, progress_callback=None): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). ''' for i, cell in enumerate(self.iter_code_cells()): try: self.run_cell(cell) except NotebookError: if not skip_exceptions: raise if progress_callback: progress_callback(i) def count_code_cells(self): ''' Return the number of code cells in the notebook ''' return sum(1 for _ in self.iter_code_cells())
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', } def __init__(self, **kw): """Initializes the notebook runner. Requires config rcloud_python_lib_path -- or raises exception""" self.km = KernelManager() self.km.kernel_cmd = make_ipkernel_cmd( """ import sys; sys.path.extend("{RCPATH}".split(":")); from rcloud_kernel import main; main()""".format(RCPATH=kw["rcloud_python_lib_path"]), **kw) del kw["rcloud_python_lib_path"] self.km.client_factory = MyClient self.km.start_kernel(**kw) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def shutdown(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cmd(self, cmd): d = {} class Cell(object): def __init__(self): self.input = cmd def __setitem__(self, k, v): d[k] = v self.run_cell(Cell()) return d['outputs'] def run_magic(self, magic_line): self.shell.magic(magic_line) def wait_for_msg(self): outs = list() reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': logging.info('Cell raised uncaught exception: \n%s', '\n'.join(reply['content']['traceback'])) else: if _debugging: logging.debug('Cell returned') while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened raise outs.append(msg) return outs def run_cell(self, cell): ''' Run a notebook cell and update the output of that cell in-place. ''' if _debugging: logging.debug('Running cell:\n%s\n', cell.input) self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': logging.info('Cell raised uncaught exception: \n%s', '\n'.join(reply['content']['traceback'])) else: if _debugging: logging.debug('Cell returned') outs = list() while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened # logging.warn('Execution state did not return to idle') pass # raise content = msg['content'] msg_type = msg['msg_type'] out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ['status', 'pyin']: continue elif msg_type == 'stream': out.stream = content['name'] out.html = RClansiconv(content['data']) elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: logging.warning('unhandled mime type: %s' % mime) raise NotImplementedError('unhandled mime type: %s' % mime) if attr == 'text': if _debugging: logging.info(data) setattr(out, attr, RClansiconv(data)) else: setattr(out, attr, data) elif msg_type == 'pyerr': logging.info('Received an exception: ' + content['ename']) out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] out.html = RClansiconv('\n'.join(out.traceback)) else: raise NotImplementedError('unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs def iter_code_cells(self, nb): ''' Iterate over the notebook cells containing code. ''' nb = read(open(nb), 'json') for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, nb, skip_exceptions=False): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). ''' for cell in self.iter_code_cells(nb): try: self.run_cell(cell) print cell.outputs except NotebookError: if not skip_exceptions: raise def save_notebook(self, nb_out): if _debugging: logging.info('Saving to %s', nb_out) write(self.nb, open(nb_out, 'w'), 'json')
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'image/svg+xml': 'svg', } def __init__( self, nb, pylab=False, mpl_inline=False, profile_dir=None, working_dir=None): self.km = KernelManager() args = [] if pylab: args.append('--pylab=inline') logging.warn( '--pylab is deprecated and will be removed in a future version' ) elif mpl_inline: args.append('--matplotlib=inline') logging.warn( '--matplotlib is deprecated and' + ' will be removed in a future version' ) if profile_dir: args.append('--profile-dir=%s' % os.path.abspath(profile_dir)) cwd = os.getcwd() if working_dir: os.chdir(working_dir) self.km.start_kernel(extra_arguments=args) os.chdir(cwd) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() try: self.kc.wait_for_ready() except AttributeError: # IPython < 3 self._wait_for_ready_backport() self.nb = nb def shutdown_kernel(self): logging.info('Shutdown kernel') self.kc.stop_channels() self.km.shutdown_kernel(now=True) def _wait_for_ready_backport(self): # Backport BlockingKernelClient.wait_for_ready from IPython 3. # Wait for kernel info reply on shell channel. self.kc.kernel_info() while True: msg = self.kc.get_shell_msg(block=True, timeout=30) if msg['msg_type'] == 'kernel_info_reply': break # Flush IOPub channel while True: try: msg = self.kc.get_iopub_msg(block=True, timeout=0.2) except Empty: break def iter_code_cells(self): """Iterate over the notebook cells containing code.""" for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code' or \ (cell.cell_type == 'markdown' \ and re.search(r'{{(.*?)}}', cell.input)): yield cell def run_code(self, code): """Execute python code in the notebook kernel and return the outputs.""" self.kc.execute(code) reply = self.kc.get_shell_msg() status = reply['content']['status'] traceback_text = '' if status == 'error': traceback_text = 'Code raised uncaught exception: \n' + \ '\n'.join(reply['content']['traceback']) logging.info(traceback_text) else: logging.info('Code returned') outs = list() prompt_number = -1 while True: try: msg = self.kc.get_iopub_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle # before the queue becomes empty, # if it doesn't, something bad has happened raise content = msg['content'] msg_type = msg['msg_type'] # IPython 3.0.0-dev writes pyerr/pyout in the notebook format # but uses error/execute_result in the message spec. This does the # translation needed for tests to pass with IPython 3.0.0-dev notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout' } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = NotebookNode(output_type=msg_type) if 'execution_count' in content: prompt_number = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'stream': out.stream = content['name'] # in msgspec 5, this is name, text # in msgspec 4, this is name, data if 'text' in content: out.text = content['text'] else: out.text = content['data'] elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError( 'unhandled mime type: %s' % mime ) setattr(out, attr, data) elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] elif msg_type == 'clear_output': outs = list() continue else: raise NotImplementedError( 'unhandled iopub message: %s' % msg_type ) outs.append(out) if status == 'error': raise NotebookError(traceback_text) return (prompt_number, outs) def run_cell(self, cell, skip_exceptions): """ Run a notebook cell and update the output of that cell in-place. """ logging.info('Running cell:\n%s\n', cell.input) try: (prompt_number, outs) = self.run_code(cell.input) cell['prompt_number'] = prompt_number cell['outputs'] = outs except NotebookError: if not skip_exceptions: raise def pymarkdown_cell(self, cell): """ Go looking for {{this}} inside a markdown cell and add this:value to metadata['variables']. This is the same thing that the python-markdown extension does. """ variables = dict() for expr in re.finditer("{{(.*?)}}", cell.source): code = expr.group(1) variables[code] = "" logging.info('Running markdown expression:\n%s\n', expr) try: (prompt_number, outs) = self.run_code(code) except NotebookError: continue #leave the variable empty outs = filter(lambda out : out['output_type'] == 'pyout', outs) if len(outs) > 0: variables[code] = outs[0]['text'] cell.metadata['variables'] = variables def run_notebook(self, skip_exceptions=False, progress_callback=None): """ Run all the notebook cells in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). """ for i, cell in enumerate(self.iter_code_cells()): logging.info('Cell type: %s\n', cell.cell_type) try: if cell.cell_type == 'code': self.run_cell(cell, skip_exceptions) if progress_callback: progress_callback(i) elif cell.cell_type == 'markdown': self.pymarkdown_cell(cell) except NotebookError: if not skip_exceptions: raise
class ExecutePreprocessor(Preprocessor): """ Executes all the cells in a notebook """ timeout = Integer( 30, config=True, help="The time to wait (in seconds) for output from executions.") # FIXME: to be removed with nbformat v4 # map msg_type to v3 output_type msg_type_map = { "error": "pyerr", "execute_result": "pyout", } # FIXME: to be removed with nbformat v4 # map mime-type to v3 mime-type keys mime_map = { "text/plain": "text", "text/html": "html", "image/svg+xml": "svg", "image/png": "png", "image/jpeg": "jpeg", "text/latex": "latex", "application/json": "json", "application/javascript": "javascript", } extra_arguments = List(Unicode) def _create_client(self): from IPython.kernel import KernelManager self.km = KernelManager() self.km.write_connection_file() self.kc = self.km.client() self.kc.start_channels() self.km.start_kernel(extra_arguments=self.extra_arguments, stderr=open(os.devnull, 'w')) self.iopub = self.kc.iopub_channel self.shell = self.kc.shell_channel self.shell.kernel_info() try: self.shell.get_msg(timeout=self.timeout) except Empty: self.log.error("Timeout waiting for kernel_info reply") raise # flush IOPub while True: try: self.iopub.get_msg(block=True, timeout=0.25) except Empty: break def _shutdown_client(self): self.kc.stop_channels() self.km.shutdown_kernel() del self.km def preprocess(self, nb, resources): self._create_client() nb, resources = super(ExecutePreprocessor, self).preprocess(nb, resources) self._shutdown_client() return nb, resources def preprocess_cell(self, cell, resources, cell_index): """ Apply a transformation on each code cell. See base.py for details. """ if cell.cell_type != 'code': return cell, resources try: outputs = self.run_cell(self.shell, self.iopub, cell) except Exception as e: self.log.error("failed to run cell: " + repr(e)) self.log.error(str(cell.input)) raise cell.outputs = outputs return cell, resources def run_cell(self, shell, iopub, cell): msg_id = shell.execute(cell.input) self.log.debug("Executing cell:\n%s", cell.input) # wait for finish, with timeout while True: try: msg = shell.get_msg(timeout=self.timeout) except Empty: self.log.error("Timeout waiting for execute reply") raise if msg['parent_header'].get('msg_id') == msg_id: break else: # not our reply continue outs = [] while True: try: msg = iopub.get_msg(timeout=self.timeout) except Empty: self.log.warn("Timeout waiting for IOPub output") break if msg['parent_header'].get('msg_id') != msg_id: # not an output from our execution continue msg_type = msg['msg_type'] self.log.debug("output: %s", msg_type) content = msg['content'] if msg_type == 'status': if content['execution_state'] == 'idle': break else: continue elif msg_type in {'execute_input', 'pyin'}: continue elif msg_type == 'clear_output': outs = [] continue out = NotebookNode( output_type=self.msg_type_map.get(msg_type, msg_type)) # set the prompt number for the input and the output if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type == 'stream': out.stream = content['name'] out.text = content['data'] elif msg_type in ('display_data', 'execute_result'): out['metadata'] = content['metadata'] for mime, data in content['data'].items(): # map mime-type keys to nbformat v3 keys # this will be unnecessary in nbformat v4 key = self.mime_map.get(mime, mime) out[key] = data elif msg_type == 'error': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: self.log.error("unhandled iopub msg: " + msg_type) outs.append(out) return outs
def test_notebook(nb): km = KernelManager() km.start_kernel(stderr=open(os.devnull, 'w')) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break successes = 0 failures = 0 errors = 0 prompt_number = 1 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue try: outs = run_cell(shell, iopub, cell) except Exception as e: print "failed to run cell:", repr(e) print cell.input errors += 1 cell.outputs = [e] continue failed = False for out, ref in zip(outs, cell.outputs): if not compare_outputs(out, ref): failed = True if failed: failures += 1 else: successes += 1 sys.stdout.write('.') cell.outputs = outs cell.prompt_number = prompt_number if cell.outputs: cell.outputs[0]['metadata'] = {} prompt_number += 1 print print "[{NAME}] {s} cells successfully replicated".format(NAME=NAME, s=successes) if failures: print "[{NAME}] {f} cells mismatched output".format(NAME=NAME, f=failures) if errors: print "[{NAME}] {e} cells failed to complete".format(NAME=NAME, e=errors) kc.stop_channels() km.shutdown_kernel() del km
class IPyKernel(object): """ A simple wrapper class to run cells in an IPython Notebook. Notes ----- Use `with` construct to properly instantiate """ def __init__(self, console=None): # default timeout time is 60 seconds self.default_timeout = 60 self.extra_arguments = ['--pylab=inline'] def __enter__(self): self.km = KernelManager() self.km.start_kernel(extra_arguments=self.extra_arguments, stderr=open(os.devnull, 'w')) try: self.kc = self.km.client() self.kc.start_channels() self.iopub = self.kc.iopub_channel except AttributeError: # IPython 0.13 self.kc = self.km self.kc.start_channels() self.iopub = self.kc.sub_channel self.shell = self.kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't self.shell.execute("pass") self.shell.get_msg() while True: try: self.iopub.get_msg(timeout=1) except Empty: break return self def __exit__(self, exc_type, exc_val, exc_tb): self.kc.stop_channels() self.km.shutdown_kernel() del self.km def run(self, cell, timeout=None): use_timeout = self.default_timeout if timeout is not None: use_timeout = timeout self.shell.execute(cell.input) self.shell.get_msg(timeout=use_timeout) outs = [] while True: try: msg = self.iopub.get_msg(timeout=0.5) except Empty: break msg_type = msg['msg_type'] if msg_type in ('status', 'pyin'): continue elif msg_type == 'clear_output': outs = [] continue content = msg['content'] out = NotebookNode(output_type=msg_type) if msg_type == 'stream': out.stream = content['name'] out.text = content['data'] elif msg_type in ('display_data', 'pyout'): out['metadata'] = content['metadata'] for mime, data in content['data'].iteritems(): attr = mime.split('/')[-1].lower() # this gets most right, but fix svg+html, plain attr = attr.replace('+xml', '').replace('plain', 'text') setattr(out, attr, data) if msg_type == 'pyout': out.prompt_number = content['execution_count'] elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: print "unhandled iopub msg:", msg_type outs.append(out) return outs def sanitize(self, s): """sanitize a string for comparison. fix universal newlines, strip trailing newlines, and normalize likely random values (memory addresses and UUIDs) """ if not isinstance(s, basestring): return s # normalize newline: s = s.replace('\r\n', '\n') # ignore trailing newlines (but not space) s = s.rstrip('\n') # normalize hex addresses: s = re.sub(r'0x[a-f0-9]+', '0xFFFFFFFF', s) # normalize UUIDs: s = re.sub(r'[a-f0-9]{8}(\-[a-f0-9]{4}){3}\-[a-f0-9]{12}', 'U-U-I-D', s) return s def compare_outputs(self, test, ref, skip_compare=('png', 'traceback', 'latex', 'prompt_number', 'svg', 'html')): for key in ref: if key not in test: # print "missing key: %s != %s" % (test.keys(), ref.keys()) return False elif key not in skip_compare: s1 = self.sanitize(test[key]) s2 = self.sanitize(ref[key]) if s1 != s2: # print "mismatch %s:" % key expected = s1.splitlines(1) actual = s2.splitlines(1) # diff=difflib.unified_diff(expected, actual) # print ''.join(diff) return False return True def get_commands(self, cell): commands = {} if hasattr(cell, 'input'): lines = cell.input.splitlines() if len(lines) > 0: first_line = lines[0] if first_line.startswith('#!'): txt = first_line[2:].strip() parts = txt.split(',') for part in parts: subparts = part.split(':') if len(subparts) == 1: commands[subparts[0].strip().lower()] = True elif len(subparts) == 2: commands[subparts[0].strip().lower()] = subparts[1] return commands
def executeNotebook(ipynbfilepath): print "Executing IPython Notebook: ", ipynbfilepath with open(ipynbfilepath) as f: nb = reads(f.read(), 'json') km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline'], stderr=open(os.devnull, 'w')) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break nSuccess = 0 nError = 0 prompt_number = 1 for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue try: outs = run_cell(shell, iopub, cell) except Exception as e: print '>>>>>>>>>>>>>> FAILED TO RUN CELL' print "Error Msg:", str(e) print cell.input raise ValueError('Cell Execution Failed') nError += 1 cell.outputs = [e] continue nSuccess += 1 cell.outputs = outs cell.prompt_number = prompt_number if cell.outputs: cell.outputs[0]['prompt_number'] = prompt_number prompt_number += 1 print 'DONE.' print "%3i/%3i cells executed correctly." % (nSuccess, nSuccess+nError) if nError: print "%3i cells raised errors." % nError kc.stop_channels() km.shutdown_kernel() del km with io.open(ipynbfilepath, 'w', encoding='utf8') as f: write(nb, f, 'json') print "Wrote output to file: %s" % ipynbfilepath
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', } def __init__(self, **kw): """Initializes the notebook runner. Requires config rcloud_python_lib_path -- or raises exception""" self.km = KernelManager() self.km.kernel_cmd = make_ipkernel_cmd(""" import sys; import time; # to test for slow staring kernels, add a delay here sys.path.extend("{RCPATH}".split(":")); from rcloud_kernel import main; main()""".format(RCPATH=kw["rcloud_python_lib_path"]), **kw) del kw["rcloud_python_lib_path"] self.km.client_factory = MyClient self.km.start_kernel(**kw) # This is a non-blocking call to launch the process self.completer = completer.Completer() self.completer.limit_to__all__ = True # There is a possible race condition if the system is slow to load # the modules for the kernel and we issue commands to the kernel rightaway. # We saw these on CentOS and OSX. So, issue an empty command to the kernel and wait for return self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.shell.execute("") _ = self.shell.get_msgs() self.iopub = self.kc.iopub_channel def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def shutdown(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def complete(self, text, pos_fromR): """Completions for text at pos""" txtArr = text[:int(pos_fromR)].split('\n') # We are ignoring surrounding lines and full context; a poor-man's tab-complete for now _ = allres = self.shell.get_msgs() # flush before execution self.shell.complete(txtArr[-1], line="", cursor_pos=len(txtArr[-1])-1) while True: resp = self.shell.get_msg() if _debugging: logging.debug(resp) if resp['msg_type'] == 'complete_reply': _ = self.shell.get_msgs() # flush return resp['content']['matches'] def run_cmd(self, cmd): self.submit_cell(cmd) def run_magic(self, magic_line): self.shell.magic(magic_line) def submit_cell(self, cmd): """Submits the cell code to the kernel. Now that we have a continuous IO on rcloud, and the clients can send oob messages to RCloud [but only through R], we separate the pieces to submit and have a function for looping to get messages""" if _debugging: logging.debug('Running cell:\n%s\n', cmd) _ = self.iopub.flush() self.shell.execute(cmd) self._previous_status = 'START' def poll_for_msgs(self): """Polls for messages from the kernel. Used after submitting code for execution""" try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status' and msg['content']['execution_state'] == 'idle': if _debugging: logging.info('Message -- {}:{}'.format(msg['msg_type'], msg['content'])) self._previous_status = 'IDLE' return NotebookNode(output_type = 'IDLE') except Empty: # state should return to idle before queue becomes empty, but we ignore it now prevstat, self._previous_status = self._previous_status, 'EMPTY' retstat = 'END_CELL' if prevstat == 'IDLE' else 'EMPTY' # Assuming IDLE followed by EMPTY is the end-of-cell return NotebookNode(output_type = retstat) self._previous_status = '' # Not idle, that's all we are concerned about for now content, msg_type = msg['content'], msg['msg_type'] if msg_type in ['status', 'pyin']: return NotebookNode(output_type = 'NoOp') out = NotebookNode(output_type = msg_type) if msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] tmpval = RClansiconv(data) if attr == 'text' else data setattr(out, attr, tmpval) except KeyError: raise NotImplementedError('unhandled mime type: %s' % mime) elif msg_type == 'stream': setattr(out, 'text', RClansiconv(content['data'])) elif msg_type == 'pyerr': setattr(out, 'html', RClansiconv('\n'.join(content['traceback']) + '\n')) else: if _debugging: logging.info('Unsupported: ' + msg_type) raise NotImplementedError('unhandled iopub message: %s' % msg_type) if _debugging: logging.info('Sending: msg_type: [{}]; HTML: [{}]; TEXT: [{}]'.format(msg_type, out.get('html', ''), out.get('text', '') )) return out # upstream process will handle it [e.g. send as an oob message]
class NotebookRunner(object): """ The kernel communicates with mime-types while the notebook uses short labels for different cell types. We'll use this to map from kernel types to notebook format types. This classes executes a notebook end to end. """ #. available output types MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'image/svg+xml': 'svg', } def __init__(self, nb, profile_dir=None, working_dir=None, comment="", fLOG=noLOG, theNotebook=None, code_init=None): """ constuctor @param nb notebook as JSON @param profile_dir profile directory @param working_dir working directory @param comment additional information added to error message @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook @param code_init to initialize the notebook with a python code as if it was a cell @param fLOG logging function .. versionchanged:: 1.1 Parameters *theNotebook*, *code_init* were added. """ self.km = KernelManager() self.fLOG = fLOG self.theNotebook = theNotebook self.code_init = code_init args = [] if profile_dir: args.append('--profile-dir=%s' % os.path.abspath(profile_dir)) cwd = os.getcwd() if working_dir: os.chdir(working_dir) self.km.start_kernel(extra_arguments=args) os.chdir(cwd) self.kc = self.km.client() self.kc.start_channels() # if it does not work, it probably means IPython < 3 self.kc.wait_for_ready() self.nb = nb self.comment = comment def to_json(self, filename, encoding="utf8"): """ convert the notebook into json @param filename filename or stream """ if isinstance(filename, str # unicode# ): with open(filename, "w", encoding=encoding) as payload: self.to_json(payload) else: filename.write(writes(self.nb)) @staticmethod def read_json(js, profile_dir=None, encoding="utf8"): """ read a notebook from a JSON stream or string @param js string or stream @param profile_dir profile directory @param encoding encoding for the notebooks @return instance of @see cl NotebookRunner """ if isinstance(js, str # unicode# ): st = io.StringIO(js) else: st = js from .notebook_helper import read_nb return read_nb(st, profile_dir=profile_dir, encoding=encoding) def copy(self): """ copy the notebook (just the content) @return instance of @see cl NotebookRunner .. versionadded:: 1.1 """ st = io.StringIO() self.to_json(st) return NotebookRunner.read_json(st.getvalue()) def __add__(self, nb): """ merges two notebooks together, returns a new none @param nb notebook @return new notebook """ c = self.copy() c.merge_notebook(nb) return c def shutdown_kernel(self): """ shut down kernel """ self.fLOG('-- shutdown kernel') self.kc.stop_channels() self.km.shutdown_kernel(now=True) def clean_code(self, code): """ clean the code before running it, the function comment out instruction such as ``show()`` @param code code (string) @return cleaned code """ has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code if code is None: return code else: lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")] res = [] show_is_last = False for line in lines: if line.replace(" ", "") == "show()": line = line.replace("show", "#show") show_is_last = True elif has_bokeh and line.replace(" ", "") == "output_notebook()": line = line.replace("output_notebook", "#output_notebook") else: show_is_last = False res.append(line) if show_is_last: res.append('"nothing to show"') return "\n".join(res) @staticmethod def get_cell_code(cell): """ return the code of a cell @param cell a cell or a string @return boolean (=iscell), string """ if isinstance(cell, str # unicode# ): iscell = False return iscell, cell else: iscell = True try: return iscell, cell.source except AttributeError: return iscell, cell.input def run_cell(self, index_cell, cell, clean_function=None): ''' Run a notebook cell and update the output of that cell in-place. @param index_cell index of the cell @param cell cell to execute @param clean_function cleaning function to apply to the code before running it @return output of the cell ''' iscell, codei = NotebookRunner.get_cell_code(cell) self.fLOG('-- running cell:\n%s\n' % codei) code = self.clean_code(codei) if clean_function is not None: code = clean_function(code) if len(code) == 0: return "" self.kc.execute(code) reply = self.kc.get_shell_msg() reason = None try: status = reply['content']['status'] except KeyError: status = 'error' reason = "no status key in reply['content']" if status == 'error': ansi_escape = re.compile(r'\x1b[^m]*m') try: tr = [ansi_escape.sub('', _) for _ in reply['content']['traceback']] except KeyError: tr = ["No traceback, available keys in reply['content']"] + \ [_ for _ in reply['content']] traceback_text = '\n'.join(tr) self.fLOG("ERR:\n", traceback_text) else: traceback_text = '' self.fLOG('-- cell returned') outs = list() nbissue = 0 while True: try: msg = self.kc.get_iopub_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened status = "error" reason = "exception Empty was raised" nbissue += 1 if nbissue > 10: # the notebook is empty return "" else: continue content = msg['content'] msg_type = msg['msg_type'] # IPython 3.0.0-dev writes pyerr/pyout in the notebook format but uses # error/execute_result in the message spec. This does the translation # needed for tests to pass with IPython 3.0.0-dev notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout' } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = NotebookNode(output_type=msg_type) if 'execution_count' in content: if iscell: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'stream': out.stream = content['name'] # in msgspec 5, this is name, text # in msgspec 4, this is name, data if 'text' in content: out.text = content['text'] else: out.data = content['data'] elif msg_type in ('display_data', 'pyout'): out.data = content['data'] elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] elif msg_type == 'clear_output': outs = list() continue elif msg_type == 'comm_open' or msg_type == 'comm_msg': # widgets in a notebook out.data = content["data"] out.comm_id = content["comm_id"] else: dcontent = "\n".join("{0}={1}".format(k, v) for k, v in sorted(content.items())) raise NotImplementedError( 'unhandled iopub message: %s' % msg_type + "\nCONTENT:\n" + dcontent) outs.append(out) if iscell: cell['outputs'] = outs raw = [] for _ in outs: try: t = _.data except AttributeError: continue # see MIMEMAP to see the available output type for k, v in t.items(): if k.startswith("text"): raw.append(v) sraw = "\n".join(raw) self.fLOG(sraw) def reply2string(reply): sreply = [] for k, v in sorted(reply.items()): if isinstance(v, dict): temp = [] for _, __ in sorted(v.items()): temp.append(" [{0}]={1}".format(_, str(__))) v = "\n".join(temp) sreply.append("reply['{0}']=dict\n{1}".format(k, v)) else: sreply.append("reply['{0}']={1}".format(k, str(v))) sreply = "\n".join(sreply) return sreply if status == 'error': sreply = reply2string(reply) if len(code) < 5: scode = [code] else: scode = "" raise NotebookError("{7}\nCELL status={8}, reason={9} -- {4} length={5} -- {6}:\n-----------------\n{0}\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}".format( code, traceback_text, sraw, sreply, index_cell, len(code), scode, self.comment, status, reason)) return outs def iter_code_cells(self): ''' Iterate over the notebook cells containing code. ''' for cell in self.iter_cells(): if cell.cell_type == 'code': yield cell def iter_cells(self): ''' Iterate over the notebook cells. ''' if hasattr(self.nb, "worksheets"): for ws in self.nb.worksheets: for cell in ws.cells: yield cell else: for cell in self.nb.cells: yield cell def _cell_container(self): """ returns a cells container, it may change according to the format @return cell container """ if hasattr(self.nb, "worksheets"): last = None for ws in self.nb.worksheets: last = ws if last is None: raise NotebookError("no cell container") return last.cells else: return self.nb.cells def __len__(self): """ return the number of cells, it iterates on cells to get this information and does cache the information @return int .. versionadded:: 1.1 """ return sum(1 for _ in self.iter_cells()) def cell_type(self, cell): """ returns the cell type @param cell from @see me iter_cells @return type """ return cell.cell_type def cell_metadata(self, cell): """ returns the cell metadata @param cell cell @return metadata """ return cell.metadata def cell_height(self, cell): """ approximate the height of a cell by its number of lines it contains @param cell cell @return number of cell """ kind = self.cell_type(cell) if kind == "markdown": content = cell.source lines = content.split("\n") nbs = sum(1 + len(line) // 80 for line in lines) return nbs elif kind == "code": content = cell.source lines = content.split("\n") nbl = len(lines) for output in cell.outputs: if output["output_type"] == "execute_result" or \ output["output_type"] == "display_data": data = output["data"] for k, v in data.items(): if k == "text/plain": nbl += len(v.split("\n")) elif k == "application/javascript": # rough estimation nbl += len(v.split("\n")) // 2 elif k == "image/svg+xml": nbl += len(v) // 5 elif k == "text/html": nbl += len(v.split("\n")) elif k == "image/png" or k == "image/jpg" or k == "image/jpeg": nbl += len(v) // 50 else: raise NotImplementedError("cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format(kind, k, v, cell)) elif output["output_type"] == "stream": v = output["text"] nbl += len(v.split("\n")) elif output["output_type"] == "error": v = output["traceback"] nbl += len(v) else: raise NotImplementedError("cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" .format(kind, output["output_type"], output, cell)) return nbl else: raise NotImplementedError( "cell type: {0}\nCELL:\n{1}".format(kind, cell)) def add_tag_slide(self, max_nb_cell=8, max_nb_line=25): """ tries to add tags for a slide show when they are too few @param max_nb_cell maximum number of cells within a slide @param max_nb_line maximum number of lines within a slide @return list of modified cells { #slide: (kind, reason, cell) } """ res = {} nbline = 0 nbcell = 0 for i, cell in enumerate(self.iter_cells()): meta = cell.metadata if "slideshow" in meta: st = meta["slideshow"]["slide_type"] if st in ["slide", "subslide"]: nbline = 0 nbcell = 0 else: if cell.cell_type == "markdown": content = cell.source if content.startswith("# ") or \ content.startswith("## ") or \ content.startswith("### "): meta["slideshow"] = {'slide_type': 'slide'} nbline = 0 nbcell = 0 res[i] = ("slide", "section", cell) dh = self.cell_height(cell) dc = 1 new_nbline = nbline + dh new_cell = dc + nbcell if "slideshow" not in meta: if new_cell > max_nb_cell or \ new_nbline > max_nb_line: res[i] = ( "subslide", "{0}-{1} <-> {2}-{3}".format(nbcell, nbline, dc, dh), cell) nbline = 0 nbcell = 0 meta["slideshow"] = {'slide_type': 'subslide'} nbline += dh nbcell += dc return res def run_notebook(self, skip_exceptions=False, progress_callback=None, additional_path=None, valid=None, clean_function=None): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). @param skip_exceptions skip exception @param progress_callback call back function @param additional_path additional paths (as a list or None if none) @param valid if not None, valid is a function which returns wether or not the cell should be executed or not @param clean_function function which cleans a cell's code before executing it (None for None) @return dictionary with statistics .. versionchanged:: 1.1 The function adds the local variable ``theNotebook`` with the absolute file name of the notebook. ''' # additional path if additional_path is not None: if not isinstance(additional_path, list): raise TypeError( "additional_path should be a list not: " + str(additional_path)) code = ["import sys"] for p in additional_path: code.append("sys.path.append(r'{0}')".format(p)) cell = "\n".join(code) self.run_cell(-1, cell) # we add local variable theNotebook if self.theNotebook is not None: cell = "theNotebook = r'''{0}'''".format(self.theNotebook) self.run_cell(-1, cell) # initialisation with a code not inside the notebook if self.code_init is not None: self.run_cell(-1, self.code_init) # execution of the notebook nbcell = 0 nbrun = 0 nbnerr = 0 cl = time.clock() for i, cell in enumerate(self.iter_code_cells()): nbcell += 1 iscell, codei = NotebookRunner.get_cell_code(cell) if valid is not None and not valid(codei): continue try: nbrun += 1 self.run_cell(i, cell, clean_function=clean_function) nbnerr += 1 except Empty as er: raise Exception( "{0}\nissue when executing:\n{1}".format(self.comment, codei)) from er except NotebookError as e: if not skip_exceptions: raise else: raise Exception( "issue when executing:\n{0}".format(codei)) from e if progress_callback: progress_callback(i) etime = time.clock() - cl return dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime) def count_code_cells(self): ''' @return the number of code cells in the notebook .. versionadded:: 1.1 ''' return sum(1 for _ in self.iter_code_cells()) def merge_notebook(self, nb): """ append notebook *nb* to this one @param nb notebook or list of notebook (@see cl NotebookRunner) @return number of added cells @example(How to merge notebook?) The following code merges two notebooks into the first one and stores the result unto a file. @code from pyquickhelper.ipythonhelper import read_nb nb1 = read_nb("<file1>") nb2 = read_nb("<file2>") nb1.merge_notebook(nb2) nb1.to_json(outfile) @endcode @endexample .. versionadded:: 1.1 """ if isinstance(nb, list): s = 0 for n in nb: s += self.merge_notebook(n) return s else: last = self._cell_container() s = 0 for cell in nb.iter_cells(): last.append(cell) s += 1 return s
def test_notebook(nb): km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline'], stderr=open(os.devnull, 'w')) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break successes = 0 failures = 0 errors = 0 """ IPython notebooks currently only support a single worksheet Future versions may use many, so we leave the outer loop A worksheet is simply a group of cells e.g. could use for cell in nb.worksheets[0].cells: """ for ws in nb.worksheets: for cell in ws.cells: # If the cell is code, move to next cell if cell.cell_type != 'code': continue # Otherwise the cell is an output cell, run it! try: outs = run_cell(shell, iopub, cell) except Exception as e: print "failed to run cell:", repr(e) print cell.input errors += 1 continue failed = False for out, ref in zip(outs, cell.outputs): # print '\n This is the output: \n' # print out # print '\n' # print 'This is the reference:\n' # print ref # print '\n' if not compare_outputs(out, ref): failed = True if failed: failures += 1 else: successes += 1 sys.stdout.write('.') print print "tested notebook %s" % nb.metadata.name print bcolors.OKGREEN + " %3i cells successfully replicated" % + successes + bcolors.ENDC if failures: print bcolors.FAIL + " %3i cells mismatched output" % failures + bcolors.ENDC if errors: print bcolors.WARNING + " %3i cells failed to complete" % errors + bcolors.ENDC kc.stop_channels() km.shutdown_kernel() del km
def test_notebook(nb, debug_failures=False): km = KernelManager() km.start_kernel(extra_arguments=['--pylab=inline'], stderr=open(os.devnull, 'w')) try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't shell.execute("pass") shell.get_msg() while True: try: iopub.get_msg(timeout=1) except Empty: break successes = 0 failures = 0 errors = 0 html_diffs_all = "" skip_remainder = False for ws in nb.worksheets: for cell in ws.cells: if skip_remainder: continue if cell.cell_type != 'code': continue # Extract the first line so that we can check for special tags such as IPYTHON_TEST_IGNORE_OUTPUT first_line = cell['input'].splitlines()[0] if ( cell['input'] != '') else '' if re.search('^#\s*IPYTHON_TEST_SKIP_REMAINDER', first_line): skip_remainder = True continue try: outs = run_cell(shell, iopub, cell) # Ignore output from cells tagged with IPYTHON_TEST_IGNORE_OUTPUT if re.search('^#\s*IPYTHON_TEST_IGNORE_OUTPUT', first_line): outs = [] except Exception as e: print "failed to run cell:", repr(e) print cell.input errors += 1 continue #import ipdb; ipdb.set_trace() failed = False outs_merged = merge_streams(outs) cell_outputs_merged = merge_streams(cell.outputs) for out, ref in zip(outs_merged, cell_outputs_merged): cmp_result, html_diff = compare_outputs(out, ref, cell) html_diffs_all += html_diff if not cmp_result: if debug_failures: print( textwrap.dedent(""" ========================================================================== Entering debugger. You can print the reference output and computed output with "print ref['text']" and "print out['text']", respectively. ========================================================================== """)) try: import ipdb ipdb.set_trace() except ImportError: try: import pdb pdb.set_trace() except ImportError: raise RuntimeError( "Cannot start debugger. Please install pdb or ipdb." ) failed = True if failed: print "Failed to replicate cell with the following input: " print "=== BEGIN INPUT ===================================" print cell.input print "=== END INPUT =====================================" if failures == 0: # This is the first cell that failed to replicate. # Let's store its output for debugging. first_failed_input = cell.input first_failed_output = outs_merged first_failed_output_expected = cell_outputs_merged # For easier debugging, replace the (usually huge) binary # data of any pngs appearing in the expected or computed # output with a short string representing the image. for node in first_failed_output_expected + first_failed_output: try: node['png'] = '<PNG IMAGE>' except KeyError: pass failures += 1 else: successes += 1 sys.stdout.write('.') sys.stdout.flush() if failures >= 1: outfilename = 'ipynbtest_failed_test_differences.html' with open(outfilename, 'w') as f: f.write(html_diffs_all) print( "Diagnostic HTML output of the failed test has been " "written to '{}'".format(outfilename)) print "" print "tested notebook %s" % nb.metadata.name print " %3i cells successfully replicated" % successes if failures: print " %3i cells mismatched output" % failures if errors: print " %3i cells failed to complete" % errors kc.stop_channels() km.shutdown_kernel() del km if failures or errors: errmsg = ("The notebook {} failed to replicate successfully." "".format(nb.metadata['name'])) if failures: errmsg += \ ("Input and output from first failed cell:\n" "=== BEGIN INPUT ==================================\n" "{}\n" "=== BEGIN EXPECTED OUTPUT ========================\n" "{}\n" "=== BEGIN COMPUTED OUTPUT ========================\n" "{}\n" "==================================================\n" "".format(first_failed_input, first_failed_output_expected, first_failed_output)) raise IPythonNotebookDoctestError(errmsg)
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', } def __init__(self, nb_in=None, pylab=False, mpl_inline=False, nb=None): self.km = KernelManager() if pylab: self.km.start_kernel(extra_arguments=['--pylab=inline']) elif mpl_inline: self.km.start_kernel(extra_arguments=['--matplotlib=inline']) else: self.km.start_kernel() if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel logging.info('Reading notebook %s', nb_in) self.nb = nb if not self.nb: self.nb = read(open(nb_in), 'json') def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cell(self, cell): ''' Run a notebook cell and update the output of that cell in-place. ''' logging.info('Running cell:\n%s\n', cell.input) self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': logging.info('Cell raised uncaught exception: \n%s', '\n'.join(reply['content']['traceback'])) else: logging.info('Cell returned') outs = list() while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened raise content = msg['content'] msg_type = msg['msg_type'] out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ['status', 'pyin']: continue elif msg_type == 'stream': out.stream = content['name'] out.text = content['data'] #print(out.text, end='') elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError('unhandled mime type: %s' % mime) setattr(out, attr, data) #print(data, end='') elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] #logging.error('\n'.join(content['traceback'])) else: raise NotImplementedError('unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs if status == 'error': raise NotebookError() def iter_code_cells(self): ''' Iterate over the notebook cells containing code. ''' for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, skip_exceptions=False): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). ''' for cell in self.iter_code_cells(): try: self.run_cell(cell) except NotebookError: if not skip_exceptions: raise def save_notebook(self, nb_out): logging.info('Saving to %s', nb_out) write(self.nb, open(nb_out, 'w'), 'json')
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'image/svg+xml': 'svg', } def __init__(self, nb, working_dir=None): from IPython.kernel import KernelManager self.km = KernelManager() cwd = os.getcwd() if working_dir is not None: os.chdir(working_dir) self.km.start_kernel() os.chdir(cwd) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. time.sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel self.nb = nb def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cell(self, cell): # noqa: C901 """Run a notebook cell and update the output of that cell in-place.""" self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': traceback_text = ("Cell raised uncaught exception: \n" "\n".join(reply['content']['traceback'])) outs = [] while True: msg = self.iopub.get_msg(timeout=1) msg_type = msg['msg_type'] content = msg['content'] if msg_type == 'status' and content['execution_state'] == 'idle': break # IPython 3.0.0-dev writes pyerr/pyout in the notebook format # but uses error/execute_result in the message spec. This does # the translation needed for tests to pass with IPython 3.0.0-dev notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout', } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'stream': out.stream = content['name'] out.text = content['data'] elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError( 'unhandled mime type: %s' % mime) setattr(out, attr, data) elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] elif msg_type == 'clear_output': outs = [] continue else: raise NotImplementedError( 'unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs if status == 'error': raise Exception(traceback_text) def iter_code_cells(self): """Iterate over the notebook cells containing code.""" for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, skip_exceptions=False, progress_callback=None): """Runs all notebook cells in order and updates outputs in-place. If ``skip_exceptions`` is True, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). """ for i, cell in enumerate(self.iter_code_cells()): try: self.run_cell(cell) except: if not skip_exceptions: raise if progress_callback is not None: progress_callback(i) def count_code_cells(self): """Return the number of code cells in the notebook.""" return sum(1 for _ in self.iter_code_cells())
class ExecutePreprocessor(Preprocessor): """ Executes all the cells in a notebook """ timeout = Integer(30, config=True, help="The time to wait (in seconds) for output from executions." ) # FIXME: to be removed with nbformat v4 # map msg_type to v3 output_type msg_type_map = { "error" : "pyerr", "execute_result" : "pyout", } # FIXME: to be removed with nbformat v4 # map mime-type to v3 mime-type keys mime_map = { "text/plain" : "text", "text/html" : "html", "image/svg+xml" : "svg", "image/png" : "png", "image/jpeg" : "jpeg", "text/latex" : "latex", "application/json" : "json", "application/javascript" : "javascript", } extra_arguments = List(Unicode) def _create_client(self): from IPython.kernel import KernelManager self.km = KernelManager() self.km.write_connection_file() self.kc = self.km.client() self.kc.start_channels() self.km.start_kernel(extra_arguments=self.extra_arguments, stderr=open(os.devnull, 'w')) self.iopub = self.kc.iopub_channel self.shell = self.kc.shell_channel self.shell.kernel_info() try: self.shell.get_msg(timeout=self.timeout) except Empty: self.log.error("Timeout waiting for kernel_info reply") raise # flush IOPub while True: try: self.iopub.get_msg(block=True, timeout=0.25) except Empty: break def _shutdown_client(self): self.kc.stop_channels() self.km.shutdown_kernel() del self.km def preprocess(self, nb, resources): self._create_client() nb, resources = super(ExecutePreprocessor, self).preprocess(nb, resources) self._shutdown_client() return nb, resources def preprocess_cell(self, cell, resources, cell_index): """ Apply a transformation on each code cell. See base.py for details. """ if cell.cell_type != 'code': return cell, resources try: outputs = self.run_cell(self.shell, self.iopub, cell) except Exception as e: self.log.error("failed to run cell: " + repr(e)) self.log.error(str(cell.input)) raise cell.outputs = outputs return cell, resources def run_cell(self, shell, iopub, cell): msg_id = shell.execute(cell.input) self.log.debug("Executing cell:\n%s", cell.input) # wait for finish, with timeout while True: try: msg = shell.get_msg(timeout=self.timeout) except Empty: self.log.error("Timeout waiting for execute reply") raise if msg['parent_header'].get('msg_id') == msg_id: break else: # not our reply continue outs = [] while True: try: msg = iopub.get_msg(timeout=self.timeout) except Empty: self.log.warn("Timeout waiting for IOPub output") break if msg['parent_header'].get('msg_id') != msg_id: # not an output from our execution continue msg_type = msg['msg_type'] self.log.debug("output: %s", msg_type) content = msg['content'] if msg_type == 'status': if content['execution_state'] == 'idle': break else: continue elif msg_type in {'execute_input', 'pyin'}: continue elif msg_type == 'clear_output': outs = [] continue out = NotebookNode(output_type=self.msg_type_map.get(msg_type, msg_type)) # set the prompt number for the input and the output if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type == 'stream': out.stream = content['name'] out.text = content['data'] elif msg_type in ('display_data', 'execute_result'): out['metadata'] = content['metadata'] for mime, data in content['data'].items(): # map mime-type keys to nbformat v3 keys # this will be unnecessary in nbformat v4 key = self.mime_map.get(mime, mime) out[key] = data elif msg_type == 'error': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: self.log.error("unhandled iopub msg: " + msg_type) outs.append(out) return outs
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'image/svg+xml': 'svg', } def __init__(self, nb, pylab=False, mpl_inline=False, working_dir=None): self.km = KernelManager() args = [] if pylab: args.append('--pylab=inline') logging.warn( '--pylab is deprecated and will be removed in a future version' ) elif mpl_inline: args.append('--matplotlib=inline') logging.warn( '--matplotlib is deprecated and will be removed in a future version' ) cwd = os.getcwd() if working_dir: os.chdir(working_dir) self.km.start_kernel(extra_arguments=args) os.chdir(cwd) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel self.nb = nb def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cell(self, cell): ''' Run a notebook cell and update the output of that cell in-place. ''' logging.info('Running cell:\n%s\n', cell.input) self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': traceback_text = 'Cell raised uncaught exception: \n' + \ '\n'.join(reply['content']['traceback']) logging.info(traceback_text) else: logging.info('Cell returned') outs = list() while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened raise content = msg['content'] msg_type = msg['msg_type'] # IPython 3.0.0-dev writes pyerr/pyout in the notebook format but uses # error/execute_result in the message spec. This does the translation # needed for tests to pass with IPython 3.0.0-dev notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout' } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'stream': out.stream = content['name'] out.text = content['data'] #print(out.text, end='') elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError('unhandled mime type: %s' % mime) setattr(out, attr, data) #print(data, end='') elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] #logging.error('\n'.join(content['traceback'])) elif msg_type == 'clear_output': outs = list() continue else: raise NotImplementedError('unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs if status == 'error': raise NotebookError(traceback_text) def iter_code_cells(self): ''' Iterate over the notebook cells containing code. ''' for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, skip_exceptions=False, progress_callback=None): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). ''' for i, cell in enumerate(self.iter_code_cells()): try: self.run_cell(cell) except NotebookError: if not skip_exceptions: raise if progress_callback: progress_callback(i) def count_code_cells(self): ''' Return the number of code cells in the notebook ''' return sum(1 for _ in self.iter_code_cells())
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'application/json': 'json', 'image/svg+xml': 'svg', } def __init__( self, nb, pylab=False, mpl_inline=False, profile_dir=None, working_dir=None): self.km = KernelManager() args = [] if pylab: args.append('--pylab=inline') logging.warn( '--pylab is deprecated and will be removed in a future version' ) elif mpl_inline: args.append('--matplotlib=inline') logging.warn( '--matplotlib is deprecated and' + ' will be removed in a future version' ) if profile_dir: args.append('--profile-dir=%s' % os.path.abspath(profile_dir)) cwd = os.getcwd() if working_dir: os.chdir(working_dir) self.km.start_kernel(extra_arguments=args) os.chdir(cwd) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() try: self.kc.wait_for_ready() except AttributeError: # IPython < 3 self._wait_for_ready_backport() self.nb = nb def shutdown_kernel(self): logging.info('Shutdown kernel') self.kc.stop_channels() self.km.shutdown_kernel(now=True) def _wait_for_ready_backport(self): # Backport BlockingKernelClient.wait_for_ready from IPython 3. # Wait for kernel info reply on shell channel. self.kc.kernel_info() while True: msg = self.kc.get_shell_msg(block=True, timeout=30) if msg['msg_type'] == 'kernel_info_reply': break # Flush IOPub channel while True: try: msg = self.kc.get_iopub_msg(block=True, timeout=0.2) except Empty: break def run_cell(self, cell): """Run a notebook cell and update the output of that cell in-place.""" logging.info('Running cell:\n%s\n', cell.input) self.kc.execute(cell.input) reply = self.kc.get_shell_msg() status = reply['content']['status'] traceback_text = '' if status == 'error': traceback_text = 'Cell raised uncaught exception: \n' + \ '\n'.join(reply['content']['traceback']) logging.info(traceback_text) else: logging.info('Cell returned') outs = list() while True: try: msg = self.kc.get_iopub_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle # before the queue becomes empty, # if it doesn't, something bad has happened raise content = msg['content'] msg_type = msg['msg_type'] # IPython 3.0.0-dev writes pyerr/pyout in the notebook format # but uses error/execute_result in the message spec. This does the # translation needed for tests to pass with IPython 3.0.0-dev notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout' } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'stream': out.stream = content['name'] # in msgspec 5, this is name, text # in msgspec 4, this is name, data if 'text' in content: out.text = content['text'] else: out.text = content['data'] elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError( 'unhandled mime type: %s' % mime ) # In notebook version <= 3 JSON data is stored as a string # Evaluation of IPython2's JSON gives strings directly # Therefore do not encode for IPython versions prior to 3 json_encode = ( IPython.version_info[0] >= 3 and mime == "application/json") data_out = data if not json_encode else json.dumps(data) setattr(out, attr, data_out) elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] elif msg_type == 'clear_output': outs = list() continue else: raise NotImplementedError( 'unhandled iopub message: %s' % msg_type ) outs.append(out) cell['outputs'] = outs logging.info('Cell output:\n%s\n', outs) if status == 'error': raise NotebookError(traceback_text) def iter_code_cells(self): """Iterate over the notebook cells containing code.""" for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, skip_exceptions=False, progress_callback=None, cells=None): """ Run all the notebook cells in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). """ for i, cell in enumerate(self.iter_code_cells()): if cells is not None: if i not in cells: continue try: self.run_cell(cell) except NotebookError: if not skip_exceptions: raise if progress_callback: progress_callback(i) def count_code_cells(self): """Return the number of code cells in the notebook.""" return sum(1 for _ in self.iter_code_cells())
class IPyKernel(object): """ A simple wrapper class to run cells in an IPython Notebook. Notes ----- Use `with` construct to properly instantiate """ def __init__(self, console = None): # default timeout time is 60 seconds self.default_timeout = 60 self.extra_arguments = ['--pylab=inline'] def __enter__(self): self.km = KernelManager() self.km.start_kernel(extra_arguments=self.extra_arguments, stderr=open(os.devnull, 'w')) try: self.kc = self.km.client() self.kc.start_channels() self.iopub = self.kc.iopub_channel except AttributeError: # IPython 0.13 self.kc = self.km self.kc.start_channels() self.iopub = self.kc.sub_channel self.shell = self.kc.shell_channel # run %pylab inline, because some notebooks assume this # even though they shouldn't self.shell.execute("pass") self.shell.get_msg() while True: try: self.iopub.get_msg(timeout=1) except Empty: break return self def __exit__(self, exc_type, exc_val, exc_tb): self.kc.stop_channels() self.km.shutdown_kernel() del self.km def run(self, cell, timeout = None): use_timeout = self.default_timeout if timeout is not None: use_timeout = timeout self.shell.execute(cell.input) self.shell.get_msg(timeout=use_timeout) outs = [] while True: try: msg = self.iopub.get_msg(timeout=0.5) except Empty: break msg_type = msg['msg_type'] if msg_type in ('status', 'pyin'): continue elif msg_type == 'clear_output': outs = [] continue content = msg['content'] out = NotebookNode(output_type=msg_type) if msg_type == 'stream': out.stream = content['name'] out.text = content['data'] elif msg_type in ('display_data', 'pyout'): out['metadata'] = content['metadata'] for mime, data in content['data'].iteritems(): attr = mime.split('/')[-1].lower() # this gets most right, but fix svg+html, plain attr = attr.replace('+xml', '').replace('plain', 'text') setattr(out, attr, data) if msg_type == 'pyout': out.prompt_number = content['execution_count'] elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: print "unhandled iopub msg:", msg_type outs.append(out) return outs def sanitize(self, s): """sanitize a string for comparison. fix universal newlines, strip trailing newlines, and normalize likely random values (memory addresses and UUIDs) """ if not isinstance(s, basestring): return s # normalize newline: s = s.replace('\r\n', '\n') # ignore trailing newlines (but not space) s = s.rstrip('\n') # normalize hex addresses: s = re.sub(r'0x[a-f0-9]+', '0xFFFFFFFF', s) # normalize UUIDs: s = re.sub(r'[a-f0-9]{8}(\-[a-f0-9]{4}){3}\-[a-f0-9]{12}', 'U-U-I-D', s) return s def compare_outputs(self, test, ref, skip_compare=('png', 'traceback', 'latex', 'prompt_number', 'svg', 'html')): for key in ref: if key not in test: # print "missing key: %s != %s" % (test.keys(), ref.keys()) return False elif key not in skip_compare: s1 = self.sanitize(test[key]) s2 = self.sanitize(ref[key]) if s1 != s2: # print "mismatch %s:" % key expected=s1.splitlines(1) actual=s2.splitlines(1) # diff=difflib.unified_diff(expected, actual) # print ''.join(diff) return False return True def get_commands(self, cell): commands = {} if hasattr(cell, 'input'): lines = cell.input.splitlines() if len(lines) > 0: first_line = lines[0] if first_line.startswith('#!'): txt = first_line[2:].strip() parts = txt.split(',') for part in parts: subparts = part.split(':') if len(subparts) == 1: commands[subparts[0].strip().lower()] = True elif len(subparts) == 2: commands[subparts[0].strip().lower()] = subparts[1] return commands
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', } def __init__(self, **kw): """Initializes the notebook runner. Requires config rcloud_python_lib_path -- or raises exception""" self.km = KernelManager() self.km.kernel_cmd = make_ipkernel_cmd(""" import sys; sys.path.extend("{RCPATH}".split(":")); from rcloud_kernel import main; main()""".format(RCPATH=kw["rcloud_python_lib_path"]), **kw) del kw["rcloud_python_lib_path"] self.km.client_factory = MyClient self.km.start_kernel(**kw) if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def shutdown(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cmd(self, cmd): d = {} class Cell(object): def __init__(self): self.input = cmd def __setitem__(self, k, v): d[k] = v self.run_cell(Cell()) return d['outputs'] def run_magic(self, magic_line): self.shell.magic(magic_line) def wait_for_msg(self): outs = list() reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': logging.info('Cell raised uncaught exception: \n%s', '\n'.join(reply['content']['traceback'])) else: if _debugging: logging.debug('Cell returned') while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened raise outs.append(msg) return outs def run_cell(self, cell): ''' Run a notebook cell and update the output of that cell in-place. ''' if _debugging: logging.debug('Running cell:\n%s\n', cell.input) self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': logging.info('Cell raised uncaught exception: \n%s', '\n'.join(reply['content']['traceback'])) else: if _debugging: logging.debug('Cell returned') outs = list() while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened logging.warn('Execution state did not return to idle') raise content = msg['content'] msg_type = msg['msg_type'] out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ['status', 'pyin']: continue elif msg_type == 'stream': out.stream = content['name'] out.html = RClansiconv(content['data']) elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: logging.warning('unhandled mime type: %s' % mime) raise NotImplementedError('unhandled mime type: %s' % mime) if attr == 'text': if _debugging: logging.info(data) setattr(out, attr, RClansiconv(data)) else: setattr(out, attr, data) elif msg_type == 'pyerr': logging.info('Received an exception: ' + content['ename']) out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] out.html = RClansiconv('\n'.join(out.traceback)) else: raise NotImplementedError('unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs def iter_code_cells(self, nb): ''' Iterate over the notebook cells containing code. ''' nb = read(open(nb), 'json') for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, nb, skip_exceptions=False): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). ''' for cell in self.iter_code_cells(nb): try: self.run_cell(cell) print cell.outputs except NotebookError: if not skip_exceptions: raise def save_notebook(self, nb_out): if _debugging: logging.info('Saving to %s', nb_out) write(self.nb, open(nb_out, 'w'), 'json')
def test_notebook(nb): """Main function to run tests at the level of one notebook.""" # Boot up the kernel, assume inline plotting km = KernelManager() km.start_kernel( extra_arguments=["--matplotlib=inline", "--colors=NoColor"], stderr=open(os.devnull, 'w')) # Connect, allowing for older IPythons try: kc = km.client() kc.start_channels() iopub = kc.iopub_channel except AttributeError: # IPython 0.13 kc = km kc.start_channels() iopub = kc.sub_channel shell = kc.shell_channel # Initialize the result tracking successes = 0 failures = 0 errors = 0 fail_messages = [] err_messages = [] # Iterate the notebook, testing only code cells for ws in nb.worksheets: for cell in ws.cells: if cell.cell_type != 'code': continue # Try and get the prompt number for easier reference try: prompt_num = cell.prompt_number except AttributeError: prompt_num = None # Try to execute the cell, catch errors from test execution try: outs = run_cell(shell, iopub, cell) except Exception as e: message = "Error while running cell:\n%s" % repr(e) err_messages.append(message) errors += 1 sys.stdout.write("E") continue errored = False failed = False for out, ref in zip(outs, cell.outputs): # Now check for an error in the cell execution itself bad_error = (out.output_type == "pyerr" and not ref.output_type == "pyerr") if bad_error: message = "\nError in code cell" if prompt_num is not None: message = " %s (#%d)" % (message, prompt_num) message = "%s:\n%s" % (message, "".join(out.traceback)) err_messages.append(message) errored = True # Otherwise check whether the stored and achived outputs match else: try: match, message = compare_outputs(out, ref, prompt_num) if not match: failed = True fail_messages.append(message) except Exception as e: message = "Error while comparing output:\n%s" % repr(e) err_messages.append(message) errors += 1 sys.stdout.write("E") continue if failed: failures += 1 dot = "F" elif errored: errors += 1 dot = "E" else: successes += 1 dot = "." print(dot, end="") print() print(" %3i cells successfully replicated" % successes) if failures: print(" %3i cells mismatched output" % failures) if errors: print(" %3i cells failed to complete" % errors) if failures: print("Failures:") print("-" * 20) print("\n" + "\n".join(fail_messages) + "\n") if errors: print("Errors:") print("-" * 20) print("\n" + "\n".join(err_messages) + "\n") kc.stop_channels() km.shutdown_kernel() del km return int(bool(failures + errors))
status=msg['content']['status'] if args.verbose: print( status ) if status=='ok': nsucc+=1 continue nerrors+=1 if args.verbose: print( "="*80 ) print( msg['content']['ename'], ":", msg['content']['evalue'] ) print( "{0:-^80}".format("<CODE>") ) print( cell.input ) print( "{0:-^80}".format("</CODE>") ) for m in msg['content']['traceback']: print( m ) print( "="*80 ) if args.break_at_error: break if args.summary: print( "{0:#^80}".format(" Summary: %s "%args.notebook) ) print( "Num Errors : ", nerrors ) print( "Num Successes: ", nsucc ) print( "Num Cells : ", ncells ) # kernel cleanup kc.stop_channels() km.shutdown_kernel(now=True) sys.exit(-1 if nerrors>0 else 0)
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', } def __init__(self, nb_in, pylab=False): self.km = KernelManager() if pylab: self.km.start_kernel(extra_arguments=['--pylab=inline']) else: self.km.start_kernel() if platform.system() == 'Darwin': # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.iopub = self.kc.iopub_channel logging.info('Reading notebook %s', nb_in) self.nb = read(open(nb_in), 'json') def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def run_cell(self, cell): ''' Run a notebook cell and update the output of that cell in-place. ''' logging.info('Running cell:\n%s\n', cell.input) self.shell.execute(cell.input) reply = self.shell.get_msg() status = reply['content']['status'] if status == 'error': logging.info('Cell raised uncaught exception: %s', reply['content']['ename']) else: logging.info('Cell returned') outs = list() while True: try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': break except Empty: # execution state should return to idle before the queue becomes empty, # if it doesn't, something bad has happened raise content = msg['content'] msg_type = msg['msg_type'] out = NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type in ['status', 'pyin']: continue elif msg_type == 'stream': out.stream = content['name'] out.text = content['data'] #print(out.text, end='') elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] except KeyError: raise NotImplementedError('unhandled mime type: %s' % mime) setattr(out, attr, data) #print(data, end='') elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] #logging.error('\n'.join(content['traceback'])) else: raise NotImplementedError('unhandled iopub message: %s' % msg_type) outs.append(out) cell['outputs'] = outs if status == 'error': raise NotebookError() def iter_code_cells(self): ''' Iterate over the notebook cells containing code. ''' for ws in self.nb.worksheets: for cell in ws.cells: if cell.cell_type == 'code': yield cell def run_notebook(self, skip_exceptions=False): ''' Run all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). ''' for cell in self.iter_code_cells(): try: self.run_cell(cell) except NotebookError: if not skip_exceptions: raise def save_notebook(self, nb_out): logging.info('Saving to %s', nb_out) write(self.nb, open(nb_out, 'w'), 'json')
status=msg['content']['status'] if args.verbose: print status if status=='ok': nsucc+=1 continue nerrors+=1 if args.verbose: print "="*80 print msg['content']['ename'], ":", msg['content']['evalue'] print "{0:-^80}".format("<CODE>") print cell.input print "{0:-^80}".format("</CODE>") for m in msg['content']['traceback']: print m print "="*80 if args.break_at_error: break if args.summary: print "{0:#^80}".format(" Summary: %s "%args.notebook) print "Num Errors : ", nerrors print "Num Successes: ", nsucc print "Num Cells : ", ncells # kernel cleanup kc.stop_channels() km.shutdown_kernel(now=True) sys.exit(-1 if nerrors>0 else 0)
class NotebookRunner(object): # The kernel communicates with mime-types while the notebook # uses short labels for different cell types. We'll use this to # map from kernel types to notebook format types. MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', } def __init__(self, **kw): """Initializes the notebook runner. Requires config rcloud_python_lib_path -- or raises exception""" self.km = KernelManager() self.km.kernel_cmd = make_ipkernel_cmd( """ import sys; import time; # to test for slow staring kernels, add a delay here sys.path.extend("{RCPATH}".split(":")); from rcloud_kernel import main; main()""".format(RCPATH=kw["rcloud_python_lib_path"]), **kw) del kw["rcloud_python_lib_path"] self.km.client_factory = MyClient self.km.start_kernel( **kw) # This is a non-blocking call to launch the process self.completer = completer.Completer() self.completer.limit_to__all__ = True # There is a possible race condition if the system is slow to load # the modules for the kernel and we issue commands to the kernel rightaway. # We saw these on CentOS and OSX. So, issue an empty command to the kernel and wait for return self.kc = self.km.client() self.kc.start_channels() self.shell = self.kc.shell_channel self.shell.execute("") _ = self.shell.get_msgs() self.iopub = self.kc.iopub_channel def __del__(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def shutdown(self): self.kc.stop_channels() self.km.shutdown_kernel(now=True) def complete(self, text, pos_fromR): """Completions for text at pos""" txtArr = text[:int(pos_fromR)].split('\n') # We are ignoring surrounding lines and full context; a poor-man's tab-complete for now _ = allres = self.shell.get_msgs() # flush before execution self.shell.complete(txtArr[-1], line="", cursor_pos=len(txtArr[-1]) - 1) while True: resp = self.shell.get_msg() if _debugging: logging.debug(resp) if resp['msg_type'] == 'complete_reply': _ = self.shell.get_msgs() # flush return resp['content']['matches'] def run_cmd(self, cmd): self.submit_cell(cmd) def run_magic(self, magic_line): self.shell.magic(magic_line) def submit_cell(self, cmd): """Submits the cell code to the kernel. Now that we have a continuous IO on rcloud, and the clients can send oob messages to RCloud [but only through R], we separate the pieces to submit and have a function for looping to get messages""" if _debugging: logging.debug('Running cell:\n%s\n', cmd) _ = self.iopub.flush() self.shell.execute(cmd) self._previous_status = 'START' def poll_for_msgs(self): """Polls for messages from the kernel. Used after submitting code for execution""" try: msg = self.iopub.get_msg(timeout=1) if msg['msg_type'] == 'status' and msg['content'][ 'execution_state'] == 'idle': if _debugging: logging.info('Message -- {}:{}'.format( msg['msg_type'], msg['content'])) self._previous_status = 'IDLE' return NotebookNode(output_type='IDLE') except Empty: # state should return to idle before queue becomes empty, but we ignore it now prevstat, self._previous_status = self._previous_status, 'EMPTY' retstat = 'END_CELL' if prevstat == 'IDLE' else 'EMPTY' # Assuming IDLE followed by EMPTY is the end-of-cell return NotebookNode(output_type=retstat) self._previous_status = '' # Not idle, that's all we are concerned about for now content, msg_type = msg['content'], msg['msg_type'] if msg_type in ['status', 'pyin']: return NotebookNode(output_type='NoOp') out = NotebookNode(output_type=msg_type) if msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): try: attr = self.MIME_MAP[mime] tmpval = RClansiconv(data) if attr == 'text' else data setattr(out, attr, tmpval) except KeyError: raise NotImplementedError('unhandled mime type: %s' % mime) elif msg_type == 'stream': setattr(out, 'text', RClansiconv(content['data'])) elif msg_type == 'pyerr': setattr(out, 'html', RClansiconv('\n'.join(content['traceback']) + '\n')) else: if _debugging: logging.info('Unsupported: ' + msg_type) raise NotImplementedError('unhandled iopub message: %s' % msg_type) if _debugging: logging.info( 'Sending: msg_type: [{}]; HTML: [{}]; TEXT: [{}]'.format( msg_type, out.get('html', ''), out.get('text', ''))) return out # upstream process will handle it [e.g. send as an oob message]