コード例 #1
0
ファイル: test_examples.py プロジェクト: cing/mdtraj
def run_notebook(nb):
    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    kc = km.client()
    kc.start_channels()
    shell = kc.shell_channel
    # simple ping:
    kc.execute("pass")
    shell.get_msg()
    
    failures = 0
    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue
        kc.execute(cell.source)
        # wait for finish, maximum 20s
        reply = shell.get_msg(timeout=60)['content']
        if reply['status'] == 'error':
            failures += 1
            print("\nFAILURE:")
            print('\n'.join(reply['traceback']))
            print()

    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures > 0:
        raise Exception()
コード例 #2
0
ファイル: ipynbhelper.py プロジェクト: chengsoonong/didbits
def run_notebook(nb):
    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    kc = km.client()
    kc.start_channels()

    # simple ping:
    kc.execute("pass")
    kc.get_shell_msg()

    cells = 0
    failures = 0
    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue

        outputs, failed = run_cell(kc, cell)
        cell.outputs = outputs
        cells += 1
        cell['execution_count'] = cells
        failures += failed
        sys.stdout.write('.')
        sys.stdout.flush()

    print()
    #print("ran notebook %s" % nb.metadata.name)
    print("    ran %3i cells" % cells)
    if failures:
        print("    %3i cells raised exceptions" % failures)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
コード例 #3
0
ファイル: ipynbhelper.py プロジェクト: vishalbelsare/didbits
def run_notebook(nb):
    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    kc = km.client()
    kc.start_channels()

    # simple ping:
    kc.execute("pass")
    kc.get_shell_msg()

    cells = 0
    failures = 0
    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue

        outputs, failed = run_cell(kc, cell)
        cell.outputs = outputs
        cells += 1
        cell['execution_count'] = cells
        failures += failed
        sys.stdout.write('.')
        sys.stdout.flush()

    print()
    #print("ran notebook %s" % nb.metadata.name)
    print("    ran %3i cells" % cells)
    if failures:
        print("    %3i cells raised exceptions" % failures)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
コード例 #4
0
def _check_ipynb():
    kernel_manager = KernelManager()
    kernel_manager.start_kernel()
    kernel_client = kernel_manager.client()
    kernel_client.start_channels()

    try:
        # IPython 3.x
        kernel_client.wait_for_ready()
        iopub = kernel_client
        shell = kernel_client
    except AttributeError:
        # Ipython 2.x
        # Based on https://github.com/paulgb/runipy/pull/49/files
        iopub = kernel_client.iopub_channel
        shell = kernel_client.shell_channel
        shell.get_shell_msg = shell.get_msg
        iopub.get_iopub_msg = iopub.get_msg

    successes = 0
    failures = 0
    errors = 0

    report = ''
    _execute_code("print('Hello World')", shell, iopub, timeout=1)

    kernel_client.stop_channels()
    kernel_manager.shutdown_kernel()

    passed = not (failures or errors)

    print(report)
コード例 #5
0
    def _run_signaltest_lifecycle(self, config=None):
        km = KernelManager(config=config, kernel_name='signaltest')
        kc = self._prepare_kernel(km, stdout=PIPE, stderr=PIPE)

        def execute(cmd):
            request_id = kc.execute(cmd)
            while True:
                reply = kc.get_shell_msg(TIMEOUT)
                if reply['parent_header']['msg_id'] == request_id:
                    break
            content = reply['content']
            assert content['status'] == 'ok'
            return content

        execute("start")
        assert km.is_alive()
        execute('check')
        assert km.is_alive()

        km.restart_kernel(now=True)
        assert km.is_alive()
        execute('check')

        km.shutdown_kernel()
        assert km.context.closed
コード例 #6
0
def run_notebook(nb):
    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    kc = km.client()
    kc.start_channels()
    shell = kc.shell_channel
    # simple ping:
    kc.execute("pass")
    shell.get_msg()

    failures = 0
    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue
        kc.execute(cell.source)
        try:
            # wait for finish, w/ timeout
            reply = shell.get_msg(timeout=TIMEOUT)['content']
        except Empty:
            raise Exception(
                'Timeout (%.1f) when executing the following %s cell: "%s"' %
                (TIMEOUT, cell.cell_type, cell.source.strip()))
        if reply['status'] == 'error':
            failures += 1
            print("\nFAILURE:", file=sys.stderr)
            print('\n'.join(reply['traceback']), file=sys.stderr)
            print(file=sys.stderr)

    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures > 0:
        raise Exception()
コード例 #7
0
def run_notebook(nb):
    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    kc = km.client()
    kc.start_channels()
    shell = kc.shell_channel
    # simple ping:
    kc.execute("pass")
    shell.get_msg()

    failures = 0
    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue
        kc.execute(cell.source)
        # wait for finish, maximum 20s
        reply = shell.get_msg(timeout=20)['content']
        if reply['status'] == 'error':
            failures += 1
            print("\nFAILURE:")
            print('\n'.join(reply['traceback']))
            print()

    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures > 0:
        raise Exception()
コード例 #8
0
    def _run_signaltest_lifecycle(self, config=None):
        km = KernelManager(config=config, kernel_name="signaltest")
        kc = self._prepare_kernel(km, stdout=PIPE, stderr=PIPE)

        def execute(cmd):
            request_id = kc.execute(cmd)
            while True:
                reply = kc.get_shell_msg(TIMEOUT)
                if reply["parent_header"]["msg_id"] == request_id:
                    break
            content = reply["content"]
            assert content["status"] == "ok"
            return content

        execute("start")
        assert km.is_alive()
        execute("check")
        assert km.is_alive()

        km.restart_kernel(now=True)
        assert km.is_alive()
        execute("check")

        km.shutdown_kernel()
        assert km.context.closed
コード例 #9
0
ファイル: ipnbdoctest.py プロジェクト: hekmatnejad/mvspot
def test_notebook(nb):
    km = KernelManager()
    # Do not save the history to disk, as it can yield spurious lock errors.
    # See https://github.com/ipython/ipython/issues/2845
    km.start_kernel(extra_arguments=['--HistoryManager.hist_file=:memory:'])

    kc = km.client()
    kc.start_channels()

    try:
        kc.wait_for_ready()
    except AttributeError:
        _wait_for_ready_backport(kc)

    successes = 0
    failures = 0
    errors = 0
    for ws in nb.worksheets:
        for i, cell in enumerate(ws.cells):
            if cell.cell_type != 'code' or cell.input.startswith('%timeit'):
                continue
            try:
                outs = run_cell(kc, cell)
            except Exception as e:
                print("failed to run cell:", repr(e))
                print(cell.input)
                errors += 1
                continue

            failed = False
            if len(outs) != len(cell.outputs):
                print("output length mismatch (expected {}, got {})".format(
                    len(cell.outputs), len(outs)))
                failed = True
            for out, ref in zip(outs, cell.outputs):
                if not compare_outputs(out, ref):
                    failed = True
            print("cell %d: " % i, end="")
            if failed:
                print("FAIL")
                failures += 1
            else:
                print("OK")
                successes += 1

    print()
    print("tested notebook %s" % nb.metadata.name)
    print("    %3i cells successfully replicated" % successes)
    if failures:
        print("    %3i cells mismatched output" % failures)
    if errors:
        print("    %3i cells failed to complete" % errors)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures | errors:
        sys.exit(1)
コード例 #10
0
ファイル: ipnbdoctest.py プロジェクト: mcc-petrinets/formulas
def test_notebook(ipynb):
    with open(ipynb, encoding='utf-8') as f:
        nb = nbformat.reads_json(f.read())
    km = KernelManager()
    # Do not save the history to disk, as it can yield spurious lock errors.
    # See https://github.com/ipython/ipython/issues/2845
    km.start_kernel(extra_arguments=['--HistoryManager.hist_file=:memory:'])

    kc = km.client()
    kc.start_channels()

    try:
        kc.wait_for_ready()
    except AttributeError:
        _wait_for_ready_backport(kc)

    successes = 0
    failures = 0
    errors = 0
    for i, cell in enumerate(nb.cells):
        if cell.cell_type != 'code' or cell.source.startswith('%timeit'):
            continue
        try:
            outs = run_cell(kc, cell)
        except Exception as e:
            print("failed to run cell:", repr(e))
            print(cell.input)
            errors += 1
            continue

        failed = False
        if len(outs) != len(cell.outputs):
            print("output length mismatch (expected {}, got {})".format(
                  len(cell.outputs), len(outs)))
            failed = True
        if not compare_outputs(outs, cell.outputs):
            failed = True
        print("cell %d: " % i, end="")
        if failed:
            print("FAIL")
            failures += 1
        else:
            print("OK")
            successes += 1


    print("tested notebook %s" % ipynb)
    print("    %3i cells successfully replicated" % successes)
    if failures:
        print("    %3i cells mismatched output" % failures)
    if errors:
        print("    %3i cells failed to complete" % errors)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures | errors:
        sys.exit(1)
コード例 #11
0
ファイル: _main.py プロジェクト: breathe/NotebookScripter
    def _block_and_receive_results(*return_values, save_output_notebook=None):

        # add an extra cell to beginning of notebook to populate parameters
        notebook_parameters = __notebookscripter_injected__ + [[hooks, {"return_values": return_values}]]
        base64_parameters = obj_to_string_literal(notebook_parameters)

        initialization_source = """from NotebookScripter import (rehydrate as __rehydrate__, dehydrate_return_values as __dehydrate_return_values__)
__rehydrate__({})""".format(base64_parameters)

        initialization_cell = notebook_node_from_dict({
            "cell_type": "code",
            "execution_count": 0,
            "metadata": {},
            "outputs": [],
            "source": initialization_source
        })

        finalization_source = """__dehydrate_return_values__(locals())"""

        finalization_cell = notebook_node_from_dict({
            "cell_type": "code",
            "execution_count": 0,
            "metadata": {},
            "outputs": [],
            "source": finalization_source})

        notebook['cells'].insert(0, initialization_cell)
        notebook['cells'].append(finalization_cell)

        km = KernelManager()
        # hack -- needed because the code within ExecutePreprocessor.start_kernel to start
        # the kernel when km hasn't started a kernel already can't possibly work
        km.start_kernel()
        executed_notebook = executenb(notebook, timeout=None, km=km)
        km.shutdown_kernel()

        if save_output_notebook:
            if isinstance(save_output_notebook, str):
                with open(save_output_notebook, 'w') as f:
                    write_notebook(executed_notebook, f)
            else:
                write_notebook(executed_notebook, save_output_notebook)

        encoded_return_values = eval(executed_notebook["cells"][-1]["outputs"][0]["data"]["text/plain"])
        final_namespace = str_to_obj(encoded_return_values)

        module_identity = "loaded_notebook_from_subprocess"
        dynamic_module = types.ModuleType(module_identity)
        dynamic_module.__file__ = path_to_notebook

        # inject retrieved return values into the returned module namespace
        dynamic_module.__dict__.update(final_namespace)
        return dynamic_module
コード例 #12
0
ファイル: ipnbdoctest.py プロジェクト: yanntm/spot
def test_notebook(ipynb):
    with open(ipynb, encoding='utf-8') as f:
        nb = nbformat.reads_json(f.read())
    km = KernelManager()
    # Do not save the history to disk, as it can yield spurious lock errors.
    # See https://github.com/ipython/ipython/issues/2845
    km.start_kernel(extra_arguments=['--HistoryManager.hist_file=:memory:'])

    kc = km.client()
    kc.start_channels()

    try:
        kc.wait_for_ready(timeout=30)
    except AttributeError:
        _wait_for_ready_backport(kc)

    successes = 0
    failures = 0
    errors = 0
    for i, cell in enumerate(nb.cells):
        if cell.cell_type != 'code' or cell.source.startswith('%timeit'):
            continue
        try:
            outs = run_cell(kc, cell)
        except Exception as e:
            print("failed to run cell:", repr(e))
            print(cell.input)
            errors += 1
            continue

        failed = False
        if not compare_outputs(cell.outputs, outs):
            failed = True
        print("cell %d: " % i, end="")
        if failed:
            print("FAIL")
            failures += 1
        else:
            print("OK")
            successes += 1

    print("tested notebook %s" % ipynb)
    print("    %3i cells successfully replicated" % successes)
    if failures:
        print("    %3i cells mismatched output" % failures)
    if errors:
        print("    %3i cells failed to complete" % errors)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures | errors:
        sys.exit(1)
コード例 #13
0
    def _start_kernel_with_cmd(self, kernel_cmd, extra_env, **kwargs):
        """Start a new kernel, and return its Manager and Client"""
        km = KernelManager(kernel_name='signaltest')
        km.kernel_cmd = kernel_cmd
        km.extra_env = extra_env
        km.start_kernel(**kwargs)
        kc = km.client()
        kc.start_channels()
        try:
            kc.wait_for_ready(timeout=60)
        except RuntimeError:
            kc.stop_channels()
            km.shutdown_kernel()
            raise

        return km, kc
コード例 #14
0
ファイル: socket.py プロジェクト: imasdekar/kuma
async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    km = KernelManager()
    km.start_kernel()
    c = km.client()
    execute_code("import pandas as pd", c)
    try:
        while True:
            data = await websocket.receive_json()
            code = PandasCodeGenerator(data, save=True,
                                       display_rows=15).process()
            df = execute_code(code, c)
            await websocket.send_text(f"{df}")
    except WebSocketDisconnect:
        logger.info(f"Client disconnected")
    finally:
        logger.info("Shuting down Kernel")
        km.shutdown_kernel()
コード例 #15
0
def start_kernel_w_env():
    kernel_cmd = [sys.executable,
                  '-m', 'jupyter_client.tests.signalkernel',
                  '-f', '{connection_file}']
    extra_env = {'TEST_VARS': '${TEST_VARS}:test_var_2'}

    km = KernelManager(kernel_name='signaltest')
    km.kernel_cmd = kernel_cmd
    km.extra_env = extra_env
    km.start_kernel()
    kc = km.client()
    kc.start_channels()

    kc.wait_for_ready(timeout=60)

    yield km, kc
    kc.stop_channels()
    km.shutdown_kernel()
コード例 #16
0
def main(args):
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int)
    parser.add_argument('--kernel')
    parser.add_argument('--conn-file')
    args = parser.parse_args()
    if args.conn_file:
        if runtime_dir:
            conn_file = (args.conn_file if os.path.isabs(args.conn_file) else
                         os.path.join(runtime_dir(), args.conn_file))
        else:  # IPython 3
            pd = ProfileDir.find_profile_dir_by_name(get_ipython_dir(),
                                                     'default')
            conn_file = os.path.join(pd.security_dir, args.conn_file)
        kwargs = {'connection_file': conn_file}
        if args.kernel:
            kwargs['kernel_name'] = args.kernel
        manager = KernelManager(**kwargs)

        semaphore = multiprocessing.Semaphore()
        semaphore.acquire()

        def onsignal(*args):
            semaphore.release()

        signal.signal(signal.SIGTERM, onsignal)
        import platform
        if platform.system() == 'Windows':
            signal.signal(signal.SIGBREAK, onsignal)
        else:
            signal.signal(signal.SIGQUIT, onsignal)
            # Emacs sends SIGHUP upon exit
            signal.signal(signal.SIGHUP, onsignal)

        manager.start_kernel()
        try:
            semaphore.acquire()
        except KeyboardInterrupt:
            pass
        manager.shutdown_kernel()
    else:
        app = make_app()
        app.listen(args.port)
        tornado.ioloop.IOLoop.current().start()
コード例 #17
0
ファイル: kernel.py プロジェクト: spacetelescope/pyllisto
class Kernel:
    def __init__(self, **kwargs):
        self._manager = KernelManager(**kwargs)
        self._manager.start_kernel()
        self._client = self._manager.client()
        self._client.start_channels()
        self._client.wait_for_ready()

    @property
    def manager(self):
        return self._manager

    @property
    def client(self):
        return self._client

    def execute(self, msg):
        self._client.execute(msg)

        status = 'busy'
        result = None

        while status == 'busy':
            msg = self._client.get_iopub_msg(timeout=1)
            content = msg.get('content')
            msg_type = msg.get('msg_type')
            print(msg)

            # 'text/plain' in ['content']['data'] contains results from cells;
            #  'text' in ['content'] contains results from print statements
            if msg_type == 'execute_result':
                data = content.get('data')

                if data is not None:
                    result = data.get('text/plain')

            if 'execution_state' in content:
                status = msg['content']['execution_state']

        return result

    def shutdown(self):
        self._manager.shutdown_kernel()
コード例 #18
0
ファイル: python.py プロジェクト: studioschade/notebook_graph
 def execute_command(self, command):
     import time
     try:
         from queue import Empty  # Py 3
     except ImportError:
         from Queue import Empty  # Py 2
     km = KernelManager(kernel_name='python')
     km.start_kernel()
     print("Kernel Running: " + str(km.is_alive()))
     try:
         c = km.client()
         msg_id = c.execute(command)
         state = 'busy'
         data = {}
         content_printer = pprint.PrettyPrinter(indent=4)
         content = {}
         while state != 'idle' and c.is_alive():
             try:
                 msg = c.get_iopub_msg(timeout=1)
                 if not 'content' in msg:
                     print("message has no content, moving on...")
                     continue
                 content = msg['content']
                 for info in content:
                     self.response2[info] = content[info]
                 #content_printer.pprint(content)
             #	if 'data' in content:
             #		data=content['data']
                 if 'execution_state' in content:
                     state = content['execution_state']
             except Empty:
                 pass
         #print(str(data))
     except KeyboardInterrupt:
         pass
     finally:
         km.shutdown_kernel()
         #print('Kernel Running Final : ' +  str(km.is_alive()))
         #response = json.dumps(data, ensure_ascii=True)
         #print(type(self.response2))
         content_printer.pprint(self.response2)
         return self.response2
コード例 #19
0
ファイル: driver.py プロジェクト: EmperorDali/ob-ipython
def main(args):
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int)
    parser.add_argument('--kernel')
    parser.add_argument('--conn-file')
    args = parser.parse_args()
    if args.conn_file:
        if runtime_dir:
            conn_file = (args.conn_file if os.path.isabs(args.conn_file)
                         else os.path.join(runtime_dir(), args.conn_file))
        else: # IPython 3
            pd = ProfileDir.find_profile_dir_by_name(get_ipython_dir(), 'default')
            conn_file = os.path.join(pd.security_dir, args.conn_file)
        kwargs = {'connection_file': conn_file}
        if args.kernel:
            kwargs['kernel_name'] = args.kernel
        manager = KernelManager(**kwargs)

        semaphore = multiprocessing.Semaphore()
        semaphore.acquire()
        def onsignal(*args):
            semaphore.release()
        signal.signal(signal.SIGTERM, onsignal)
        import platform
        if platform.system() == 'Windows':
            signal.signal(signal.SIGBREAK, onsignal)
        else:
            signal.signal(signal.SIGQUIT, onsignal)
            # Emacs sends SIGHUP upon exit
            signal.signal(signal.SIGHUP, onsignal)

        manager.start_kernel()
        try:
            semaphore.acquire()
        except KeyboardInterrupt: pass
        manager.shutdown_kernel()
    else:
        app = make_app()
        app.listen(args.port)
        tornado.ioloop.IOLoop.current().start()
コード例 #20
0
ファイル: ipynbhelper.py プロジェクト: Busyclover/icl
def run_notebook(nb):
    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    if hasattr(km, 'client'):
        kc = km.client()
        kc.start_channels()
        iopub = kc.iopub_channel
    else:
        # IPython 0.13 compat
        kc = km
        kc.start_channels()
        iopub = kc.sub_channel
    shell = kc.shell_channel

    # simple ping:
    shell.send("pass")
    shell.get_msg()

    cells = 0
    failures = 0
    for ws in nb.worksheets:
        for cell in ws.cells:
            if cell.cell_type != 'code':
                continue

            outputs, failed = run_cell(shell, iopub, cell)
            cell.outputs = outputs
            cell['prompt_number'] = cells
            failures += failed
            cells += 1
            sys.stdout.write('.')

    print()
    print("ran notebook %s" % nb.metadata.name)
    print("    ran %3i cells" % cells)
    if failures:
        print("    %3i cells raised exceptions" % failures)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
コード例 #21
0
    def start_kernel(self):
        """Start a new kernel and return its Manager and Client.

        Adapted from:
        - https://github.com/jupyter/jupyter_client/blob/284914b/jupyter_client/manager.py#780
        - https://github.com/jupyter/jupyter_client/blob/284914b/jupyter_client/tests/test_kernelmanager.py#L47
        """
        km = KernelManager(kernel_name=TEST_KERNEL_NAME)
        km.start_kernel()
        kc = km.client()
        kc.start_channels()
        try:
            kc.wait_for_ready(timeout=60)
        except RuntimeError:
            kc.stop_channels()
            km.shutdown_kernel()
            raise

        yield km, kc
        kc.stop_channels()
        km.shutdown_kernel()
        assert km.context.closed
コード例 #22
0
def run_notebook(nb):
    """
    Run each code cell in a given notebook and update with the new output
    """
    km = KernelManager()
    km.start_kernel(extra_arguments=['--pylab=inline'])
    try:
        kc = km.client()
        kc.start_channels()
        iopub = kc.iopub_channel
    except AttributeError:
        # IPython 0.13
        kc = km
        kc.start_channels()
        iopub = kc.sub_channel
    shell = kc.shell_channel

    shell.execute("pass")
    shell.get_msg()
    while True:
        try:
            iopub.get_msg(timeout=1)
        except Empty:
            break

    for ws in nb.worksheets:
        for cell in ws.cells:
            if cell.cell_type != 'code':
                continue
            try:
                cell.outputs = run_cell(shell, iopub, cell)
            except Exception as e:
                return -1

    kc.stop_channels()
    km.shutdown_kernel()
    del km
    return nb
コード例 #23
0
def run_notebook(nb):
    """Run IPython Notebook.

    Paramters:
    ----------
    nb : IPython Notebook in JSON format.

    Returns:
    --------
    ret : int
        Return value; 0 in case of no failure, 1 otherwise
    """

    km = KernelManager()
    km.start_kernel(stderr=open(os.devnull, 'w'))
    try:
        kc = km.client()
    except AttributeError:
        # 0.13
        kc = km
    kc.start_channels()
    shell = kc.shell_channel
    # simple ping:
    try:
        send = kc.execute
    except AttributeError:
        send = kc.shell_channel.execute
    send("pass")
    reply = shell.get_msg()

    cells = 0
    failures = 0
    for ws in nb.worksheets:
        for cell in ws.cells:
            if cell.cell_type != 'code':
                continue
            send(cell.input)
            # wait for finish, maximum 20s
            reply = shell.get_msg(timeout=20)['content']
            if reply['status'] == 'error':
                failures += 1
                print "\nFAILURE:"
                print cell.input
                print '-----'
                print "raised:"
                print '\n'.join(reply['traceback'])
            cells += 1
            sys.stdout.write('.')

    print
    print "ran notebook %s" % nb.metadata.name
    print "    ran %3i cells" % cells
    if failures:
        print "    %3i cells raised exceptions" % failures
    kc.stop_channels()
    km.shutdown_kernel()
    del km

    if failures:
        return 1
    return 0
コード例 #24
0
class NotebookRunner(object):
    """
    The kernel communicates with mime-types while the notebook
    uses short labels for different cell types. We'll use this to
    map from kernel types to notebook format types.

    This classes executes a notebook end to end.

    .. index:: kernel, notebook

    The class can use different kernels. The next links gives more
    information on how to create or test a kernel:

    * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_
    * `simple_kernel <https://github.com/dsblank/simple_kernel>`_

    .. faqref::
        :title: Do I need to shutdown the kernel after running a notebook?

        .. index:: travis

        If the class is instantiated with *kernel=True*, a kernel will
        be started. It must be shutdown otherwise the program might
        be waiting for it for ever. That is one of the reasons why the
        travis build does not complete. The build finished but cannot terminate
        until all kernels are shutdown.
    """

    # . available output types
    MIME_MAP = {
        'image/jpeg': 'jpeg',
        'image/png': 'png',
        'image/gif': 'gif',
        'text/plain': 'text',
        'text/html': 'html',
        'text/latex': 'latex',
        'application/javascript': 'html',
        'image/svg+xml': 'svg',
    }

    def __init__(self,
                 nb,
                 profile_dir=None,
                 working_dir=None,
                 comment="",
                 fLOG=noLOG,
                 theNotebook=None,
                 code_init=None,
                 kernel_name="python",
                 log_level="30",
                 extended_args=None,
                 kernel=False,
                 filename=None,
                 replacements=None,
                 detailed_log=None,
                 startup_timeout=300):
        """
        @param      nb              notebook as :epkg:`JSON`
        @param      profile_dir     profile directory
        @param      working_dir     working directory
        @param      comment         additional information added to error message
        @param      theNotebook     if not None, populate the variable *theNotebook* with this value in the notebook
        @param      code_init       to initialize the notebook with a python code as if it was a cell
        @param      fLOG            logging function
        @param      log_level       Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')
        @param      kernel_name     kernel name, it can be None
        @param      extended_args   others arguments to pass to the command line
                                    (`--KernelManager.autorestar=True` for example),
                                    see :ref:`l-ipython_notebook_args` for a full list
        @param      kernel          *kernel* is True by default, the notebook can be run, if False,
                                    the notebook can be read but not run
        @param      filename        to add the notebook file if there is one in error messages
        @param      replacements    replacements to make in every cell before running it,
                                    dictionary ``{ string: string }``
        @param      detailed_log    to log detailed information when executing the notebook, this should be a function
                                    with the same signature as ``print`` or None
        @param      startup_timeout wait for this long for the kernel to be ready,
                                    see `wait_for_ready
                                    <https://github.com/jupyter/jupyter_client/blob/master/
                                    jupyter_client/blocking/client.py#L84>`_

        .. versionchanged:: 1.8
            Parameter *startup_timeout* was added.
        """
        if kernel:
            try:
                from jupyter_client import KernelManager
            except ImportError:  # pragma: no cover
                from ipykernel import KernelManager

            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=DeprecationWarning)
                self.km = KernelManager(
                    kernel_name=kernel_name
                ) if kernel_name is not None else KernelManager()
        else:
            self.km = None
        self.detailed_log = detailed_log
        self.fLOG = fLOG
        self.theNotebook = theNotebook
        self.code_init = code_init
        self._filename = filename if filename is not None else "memory"
        self.replacements = replacements
        self.init_args = dict(profile_dir=profile_dir,
                              working_dir=working_dir,
                              comment=comment,
                              fLOG=fLOG,
                              theNotebook=theNotebook,
                              code_init=code_init,
                              kernel_name="python",
                              log_level="30",
                              extended_args=None,
                              kernel=kernel,
                              filename=filename,
                              replacements=replacements)
        args = []

        if profile_dir:
            args.append('--profile-dir=%s' % os.path.abspath(profile_dir))
        if log_level:
            args.append('--log-level=%s' % log_level)

        if extended_args is not None and len(extended_args) > 0:
            for opt in extended_args:
                if not opt.startswith("--"):
                    raise SyntaxError("every option should start with '--': " +
                                      opt)
                if "=" not in opt:
                    raise SyntaxError(  # pragma: no cover
                        "every option should be assigned a value: " + opt)
                args.append(opt)

        if kernel:
            cwd = os.getcwd()

            if working_dir:
                os.chdir(working_dir)

            if self.km is not None:
                try:
                    with warnings.catch_warnings():
                        warnings.filterwarnings("ignore",
                                                category=ResourceWarning)
                        self.km.start_kernel(extra_arguments=args)
                except Exception as e:  # pragma: no cover
                    raise Exception(
                        "Failure with args: {0}\nand error:\n{1}".format(
                            args, str(e))) from e

                if platform.system() == 'Darwin':
                    # see http://www.pypedia.com/index.php/notebook_runner
                    # There is sometimes a race condition where the first
                    # execute command hits the kernel before it's ready.
                    # It appears to happen only on Darwin (Mac OS) and an
                    # easy (but clumsy) way to mitigate it is to sleep
                    # for a second.
                    sleep(1)

            if working_dir:
                os.chdir(cwd)

            self.kc = self.km.client()
            self.kc.start_channels(stdin=False)
            try:
                self.kc.wait_for_ready(timeout=startup_timeout)
            except RuntimeError as e:  # pragma: no cover
                # We wait for one second.
                sleep(startup_timeout)
                self.kc.stop_channels()
                self.km.shutdown_kernel()
                self.km = None
                self.kc = None
                self.nb = nb
                self.comment = comment
                raise NotebookKernelError(
                    "Wait_for_ready fails (timeout={0}).".format(
                        startup_timeout)) from e
        else:
            self.km = None
            self.kc = None
        self.nb = nb
        self.comment = comment

    def __del__(self):
        """
        We close the kernel.
        """
        if self.km is not None:
            del self.km
        if self.kc is not None:
            del self.kc

    def to_json(self, filename=None, encoding="utf8"):
        """
        Converts the notebook into :epkg:`JSON`.

        @param      filename        filename or stream
        @param      encoding        encoding
        @return                     Json string if filename is None, None otherwise
        """
        if isinstance(filename, str):
            with open(filename, "w", encoding=encoding) as payload:
                self.to_json(payload)
                return None

        if filename is None:
            st = StringIO()
            st.write(writes(self.nb))
            return st.getvalue()

        filename.write(writes(self.nb))
        return None

    def copy(self):
        """
        Copies the notebook (just the content).

        @return         instance of @see cl NotebookRunner
        """
        st = StringIO()
        self.to_json(st)
        args = self.init_args.copy()
        for name in ["theNotebook", "filename"]:
            if name in args:
                del args[name]
        nb = reads(st.getvalue())
        return NotebookRunner(nb, **args)

    def __add__(self, nb):
        """
        Merges two notebooks together, returns a new none.

        @param      nb      notebook
        @return             new notebook
        """
        c = self.copy()
        c.merge_notebook(nb)
        return c

    def shutdown_kernel(self):
        """
        Shuts down kernel.
        """
        self.fLOG('-- shutdown kernel')
        if self.kc is None:
            raise ValueError(  # pragma: no cover
                "No kernel was started, specify kernel=True when initializing the instance."
            )
        self.kc.stop_channels()
        self.km.shutdown_kernel(now=True)

    def clean_code(self, code):
        """
        Cleans the code before running it, the function comment out
        instruction such as ``show()``.

        @param      code        code (string)
        @return                 cleaned code
        """
        has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code
        if code is None:
            return code

        lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")]
        res = []
        show_is_last = False
        for line in lines:
            if line.replace(" ", "") == "show()":
                line = line.replace("show", "#show")
                show_is_last = True
            elif has_bokeh and line.replace(" ", "") == "output_notebook()":
                line = line.replace("output_notebook", "#output_notebook")
            else:
                show_is_last = False
            if self.replacements is not None:
                for k, v in self.replacements.items():
                    line = line.replace(k, v)
            res.append(line)
            if show_is_last:
                res.append('"nothing to show"')
        return "\n".join(res)

    @staticmethod
    def get_cell_code(cell):
        """
        Returns the code of a cell.

        @param      cell        a cell or a string
        @return                 boolean (=iscell), string
        """
        if isinstance(cell, str):
            iscell = False
            return iscell, cell

        iscell = True
        try:
            return iscell, cell.source
        except AttributeError:  # pragma: no cover
            return iscell, cell.input

    def run_cell(self, index_cell, cell, clean_function=None):
        '''
        Runs a notebook cell and update the output of that cell inplace.

        @param      index_cell          index of the cell
        @param      cell                cell to execute
        @param      clean_function      cleaning function to apply to the code before running it
        @return                         output of the cell
        '''
        if self.detailed_log:
            self.detailed_log(
                "[run_cell] index_cell={0} clean_function={1}".format(
                    index_cell, clean_function))
        iscell, codei = NotebookRunner.get_cell_code(cell)

        self.fLOG('-- running cell:\n%s\n' % codei)
        if self.detailed_log:
            self.detailed_log(
                '[run_cell] code=\n                        {0}'.format(
                    "\n                        ".join(codei.split("\n"))))

        code = self.clean_code(codei)
        if clean_function is not None:
            code = clean_function(code)
        if self.detailed_log:
            self.detailed_log(
                '    cleaned code=\n                        {0}'.format(
                    "\n                        ".join(code.split("\n"))))
        if len(code) == 0:
            return ""
        if self.kc is None:
            raise ValueError(  # pragma: no cover
                "No kernel was started, specify kernel=True when initializing the instance."
            )
        self.kc.execute(code)

        reply = self.kc.get_shell_msg()
        reason = None
        try:
            status = reply['content']['status']
        except KeyError:  # pragma: no cover
            status = 'error'
            reason = "no status key in reply['content']"

        if status == 'error':
            ansi_escape = re.compile(r'\x1b[^m]*m')
            try:
                tr = [
                    ansi_escape.sub('', _)
                    for _ in reply['content']['traceback']
                ]
            except KeyError:  # pragma: no cover
                tr = ["No traceback, available keys in reply['content']"] + \
                    list(reply['content'])
            traceback_text = '\n'.join(tr)
            self.fLOG("[nberror]\n", traceback_text)
            if self.detailed_log:
                self.detailed_log('[run_cell] ERROR=\n    {0}'.format(
                    "\n    ".join(traceback_text.split("\n"))))
        else:
            traceback_text = ''
            self.fLOG('-- cell returned')

        outs = list()
        nbissue = 0
        while True:
            try:
                msg = self.kc.get_iopub_msg(timeout=1)
                if msg['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Empty:  # pragma: no cover
                # execution state should return to idle before the queue becomes empty,
                # if it doesn't, something bad has happened
                status = "error"
                reason = "exception Empty was raised"
                nbissue += 1
                if nbissue > 10:
                    # the notebook is empty
                    return ""
                else:
                    continue

            content = msg['content']
            msg_type = msg['msg_type']
            if self.detailed_log:
                self.detailed_log('    msg_type={0}'.format(msg_type))

            out = NotebookNode(output_type=msg_type, metadata=dict())

            if 'execution_count' in content:
                if iscell:
                    cell['execution_count'] = content['execution_count']
                out.execution_count = content['execution_count']

            if msg_type in ('status', 'pyin', 'execute_input'):
                continue

            if msg_type == 'stream':
                out.name = content['name']
                # in msgspec 5, this is name, text
                # in msgspec 4, this is name, data
                if 'text' in content:
                    out.text = content['text']
                else:
                    out.data = content['data']

            elif msg_type in ('display_data', 'pyout', 'execute_result'):
                out.data = content['data']

            elif msg_type in ('pyerr', 'error'):
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']
                out.name = 'stderr'

            elif msg_type == 'clear_output':
                outs = list()
                continue

            elif msg_type in ('comm_open', 'comm_msg', 'comm_close'):
                # widgets in a notebook
                out.data = content["data"]
                out.comm_id = content["comm_id"]

            else:
                dcontent = "\n".join("{0}={1}".format(k, v)
                                     for k, v in sorted(content.items()))
                raise NotImplementedError(  # pragma: no cover
                    "Unhandled iopub message: '{0}'\n--CONTENT--\n{1}".format(
                        msg_type, dcontent))

            outs.append(out)
            if self.detailed_log:
                self.detailed_log('    out={0}'.format(type(out)))
                if hasattr(out, "data"):
                    self.detailed_log('    out={0}'.format(out.data))

        if iscell:
            cell['outputs'] = outs

        raw = []
        for _ in outs:
            try:
                t = _.data
            except AttributeError:
                continue

            # see MIMEMAP to see the available output type
            for k, v in t.items():
                if k.startswith("text"):
                    raw.append(v)

        sraw = "\n".join(raw)
        self.fLOG(sraw)
        if self.detailed_log:
            self.detailed_log('    sraw=\n                        {0}'.format(
                "\n                        ".join(sraw.split("\n"))))

        def reply2string(reply):
            sreply = []
            for k, v in sorted(reply.items()):
                if isinstance(v, dict):
                    temp = []
                    for _, __ in sorted(v.items()):
                        temp.append("    [{0}]={1}".format(_, str(__)))
                    v_ = "\n".join(temp)
                    sreply.append("reply['{0}']=dict\n{1}".format(k, v_))
                else:
                    sreply.append("reply['{0}']={1}".format(k, str(v)))
            sreply = "\n".join(sreply)
            return sreply

        if status == 'error':
            sreply = reply2string(reply)
            if len(code) < 5:
                scode = [code]
            else:
                scode = ""
            mes = "FILENAME\n{10}:1:1\n{7}\nCELL status={8}, reason={9} -- {4} length={5} -- {6}:\n-----------------\n{0}" + \
                  "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}"
            raise NotebookError(
                mes.format(code, traceback_text, sraw, sreply, index_cell,
                           len(code), scode, self.comment, status, reason,
                           self._filename))
        if self.detailed_log:
            self.detailed_log('[run_cell] status={0}'.format(status))
        return outs

    def to_python(self):
        """
        Converts the notebook into python.

        @return         string
        """
        rows = []
        for cell in self.iter_cells():
            if cell.cell_type == "code":
                codei = NotebookRunner.get_cell_code(cell)[1]
                rows.append(codei)
            elif cell.cell_type in ("markdown", "raw"):
                content = cell.source
                lines = content.split("\n")
                for line in lines:
                    if line.startswith("#"):
                        rows.append("###")
                        rows.append(line)
                    else:
                        rows.append("# " + line)
            else:
                # No text, no code.
                rows.append("# cell.type = {0}".format(cell.cell_type))
            rows.append("")
        return "\n".join(rows)

    def iter_code_cells(self):
        '''
        Iterates over the notebook cells containing code.
        '''
        for cell in self.iter_cells():
            if cell.cell_type == 'code':
                yield cell

    def iter_cells(self):
        '''
        Iterates over the notebook cells.
        '''
        if hasattr(self.nb, "worksheets"):
            for ws in self.nb.worksheets:
                for cell in ws.cells:
                    yield cell
        else:
            for cell in self.nb.cells:
                yield cell

    def first_cell(self):
        """
        Returns the first cell.
        """
        for cell in self.iter_cells():
            return cell

    def _cell_container(self):
        """
        Returns a cells container, it may change according to the format.

        @return     cell container
        """
        if hasattr(self.nb, "worksheets"):
            last = None
            for ws in self.nb.worksheets:
                last = ws
            if last is None:
                raise NotebookError("no cell container")  # pragma: no cover
            return last.cells
        return self.nb.cells

    def __len__(self):
        """
        Returns the number of cells, it iterates on cells
        to get this information and does cache the information.

        @return         int
        """
        return sum(1 for _ in self.iter_cells())

    def cell_type(self, cell):
        """
        Returns the cell type.

        @param      cell        from @see me iter_cells
        @return                 type
        """
        return cell.cell_type

    def cell_metadata(self, cell):
        """
        Returns the cell metadata.

        @param      cell        cell
        @return                 metadata
        """
        return cell.metadata

    def _check_thumbnail_tuple(self, b):
        """
        Checks types for a thumbnail.

        @param      b       tuple   image, format
        @return             b

        The function raises an exception if the type is incorrect.
        """
        if not isinstance(b, tuple):
            raise TypeError(  # pragma: no cover
                "tuple expected, not {0}".format(type(b)))
        if len(b) != 2:
            raise TypeError(  # pragma: no cover
                "tuple expected of lengh 2, not {0}".format(len(b)))
        if b[1] == "svg":
            if not isinstance(b[0], str):
                raise TypeError("str expected for svg, not {0}".format(
                    type(b[0])))
        elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
                      "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'):
            # Don't know how to extract a snippet out of this.
            pass
        else:
            if not isinstance(b[0], bytes):
                raise TypeError(
                    "bytes expected for images, not {0}-'{1}'\n{2}".format(
                        type(b[0]), b[1], b))
        return b

    def create_picture_from(self, text, format, asbytes=True, context=None):
        """
        Creates a picture from text.

        @param      text        the text
        @param      format      text, json, ...
        @param      context     (str) indication on the content of text (error, ...)
        @param      asbytes     results as bytes or as an image
        @return                 tuple (picture, format) or PIL.Image (if asbytes is False)

        The picture will be bytes, the format png, bmp...
        The size of the picture will depend on the text.
        The longer, the bigger. The method relies on matplotlib
        and then convert the image into a PIL image.

        HTML could be rendered with QWebPage from PyQt (not implemented).
        """
        if not isinstance(text, (str, bytes)):
            text = str(text)
            if "\n" not in text:
                rows = []
                for i in range(0, len(text), 20):
                    end = min(i + 20, len(text))
                    rows.append(text[i:end])
                text = "\n".join(text)
        if len(text) > 200:
            text = text[:200]
        size = len(text) // 10
        figsize = (3 + size, 3 + size)
        lines = text.replace("\t", " ").replace("\r", "").split("\n")

        import matplotlib.pyplot as plt
        from matplotlib.textpath import TextPath
        from matplotlib.font_manager import FontProperties
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111)
        fp = FontProperties(size=200)

        dx = 0
        dy = 0
        for i, line in enumerate(lines):
            if len(line.strip()) > 0:
                ax.text(0, -dy, line, fontproperties=fp, va='top')
                tp = TextPath((0, -dy), line, prop=fp)
                bb = tp.get_extents()
                dy += bb.height
                dx = max(dx, bb.width)

        ratio = abs(dx) / max(abs(dy), 1)
        ratio = max(min(ratio, 3), 1)
        fig.set_size_inches(int((1 + size) * ratio), 1 + size)
        ax.set_xlim([0, dx])
        ax.set_ylim([-dy, 0])
        ax.set_axis_off()
        sio = BytesIO()
        fig.savefig(sio, format="png")
        plt.close()

        if asbytes:
            b = sio.getvalue(), "png"
            self._check_thumbnail_tuple(b)
            return b
        try:
            from PIL import Image
        except ImportError:  # pragma: no cover
            import Image
        img = Image.open(sio)
        return img

    def cell_image(self, cell, image_from_text=False):
        """
        Returns the cell image or None if not found.

        @param      cell            cell to examine
        @param      image_from_text produce an image even if it is not one
        @return                     None for no image or a list of tuple (image as bytes, extension)
                                    for each output of the cell
        """
        kind = self.cell_type(cell)
        if kind != "code":
            return None
        results = []
        for output in cell.outputs:
            if output["output_type"] in {"execute_result", "display_data"}:
                data = output["data"]
                for k, v in data.items():
                    if k == "text/plain":
                        if image_from_text:
                            b = self.create_picture_from(
                                v, "text", context=output["output_type"])
                            results.append(b)
                    elif k == "application/javascript":
                        if image_from_text:
                            b = self.create_picture_from(v, "js")
                            results.append(b)
                    elif k == "application/json":
                        if image_from_text:
                            b = self.create_picture_from(v, "json")
                            results.append(b)
                    elif k == "image/svg+xml":
                        if not isinstance(v, str):
                            raise TypeError(
                                "This should be str not '{0}' (=SVG).".format(
                                    type(v)))
                        results.append((v, "svg"))
                    elif k == "text/html":
                        if image_from_text:
                            b = self.create_picture_from(v, "html")
                            results.append(b)
                    elif k == "text/latex":
                        if image_from_text:
                            b = self.create_picture_from(v, "latex")
                            results.append(b)
                    elif k == "application/vnd.jupyter.widget-view+json":
                        # see http://ipywidgets.readthedocs.io/en/latest/embedding.html
                        if "model_id" not in v:
                            raise KeyError(  # pragma: no cover
                                "model_id is missing from {0}".format(v))
                        model_id = v["model_id"]
                        self.fLOG(
                            "[application/vnd.jupyter.widget-view+json] not rendered",
                            model_id)
                    elif k in {
                            "image/png", "image/jpg", "image/jpeg", "image/gif"
                    }:
                        if not isinstance(v, bytes):
                            v = base64.b64decode(v)
                        if not isinstance(v, bytes):
                            raise TypeError(  # pragma: no cover
                                "This should be bytes not '{0}' (=IMG:{1}).".
                                format(type(v), k))
                        results.append((v, k.split("/")[-1]))
                    elif k in ("text/vnd.plotly.v1+html",
                               "application/vnd.plotly.v1+json",
                               "application/vnd.bokehjs_exec.v0+json",
                               "application/vnd.bokehjs_load.v0+json"):
                        results.append((v, k.split("/")[-1]))
                    else:
                        raise NotImplementedError(  # pragma: no cover
                            "cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format(
                                kind, k, v, cell))
            elif output["output_type"] == "error":
                vl = output["traceback"]
                if image_from_text:
                    for v in vl:
                        b = self.create_picture_from(v,
                                                     "text",
                                                     context="error")
                        results.append(b)
            elif output["output_type"] == "stream":
                v = output["text"]
                if image_from_text:
                    b = self.create_picture_from(v, "text")
                    results.append(b)
            else:
                raise NotImplementedError(  # pragma: no cover
                    "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
                    "".format(kind, output["output_type"], output, cell))
        if len(results) > 0:
            res = self._merge_images(results)
            if res[0] is None:
                return None
            self._check_thumbnail_tuple(res)
            return res
        return None

    def cell_height(self, cell):
        """
        Approximates the height of a cell by its number of lines it contains.

        @param      cell        cell
        @return                 number of cell
        """
        kind = self.cell_type(cell)
        if kind == "markdown":
            content = cell.source
            lines = content.split("\n")
            nbs = sum(1 + len(line) // 80 for line in lines)
            return nbs
        if kind == "raw":
            content = cell.source
            lines = content.split("\n")
            nbs = sum(1 + len(line) // 80 for line in lines)
            return nbs
        if kind == "code":
            content = cell.source
            lines = content.split("\n")
            nbl = len(lines)

            for output in cell.outputs:
                if output["output_type"] == "execute_result" or \
                        output["output_type"] == "display_data":
                    data = output["data"]
                    for k, v in data.items():
                        if k == "text/plain":
                            nbl += len(v.split("\n"))
                        elif k == "application/javascript":
                            # rough estimation
                            nbl += len(v.split("\n")) // 2
                        elif k == "application/json":
                            # rough estimation
                            try:
                                nbl += len(v.split("{"))
                            except AttributeError:
                                nbl += len(v) // 5 + 1
                        elif k == "image/svg+xml":
                            nbl += len(v) // 5
                        elif k == "text/html":
                            nbl += len(v.split("\n"))
                        elif k == "text/latex":
                            nbl += len(v.split("\\\\")) * 2
                        elif k in {
                                "image/png", "image/jpg", "image/jpeg",
                                "image/gif"
                        }:
                            nbl += len(v) // 50
                        elif k == "application/vnd.jupyter.widget-view+json":
                            nbl += 5
                        elif k in ("text/vnd.plotly.v1+html",
                                   "application/vnd.plotly.v1+json",
                                   "application/vnd.bokehjs_load.v0+json",
                                   "application/vnd.bokehjs_exec.v0+json"):
                            nbl += 10
                        else:
                            fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}"
                            raise NotImplementedError(
                                fmt.format(kind, k, v, cell))
                elif output["output_type"] == "stream":
                    v = output["text"]
                    nbl += len(v.split("\n"))
                elif output["output_type"] == "error":
                    v = output["traceback"]
                    nbl += len(v)
                else:
                    raise NotImplementedError(  # pragma: no cover
                        "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
                        .format(kind, output["output_type"], output, cell))

            return nbl

        raise NotImplementedError(  # pragma: no cover
            "cell type: {0}\nCELL:\n{1}".format(kind, cell))

    def add_tag_slide(self, max_nb_cell=4, max_nb_line=25):
        """
        Tries to add tags for a slide show when they are too few.

        @param      max_nb_cell     maximum number of cells within a slide
        @param      max_nb_line     maximum number of lines within a slide
        @return                     list of modified cells { #slide: (kind, reason, cell) }
        """
        res = {}
        nbline = 0
        nbcell = 0
        for i, cell in enumerate(self.iter_cells()):
            meta = cell.metadata
            if "slideshow" in meta:
                st = meta["slideshow"]["slide_type"]
                if st in ["slide", "subslide"]:
                    nbline = 0
                    nbcell = 0
            else:
                if cell.cell_type == "markdown":
                    content = cell.source
                    if content.startswith("# ") or \
                       content.startswith("## ") or \
                       content.startswith("### "):
                        meta["slideshow"] = {'slide_type': 'slide'}
                        nbline = 0
                        nbcell = 0
                        res[i] = ("slide", "section", cell)

            dh = self.cell_height(cell)
            dc = 1
            new_nbline = nbline + dh
            new_cell = dc + nbcell
            if "slideshow" not in meta:
                if new_cell > max_nb_cell or \
                   new_nbline > max_nb_line:
                    res[i] = ("subslide", "{0}-{1} <-> {2}-{3}".format(
                        nbcell, nbline, dc, dh), cell)
                    nbline = 0
                    nbcell = 0
                    meta["slideshow"] = {'slide_type': 'subslide'}

            nbline += dh
            nbcell += dc

        return res

    def run_notebook(self,
                     skip_exceptions=False,
                     progress_callback=None,
                     additional_path=None,
                     valid=None,
                     clean_function=None,
                     context=None):
        '''
        Runs all the cells of a notebook in order and update
        the outputs in-place.

        If ``skip_exceptions`` is set, then if exceptions occur in a cell, the
        subsequent cells are run (by default, the notebook execution stops).

        @param      skip_exceptions     skip exception
        @param      progress_callback   call back function
        @param      additional_path     additional paths (as a list or None if none)
        @param      valid               if not None, valid is a function which returns whether
                                        or not the cell should be executed or not, if the function
                                        returns None, the execution of the notebooks and skip the execution
                                        of the other cells
        @param      clean_function      function which cleans a cell's code before executing it (None for None)
        @return                         dictionary with statistics

        The function adds the local variable ``theNotebook`` with
        the absolute file name of the notebook.
        Function *valid* can return *None* to stop the execution of the notebook
        before this cell.
        '''
        if self.detailed_log:
            self.detailed_log(
                "[run_notebook] Starting execution of '{0}'".format(
                    self._filename))
        # additional path
        if additional_path is not None:
            if not isinstance(additional_path, list):
                raise TypeError(  # pragma: no cover
                    "Additional_path should be a list not: " +
                    str(additional_path))
            code = ["import sys"]
            for p in additional_path:
                code.append("sys.path.append(r'{0}')".format(p))
            cell = "\n".join(code)
            self.run_cell(-1, cell)

        # we add local variable theNotebook
        if self.theNotebook is not None:
            cell = "theNotebook = r'''{0}'''".format(self.theNotebook)
            self.run_cell(-1, cell)

        # initialisation with a code not inside the notebook
        if self.code_init is not None:
            self.run_cell(-1, self.code_init)

        # execution of the notebook
        nbcell = 0
        nbrun = 0
        nbnerr = 0
        cl = time.perf_counter()
        for i, cell in enumerate(self.iter_code_cells()):
            nbcell += 1
            codei = NotebookRunner.get_cell_code(cell)[1]
            if valid is not None:
                r = valid(codei)
                if r is None:
                    break
                if not r:
                    continue
            try:
                nbrun += 1
                self.run_cell(i, cell, clean_function=clean_function)
                nbnerr += 1
            except Empty as er:
                raise RuntimeError(  # pragma: no cover
                    "{0}\nissue when executing:\n{1}".format(
                        self.comment, codei)) from er
            except NotebookError as e:
                if not skip_exceptions:
                    raise
                raise RuntimeError(  # pragma: no cover
                    "Issue when executing:\n{0}".format(codei)) from e
            if progress_callback:
                progress_callback(i)
        etime = time.perf_counter() - cl
        res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime)
        if self.detailed_log:
            self.detailed_log("[run_notebook] end execution of '{0}'".format(
                self._filename))
            self.detailed_log(
                "[run_notebook] execution time: {0}".format(etime))
            self.detailed_log("[run_notebook] statistics : {0}".format(res))
        return res

    def count_code_cells(self):
        '''
        Returns the number of code cells in the notebook.
        '''
        return sum(1 for _ in self.iter_code_cells())

    def merge_notebook(self, nb):
        """
        Appends notebook *nb* to this one.

        @param      nb      notebook or list of notebook (@see cl NotebookRunner)
        @return             number of added cells

        .. faqref::
            :title: How to merge notebook?

            The following code merges two notebooks into the first one
            and stores the result unto a file.

            ::

                from pyquickhelper.ipythonhelper import read_nb
                nb1 = read_nb("<file1>", kernel=False)
                nb2 = read_nb("<file2>", kernel=False)
                nb1.merge_notebook(nb2)
                nb1.to_json(outfile)
        """
        if isinstance(nb, list):
            s = 0
            for n in nb:
                s += self.merge_notebook(n)
            return s
        else:
            last = self._cell_container()
            s = 0
            for cell in nb.iter_cells():
                last.append(cell)
                s += 1
            return s

    def get_description(self):
        """
        Gets summary and description of this notebook.
        We expect the first cell to contain a title and a description
        of its content.

        @return             header, description
        """
        def split_header(s, get_header=True):
            s = s.lstrip().rstrip()
            parts = s.splitlines()
            if parts[0].startswith('#'):
                if get_header:
                    header = re.sub('#+\\s*', '', parts.pop(0))
                    if not parts:
                        return header, ''
                else:
                    header = ''
                rest = '\n'.join(parts).lstrip().split('\n\n')
                desc = rest[0].replace('\n', ' ')
                return header, desc

            if get_header:
                if parts[0].startswith(('=', '-')):
                    parts = parts[1:]
                header = parts.pop(0)
                if parts and parts[0].startswith(('=', '-')):
                    parts.pop(0)
                if not parts:
                    return header, ''
            else:
                header = ''
            rest = '\n'.join(parts).lstrip().split('\n\n')
            desc = rest[0].replace('\n', ' ')
            return header, desc

        first_cell = self.first_cell()

        if not first_cell['cell_type'] == 'markdown':
            raise ValueError(  # pragma: no cover
                "The first cell is not in markdown but '{0}' filename='{1}'.".
                format(first_cell['cell_type'], self._filename))

        header, desc = split_header(first_cell['source'])
        if not desc and len(self.nb['cells']) > 1:
            second_cell = self.nb['cells'][1]
            if second_cell['cell_type'] == 'markdown':
                _, desc = split_header(second_cell['source'], False)

        reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))"
        reg = re.compile(reg_link)
        new_desc = reg.sub("\\2", desc)
        if "http://" in new_desc or "https://" in new_desc:
            raise ValueError(  # pragma: no cover
                "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".
                format(desc, new_desc, self._filename))
        return header, new_desc.replace('"', "")

    def get_thumbnail(self, max_width=200, max_height=200, use_default=False):
        """
        Processes the notebook and creates one picture based on the outputs
        to illustrate a notebook.

        @param      max_width       maximum size of the thumbnail
        @param      max_height      maximum size of the thumbnail
        @param      use_default     force using a default image even if an even is present
        @return                     string (:epkg:`SVG`) or Image (:epkg:`PIL`)
        """
        images = []
        cells = list(self.iter_cells())
        cells.reverse()
        for cell in cells:
            c = self.cell_image(cell, False)
            if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in (
                    "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
                    "vnd.bokehjs_load.v0+json"):
                self._check_thumbnail_tuple(c)
                images.append(c)
        if not use_default and len(images) == 0:
            for cell in cells:
                c = self.cell_image(cell, True)
                if c is not None and len(c) > 0 and len(c[0]) > 0:
                    self._check_thumbnail_tuple(c)
                    images.append(c)
                    if len(c[0]) >= 1000:
                        break
        if use_default:
            images = []
        if len(images) == 0:
            # no image, we need to consider the default one
            no_image = os.path.join(os.path.dirname(__file__),
                                    'no_image_nb.png')
            with open(no_image, "rb") as f:
                c = (f.read(), "png")
                self._check_thumbnail_tuple(c)
                images.append(c)

        # select the image
        if len(images) == 0:
            raise ValueError(  # pragma: no cover
                "There should be at least one image.")
        if len(images) == 1:
            image = images[0]
        else:
            # maybe later we'll implement a different logic
            # we pick the last one
            image = images[0]

        # zoom
        if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
                        "vnd.bokehjs_load.v0+json"):
            return None
        if image[1] == 'svg':
            try:
                img = svg2img(image[0])
            except PYQImageException:  # pragma: no cover
                # Enable to convert SVG.
                return None
            return self._scale_image(img,
                                     image[1],
                                     max_width=max_width,
                                     max_height=max_height)
        img = self._scale_image(image[0],
                                image[1],
                                max_width=max_width,
                                max_height=max_height)
        return img

    def _scale_image(self,
                     in_bytes,
                     format=None,
                     max_width=200,
                     max_height=200):
        """
        Scales an image with the same aspect ratio centered in an
        image with a given max_width and max_height.

        @param      in_bytes        image as bytes
        @param      format          indication of the format (can be empty)
        @param      max_width       maximum size of the thumbnail
        @param      max_height      maximum size of the thumbnail
        @return                     Image (PIL)
        """
        # local import to avoid testing dependency on PIL:
        try:
            from PIL import Image
        except ImportError:  # pragma: no cover
            import Image

        if isinstance(in_bytes, tuple):
            in_bytes = in_bytes[0]
        if isinstance(in_bytes, bytes):
            img = Image.open(BytesIO(in_bytes))
        elif isinstance(in_bytes, Image.Image):
            img = in_bytes
        else:
            raise TypeError(  # pragma: no cover
                "bytes expected, not {0} - format={1}".format(
                    type(in_bytes), format))
        width_in, height_in = img.size
        scale_w = max_width / float(width_in)
        scale_h = max_height / float(height_in)

        if height_in * scale_w <= max_height:
            scale = scale_w
        else:
            scale = scale_h

        if scale >= 1.0:
            return img

        width_sc = int(round(scale * width_in))
        height_sc = int(round(scale * height_in))

        # resize the image and center
        img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
        thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
        pos_insert = ((max_width - width_sc) // 2,
                      (max_height - height_sc) // 2)
        thumb.paste(img, pos_insert)
        return thumb

    def _merge_images(self, results):
        """
        Merges images defined by (buffer, format).
        The method uses PIL to merge images when possible.

        @return                     ``[ (image, format) ]``
        """
        if len(results) == 1:
            results = results[0]
            self._check_thumbnail_tuple(results)
            return results
        if len(results) == 0:
            return None

        formats_counts = Counter(_[1] for _ in results)
        if len(formats_counts) == 1:
            format = results[0][1]
        else:
            items = sorted(((v, k) for k, v in formats_counts.items()),
                           reverse=False)
            for it in items:
                format = it
                break

        results = [_ for _ in results if _[1] == format]
        if format == "svg":
            return ("\n".join(_[0] for _ in results), format)

        # local import to avoid testing dependency on PIL:
        try:
            from PIL import Image
        except ImportError:  # pragma: no cover
            import Image

        dx = 0.
        dy = 0.
        over = 0.7
        imgs = []
        for in_bytes, _ in results:
            img = Image.open(BytesIO(in_bytes))
            imgs.append(img)
            dx = max(dx, img.size[0])
            dy += img.size[1] * over

        new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220))
        for img in imgs:
            dy -= img.size[1] * over
            new_im.paste(img, (0, max(int(dy), 0)))

        if max(dx, dy) > 0:
            image_buffer = BytesIO()
            new_im.save(image_buffer, "PNG")
            b = image_buffer.getvalue(), "png"
            return b
        b = None, "png"
        return b
コード例 #25
0
def test_notebook(ipynb):
    print('\nTesting notebook {}'.format(ipynb))
    global_ignores = []
    with open(ipynb) as f:
        nb = formatter.reads_json(f.read())
    log('test_notebook: create KernelManager', level=2)
    km = KernelManager()
    # Do not save the history to disk, as it can yield spurious lock errors.
    # See https://github.com/ipython/ipython/issues/2845
    log('test_notebook: start_kernel', level=2)
    km.start_kernel(extra_arguments=['--HistoryManager.hist_file=:memory:'],
                    stderr=subprocess.DEVNULL)
    kc = km.client()
    log('test_notebook: start_channels', level=2)
    kc.start_channels()
    try:
        log('test_notebook: wait_for_ready', level=2)
        kc.wait_for_ready()
        log('test_notebook: wait_for_ready: done', level=2)
    except Exception as e:
        SKIP('cannot start Jupyter kernel:', repr(e))
        exit(0)
    nerror = 0
    for i, cell in enumerate(nb['cells']):
        if cell['cell_type'] != 'code':
            continue
        # i counts all the cells (included those without code), n
        # counts only the executable cells.
        n = cell['execution_count']
        print('cell [{}] ({}): '.format(n, i))
        source = cell['source']
        # `%timeit`s shall count in execution count but its result
        # cannot be compared.
        if source.startswith('%timeit'):
            run_cell(kc, 'pass')
            continue
        if 'VCSN_SEED' in source and not is_libcpp():
            SKIP('random number generation not on libc++')
            # Of course, we can't run the remainder as we certainly
            # have skipped definitions used later in the notebook.
            exit(0)
        # Adjust the paths to files used in the notebooks.
        source = source.replace('../../tests/demo',
                                os.environ['abs_top_srcdir'] + '/tests/demo')
        # Check if there are neutralization patterns to apply.
        global_ignores += re.findall('^# global ignore: (.*)$', source, re.M)
        ignores = global_ignores + re.findall('^# ignore: (.*)$', source, re.M)
        log('Ignores: ', ignores)
        try:
            outs = run_cell(kc, source)
        except Empty:
            print(
                'Failed to run cell [{}] ({}):'.format(n, i),
                '    Kernel Client is Empty; this is most likely due to a',
                '    timeout issue. Check with `vcsn ps` or run the notebook',
                '    manually, then retry.',
                sep='\n')
            print('Source was:\n', source)
            FAIL('failed to run cell [{}]'.format(n))
            nerror += 1
            continue
        except Exception as e:
            print('Failed to run cell [{}] ({}):'.format(n, i), repr(e))
            print('Source was:', source, sep='\n')
            FAIL('failed to run cell [{}]'.format(n))
            nerror += 1
            continue
        if re.search('^# ignore cell$', source, re.M):
            SKIP('ignore cell request')
            continue
        check_outputs(cell.outputs, outs, ignores)
    print("Tested notebook {}".format(ipynb))
    print("    {:3} cells successfully replicated".format(num_pass()))
    if num_fail():
        print("    {:3} cells mismatched output".format(num_fail()))
    if nerror:
        print("    {:3} cells failed to complete".format(nerror))
    if num_test() == 0:
        # The TAP protocol does not like empty test suite.
        PASS('no test')

    log('test_notebook: stop_channels', level=2)
    kc.stop_channels()
    log('test_notebook: shutdown_kernel', level=2)
    km.shutdown_kernel()
    del km
    log('test_notebook: return', level=2)
    return False if nfail or nerror else True
コード例 #26
0
ファイル: poc.py プロジェクト: sackh/kuma
                state = content["execution_state"]
        except Empty:
            pass
    if "text/plain" in data:
        data = data["text/plain"]
    return data


if __name__ == "__main__":
    km = KernelManager()
    km.start_kernel()
    print(km.is_alive())
    try:
        c = km.client()

        execute_code("import pandas as pd", c)
        form = {
            "func": "read_csv",
            "args": ["tests/testdata/titanic.csv"],
            "mod": "pd"
        }
        code = PandasCodeGenerator(form, save=True).process()
        print(code)
        df = execute_code(code, c)
        print(df)
    except KeyboardInterrupt:
        pass
    finally:
        km.shutdown_kernel()
        print(km.is_alive())
コード例 #27
0
ファイル: ipynb_examples.py プロジェクト: Python3pkg/Magni
def _check_ipynb(notebook):
    """
    Check an IPython Notebook for matching input and output.

    Each cell input in the `notebook` is executed and the result is compared
    to the cell output saved in the `notebook`.

    Parameters
    ----------
    notebook : IPython.nbformat.current.NotebookNode
        The notebook to check for matching input and output.

    Returns
    -------
    passed : Bool
        The indicator of a successful check (or not).
    sucessess : int
        The number of cell outputs that matched.
    failures : int
        The number of cell outputs that failed to match.
    errors : int
        The number of cell executions that resulted in errors.
    report : str
        The report detailing possible failures and errors.

    """

    kernel_manager = KernelManager()
    kernel_manager.start_kernel()
    kernel_client = kernel_manager.client()
    kernel_client.start_channels()

    try:
        # IPython 3.x
        kernel_client.wait_for_ready()
        iopub = kernel_client
        shell = kernel_client
    except AttributeError:
        # Ipython 2.x
        # Based on https://github.com/paulgb/runipy/pull/49/files
        iopub = kernel_client.iopub_channel
        shell = kernel_client.shell_channel
        shell.get_shell_msg = shell.get_msg
        iopub.get_iopub_msg = iopub.get_msg

    successes = 0
    failures = 0
    errors = 0

    report = ''
    for worksheet in notebook.worksheets:
        for cell in worksheet.cells:
            if cell.cell_type == 'code':
                try:
                    test_results = _execute_cell(cell, shell, iopub)
                except RuntimeError as e:
                    report += ('{!s} in cell number: {}'
                               .format(e, cell.prompt_number))
                    errors += 1
                    break

                identical_output = all(
                    [_compare_cell_output(test_result, reference)
                     for test_result, reference in
                     zip(test_results, cell.outputs)])

                if identical_output:
                    successes += 1
                else:
                    failures += 1

                    try:
                        str_test_results = [
                            '(for out {})\n'.format(k) + '\n'.join(
                                [' : '.join([str(key), str(val)])
                                 for key, val in list(t.items())
                                 if key not in ('metadata', 'png')]
                            ) for k, t in enumerate(test_results)]
                        str_cell_outputs = [
                            '(for out {})\n'.format(k) + '\n'.join(
                                [' : '.join([str(key), str(val)])
                                 for key, val in list(t.items())
                                 if key not in ('metadata', 'png')]
                            ) for k, t in enumerate(cell.outputs)]
                    except TypeError as e:
                        report += 'TypeError in ipynb_examples test\n\n'
                        for entry in cell.outputs:
                            if 'traceback' in list(entry.keys()):
                                for item in entry['traceback']:
                                    report += str(item) + '\n'
                    else:
                        report += '\n' * 2 + '~' * 40
                        report += (
                            '\nFailure in {}:{}\nGot: {}\n\n\nExpected: {}'
                        ).format(notebook.metadata.name,
                                 cell.prompt_number,
                                 '\n'.join(str_test_results),
                                 '\n'.join(str_cell_outputs))

    kernel_client.stop_channels()
    kernel_manager.shutdown_kernel()

    passed = not (failures or errors)

    return passed, successes, failures, errors, report
コード例 #28
0
class SimpleKernel(object):
    """
    ## Description
    **SimpleKernel**:
     A simplistic Jupyter kernel client wrapper.

    Additional information in [this GitHub issue]
    (

    )
    """
    def __init__(self, use_exist=False):
        """
        ## Description
        Initializes the `kernel_manager` and `client` objects
        and starts the kernel. Also initializes the pretty printer
        for displaying object properties and execution result
        payloads.

        ## Parameters
        None.
        """
        if not use_exist:
            # Initialize kernel and client
            self.kernel_manager, self.client = start_new_kernel()
            self.send = self.client.execute
        else:
            self.kernel_manager = KernelManager(
                connection_file=find_connection_file())
            self.kernel_manager.load_connection_file(find_connection_file())
            self.client = self.kernel_manager.client()
            self.client.start_channels()
            self.send = self.client.execute

        # Initialize pretty printer
        self.pp = PrettyPrinter(indent=2)

    # end __init__ ##

    def execute(self, code):
        """
        ## Description
        **execute**:
        Executes a code string in the kernel. Can return either
        the full execution response payload, or just `stdout`. Also,
        there is a verbose mode that displays the execution process.

        ## Parameters
        code : string
            The code string to get passed to `stdin`.
        verbose : bool (default=False)
            Whether to display processing information.
        get_type : bool (default=False) NOT IMPLEMENTED
            When implemented, will return a dict including the output
            and the type. E.g.

            1+1 ==> {stdout: 2, type: int}
            "hello" ==> {stdout: "hello", type: str}
            print("hello") ==> {stdout: "hello", type: NoneType}
            a=10 ==> {stdout: None, type: None}

        ## Returns
        `stdout` or the full response payload.
        """

        # Execute the code
        self.client.execute(code)

        # Continue polling for execution to complete
        list_io_msg = []
        while True:
            # Poll the message
            try:
                io_msg_content = self.client.get_iopub_msg(
                    timeout=0.2)['content']
                list_io_msg.append(io_msg_content)
            except queue.Empty:
                break

        if len(list_io_msg) < 3:
            temp = ''
        else:
            temp = list_io_msg[-2]

        # print(temp)
        # Check the message for various possibilities
        if 'data' in temp:  # Indicates completed operation
            out = temp['data']['text/plain']
        elif 'name' in temp and temp['name'] == "stdout":  # indicates output
            out = temp['text']
        elif 'traceback' in temp:  # Indicates error
            print("ERROR")
            out = '\n'.join(temp['traceback'])  # Put error into nice format
        else:
            out = ''

        return out

    def __del__(self):
        """
        ## Description
        Destructor. Shuts down kernel safely.
        """
        self.kernel_manager.shutdown_kernel()
コード例 #29
0
ファイル: ipynb_runner.py プロジェクト: gem/oq-ipynb-runner
def run_notebook(notebook):
    f = open(notebook)
    if not f:
        return False
    nb = reads(f.read(), 3)

    km = KernelManager()
    km.start_kernel(extra_arguments=['--pylab=inline'],
                    stderr=open('/tmp/km.stderr', 'w'))
    kc = km.client()
    kc.start_channels()
    shell = kc.shell_channel

    shell.get_msg()

    successes = 0
    failures = 0
    errors = 0
    for ws in nb.worksheets:
        for cell in ws.cells:
            if cell.cell_type != 'code':
                continue
            try:
                status, outs = run_cell(kc, cell, 30)

            except Exception as e:
                # currently turned off to avoid jenkins hang
                # print "failed to run cell:", repr(e)
                # print cell.input
                # print dir(cell)
                errors += 1
                continue

            failed = False
            # currently turned off to avoid jenkins hang
            # print "Count outs: %d" % len(outs)
            # print "Count cell_out: %d" % len(cell.outputs)
            #for out, ref in zip(outs, cell.outputs):
            #    print "OUT[%s]" % outs
            #    print "EXP[%s]" % ref
            #    #if not compare_outputs(out, ref):
            #    #    failed = True
            #    #    break
            if status != "ok" or failed:
                failures += 1
            else:
                successes += 1

    print
    print "tested notebook %s" % nb.metadata.name
    print "    %3i cells successfully replicated" % successes
    if failures:
        print "    %3i cells mismatched output" % failures
    if errors:
        print "    %3i cells failed to complete" % errors
    kc.stop_channels()
    km.shutdown_kernel()
    del km

    if failures > 0 or errors > 0:
        return False
    else:
        return True
コード例 #30
0
ファイル: __init__.py プロジェクト: FabriceSalvaire/Pyterate
class JupyterClient:

    _logger = _module_logger.getChild('JupyterClient')

    TIMEOUT = None  # -1 ???

    ##############################################

    def __init__(self,
                 working_directory,
                 kernel='python3',
                 embed_kernel=False):

        if embed_kernel:
            self._kernel_manager = InProcessKernelManager(kernel_name=kernel)
        else:
            self._kernel_manager = KernelManager(kernel_name=kernel)

        stderr = open(os.devnull, 'w')
        self._kernel_manager.start_kernel(cwd=working_directory, stderr=stderr)

        self._init_client()

    ##############################################

    def __del__(self):
        self.close()

    ##############################################

    def _init_client(self):

        self._kernel_client = self._kernel_manager.client()
        self._kernel_client.start_channels()
        try:
            self._kernel_client.wait_for_ready()
        except RuntimeError:
            message = 'Timeout from starting kernel'
            self._logger.error(message)
            # \nTry restarting python session and running again
            self._kernel_client.stop_channels()
            self._kernel_manager.shutdown_kernel()
            raise TimeoutError(message)

        self._kernel_client.allow_stdin = False

    ##############################################

    def close(self):
        self._logger.info('Stop kernel')
        # Pweave as this line
        #   Fixme: block ??? not documented ???
        #   self._kernel_client.stop_channels()
        self._kernel_manager.shutdown_kernel(now=True)

    ##############################################

    def restart(self):

        self._logger.info('Restart kernel')

        # self._kernel_client.shutdown(restart=True) # ???

        # self._kernel_client.stop_channels()
        self._kernel_manager.restart_kernel(now=False)  # 1s to cleanup
        # do we have to sleep ???
        self._init_client()

    ##############################################

    @staticmethod
    def message_id_match(message, message_id):
        return message['parent_header'].get('msg_id') == message_id

    ##############################################

    def _wait_for_finish(self, message_id):
        """Wait for finish, with timeout"""

        # self._logger.debug('wait for finish, with timeout')

        while True:
            try:
                # Get a message from the shell channel
                # timeout = self.timeout
                # if timeout < 0:
                #     timeout = None
                message = self._kernel_client.get_shell_msg(
                    timeout=self.TIMEOUT)
                # self._logger.debug('message {}'.format(message))
            except Empty:
                # if self._interrupt_on_timeout:
                #     self._kernel_manager.interrupt_kernel()
                #     break
                message = 'Cell execution timed out'  # , see log for details.
                self._logger.error(message)
                raise TimeoutError(message)

            if self.message_id_match(message, message_id):
                break
            else:
                # not our reply
                continue

        # self._logger.debug('wait for finish done')

    ##############################################

    def run_cell(self, source):
        """Execute the source.

        Return a list of :class:`nbformat.NotebookNode` instance.
        """

        source = source.lstrip()

        cell = {}
        cell['source'] = source
        message_id = self._kernel_client.execute(source, store_history=False)
        # self._logger.debug('message_id {}'.format(message_id))

        self._wait_for_finish(message_id)

        outputs = JupyterOutputs()
        while True:
            try:
                # We've already waited for execute_reply, so all output should already be
                # waiting. However, on slow networks, like in certain CI systems, waiting < 1 second
                # might miss messages.  So long as the kernel sends a status:idle message when it
                # finishes, we won't actually have to wait this long, anyway.
                message = self._kernel_client.iopub_channel.get_msg(block=True,
                                                                    timeout=4)
                # self._logger.debug('message {}'.format(message))
            except Empty:
                message = 'Timeout waiting for IOPub output'
                self._logger.error(message)
                # \nTry restarting python session and running again
                raise TimeoutError(message)

            # stdout from InProcessKernelManager has no parent_header
            if not self.message_id_match(
                    message, message_id) and message['msg_type'] != 'stream':
                continue

            message_type = message['msg_type']
            content = message['content']
            # self._logger.debug('msg_type {}'.format(message_type))
            # self._logger.debug('content {}'.format(content))

            # set the prompt number for the input and the output
            if 'execution_count' in content:
                cell['execution_count'] = content['execution_count']

            if message_type == 'status':
                if content['execution_state'] == 'idle':
                    break  # exit while loop
                else:
                    continue
            elif message_type == 'execute_input':
                continue
            elif message_type == 'clear_output':
                outputs = JupyterOutputs()
                continue
            elif message_type.startswith('comm'):
                continue

            try:
                output = nbformat_v4.output_from_msg(message)
            except ValueError:
                self._logger.error(
                    'unhandled iopub message: {}'.format(message_type))
            else:
                outputs.append(JupyterOutput(output))

        return outputs
コード例 #31
0
ファイル: base.py プロジェクト: gacluccati/backend.ai-agent
class BaseRunner(metaclass=ABCMeta):

    log_prefix: ClassVar[str] = 'generic-kernel'
    log_queue: janus.Queue[logging.LogRecord]
    task_queue: asyncio.Queue[Awaitable[None]]
    default_runtime_path: ClassVar[Optional[str]] = None
    default_child_env: ClassVar[MutableMapping[str, str]] = {
        'LANG': 'C.UTF-8',
        'SHELL': '/bin/sh',
        'HOME': '/home/work',
        'LD_LIBRARY_PATH': os.environ.get('LD_LIBRARY_PATH', ''),
        'LD_PRELOAD': os.environ.get('LD_PRELOAD', ''),
    }
    jupyter_kspec_name: ClassVar[str] = ''
    kernel_mgr = None
    kernel_client = None

    child_env: MutableMapping[str, str]
    subproc: Optional[asyncio.subprocess.Process]
    service_parser: Optional[ServiceParser]
    runtime_path: Path

    services_running: Dict[str, asyncio.subprocess.Process]

    _build_success: Optional[bool]

    # Set by subclasses.
    user_input_queue: Optional[asyncio.Queue[str]]

    def __init__(self, runtime_path: Path) -> None:
        self.subproc = None
        self.runtime_path = runtime_path

        default_child_env_path = self.default_child_env.pop("PATH", None)
        self.child_env = {**os.environ, **self.default_child_env}
        if default_child_env_path is not None and "PATH" not in self.child_env:
            # set the default PATH env-var only when it's missing from the image
            self.child_env["PATH"] = default_child_env_path
        config_dir = Path('/home/config')
        try:
            evdata = (config_dir / 'environ.txt').read_text()
            for line in evdata.splitlines():
                k, v = line.split('=', 1)
                self.child_env[k] = v
                os.environ[k] = v
        except FileNotFoundError:
            pass
        except Exception:
            log.exception('Reading /home/config/environ.txt failed!')

        self.started_at: float = time.monotonic()
        self.services_running = {}

        # If the subclass implements interatcive user inputs, it should set a
        # asyncio.Queue-like object to self.user_input_queue in the
        # init_with_loop() method.
        self.user_input_queue = None

        # build status tracker to skip the execute step
        self._build_success = None

    async def _init(self, cmdargs) -> None:
        self.cmdargs = cmdargs
        loop = current_loop()
        self._service_lock = asyncio.Lock()

        # Initialize event loop.
        executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
        loop.set_default_executor(executor)

        self.zctx = zmq.asyncio.Context()
        self.insock = self.zctx.socket(zmq.PULL)
        self.insock.bind('tcp://*:2000')
        self.outsock = self.zctx.socket(zmq.PUSH)
        self.outsock.bind('tcp://*:2001')

        self.log_queue = janus.Queue()
        self.task_queue = asyncio.Queue()
        self.init_done = asyncio.Event()

        setup_logger(self.log_queue.sync_q, self.log_prefix, cmdargs.debug)
        self._log_task = loop.create_task(self._handle_logs())
        await asyncio.sleep(0)

        service_def_folder = Path('/etc/backend.ai/service-defs')
        if service_def_folder.is_dir():
            self.service_parser = ServiceParser({
                'runtime_path':
                str(self.runtime_path),
            })
            await self.service_parser.parse(service_def_folder)
            log.debug('Loaded new-style service definitions.')
        else:
            self.service_parser = None

        self._main_task = loop.create_task(self.main_loop(cmdargs))
        self._run_task = loop.create_task(self.run_tasks())

    async def _shutdown(self) -> None:
        try:
            self.insock.close()
            log.debug('shutting down...')
            self._run_task.cancel()
            self._main_task.cancel()
            await self._run_task
            await self._main_task
            log.debug('terminating service processes...')
            running_procs = [*self.services_running.values()]
            async with self._service_lock:
                await asyncio.gather(
                    *(terminate_and_wait(proc) for proc in running_procs),
                    return_exceptions=True,
                )
                await asyncio.sleep(0.01)
            log.debug('terminated.')
        finally:
            # allow remaining logs to be flushed.
            await asyncio.sleep(0.1)
            try:
                if self.outsock:
                    self.outsock.close()
                await self._shutdown_jupyter_kernel()
            finally:
                self._log_task.cancel()
                await self._log_task

    async def _init_jupyter_kernel(self) -> None:
        """Detect ipython kernel spec for backend.ai and start it if found.

        Called after `init_with_loop`. `jupyter_kspec_name` should be defined to
        initialize jupyter kernel.
        """
        # Make inline backend defaults in Matplotlib.
        kconfigdir = Path('/home/work/.ipython/profile_default/')
        kconfigdir.mkdir(parents=True, exist_ok=True)
        kconfig_file = kconfigdir / 'ipython_kernel_config.py'
        kconfig_file.write_text("c.InteractiveShellApp.matplotlib = 'inline'")

        kernelspec_mgr = KernelSpecManager()
        kernelspec_mgr.ensure_native_kernel = False
        kspecs = kernelspec_mgr.get_all_specs()
        for kname in kspecs:
            if self.jupyter_kspec_name in kname:
                log.debug('starting ' + kname + ' kernel...')
                self.kernel_mgr = KernelManager(kernel_name=kname)
                self.kernel_mgr.start_kernel()
                if not self.kernel_mgr.is_alive():
                    log.error('jupyter query mode is disabled: '
                              'failed to start jupyter kernel')
                else:
                    self.kernel_client = self.kernel_mgr.client()
                    self.kernel_client.start_channels(shell=True,
                                                      iopub=True,
                                                      stdin=True,
                                                      hb=True)
                    try:
                        self.kernel_client.wait_for_ready(timeout=10)
                        # self.init_jupyter_kernel()
                    except RuntimeError:
                        # Clean up for client and kernel will be done in `shutdown`.
                        log.error('jupyter channel is not active!')
                        self.kernel_mgr = None
                break
        else:
            log.debug('jupyter query mode is not available: '
                      'no jupyter kernelspec found')
            self.kernel_mgr = None

    async def _shutdown_jupyter_kernel(self):
        if self.kernel_mgr and self.kernel_mgr.is_alive():
            log.info('shutting down ' + self.jupyter_kspec_name + ' kernel...')
            self.kernel_client.stop_channels()
            self.kernel_mgr.shutdown_kernel()
            assert not self.kernel_mgr.is_alive(
            ), 'ipykernel failed to shutdown'

    async def _init_with_loop(self) -> None:
        if self.init_done is not None:
            self.init_done.clear()
        try:
            await self.init_with_loop()
            await init_sshd_service(self.child_env)
        except Exception:
            log.exception('Unexpected error!')
            log.warning(
                'We are skipping the error but the container may not work as expected.'
            )
        finally:
            if self.init_done is not None:
                self.init_done.set()

    @abstractmethod
    async def init_with_loop(self) -> None:
        """Initialize after the event loop is created."""

    async def _clean(self, clean_cmd: Optional[str]) -> None:
        ret = 0
        try:
            if clean_cmd is None or clean_cmd == '':
                # skipped
                return
            elif clean_cmd == '*':
                ret = await self.clean_heuristic()
            else:
                ret = await self.run_subproc(clean_cmd)
        except Exception:
            log.exception('unexpected error')
            ret = -1
        finally:
            await asyncio.sleep(0.01)  # extra delay to flush logs
            payload = json.dumps({
                'exitCode': ret,
            }).encode('utf8')
            await self.outsock.send_multipart([b'clean-finished', payload])

    async def clean_heuristic(self) -> int:
        # it should not do anything by default.
        return 0

    async def _bootstrap(self, script_path: Path) -> None:
        log.info('Running the user bootstrap script...')
        ret = 0
        try:
            ret = await self.run_subproc(['/bin/sh', str(script_path)])
        except Exception:
            log.exception(
                'unexpected error while executing the user bootstrap script')
            ret = -1
        finally:
            await asyncio.sleep(0.01)  # extra delay to flush logs
            log.info('The user bootstrap script has exited with code {}', ret)

    async def _build(self, build_cmd: Optional[str]) -> None:
        ret = 0
        try:
            if build_cmd is None or build_cmd == '':
                # skipped
                return
            elif build_cmd == '*':
                if Path('Makefile').is_file():
                    ret = await self.run_subproc('make')
                else:
                    ret = await self.build_heuristic()
            else:
                ret = await self.run_subproc(build_cmd)
        except Exception:
            log.exception('unexpected error')
            ret = -1
        finally:
            await asyncio.sleep(0.01)  # extra delay to flush logs
            self._build_success = (ret == 0)
            payload = json.dumps({
                'exitCode': ret,
            }).encode('utf8')
            await self.outsock.send_multipart([b'build-finished', payload])

    @abstractmethod
    async def build_heuristic(self) -> int:
        """Process build step."""

    async def _execute(self, exec_cmd: str) -> None:
        ret = 0
        try:
            if exec_cmd is None or exec_cmd == '':
                # skipped
                return
            elif exec_cmd == '*':
                ret = await self.execute_heuristic()
            else:
                ret = await self.run_subproc(exec_cmd, batch=True)
        except Exception:
            log.exception('unexpected error')
            ret = -1
        finally:
            await asyncio.sleep(0.01)  # extra delay to flush logs
            payload = json.dumps({
                'exitCode': ret,
            }).encode('utf8')
            await self.outsock.send_multipart([b'finished', payload])

    @abstractmethod
    async def execute_heuristic(self) -> int:
        """Process execute step."""

    async def _query(self, code_text: str) -> None:
        ret = 0
        try:
            ret = await self.query(code_text)
        except Exception:
            log.exception('unexpected error')
            ret = -1
        finally:
            payload = json.dumps({
                'exitCode': ret,
            }).encode('utf8')
            await self.outsock.send_multipart([b'finished', payload])

    async def query(self, code_text) -> int:
        """Run user's code in query mode.

        The default interface is jupyter kernel. To use different interface,
        `Runner` subclass should override this method.
        """
        if not hasattr(self, 'kernel_mgr') or self.kernel_mgr is None:
            log.error('query mode is disabled: '
                      'failed to start jupyter kernel')
            return 127

        log.debug('executing in query mode...')

        async def output_hook(msg):
            content = msg.get('content', '')
            if msg['msg_type'] == 'stream':
                # content['name'] will be 'stdout' or 'stderr'.
                await self.outsock.send_multipart([
                    content['name'].encode('ascii'),
                    content['text'].encode('utf-8')
                ])
            elif msg['msg_type'] == 'error':
                tbs = '\n'.join(content['traceback'])
                await self.outsock.send_multipart(
                    [b'stderr', tbs.encode('utf-8')])
            elif msg['msg_type'] in ['execute_result', 'display_data']:
                data = content['data']
                if len(data) < 1:
                    return
                if len(data) > 1:
                    data.pop('text/plain', None)
                dtype, dval = list(data.items())[0]

                if dtype == 'text/plain':
                    await self.outsock.send_multipart(
                        [b'stdout', dval.encode('utf-8')])
                elif dtype == 'text/html':
                    await self.outsock.send_multipart(
                        [b'media', dval.encode('utf-8')])
                # elif dtype == 'text/markdown':
                #     NotImplementedError
                # elif dtype == 'text/latex':
                #     NotImplementedError
                # elif dtype in ['application/json', 'application/javascript']:
                #     NotImplementedError
                elif dtype in ['image/png', 'image/jpeg']:
                    await self.outsock.send_multipart([
                        b'media',
                        json.dumps({
                            'type': dtype,
                            'data': f'data:{dtype};base64,{dval}',
                        }).encode('utf-8')
                    ])
                elif dtype == 'image/svg+xml':
                    await self.outsock.send_multipart([
                        b'media',
                        json.dumps({
                            'type': dtype,
                            'data': dval
                        }).encode('utf8')
                    ])

        async def stdin_hook(msg):
            if msg['msg_type'] == 'input_request':
                prompt = msg['content']['prompt']
                password = msg['content']['password']
                if prompt:
                    await self.outsock.send_multipart(
                        [b'stdout', prompt.encode('utf-8')])
                await self.outsock.send_multipart([
                    b'waiting-input',
                    json.dumps({
                        'is_password': password
                    }).encode('utf-8')
                ])
                user_input = await self.user_input_queue.async_q.get()
                self.kernel_client.input(user_input)

        # Run jupyter kernel's blocking execution method in an executor pool.
        allow_stdin = False if self.user_input_queue is None else True
        stdin_hook = None if self.user_input_queue is None else stdin_hook  # type: ignore
        try:
            await aexecute_interactive(self.kernel_client,
                                       code_text,
                                       timeout=None,
                                       output_hook=output_hook,
                                       allow_stdin=allow_stdin,
                                       stdin_hook=stdin_hook)
        except Exception as e:
            log.error(str(e))
            return 127
        return 0

    async def _complete(self, completion_data) -> Sequence[str]:
        result: Sequence[str] = []
        try:
            result = await self.complete(completion_data)
        except Exception:
            log.exception('unexpected error')
        finally:
            return result

    async def complete(self, completion_data) -> Sequence[str]:
        """Return the list of strings to be shown in the auto-complete list.

        The default interface is jupyter kernel. To use different interface,
        `Runner` subclass should override this method.
        """
        # TODO: implement with jupyter_client
        '''
        matches = []
        self.outsock.send_multipart([
            b'completion',
            json.dumps(matches).encode('utf8'),
        ])
        '''
        # if hasattr(self, 'kernel_mgr') and self.kernel_mgr is not None:
        #     self.kernel_mgr.complete(data, len(data))
        # else:
        #     return []
        return []

    async def _interrupt(self):
        try:
            if self.subproc:
                self.subproc.send_signal(signal.SIGINT)
                return
            return await self.interrupt()
        except Exception:
            log.exception('unexpected error')
        finally:
            # this is a unidirectional command -- no explicit finish!
            pass

    async def interrupt(self):
        """Interrupt the running user code (only called for query-mode).

        The default interface is jupyter kernel. To use different interface,
        `Runner` subclass should implement its own `complete` method.
        """
        if hasattr(self, 'kernel_mgr') and self.kernel_mgr is not None:
            self.kernel_mgr.interrupt_kernel()

    async def _send_status(self):
        data = {
            'started_at': self.started_at,
        }
        await self.outsock.send_multipart([
            b'status',
            msgpack.packb(data, use_bin_type=True),
        ])

    @abstractmethod
    async def start_service(self, service_info):
        """Start an application service daemon."""
        return None, {}

    async def _start_service(self, service_info, user_requested: bool = True):
        async with self._service_lock:
            try:
                if service_info['protocol'] == 'preopen':
                    # skip subprocess spawning as we assume the user runs it manually.
                    result = {'status': 'started'}
                    return
                if service_info['name'] in self.services_running:
                    result = {'status': 'running'}
                    return
                if service_info['protocol'] == 'pty':
                    result = {
                        'status': 'failed',
                        'error': 'not implemented yet'
                    }
                    return
                cwd = Path.cwd()
                cmdargs: Optional[Sequence[Union[str, os.PathLike]]]
                env: Mapping[str, str]
                cmdargs, env = None, {}
                if service_info['name'] == 'ttyd':
                    cmdargs, env = await prepare_ttyd_service(service_info)
                elif service_info['name'] == 'sshd':
                    cmdargs, env = await prepare_sshd_service(service_info)
                elif service_info['name'] == 'vscode':
                    cmdargs, env = await prepare_vscode_service(service_info)
                elif self.service_parser is not None:
                    self.service_parser.variables['ports'] = service_info[
                        'ports']
                    cmdargs, env = await self.service_parser.start_service(
                        service_info['name'],
                        self.child_env.keys(),
                        service_info['options'],
                    )
                if cmdargs is None:
                    # fall-back to legacy service routine
                    start_info = await self.start_service(service_info)
                    if start_info is None:
                        cmdargs, env = None, {}
                    elif len(start_info) == 3:
                        cmdargs, env, cwd = start_info
                    elif len(start_info) == 2:
                        cmdargs, env = start_info
                if cmdargs is None:
                    # still not found?
                    log.warning('The service {0} is not supported.',
                                service_info['name'])
                    result = {
                        'status': 'failed',
                        'error': 'unsupported service',
                    }
                    return
                log.debug('cmdargs: {0}', cmdargs)
                log.debug('env: {0}', env)
                service_env = {**self.child_env, **env}
                # avoid conflicts with Python binary used by service apps.
                if 'LD_LIBRARY_PATH' in service_env:
                    service_env['LD_LIBRARY_PATH'] = \
                        service_env['LD_LIBRARY_PATH'].replace('/opt/backend.ai/lib:', '')
                try:
                    proc = await asyncio.create_subprocess_exec(
                        *map(str, cmdargs),
                        env=service_env,
                        cwd=cwd,
                    )
                    self.services_running[service_info['name']] = proc
                    asyncio.create_task(
                        self._wait_service_proc(service_info['name'], proc))
                    await wait_local_port_open(service_info['port'])
                    log.info("Service {} has started (pid: {}, port: {})",
                             service_info['name'], proc.pid,
                             service_info['port'])
                    result = {'status': 'started'}
                except asyncio.CancelledError:
                    # This may happen if the service process gets started but it fails to
                    # open the port and then terminates (with an error).
                    result = {
                        'status':
                        'failed',
                        'error':
                        f"the process did not start properly: {cmdargs[0]}"
                    }
                except PermissionError:
                    result = {
                        'status': 'failed',
                        'error':
                        f"the target file is not executable: {cmdargs[0]}"
                    }
                except FileNotFoundError:
                    result = {
                        'status': 'failed',
                        'error':
                        f"the executable file is not found: {cmdargs[0]}"
                    }
            except Exception as e:
                log.exception('start_service: unexpected error')
                result = {
                    'status': 'failed',
                    'error': repr(e),
                }
            finally:
                if user_requested:
                    await self.outsock.send_multipart([
                        b'service-result',
                        json.dumps(result).encode('utf8'),
                    ])

    async def _wait_service_proc(
        self,
        service_name: str,
        proc: asyncio.subprocess.Process,
    ) -> None:
        exitcode = await proc.wait()
        log.info(
            f"Service {service_name} (pid: {proc.pid}) has terminated with exit code: {exitcode}"
        )
        self.services_running.pop(service_name, None)

    async def run_subproc(self,
                          cmd: Union[str, List[str]],
                          batch: bool = False):
        """A thin wrapper for an external command."""
        loop = current_loop()
        if Path('/home/work/.logs').is_dir():
            kernel_id = os.environ['BACKENDAI_KERNEL_ID']
            kernel_id_hex = uuid.UUID(kernel_id).hex
            log_path = Path(
                '/home/work/.logs/task/'
                f'{kernel_id_hex[:2]}/{kernel_id_hex[2:4]}/{kernel_id_hex[4:]}.log'
            )
            log_path.parent.mkdir(parents=True, exist_ok=True)
        else:
            log_path = Path(os.path.devnull)
        try:
            # errors like "command not found" is handled by the spawned shell.
            # (the subproc will terminate immediately with return code 127)
            if isinstance(cmd, (list, tuple)):
                exec_func = partial(asyncio.create_subprocess_exec,
                                    *map(str, cmd))
            else:
                exec_func = partial(asyncio.create_subprocess_shell, str(cmd))
            pipe_opts = {}
            pipe_opts['stdout'] = asyncio.subprocess.PIPE
            pipe_opts['stderr'] = asyncio.subprocess.PIPE
            with open(log_path, 'ab') as log_out:
                env = {**self.child_env}
                if batch:
                    env['_BACKEND_BATCH_MODE'] = '1'
                proc = await exec_func(
                    env=env,
                    stdin=None,
                    **pipe_opts,
                )
                self.subproc = proc
                pipe_tasks = [
                    loop.create_task(
                        pipe_output(proc.stdout, self.outsock, 'stdout',
                                    log_out.fileno())),
                    loop.create_task(
                        pipe_output(proc.stderr, self.outsock, 'stderr',
                                    log_out.fileno())),
                ]
                retcode = await proc.wait()
                await asyncio.gather(*pipe_tasks)
            return retcode
        except Exception:
            log.exception('unexpected error')
            return -1
        finally:
            self.subproc = None

    async def shutdown(self):
        pass

    async def _shutdown_service(self, service_name: str):
        try:
            async with self._service_lock:
                if service_name in self.services_running:
                    await terminate_and_wait(
                        self.services_running[service_name])
                    self.services_running.pop(service_name, None)
        except Exception:
            log.exception('unexpected error (shutdown_service)')

    async def handle_user_input(self, reader, writer):
        try:
            if self.user_input_queue is None:
                writer.write(b'<user-input is unsupported>')
            else:
                await self.outsock.send_multipart([b'waiting-input', b''])
                text = await self.user_input_queue.get()
                writer.write(text.encode('utf8'))
            await writer.drain()
            writer.close()
        except Exception:
            log.exception('unexpected error (handle_user_input)')

    async def run_tasks(self):
        while True:
            try:
                coro = await self.task_queue.get()

                if (self._build_success is not None
                        and coro.func == self._execute
                        and not self._build_success):
                    self._build_success = None
                    # skip exec step with "command not found" exit code
                    payload = json.dumps({
                        'exitCode': 127,
                    }).encode('utf8')
                    await self.outsock.send_multipart([b'finished', payload])
                    self.task_queue.task_done()
                    continue

                await coro()
                self.task_queue.task_done()
            except asyncio.CancelledError:
                break

    async def _handle_logs(self):
        log_queue = self.log_queue.async_q
        try:
            while True:
                rec = await log_queue.get()
                await self.outsock.send_multipart(rec)
                log_queue.task_done()
        except asyncio.CancelledError:
            self.log_queue.close()
            await self.log_queue.wait_closed()

    async def _get_apps(self, service_name):
        result = {'status': 'done', 'data': []}
        if self.service_parser is not None:
            if service_name:
                apps = await self.service_parser.get_apps(
                    selected_service=service_name)
            else:
                apps = await self.service_parser.get_apps()
            result['data'] = apps
        await self.outsock.send_multipart([
            b'apps-result',
            json.dumps(result).encode('utf8'),
        ])

    async def main_loop(self, cmdargs):
        user_input_server = \
            await asyncio.start_server(self.handle_user_input,
                                       '127.0.0.1', 65000)
        await self._init_with_loop()
        await self._init_jupyter_kernel()

        user_bootstrap_path = Path('/home/work/bootstrap.sh')
        if user_bootstrap_path.is_file():
            await self._bootstrap(user_bootstrap_path)

        log.debug('starting intrinsic services: sshd, ttyd ...')
        intrinsic_spawn_coros = []
        intrinsic_spawn_coros.append(
            self._start_service(
                {
                    'name': 'sshd',
                    'port': 2200,
                    'protocol': 'tcp',
                },
                user_requested=False))
        intrinsic_spawn_coros.append(
            self._start_service(
                {
                    'name': 'ttyd',
                    'port': 7681,
                    'protocol': 'http',
                },
                user_requested=False))
        results = await asyncio.gather(*intrinsic_spawn_coros,
                                       return_exceptions=True)
        for result in results:
            if isinstance(result, Exception):
                log.exception(
                    'error during starting intrinsic services',
                    exc_info=result,
                )

        log.debug('start serving...')
        while True:
            try:
                data = await self.insock.recv_multipart()
                if len(data) != 2:
                    # maybe some garbage data
                    continue
                op_type = data[0].decode('ascii')
                text = data[1].decode('utf8')
                if op_type == 'clean':
                    await self.task_queue.put(partial(self._clean, text))
                if op_type == 'build':  # batch-mode step 1
                    await self.task_queue.put(partial(self._build, text))
                elif op_type == 'exec':  # batch-mode step 2
                    await self.task_queue.put(partial(self._execute, text))
                elif op_type == 'code':  # query-mode
                    await self.task_queue.put(partial(self._query, text))
                elif op_type == 'input':  # interactive input
                    if self.user_input_queue is not None:
                        await self.user_input_queue.put(text)
                elif op_type == 'complete':  # auto-completion
                    data = json.loads(text)
                    await self._complete(data)
                elif op_type == 'interrupt':
                    await self._interrupt()
                elif op_type == 'status':
                    await self._send_status()
                elif op_type == 'start-service':  # activate a service port
                    data = json.loads(text)
                    asyncio.create_task(self._start_service(data))
                elif op_type == 'shutdown-service':  # shutdown the service by its name
                    data = json.loads(text)
                    await self._shutdown_service(data)
                elif op_type == 'get-apps':
                    await self._get_apps(text)
            except asyncio.CancelledError:
                break
            except NotImplementedError:
                log.error('Unsupported operation for this kernel: {0}',
                          op_type)
                await asyncio.sleep(0)
            except Exception:
                log.exception('main_loop: unexpected error')
                # we need to continue anyway unless we are shutting down
                continue
        user_input_server.close()
        await user_input_server.wait_closed()
        await self.shutdown()
コード例 #32
0
class JupyterKernel(object):
    kernel = None
    curdir = None
    client = None
    output = None

    def __init__(self, kernel: str, curdir: str):
        self.kernel = KernelManager(kernel_name=kernel)
        self.curdir = curdir

    def start_kernel(self):
        self.kernel.start_kernel(cwd=self.curdir, stderr=open(devnull, 'w'))

    def start_client(self):
        self.client = self.kernel.client()
        self.client.start_channels()
        try:
            self.client.wait_for_ready()
        except RuntimeError:
            print(
                "Timeout from starting kernel\nTry restarting python session and running weave again"
            )
            self.client.stop_channels()
            self.client.shutdown_kernel()
            raise
        self.output = []

    def run_code(self, src: str):

        msg_id = self.client.execute(src.lstrip(), store_history=False)

        while True:
            try:
                msg = self.client.get_shell_msg(timeout=None)
            except Empty:
                try:
                    exception = TimeoutError
                except NameError:
                    exception = RuntimeError
                raise exception(
                    "Cell execution timed out, see log for details.")

            if msg['parent_header'].get('msg_id') == msg_id:
                break
            else:
                # not our reply
                continue

        while True:
            try:
                # We've already waited for execute_reply, so all output
                # should already be waiting. However, on slow networks, like
                # in certain CI systems, waiting < 1 second might miss messages.
                # So long as the kernel sends a status:idle message when it
                # finishes, we won't actually have to wait this long, anyway.
                msg = self.client.iopub_channel.get_msg(block=True, timeout=4)
            except Empty:
                print(
                    "Timeout waiting for IOPub output\nTry restarting python session and running weave again"
                )
                raise RuntimeError("Timeout waiting for IOPub output")

            #stdout from InProcessKernelManager has no parent_header
            if msg['parent_header'].get(
                    'msg_id') != msg_id and msg['msg_type'] != "stream":
                continue

            msg_type = msg['msg_type']
            content = msg['content']

            # set the prompt number for the input and the output
            # if 'execution_count' in content:
            #     cell['execution_count'] = content['execution_count']

            if msg_type == 'status':
                if content['execution_state'] == 'idle':
                    break
                else:
                    continue
            elif msg_type == 'execute_input':
                continue
            elif msg_type == 'clear_output':
                self.output = []
                continue
            elif msg_type.startswith('comm'):
                continue

            try:
                out = output_from_msg(msg)
            except ValueError:
                print("unhandled iopub msg: " + msg_type)
            else:
                self.output.append(out)

    def run_cell(self, cell: dict):
        self.output = []
        content = cell['content']
        contentsplit = content.split('\n')
        code = ''
        for line in contentsplit:
            if line.strip() != '':
                code += line + '\n'
        self.run_code(code)
        return self.output

    def stop_client(self):
        self.client.stop_channels()

    def stop_kernel(self):
        self.kernel.shutdown_kernel()
コード例 #33
0
ファイル: ipynbtest.py プロジェクト: thempel/PyEMMA_IPython
class IPyKernel(object):
    """
    A simple wrapper class to run cells in an IPython Notebook.

    Notes
    -----
    - Use `with` construct to properly instantiate
    - IPython 3.0.0+ is assumed for this version

    """
    def __init__(self, console=None, nb_version=4):
        # default timeout time is 60 seconds
        self.default_timeout = 60
        self.extra_arguments = ['--pylab=inline']
        self.nb_version = nb_version

    def __enter__(self):
        self.km = KernelManager()
        self.km.start_kernel(extra_arguments=self.extra_arguments,
                             stderr=open(os.devnull, 'w'))

        self.kc = self.km.client()
        self.kc.start_channels()

        self.iopub = self.kc.iopub_channel
        self.shell = self.kc.shell_channel

        # run %pylab inline, because some notebooks assume this
        # even though they shouldn't

        self.shell.send("pass")
        self.shell.get_msg()
        while True:
            try:
                self.iopub.get_msg(timeout=1)
            except Empty:
                break

        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.kc.stop_channels()
        self.km.shutdown_kernel()
        del self.km

    def run(self, cell, timeout=None):
        """
        Run a notebook cell in the IPythonKernel

        Parameters
        ----------
        cell : IPython.notebook.Cell
            the cell to be run
        timeout : int or None (default)
            the time in seconds after which a cell is stopped and assumed to
            have timed out. If set to None the value in `default_timeout`
            is used

        Returns
        -------
        list of outs
            a list of NotebookNodes of the returned types. This is
            similar to the list of outputs generated when a cell is run
        """

        use_timeout = self.default_timeout

        if timeout is not None:
            use_timeout = timeout

        if hasattr(cell, 'input'):
            self.kc.execute(cell.input)
        elif hasattr(cell, 'source'):
            self.kc.execute(cell.source)
        else:
            raise AttributeError('No source/input key')

        self.shell.get_msg(timeout=use_timeout)
        outs = []

        while True:
            try:
                msg = self.iopub.get_msg(timeout=0.5)
            except Empty:
                break
            msg_type = msg['msg_type']
            if msg_type in ('status', 'pyin', 'execute_input'):
                continue
            elif msg_type == 'clear_output':
                outs = []
                continue

            content = msg['content']
            out = NotebookNode(output_type=msg_type)

            if msg_type == 'stream':
                out.name = content['name']
                out.text = content['text']
            elif msg_type in ('display_data', 'pyout', 'execute_result'):
                if hasattr(content, 'execution_count'):
                    out['execution_count'] = content['execution_count']
                else:
                    out['execution_count'] = None
                out['data'] = content['data']
                out['metadata'] = content['metadata']

            elif msg_type == 'error':
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']
            elif msg_type.startswith('comm_'):
                # widget updates and communication,
                # which we will ignore and hope that it is not more serious
                pass
            else:
                print "unhandled iopub msg:", msg_type, content

            outs.append(out)

        return outs

    def sanitize(self, s):
        """sanitize a string for comparison.

        fix universal newlines, strip trailing newlines, and normalize likely
        random values (memory addresses and UUIDs)
        """
        if not isinstance(s, basestring):
            return s
        # normalize newline:
        s = s.replace('\r\n', '\n')

        # ignore trailing newlines (but not space)
        s = s.rstrip('\n')

        # normalize hex addresses:
        s = re.sub(r'0x[a-f0-9]+', '0xFFFFFFFF', s)

        # normalize UUIDs:
        s = re.sub(r'[a-f0-9]{8}(\-[a-f0-9]{4}){3}\-[a-f0-9]{12}', 'U-U-I-D',
                   s)

        # fix problem with

        return s

    def compare_outputs(self,
                        test,
                        ref,
                        skip_compare=('traceback', 'latex',
                                      'execution_count')):
        """
        Compare two lists of `NotebookNode`s

        Parameters
        ----------
        test : list of `NotebookNode`
            the list of be tested generated by the kernel
        ref : list of `NotebookNode`
            the reference list read from the notebook
        skip_compare : list of str
            a list of strings that name node types that are not to be tested

        Returns
        -------
        bool
            is True if both lists are different
        list of diff
            a list of diff (str) the represent the differences
        """
        diff = False
        diff_list = []

        if self.nb_version == 4:
            for key in ref:
                if key not in test:
                    return True, [
                        "missing key: %s != %s" % (test.keys(), ref.keys())
                    ]

                elif key not in skip_compare:
                    if key == 'data':
                        for data_key in test[key]:
                            my_diff = self.do_diff(data_key, test[key],
                                                   ref[key])

                            if my_diff is not None:
                                diff_list += my_diff
                                diff = True

                    else:
                        # can this happen?
                        my_diff = self.do_diff(key, test, ref)
                        if my_diff is not None:
                            diff_list += my_diff
                            diff = True

        return diff, diff_list

    def do_diff(self, key, test_cell, ref_cell):
        """
        Compare the key of two dicts

        Parameters
        ----------
        key : string
            the key to be compared
        test_cell : dict
            a dict with `key` as a key of string value
        ref_cell : dict
            a dict with `key` as a key of string value

        Returns
        -------
        list of diff (str)
            a list of diff representing the differences
        """

        if hasattr(ref_cell, key):
            s1 = self.sanitize(ref_cell[key])
        else:
            s1 = ''

        if hasattr(test_cell, key):
            s2 = self.sanitize(test_cell[key])
        else:
            s2 = ''

        if key in ['image/png', 'image/svg', 'image/svg+xml']:
            if s1 != s2:
                return [
                    '>>> diff in %s (size new : %d vs size old : %d )' %
                    (key, len(s1), len(s2))
                ]
        else:
            if s1 != s2:
                expected = s1.splitlines(1)
                actual = s2.splitlines(1)
                diff = difflib.ndiff(expected, actual)

                return ['>>> diff in ' + key] + list(diff)

        return None

    def get_commands(self, cell):
        """
        Extract potential commands from the first line of a cell

        if a code cell starts with the hashbang `#!` it can be followed by
        a comma separated list of commands. Each command can be
        a single key `skip`
        or
        a key/value pair separated by a colon `timeout:[int]`

        Parameters
        ----------
        cell : a NotebookCell
            the cell to be examined

        Returns
        -------
        dict
            a dict of key/value pairs. For a single command the value is `True`
        """
        commands = {}
        source = self.get_source(cell)
        if source is not None:
            lines = source.splitlines()
            if len(lines) > 0:
                first_line = lines[0]
                if first_line.startswith('#!'):
                    txt = first_line[2:].strip()

                    parts = txt.split(',')
                    for part in parts:
                        subparts = part.split(':')
                        if len(subparts) == 1:
                            commands[subparts[0].strip().lower()] = True
                        elif len(subparts) == 2:
                            commands[subparts[0].strip().lower()] = subparts[1]

        return commands

    def get_source(self, cell):
        """
        get the source code of a cell

        Notes
        -----
        This is legacy of IPython 2/3 conversion.

        Parameters
        ----------
        cell : a NotebookCell
            the cell to be examined

        Returns
        -------
        string
            the source code

        """
        if cell.cell_type == 'code':
            if hasattr(cell, 'input'):
                return cell.input
            elif hasattr(cell, 'source'):
                return cell.source
            else:
                return None

    def is_empty_cell(self, cell):
        """
        Check if a cell has no code

        Parameters
        ----------
        cell : a NotebookCell
            the cell to be examined

        Returns
        -------
        bool
            True if the cell has no code, False otherwise
        """
        source = self.get_source(cell)
        if source is None or source == '':
            return True
        else:
            return False
コード例 #34
0
def test_notebook(nb):
    km = KernelManager()
    km.start_kernel(extra_arguments=['--pylab=inline'],
                    stderr=open(os.devnull, 'w'))
    try:
        kc = km.client()
        kc.start_channels()
        iopub = kc.iopub_channel
    except AttributeError:
        print("AttributeError")
        # IPython 0.13
        kc = km
        kc.start_channels()
        iopub = kc.sub_channel
    shell = kc.shell_channel

    # run %pylab inline, because some notebooks assume this
    # even though they shouldn't
    #shell.execute("pass")
    #kc.execute("pass")

    while True:
        try:
            iopub.get_msg(timeout=1)
        except Empty:
            break

    successes = 0
    failures = 0
    errors = 0
    #for ws in nb.worksheets:
    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue
        try:
            outs = run_cell(shell, iopub, cell, kc)
        except Exception as e:
            import pdb
            pdb.set_trace()
            print("failed to run cell:", repr(e))
            print(cell.source)
            errors += 1
            continue

        failed = False
        for out, ref in zip(outs, cell.outputs):
            if not compare_outputs(out, ref):
                failed = True
        if failed:
            failures += 1
        else:
            successes += 1
        sys.stdout.write('.')

    print("tested notebook %s" % nb.metadata.kernelspec.name)
    print("    %3i cells successfully replicated" % successes)
    if failures:
        print("    %3i cells mismatched output" % failures)
    if errors:
        print("    %3i cells failed to complete" % errors)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
コード例 #35
0
class NotebookRunner(object):

    """
    The kernel communicates with mime-types while the notebook
    uses short labels for different cell types. We'll use this to
    map from kernel types to notebook format types.

    This classes executes a notebook end to end.

    .. index:: kernel, notebook

    The class can use different kernels. The next links gives more
    information on how to create or test a kernel:

    * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_
    * `simple_kernel <https://github.com/dsblank/simple_kernel>`_

    .. faqref::
        :title: Do I need to shutdown the kernel after running a notebook?

        .. index:: travis

        If the class is instantiated with *kernel=True*, a kernel will
        be started. It must be shutdown otherwise the program might
        be waiting for it for ever. That is one of the reasons why the
        travis build does not complete. The build finished but cannot temrinate
        until all kernels are shutdown.
    """

    #. available output types
    MIME_MAP = {
        'image/jpeg': 'jpeg',
        'image/png': 'png',
        'image/gif': 'gif',
        'text/plain': 'text',
        'text/html': 'html',
        'text/latex': 'latex',
        'application/javascript': 'html',
        'image/svg+xml': 'svg',
    }

    def __init__(self, nb, profile_dir=None, working_dir=None,
                 comment="", fLOG=noLOG, theNotebook=None, code_init=None,
                 kernel_name="python", log_level="30", extended_args=None,
                 kernel=False, filename=None, replacements=None):
        """
        constuctor

        @param      nb              notebook as JSON
        @param      profile_dir     profile directory
        @param      working_dir     working directory
        @param      comment         additional information added to error message
        @param      theNotebook     if not None, populate the variable *theNotebook* with this value in the notebook
        @param      code_init       to initialize the notebook with a python code as if it was a cell
        @param      fLOG            logging function
        @param      log_level       Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')
        @param      kernel_name     kernel name, it can be None
        @param      extended_args   others arguments to pass to the command line ('--KernelManager.autorestar=True' for example),
                                    see :ref:`l-ipython_notebook_args` for a full list
        @param      kernel          *kernel* is True by default, the notebook can be run, if False,
                                    the notebook can be read but not run
        @param      filename        to add the notebook file if there is one in error messages
        @param      replacements    replacements to make in every cell before running it,
                                    dictionary ``{ string: string }``

        .. versionchanged:: 1.4
            Parameter *replacements* was added.
        """
        if kernel:
            try:
                from jupyter_client import KernelManager
            except ImportError:
                from ipykernel import KernelManager
            self.km = KernelManager(
                kernel_name=kernel_name) if kernel_name is not None else KernelManager()
        else:
            self.km = None
        self.fLOG = fLOG
        self.theNotebook = theNotebook
        self.code_init = code_init
        self._filename = filename if filename is not None else "memory"
        self.replacements = replacements
        self.init_args = dict(profile_dir=profile_dir, working_dir=working_dir,
                              comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init,
                              kernel_name="python", log_level="30", extended_args=None,
                              kernel=kernel, filename=filename, replacements=replacements)
        args = []

        if profile_dir:
            args.append('--profile-dir=%s' % os.path.abspath(profile_dir))
        if log_level:
            args.append('--log-level=%s' % log_level)

        if extended_args is not None and len(extended_args) > 0:
            for opt in extended_args:
                if not opt.startswith("--"):
                    raise SyntaxError(
                        "every option should start with '--': " + opt)
                if "=" not in opt:
                    raise SyntaxError(
                        "every option should be assigned a value: " + opt)
                args.append(opt)

        if kernel:
            cwd = os.getcwd()

            if working_dir:
                os.chdir(working_dir)

            if self.km is not None:
                if sys.version_info[0] == 2 and args is not None:
                    # I did not find a way to make it work
                    args = None
                    warnings.warn(
                        "args is not None: {0}, unable to use it in Python 2.7".format(args))
                    self.km.start_kernel()
                else:
                    try:
                        self.km.start_kernel(extra_arguments=args)
                    except Exception as e:
                        raise Exception(
                            "Failure with args: {0}\nand error:\n{1}".format(args, str(e))) from e

                if platform.system() == 'Darwin':
                    # see http://www.pypedia.com/index.php/notebook_runner
                    # There is sometimes a race condition where the first
                    # execute command hits the kernel before it's ready.
                    # It appears to happen only on Darwin (Mac OS) and an
                    # easy (but clumsy) way to mitigate it is to sleep
                    # for a second.
                    sleep(1)

            os.chdir(cwd)

            self.kc = self.km.client()
            self.kc.start_channels(stdin=False)
            # if it does not work, it probably means IPython < 3
            self.kc.wait_for_ready()
        else:
            self.km = None
            self.kc = None
        self.nb = nb
        self.comment = comment

    def to_json(self, filename=None, encoding="utf8"):
        """
        convert the notebook into json

        @param      filename        filename or stream
        @param      encoding        encoding
        @return                     Json string if filename is None, None otherwise

        .. versionchanged:: 1.4
            The function now returns the json string if filename is None.
        """
        if isinstance(filename, str  # unicode#
                      ):
            with open(filename, "w", encoding=encoding) as payload:
                self.to_json(payload)
        elif filename is None:
            st = StringIO()
            st.write(writes(self.nb))
            return st.getvalue()
        else:
            filename.write(writes(self.nb))

    @staticmethod
    def read_json(js, profile_dir=None, encoding="utf8",
                  working_dir=None, comment="", fLOG=noLOG, code_init=None,
                  kernel_name="python", log_level="30", extended_args=None,
                  kernel=False, replacements=None):
        """
        read a notebook from a JSON stream or string

        @param      js              string or stream
        @param      profile_dir     profile directory
        @param      encoding        encoding for the notebooks
        @param      kernel          to start a kernel or not when reading the notebook (to execute it)
        @param      working_dir     working directory
        @param      comment         additional information added to error message
        @param      code_init       to initialize the notebook with a python code as if it was a cell
        @param      fLOG            logging function
        @param      log_level       Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')
        @param      kernel_name     kernel name, it can be None
        @param      extended_args   others arguments to pass to the command line ('--KernelManager.autorestar=True' for example),
                                    see :ref:`l-ipython_notebook_args` for a full list
        @param      kernel          *kernel* is True by default, the notebook can be run, if False,
                                    the notebook can be read but not run
        @param      replacements    replacements to make in every cell before running it,
                                    dictionary ``{ string: string }``
        @return                     instance of @see cl NotebookRunner

        .. versionchanged:: 1.5
            Add constructor parameters.
        """
        if isinstance(js, str  # unicode#
                      ):
            st = StringIO(js)
        else:
            st = js
        from .notebook_helper import read_nb
        return read_nb(st, encoding=encoding, kernel=kernel,
                       profile_dir=profile_dir, working_dir=working_dir,
                       comment=comment, fLOG=fLOG, code_init=code_init,
                       kernel_name="python", log_level="30", extended_args=None,
                       replacements=replacements)

    def copy(self):
        """
        copy the notebook (just the content)

        @return         instance of @see cl NotebookRunner

        .. versionadded:: 1.1

        .. versionchanged:: 1.5
            Add constructor parameters.
        """
        st = StringIO()
        self.to_json(st)
        args = self.init_args.copy()
        for name in ["theNotebook", "filename"]:
            if name in args:
                del args[name]
        return NotebookRunner.read_json(st.getvalue(), **args)

    def __add__(self, nb):
        """
        merges two notebooks together, returns a new none

        @param      nb      notebook
        @return             new notebook
        """
        c = self.copy()
        c.merge_notebook(nb)
        return c

    def shutdown_kernel(self):
        """
        shut down kernel
        """
        self.fLOG('-- shutdown kernel')
        if self.kc is None:
            raise ValueError(
                "No kernel was started, specify kernel=True when initializing the instance.")
        self.kc.stop_channels()
        self.km.shutdown_kernel(now=True)

    def clean_code(self, code):
        """
        clean the code before running it, the function comment out
        instruction such as ``show()``

        @param      code        code (string)
        @return                 cleaned code

        .. versionchanged:: 1.4
            Do replacements.
        """
        has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code
        if code is None:
            return code
        else:
            lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")]
            res = []
            show_is_last = False
            for line in lines:
                if line.replace(" ", "") == "show()":
                    line = line.replace("show", "#show")
                    show_is_last = True
                elif has_bokeh and line.replace(" ", "") == "output_notebook()":
                    line = line.replace("output_notebook", "#output_notebook")
                else:
                    show_is_last = False
                if self.replacements is not None:
                    for k, v in self.replacements.items():
                        line = line.replace(k, v)
                res.append(line)
                if show_is_last:
                    res.append('"nothing to show"')
            return "\n".join(res)

    @staticmethod
    def get_cell_code(cell):
        """
        return the code of a cell

        @param      cell        a cell or a string
        @return                 boolean (=iscell), string
        """
        if isinstance(cell, str  # unicode#
                      ):
            iscell = False
            return iscell, cell
        else:
            iscell = True
            try:
                return iscell, cell.source
            except AttributeError:
                return iscell, cell.input

    def run_cell(self, index_cell, cell, clean_function=None):
        '''
        Run a notebook cell and update the output of that cell in-place.

        @param      index_cell          index of the cell
        @param      cell                cell to execute
        @param      clean_function      cleaning function to apply to the code before running it
        @return                         output of the cell
        '''
        iscell, codei = NotebookRunner.get_cell_code(cell)

        self.fLOG('-- running cell:\n%s\n' % codei)

        code = self.clean_code(codei)
        if clean_function is not None:
            code = clean_function(code)
        if len(code) == 0:
            return ""
        if self.kc is None:
            raise ValueError(
                "No kernel was started, specify kernel=True when initializing the instance.")
        self.kc.execute(code)

        reply = self.kc.get_shell_msg()
        reason = None
        try:
            status = reply['content']['status']
        except KeyError:
            status = 'error'
            reason = "no status key in reply['content']"

        if status == 'error':
            ansi_escape = re.compile(r'\x1b[^m]*m')
            try:
                tr = [ansi_escape.sub('', _)
                      for _ in reply['content']['traceback']]
            except KeyError:
                tr = ["No traceback, available keys in reply['content']"] + \
                    [_ for _ in reply['content']]
            traceback_text = '\n'.join(tr)
            self.fLOG("ERR:\n", traceback_text)
        else:
            traceback_text = ''
            self.fLOG('-- cell returned')

        outs = list()
        nbissue = 0
        while True:
            try:
                msg = self.kc.get_iopub_msg(timeout=1)
                if msg['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Empty:
                # execution state should return to idle before the queue becomes empty,
                # if it doesn't, something bad has happened
                status = "error"
                reason = "exception Empty was raised"
                nbissue += 1
                if nbissue > 10:
                    # the notebook is empty
                    return ""
                else:
                    continue

            content = msg['content']
            msg_type = msg['msg_type']

            # IPython 3.0.0-dev writes pyerr/pyout in the notebook format but uses
            # error/execute_result in the message spec. This does the translation
            # needed for tests to pass with IPython 3.0.0-dev
            notebook3_format_conversions = {
                'error': 'pyerr',
                'execute_result': 'pyout'
            }
            msg_type = notebook3_format_conversions.get(msg_type, msg_type)

            out = NotebookNode(output_type=msg_type)

            if 'execution_count' in content:
                if iscell:
                    cell['prompt_number'] = content['execution_count']
                out.prompt_number = content['execution_count']

            if msg_type in ('status', 'pyin', 'execute_input'):
                continue

            elif msg_type == 'stream':
                out.stream = content['name']
                # in msgspec 5, this is name, text
                # in msgspec 4, this is name, data
                if 'text' in content:
                    out.text = content['text']
                else:
                    out.data = content['data']

            elif msg_type in ('display_data', 'pyout'):
                out.data = content['data']

            elif msg_type == 'pyerr':
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']

            elif msg_type == 'clear_output':
                outs = list()
                continue

            elif msg_type == 'comm_open' or msg_type == 'comm_msg':
                # widgets in a notebook
                out.data = content["data"]
                out.comm_id = content["comm_id"]

            else:
                dcontent = "\n".join("{0}={1}".format(k, v)
                                     for k, v in sorted(content.items()))
                raise NotImplementedError(
                    'unhandled iopub message: %s' % msg_type + "\nCONTENT:\n" + dcontent)

            outs.append(out)

        if iscell:
            cell['outputs'] = outs

        raw = []
        for _ in outs:
            try:
                t = _.data
            except AttributeError:
                continue

            # see MIMEMAP to see the available output type
            for k, v in t.items():
                if k.startswith("text"):
                    raw.append(v)

        sraw = "\n".join(raw)
        self.fLOG(sraw)

        def reply2string(reply):
            sreply = []
            for k, v in sorted(reply.items()):
                if isinstance(v, dict):
                    temp = []
                    for _, __ in sorted(v.items()):
                        temp.append("    [{0}]={1}".format(_, str(__)))
                    v = "\n".join(temp)
                    sreply.append("reply['{0}']=dict\n{1}".format(k, v))
                else:
                    sreply.append("reply['{0}']={1}".format(k, str(v)))
            sreply = "\n".join(sreply)
            return sreply

        if status == 'error':
            sreply = reply2string(reply)
            if len(code) < 5:
                scode = [code]
            else:
                scode = ""
            mes = "FILENAME\n{10}:1:1\n{7}\nCELL status={8}, reason={9} -- {4} length={5} -- {6}:\n-----------------\n{0}" + \
                  "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}"
            raise NotebookError(mes.format(
                code, traceback_text, sraw, sreply, index_cell, len(
                    code), scode, self.comment, status, reason,
                self._filename))
        return outs

    def iter_code_cells(self):
        '''
        Iterate over the notebook cells containing code.
        '''
        for cell in self.iter_cells():
            if cell.cell_type == 'code':
                yield cell

    def iter_cells(self):
        '''
        Iterate over the notebook cells.
        '''
        if hasattr(self.nb, "worksheets"):
            for ws in self.nb.worksheets:
                for cell in ws.cells:
                    yield cell
        else:
            for cell in self.nb.cells:
                yield cell

    def first_cell(self):
        """
        Returns the first cell.
        """
        for cell in self.iter_cells():
            return cell

    def _cell_container(self):
        """
        returns a cells container, it may change according to the format

        @return     cell container
        """
        if hasattr(self.nb, "worksheets"):
            last = None
            for ws in self.nb.worksheets:
                last = ws
            if last is None:
                raise NotebookError("no cell container")
            return last.cells
        else:
            return self.nb.cells

    def __len__(self):
        """
        return the number of cells, it iterates on cells
        to get this information and does cache the information

        @return         int

        .. versionadded:: 1.1
        """
        return sum(1 for _ in self.iter_cells())

    def cell_type(self, cell):
        """
        returns the cell type

        @param      cell        from @see me iter_cells
        @return                 type
        """
        return cell.cell_type

    def cell_metadata(self, cell):
        """
        returns the cell metadata

        @param      cell        cell
        @return                 metadata
        """
        return cell.metadata

    def _check_thumbnail_tuple(self, b):
        """
        checks types for a thumbnail

        @param      b       tuple   image, format
        @return             b

        The function raises an exception if the type is incorrect.
        """
        if not isinstance(b, tuple):
            raise TypeError("tuple expected, not {0}".format(type(b)))
        if len(b) != 2:
            raise TypeError(
                "tuple expected of lengh 2, not {0}".format(len(b)))
        if b[1] == "svg":
            if not isinstance(b[0], str):
                raise TypeError(
                    "str expected for svg, not {0}".format(type(b[0])))
        else:
            if not isinstance(b[0], bytes):
                raise TypeError(
                    "bytes expected for images, not {0}".format(type(b[0])))
        return b

    def create_picture_from(self, text, format, asbytes=True, context=None):
        """
        Creates a picture from text.

        @param      text        the text
        @param      format      text, json, ...
        @param      context     (str) indication on the content of text (error, ...)
        @param      asbytes     results as bytes or as an image
        @return                 tuple (picture, format) or PIL.Image (if asbytes is False)

        The picture will be bytes, the format png, bmp...
        The size of the picture will depend on the text.
        The longer, the bigger. The method relies on matplotlib
        and then convert the image into a PIL image.

        HTML could be rendered with QWebPage from PyQt (not implemented).
        """
        if not isinstance(text, (str, bytes)):
            text = str(text)
            if "\n" not in text:
                rows = []
                for i in range(0, len(text), 20):
                    end = min(i + 20, len(text))
                    rows.append(text[i:end])
                text = "\n".join(text)
        if len(text) > 200:
            text = text[:200]
        size = len(text) // 10
        figsize = (3 + size, 3 + size)
        lines = text.replace("\t", " ").replace("\r", "").split("\n")

        import matplotlib.pyplot as plt
        from matplotlib.textpath import TextPath
        from matplotlib.font_manager import FontProperties
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111)
        fp = FontProperties(size=200)

        dx = 0
        dy = 0
        for i, line in enumerate(lines):
            if len(line.strip()) > 0:
                ax.text(0, -dy, line, fontproperties=fp, va='top')
                tp = TextPath((0, -dy), line, prop=fp)
                bb = tp.get_extents()
                dy += bb.height
                dx = max(dx, bb.width)

        ratio = abs(dx) / max(abs(dy), 1)
        ratio = max(min(ratio, 3), 1)
        fig.set_size_inches(int((1 + size) * ratio), 1 + size)
        ax.set_xlim([0, dx])
        ax.set_ylim([-dy, 0])
        ax.set_axis_off()
        sio = BytesIO()
        fig.savefig(sio, format="png")
        plt.close()

        if asbytes:
            b = sio.getvalue(), "png"
            self._check_thumbnail_tuple(b)
            return b
        else:
            try:
                from PIL import Image
            except ImportError:
                import Image
            img = Image.open(sio)
            return img

    def cell_image(self, cell, image_from_text=False):
        """
        returns the cell image or None if not found

        @param      cell            cell to examine
        @param      image_from_text produce an image even if it is not one
        @return                     None for no image or a list of tuple (image as bytes, extension)
                                    for each output of the cell
        """
        kind = self.cell_type(cell)
        if kind != "code":
            return None
        results = []
        for output in cell.outputs:
            if output["output_type"] in {"execute_result", "display_data"}:
                data = output["data"]
                for k, v in data.items():
                    if k == "text/plain":
                        if image_from_text:
                            b = self.create_picture_from(
                                v, "text", context=output["output_type"])
                            results.append(b)
                    elif k == "application/javascript":
                        if image_from_text:
                            b = self.create_picture_from(v, "js")
                            results.append(b)
                    elif k == "application/json":
                        if image_from_text:
                            b = self.create_picture_from(v, "json")
                            results.append(b)
                    elif k == "image/svg+xml":
                        if not isinstance(v, str):
                            raise TypeError(
                                "This should be str not '{0}' (=SVG).".format(type(v)))
                        results.append((v, "svg"))
                    elif k == "text/html":
                        if image_from_text:
                            b = self.create_picture_from(v, "html")
                            results.append(b)
                    elif k == "text/latex":
                        if image_from_text:
                            b = self.create_picture_from(v, "latex")
                            results.append(b)
                    elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
                        if not isinstance(v, bytes):
                            v = base64.b64decode(v)
                        if not isinstance(v, bytes):
                            raise TypeError(
                                "This should be bytes not '{0}' (=IMG:{1}).".format(type(v), k))
                        results.append((v, k.split("/")[-1]))
                    else:
                        raise NotImplementedError("cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format(kind,
                                                                                                    k, v, cell))
            elif output["output_type"] == "error":
                vl = output["traceback"]
                if image_from_text:
                    for v in vl:
                        b = self.create_picture_from(
                            v, "text", context="error")
                        results.append(b)
            elif output["output_type"] == "stream":
                v = output["text"]
                if image_from_text:
                    b = self.create_picture_from(v, "text")
                    results.append(b)
            else:
                raise NotImplementedError("cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
                                          .format(kind, output["output_type"], output, cell))
        if len(results) > 0:
            res = self._merge_images(results)
            self._check_thumbnail_tuple(res)
            return res
        else:
            return None

    def cell_height(self, cell):
        """
        approximate the height of a cell by its number of lines it contains

        @param      cell        cell
        @return                 number of cell
        """
        kind = self.cell_type(cell)
        if kind == "markdown":
            content = cell.source
            lines = content.split("\n")
            nbs = sum(1 + len(line) // 80 for line in lines)
            return nbs
        elif kind == "raw":
            content = cell.source
            lines = content.split("\n")
            nbs = sum(1 + len(line) // 80 for line in lines)
            return nbs
        elif kind == "code":
            content = cell.source
            lines = content.split("\n")
            nbl = len(lines)

            for output in cell.outputs:
                if output["output_type"] == "execute_result" or \
                        output["output_type"] == "display_data":
                    data = output["data"]
                    for k, v in data.items():
                        if k == "text/plain":
                            nbl += len(v.split("\n"))
                        elif k == "application/javascript":
                            # rough estimation
                            nbl += len(v.split("\n")) // 2
                        elif k == "application/json":
                            # rough estimation
                            try:
                                nbl += len(v.split("{"))
                            except AttributeError:
                                nbl += len(v) // 5 + 1
                        elif k == "image/svg+xml":
                            nbl += len(v) // 5
                        elif k == "text/html":
                            nbl += len(v.split("\n"))
                        elif k == "text/latex":
                            nbl += len(v.split("\\\\")) * 2
                        elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
                            nbl += len(v) // 50
                        else:
                            raise NotImplementedError("cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format(kind,
                                                                                                        k, v, cell))
                elif output["output_type"] == "stream":
                    v = output["text"]
                    nbl += len(v.split("\n"))
                elif output["output_type"] == "error":
                    v = output["traceback"]
                    nbl += len(v)
                else:
                    raise NotImplementedError("cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
                                              .format(kind, output["output_type"], output, cell))

            return nbl

        else:
            raise NotImplementedError(
                "cell type: {0}\nCELL:\n{1}".format(kind, cell))

    def add_tag_slide(self, max_nb_cell=4, max_nb_line=25):
        """
        tries to add tags for a slide show when they are too few

        @param      max_nb_cell     maximum number of cells within a slide
        @param      max_nb_line     maximum number of lines within a slide
        @return                     list of modified cells { #slide: (kind, reason, cell) }
        """
        res = {}
        nbline = 0
        nbcell = 0
        for i, cell in enumerate(self.iter_cells()):
            meta = cell.metadata
            if "slideshow" in meta:
                st = meta["slideshow"]["slide_type"]
                if st in ["slide", "subslide"]:
                    nbline = 0
                    nbcell = 0
            else:
                if cell.cell_type == "markdown":
                    content = cell.source
                    if content.startswith("# ") or \
                       content.startswith("## ") or \
                       content.startswith("### "):
                        meta["slideshow"] = {'slide_type': 'slide'}
                        nbline = 0
                        nbcell = 0
                        res[i] = ("slide", "section", cell)

            dh = self.cell_height(cell)
            dc = 1
            new_nbline = nbline + dh
            new_cell = dc + nbcell
            if "slideshow" not in meta:
                if new_cell > max_nb_cell or \
                   new_nbline > max_nb_line:
                    res[i] = (
                        "subslide", "{0}-{1} <-> {2}-{3}".format(nbcell, nbline, dc, dh), cell)
                    nbline = 0
                    nbcell = 0
                    meta["slideshow"] = {'slide_type': 'subslide'}

            nbline += dh
            nbcell += dc

        return res

    def run_notebook(self,
                     skip_exceptions=False,
                     progress_callback=None,
                     additional_path=None,
                     valid=None,
                     clean_function=None):
        '''
        Run all the cells of a notebook in order and update
        the outputs in-place.

        If ``skip_exceptions`` is set, then if exceptions occur in a cell, the
        subsequent cells are run (by default, the notebook execution stops).

        @param      skip_exceptions     skip exception
        @param      progress_callback   call back function
        @param      additional_path     additional paths (as a list or None if none)
        @param      valid               if not None, valid is a function which returns whether
                                        or not the cell should be executed or not, if the function
                                        returns None, the execution of the notebooks and skip the execution
                                        of the other cells
        @param      clean_function      function which cleans a cell's code before executing it (None for None)
        @return                         dictionary with statistics

        .. versionchanged:: 1.1
            The function adds the local variable ``theNotebook`` with
            the absolute file name of the notebook.

        .. versionchanged:: 1.4
            Function *valid* can now return None to stop the execution of the notebook
            before this cell.
        '''
        # additional path
        if additional_path is not None:
            if not isinstance(additional_path, list):
                raise TypeError(
                    "additional_path should be a list not: " + str(additional_path))
            code = ["import sys"]
            for p in additional_path:
                code.append("sys.path.append(r'{0}')".format(p))
            cell = "\n".join(code)
            self.run_cell(-1, cell)

        # we add local variable theNotebook
        if self.theNotebook is not None:
            cell = "theNotebook = r'''{0}'''".format(self.theNotebook)
            self.run_cell(-1, cell)

        # initialisation with a code not inside the notebook
        if self.code_init is not None:
            self.run_cell(-1, self.code_init)

        # execution of the notebook
        nbcell = 0
        nbrun = 0
        nbnerr = 0
        cl = time.clock()
        for i, cell in enumerate(self.iter_code_cells()):
            nbcell += 1
            iscell, codei = NotebookRunner.get_cell_code(cell)
            if valid is not None:
                r = valid(codei)
                if r is None:
                    break
                elif not r:
                    continue
            try:
                nbrun += 1
                self.run_cell(i, cell, clean_function=clean_function)
                nbnerr += 1
            except Empty as er:
                raise Exception(
                    "{0}\nissue when executing:\n{1}".format(self.comment, codei)) from er
            except NotebookError as e:
                if not skip_exceptions:
                    raise
                else:
                    raise Exception(
                        "issue when executing:\n{0}".format(codei)) from e
            if progress_callback:
                progress_callback(i)
        etime = time.clock() - cl
        return dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime)

    def count_code_cells(self):
        '''
        @return the number of code cells in the notebook

        .. versionadded:: 1.1
        '''
        return sum(1 for _ in self.iter_code_cells())

    def merge_notebook(self, nb):
        """
        append notebook *nb* to this one

        @param      nb      notebook or list of notebook (@see cl NotebookRunner)
        @return             number of added cells

        .. faqref::
            :title: How to merge notebook?

            The following code merges two notebooks into the first one
            and stores the result unto a file.

            @code
            from pyquickhelper.ipythonhelper import read_nb
            nb1 = read_nb("<file1>", kernel=False)
            nb2 = read_nb("<file2>", kernel=False)
            nb1.merge_notebook(nb2)
            nb1.to_json(outfile)
            @endcode

        .. versionadded:: 1.1
        """
        if isinstance(nb, list):
            s = 0
            for n in nb:
                s += self.merge_notebook(n)
            return s
        else:
            last = self._cell_container()
            s = 0
            for cell in nb.iter_cells():
                last.append(cell)
                s += 1
            return s

    def get_description(self):
        """
        Get summary and description of this notebook.
        We expect the first cell to contain a title and a description
        of its content.

        @return             header, description

        .. versionadded:: 1.5
        """
        def split_header(s, get_header=True):
            s = s.lstrip().rstrip()
            parts = s.splitlines()
            if parts[0].startswith('#'):
                if get_header:
                    header = re.sub('#+\s*', '', parts.pop(0))
                    if not parts:
                        return header, ''
                else:
                    header = ''
                rest = '\n'.join(parts).lstrip().split('\n\n')
                desc = rest[0].replace('\n', ' ')
                return header, desc
            else:
                if get_header:
                    if parts[0].startswith(('=', '-')):
                        parts = parts[1:]
                    header = parts.pop(0)
                    if parts and parts[0].startswith(('=', '-')):
                        parts.pop(0)
                    if not parts:
                        return header, ''
                else:
                    header = ''
                rest = '\n'.join(parts).lstrip().split('\n\n')
                desc = rest[0].replace('\n', ' ')
                return header, desc

        first_cell = self.first_cell()

        if not first_cell['cell_type'] == 'markdown':
            raise ValueError("The first cell is not in markdown but '{0}'.".format(
                first_cell['cell_type']))

        header, desc = split_header(first_cell['source'])
        if not desc and len(self.nb['cells']) > 1:
            second_cell = self.nb['cells'][1]
            if second_cell['cell_type'] == 'markdown':
                _, desc = split_header(second_cell['source'], False)

        reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))"
        reg = re.compile(reg_link)
        new_desc = reg.sub("\\2", desc)
        if "http://" in new_desc or "https://" in new_desc:
            raise ValueError(
                "Wrong regular expression:\n{0}\nMODIFIED:\n{1}".format(desc, new_desc))
        return header, new_desc.replace('"', "")

    def get_thumbnail(self, max_width=200, max_height=200):
        """
        Process the notebook and create one picture based on the outputs
        to illustrate a notebook.

        @param      max_width       maximum size of the thumbnail
        @param      max_height      maximum size of the thumbnail
        @return                     string (SVG) or Image (PIL)

        This functionality might not works with Python 2.7.

        .. versionadded:: 1.5
        """
        images = []
        cells = list(self.iter_cells())
        cells.reverse()
        for cell in cells:
            c = self.cell_image(cell, False)
            if c is not None and len(c) > 0 and len(c[0]) > 0:
                self._check_thumbnail_tuple(c)
                images.append(c)
        if len(images) == 0:
            for cell in cells:
                c = self.cell_image(cell, True)
                if c is not None and len(c) > 0 and len(c[0]) > 0:
                    self._check_thumbnail_tuple(c)
                    images.append(c)
                    if len(c[0]) >= 1000:
                        break
        if len(images) == 0:
            # no image, we need to consider the default one
            no_image = os.path.join(
                os.path.dirname(__file__), 'no_image_nb.png')
            with open(no_image, "rb") as f:
                c = (f.read(), "png")
                self._check_thumbnail_tuple(c)
                images.append(c)

        # select the image
        if len(images) == 0:
            raise ValueError("There should be at least one image.")
        elif len(images) == 1:
            image = images[0]
        else:
            # maybe later we'll implement a different logic
            # we pick the last one
            image = images[0]

        # zoom
        if image[1] != "svg":
            img = self._scale_image(
                image[0], image[1], max_width=max_width, max_height=max_height)
            return img
        else:
            return image[0]

    def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200):
        """
        Scales an image with the same aspect ratio centered in an
        image with a given max_width and max_height.

        @param      in_bytes        image as bytes
        @param      format          indication of the format (can be empty)
        @param      max_width       maximum size of the thumbnail
        @param      max_height      maximum size of the thumbnail
        @return                     Image (PIL)

        .. versionadded:: 1.5
        """
        # local import to avoid testing dependency on PIL:
        try:
            from PIL import Image
        except ImportError:
            import Image

        if isinstance(in_bytes, tuple):
            in_bytes = in_bytes[0]
        if not isinstance(in_bytes, bytes):
            raise TypeError("bytes expected, not {0}".format(type(in_bytes)))
        img = Image.open(BytesIO(in_bytes))
        width_in, height_in = img.size
        scale_w = max_width / float(width_in)
        scale_h = max_height / float(height_in)

        if height_in * scale_w <= max_height:
            scale = scale_w
        else:
            scale = scale_h

        if scale >= 1.0:
            return img

        width_sc = int(round(scale * width_in))
        height_sc = int(round(scale * height_in))

        # resize the image and center
        img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
        thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
        pos_insert = ((max_width - width_sc) // 2,
                      (max_height - height_sc) // 2)
        thumb.paste(img, pos_insert)
        return thumb

    def _merge_images(self, results):
        """
        Merges images defined by (buffer, format).
        The method uses PIL to merge images when possible.

        @return                     [ (image, format) ]

        .. versionadded:: 1.5
        """
        if len(results) == 1:
            results = results[0]
            self._check_thumbnail_tuple(results)
            return results
        elif len(results) == 0:
            return None
        formats_counts = Counter(_[1] for _ in results)
        if len(formats_counts) == 1:
            format = results[0][1]
        else:
            items = sorted(((v, k) for k, v in formats_counts.items()), False)
            for it in items:
                format = it
                break

        results = [_ for _ in results if _[1] == format]
        if format == "svg":
            return ("\n".join(_[0] for _ in results), format)
        else:
            # local import to avoid testing dependency on PIL:
            try:
                from PIL import Image
            except ImportError:
                import Image

            dx = 0.
            dy = 0.
            over = 0.7
            imgs = []
            for in_bytes, f in results:
                img = Image.open(BytesIO(in_bytes))
                imgs.append(img)
                dx = max(dx, img.size[0])
                dy += img.size[1] * over

            new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220))
            for img in imgs:
                dy -= img.size[1] * over
                new_im.paste(img, (0, max(int(dy), 0)))

            image_buffer = BytesIO()
            new_im.save(image_buffer, "PNG")
            b = image_buffer.getvalue(), "png"
            return b