Beispiel #1
0
 def test__output_conv_ipynb(self):
     nb_cell = nbformat.NotebookNode({
         'cell_type': 'markdown',
         'metadata': {},
         'source': "## Test"
     })
     nb = nbformat.NotebookNode({
         'cells': [nb_cell],
         'metadata': {},
         'nbformat': 4,
         'nbformat_minor': 4
     })
     ret = self.output._output_conv(nb)
     self.assertEqual(type(ret).__name__, 'HTML')
Beispiel #2
0
def test_add_parameters_tag(source):
    with open("jovian_papermill/tests/resources/notebook1.ipynb") as f:
        nb = nbformat.read(f, as_version=4)
        nb.cells[0]["source"] = source
        nb = nbformat.NotebookNode(
            json.loads(add_parameters_tag(json.dumps(nb))))
        assert "parameters" in nb.cells[0]["metadata"]["tags"]
Beispiel #3
0
    async def async_exec_code(self, source, write_cell=settings.dev_mode):
        """
        Execute code in a Kernel

        Parameters
        ----------
            source (str): Execute this code
            write_cell (bool, default=None): Write a new cell to the notebook

        Returns
        -------
            NotebookNode
        """
        cell = nbformat.NotebookNode()
        cell.cell_type = "code"
        cell.execution_count = self.code_cells_executed + 1
        cell.metadata = {}
        cell.outputs = []
        cell.source = source

        self.nb["cells"].append(cell)
        cell_index = len(self.nb["cells"]) - 1
        cell = await self.async_execute_cell(
            cell, cell_index, execution_count=cell.execution_count
        )

        if write_cell == False:
            # Delete created cell
            del self.nb["cells"][cell_index]

        return cell
Beispiel #4
0
    async def fix_notebook(self, notebook):
        """Returns a notebook object with a valid kernelspec.

        In case the kernel is not found, we search for a matching kernel based on the language.
        """

        # Fetch kernel name from the notebook metadata
        if 'kernelspec' not in notebook.metadata:
            notebook.metadata.kernelspec = nbformat.NotebookNode()
        kernelspec = notebook.metadata.kernelspec
        kernel_name = kernelspec.get('name',
                                     self.kernel_manager.default_kernel_name)
        # We use `maybe_future` to support RemoteKernelSpecManager
        all_kernel_specs = await tornado.gen.maybe_future(
            self.kernel_spec_manager.get_all_specs())
        # Find a spec matching the language if the kernel name does not exist in the kernelspecs
        if kernel_name not in all_kernel_specs:
            missing_kernel_name = kernel_name
            kernel_name = await self.find_kernel_name_for_language(
                kernelspec.language.lower(), kernel_specs=all_kernel_specs)
            self.log.warning('Could not find a kernel named %r, will use  %r',
                             missing_kernel_name, kernel_name)
        # We make sure the notebook's kernelspec is correct
        notebook.metadata.kernelspec.name = kernel_name
        notebook.metadata.kernelspec.display_name = all_kernel_specs[
            kernel_name]['spec']['display_name']
        notebook.metadata.kernelspec.language = all_kernel_specs[kernel_name][
            'spec']['language']
        return notebook
Beispiel #5
0
def test_add_parameters_tag_raises_exception(source):
    with open("jovian_papermill/tests/resources/notebook1.ipynb") as f:
        nb = nbformat.read(f, as_version=4)
        nb.cells[0]["source"] = source

        with pytest.raises(Exception):
            nb = nbformat.NotebookNode(
                json.loads(add_parameters_tag(json.dumps(nb))))
Beispiel #6
0
def recombine(directory):
    directory = pathlib.Path(directory)

    with (directory / 'metadata.json').open() as f:
        metadata = json.load(f)

    nb = nbf.v4.new_notebook(metadata=metadata)

    with (directory / 'cells_sequence').open() as f:
        cells_sequence = f.read().splitlines()

    for cell_id in cells_sequence:
        cell_dir = directory / cell_id

        source_file = list(cell_dir.glob('source.*'))[0]
        if source_file.suffix == '.md':
            with source_file.open() as f:
                cell = nbf.v4.new_markdown_cell(f.read())
        elif source_file.suffix == '.txt':
            with source_file.open() as f:
                cell = nbf.NotebookNode(cell_type='raw',
                                        source=f.read(),
                                        metadata=nbf.NotebookNode())
        else:
            with source_file.open() as f:
                cell = nbf.v4.new_code_cell(f.read())
        nb.cells.append(cell)

        if (cell_dir / 'metadata.json').exists():
            with (cell_dir / 'metadata.json').open() as f:
                cell.metadata = nbf.from_dict(json.load(f))

        cell.metadata['nbexplode_cell_id'] = cell_id

        if not (cell_dir / 'outputs_sequence').exists():
            continue

        with (cell_dir / 'outputs_sequence').open() as f:
            outputs_seq = f.read().splitlines()

        cell.outputs = [
            recombine_output(cell_dir, i, info)
            for (i, info) in enumerate(outputs_seq, start=1)
        ]

    return nb
Beispiel #7
0
    def preprocess_cell(self, cell, resources, cell_index):
        if cell.cell_type != 'code':
            return cell, resources

        key = self.cache_key(cell.source, cell_index)
        if key in self.cache:
            self.log.debug("Cache hit[%i]: %s", cell_index, key)
            cell.outputs = [
                nbformat.NotebookNode(output) for output in self.cache[key]
            ]
        else:
            # Apply the warnings filter. Fix for the edx converter.
            if self.warnings != "default":
                self.kc.execute("import warnings; "
                                "warnings.simplefilter('{}')".format(
                                    self.warnings))
            outputs = self.run_cell(cell, cell_index)
            # allow_errors inherited from ExecutePreprocessor: by default, is False.
            if not self.allow_errors:
                for out in outputs:
                    if out.output_type == 'error':
                        # If the current cell is not a setup cell, inform of error
                        # and continue to the next cell without storing output
                        if cell_index >= self.setup_cells:
                            pattern = """\
                                An error occurred while executing the following cell:
                                ------------------
                                {cell.source}
                                ------------------
                                {out.ename}: {out.evalue}
                                """
                            print(dedent(pattern).format(out=out, cell=cell),
                                  file=sys.stderr)
                            return cell, resources
                        else:  # If current cell is a setup cell, do not run more cells
                            pattern = """\
                                An error occurred while executing setup cell number {cell_index}.
                                No further cells will be run.
                                ------------------
                                {out.ename}: {out.evalue}
                                """
                            msg = dedent(pattern).format(out=out,
                                                         cell_index=cell_index)
                            raise CellExecutionError(msg)
                # If no error, store output of cell
                cell.outputs = outputs
                self.cache[key] = cell.outputs
            else:  # If we don't check for errors, store output of cell
                self.outputs = outputs
                self.cache[key] = cell.outputs
        return cell, resources
Beispiel #8
0
def run_cell(shell, iopub, cell, kc):
    # print cell.source
    #shell.execute(cell.source)
    kc.execute(cell.source)
    # wait for finish, maximum 20s
    shell.get_msg(timeout=1)  # was 20
    outs = []

    while True:
        try:
            msg = iopub.get_msg(timeout=0.2)
        except Empty:
            break
        msg_type = msg['msg_type']
        if msg_type in ('status', 'execute_input'):
            continue
        elif msg_type == 'clear_output':
            outs = []
            continue

        content = msg['content']
        # print msg_type, content
        out = nbformat.NotebookNode(output_type=msg_type)

        if msg_type == 'stream':
            out.stream = content['name']
            out.text = content['text']
            out.data = content['text']
            out.name = content['name']
        elif msg_type in ('display_data', 'pyout', 'execute_result'):
            out['metadata'] = content['metadata']
            for mime, data in content['data'].items():
                attr = mime.split('/')[-1].lower()
                # this gets most right, but fix svg+html, plain
                attr = attr.replace('+xml', '').replace('plain', 'text')
                setattr(out, attr, data)
            out.data = content['data']

            if msg_type in ('execute_result', 'pyout'):
                out.execution_count = content['execution_count']
        elif msg_type in ('pyerr', 'error'):
            out.ename = content['ename']
            out.evalue = content['evalue']
            out.traceback = content['traceback']
        else:
            print("unhandled iopub msg:", msg_type)

        outs.append(out)
    return outs
Beispiel #9
0
 def insertRef(self):
     nbDoc = nbformat.read(self.nbFileName, as_version=4)
     nbCells = nbDoc['cells']
     markdownCells = [x for x in nbCells if x['cell_type'] == 'markdown']
     
     hasRef = False
     for mdc in markdownCells:
         print (mdc['source'])
         if 'pluto.studio' in mdc['source']:
             hasRef = True
             break
 
     if not hasRef:
         outObj = nbformat.NotebookNode(cell_type='markdown', metadata={}, source=["This notebook was created using [pluto](http://pluto.studio). Learn more [here](https://github.com/shyams80/pluto)"])
         nbCells.append(outObj)
         nbformat.write(nbDoc, self.nbFileName, version=4)
Beispiel #10
0
 def split_cells():
     cells = dropwhile((lambda cell: cell.cell_type != 'markdown'),
                       nb.cells)
     for cell in cells:
         if cell.cell_type != 'markdown':
             yield cell
         else:
             split_sources = re.split('(^# .*$)',
                                      cell.source,
                                      flags=re.MULTILINE)
             for src in split_sources:
                 yield nbformat.NotebookNode(
                     source=src,
                     cell_type='markdown',
                     metadata={},
                 )
def post_process(language, cell, nodes, cell_config) -> nbformat.NotebookNode:
    """Construct what should be written to the contents for this cell.

    This simply creates a new raw cell containing everything - not because it's the
    best solution but the easiest for my use case.

    TODO figure out a better way that also accommodates potential HTML/interactive
     output better Modifies cell in-place (#4).
    """
    out = [config["cell.source"].format(language=language, cell=cell)]
    for node in nodes:
        assert isinstance(node, nbformat.NotebookNode)
        # https://nbformat.readthedocs.io/en/latest/format_description.html
        if node.output_type == "execute_result":
            out.append(config["node.execute_result"].format(node=node))
        elif node.output_type == "stream":
            # TODO use tags/raises-exception like in pytest (not raising raises error)
            #  if <wherever that tags thing is> is set:
            #      raise DidNotRaise(f"should have raised but didn't:\n{node}")
            out.append(config["node.stream"].format(node=node))
        elif node.output_type == "error":
            if not cell_config["metadata.allow_errors"]:
                raise ErrorsNotAllowed(
                    f"raised but errors not allowed:\n{node}")
            if cell_config["metadata.full_traceback"]:
                # TODO handle ANSI terminal colors stuff
                #  see if how jupyter does it is reusable
                #  to keep colours this would need to be HTML though
                out.append("".join(node.traceback))
            else:
                out.append(config["node.exception"].format(node=node))
        else:
            raise UnhandledOutputType(
                f"{node.output_type=} unknown - {cell.source=}")
    return nbformat.NotebookNode({
        "cell_type": "raw",
        "metadata": {},
        "source": "\n".join(out)
    })
Beispiel #12
0
This script can strip solution cells or insert grading cells in Jupyter
notebooks.
"""

import argparse
import nbformat as nb
import os.path

# Configuration Variables
SOLN_SUFFIX = '-solutions.ipynb'
SOLN_HEADER = '#### SOLUTION'  # header for solution cells

EXER_HEADER = '__Exercise'  # header for exercise cells
GRADE_HEADER = '<strong style="color:#F00">\nGrade: \n</strong>'
GRADE_CELL = nb.NotebookNode(cell_type='markdown',
                             metadata={},
                             source=GRADE_HEADER)


def nb_strip(path):
    if not path.endswith(SOLN_SUFFIX):
        msg = "Error: input path '{}' doesn't end with '{}'."
        print(msg.format(path, SOLN_SUFFIX))
        return

    out_path = path[:-len(SOLN_SUFFIX)] + '.ipynb'
    if os.path.exists(out_path):
        print("Error: output path '{}' already exists.".format(out_path))
        return

    # Strip solution cells.
 def insert_cell(self, index: int, cell: dict):
     new_cell = nbformat.NotebookNode(cell)
     self.nb_node.cells.insert(index, new_cell)
Beispiel #14
0
def run_cell(kernel_client, cell, timeout=300):
    if not hasattr(cell, 'source'):
        return [], False
    kernel_client.execute(cell.source)
    # wait for finish, maximum 5min by default
    reply = kernel_client.get_shell_msg(timeout=timeout)['content']
    if reply['status'] == 'error':
        failed = True
        print("\nFAILURE:")
        print(cell.source)
        print('-----')
        print("raised:")
        print('\n'.join(reply['traceback']))
    else:
        failed = False

    # Collect the outputs of the cell execution
    outs = []
    while True:
        try:
            msg = kernel_client.get_iopub_msg(timeout=0.2)
        except Empty:
            break
        msg_type = msg['msg_type']
        if msg_type in ('status', 'execute_input'):
            continue
        elif msg_type == 'clear_output':
            outs = []
            continue

        content = msg['content']
        out = nbformat.NotebookNode(output_type=msg_type)
        if msg_type == 'stream':
            out.name = content['name']
            out.text = content['text']
        elif msg_type in ('display_data', 'execute_result'):
            for mime, data in content['data'].items():
                attr = mime.split('/')[-1].lower()
                # this gets most right, but fix svg+html, plain
                attr = attr.replace('+xml', '').replace('plain', 'text')
                setattr(out, attr, data)
            if msg_type == 'execute_result':
                out.execution_count = content['execution_count']
        elif msg_type == 'error':
            out.ename = content['ename']
            out.evalue = content['evalue']
            out.traceback = content['traceback']
        elif msg_type == 'execute_input':
            print(content)
        else:
            print("unhandled iopub msg: %s" % msg_type)
        outs.append(out)

    # Special handling of ipcluster restarts
    if '!ipcluster stop' in cell.source:
        # wait some time for cluster commands to complete
        for i in range(10):
            try:
                if len(Client()) == 0:
                    break
            except FileNotFoundError:
                pass
            sys.stdout.write("@")
            sys.stdout.flush()
            time.sleep(5)
    if '!ipcluster start' in cell.source:
        # wait some time for cluster commands to complete
        for i in range(10):
            try:
                if len(Client()) > 0:
                    break
            except FileNotFoundError:
                pass
            sys.stdout.write("#")
            sys.stdout.flush()
            time.sleep(5)
    return outs, failed
Beispiel #15
0
    original_nb = nbformat.read(file_path, nbformat.NO_CONVERT)

    nb_sans_cells = notebook_without_cells(original_nb)
    nb_sans_cells['cells'] = []

    for cell in original_nb.cells:
        # only include cells that do not contain the following pattern
        if remove_cell_with(cell['source'], '# To avoid duplication'):
            # a list for all the cells that should be preserved
            new_lines = []
            # go through the cell source and remove all unneeded lines and text
            for line in iterlines(cell['source']):
                new_line = remove_ex_comment(line)
                new_line = remove_ex(new_line)
                new_line = remove_line_with(new_line, '#remove_next')
                new_lines.append(new_line)
            # combine preserved lines into single string
            new_source = combine_lines(new_lines)
            # construct a new cell
            new_cell = {k: v for k, v in cell.iteritems() if k != u'source'}
            # add the cell source
            new_cell[u'source'] = new_source
            # convert cell to NotebookNode
            new_cell = nbformat.NotebookNode(new_cell)
            # add cell to the new notebook
            nb_sans_cells['cells'].append(new_cell)
    new_nb = nbformat.NotebookNode(nb_sans_cells)
    new_path = path.splitext(file_path)
    new_path = new_path[0] + '_clean' + new_path[1]
    nbformat.write(new_nb, new_path)
Beispiel #16
0
def md2nbcell(md: str) -> NotebookNode:
    """Convert markdown to Jupyter notebook cell."""
    data = {"cell_type": "markdown", "metadata": {}, "source": md}
    cell = nbformat.NotebookNode(**data)
    return cell
Beispiel #17
0
 def run_code(self, code):
     "Useful for debug, run arbitrary line of code"
     cell = nbformat.NotebookNode(source=code)
     return self.run_cell(cell)
Beispiel #18
0
def run_cell(kc, cell, timeout=300):
    if not hasattr(cell, 'input'):
        return [], False
    kc.execute(cell.input)
    # wait for finish, maximum 5min by default
    reply = kc.get_shell_msg(timeout=timeout)['content']
    if reply['status'] == 'error':
        failed = True
        print("\nFAILURE:")
        print(cell.input)
        print('-----')
        print("raised:")
        print('\n'.join(reply['traceback']))
    else:
        failed = False

    # Collect the outputs of the cell execution
    outs = []
    while True:
        try:
            msg = kc.get_iopub_msg(timeout=0.2)
        except Empty:
            break
        msg_type = msg['msg_type']
        if msg_type in ('status', 'pyin', 'execute_input'):
            continue
        elif msg_type == 'clear_output':
            outs = []
            continue

        content = msg['content']

        # IPython 3 writes pyerr/pyout in the notebook format but uses
        # error/execute_result in the message spec. This does the translation
        # needed for tests to pass with IPython 3
        notebook3_format_conversions = {
            'error': 'pyerr',
            'execute_result': 'pyout'
        }
        msg_type = notebook3_format_conversions.get(msg_type, msg_type)
        out = nbformat.NotebookNode(output_type=msg_type)

        if 'execution_count' in content:
            cell['prompt_number'] = content['execution_count']
            out.prompt_number = content['execution_count']

        if msg_type == 'stream':
            out.stream = content['name']
            # in msgspec 5, this is name, text
            # in msgspec 4, this is name, data
            if 'text' in content:
                out.text = content['text']
            else:
                out.text = content['data']
        elif msg_type in ('display_data', 'pyout'):
            for mime, data in content['data'].items():
                attr = mime.split('/')[-1].lower()
                # this gets most right, but fix svg+html, plain
                attr = attr.replace('+xml', '').replace('plain', 'text')
                setattr(out, attr, data)
        elif msg_type == 'pyerr':
            out.ename = content['ename']
            out.evalue = content['evalue']
            out.traceback = content['traceback']
        else:
            print("unhandled iopub msg: %s" % msg_type)

        outs.append(out)

    # Special handling of ipcluster restarts
    if '!ipcluster stop' in cell.input:
        # wait some time for cluster commands to complete
        for i in range(10):
            try:
                if len(Client()) == 0:
                    break
            except OSError:
                pass
            sys.stdout.write("@")
            sys.stdout.flush()
            time.sleep(5)
    if '!ipcluster start' in cell.input:
        # wait some time for cluster commands to complete
        for i in range(10):
            try:
                if len(Client()) > 0:
                    break
            except OSError:
                pass
            sys.stdout.write("#")
            sys.stdout.flush()
            time.sleep(5)
    return outs, failed
Beispiel #19
0
        "Nbconvert is not installed. To install it use: \n pip install nbconvert \n or visit: http://nbconvert.readthedocs.io/en/latest/install.html. \n OS error: {0}"
        .format(err))
    raise
from StringIO import StringIO
import codecs
from nbformat.v4.nbbase import (new_code_cell, new_markdown_cell, new_notebook,
                                new_output, new_raw_cell)
infilename = sys.argv[1]
outfilename = sys.argv[2]
f = open(infilename)
# lines = f.readlines()
# f.close()
# text = "".join(lines)

nb = nbformat.read(infilename, 4)
nb_new = nbformat.NotebookNode()

cells_new = []
offset = 0
reright = 0
for cell in nb.cells:
    if cell["cell_type"] == "code":
        if cell["source"] == '%jsroot on':
            offset = -1
        elif cell["source"] == '%jsroot off':
            offset = -1
        else:
            # print cell.execution_count
            # print type(cell.execution_count)
            cells_new.append(
                new_code_cell(source=cell.source,
 def overwrite_cell(self, index: int, cell: dict):
     new_cell = nbformat.NotebookNode(cell)
     self.nb_node.cells[index] = new_cell
Beispiel #21
0
def loop():
    request = db.q.find_one({'isProcessed': False})
    if request == None:
        return

    subprocess.run(shlex.split("ufw allow out to any port 443"),
                   env=os.environ,
                   errors=True)
    subprocess.run(shlex.split("ufw deny out to any port 27017"),
                   env=os.environ,
                   errors=True)
    print(request['_id'])
    githubAcc = Github(request['githubTok'])
    user = githubAcc.get_user()
    repo = user.get_repo("plutons")

    githubUserName = request['githubUser']
    print(f"processing for: {githubUserName}")

    qId = ObjectId(request['_id'])
    fullPath = request['file']
    notebook = gzip.decompress(request['notebook'])

    tempFileName = plutoPath + fullPath[fullPath.rfind('/') + 1:]
    print(tempFileName)
    with open(tempFileName, mode='wb') as file:
        file.write(notebook)

    subprocess.run(shlex.split(f"chmod 666 {tempFileName}"),
                   env=os.environ,
                   errors=True)

    insertRef(tempFileName)

    subprocess.run(shlex.split("ufw deny out to any port 443"),
                   env=os.environ,
                   errors=True)
    cmdLine = f"sudo -E -H -u pluto jupyter nbconvert --to notebook --execute {tempFileName} --inplace --allow-errors"
    cpi = subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True)

    textLength = getOutputLength(tempFileName)
    print(f"total output length: {textLength}")

    if githubUserName != 'shyams80' and textLength > 10000:
        cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {tempFileName}"
        cpi = subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True)
        nbDoc = nbformat.read(tempFileName, as_version=4)

        for nbCell in nbDoc['cells']:
            if nbCell['cell_type'] != 'code' and nbCell['source'] != None:
                continue

            nbCell['execution_count'] = 1
            outObj = nbformat.NotebookNode(
                output_type='stream',
                name='stderr',
                text=[
                    'total output string length exceeded 10000 characters. please stay within the limit.'
                ])
            nbCell['outputs'].append(outObj)
            break

        nbformat.write(nbDoc, tempFileName, version=4)

    with open(tempFileName, mode='rb') as file:
        outFileContent = file.read()

    tooBig = False
    subprocess.run(shlex.split("ufw allow out to any port 443"),
                   env=os.environ,
                   errors=True)
    try:
        fileContent = repo.get_contents(fullPath)
        repo.update_file(fullPath, "response", outFileContent, fileContent.sha)
    except Exception as exp:
        print(exp)
        if exp.data["errors"][0]['code'] == 'too_large':
            tooBig = True
    subprocess.run(shlex.split("ufw deny out to any port 443"),
                   env=os.environ,
                   errors=True)

    if tooBig:
        cmdLine = f"sudo -E -H -u pluto jupyter nbconvert --to markdown --execute {tempFileName} --allow-errors"
        cpi = subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True)
        filePattern = tempFileName.replace(".ipynb", "")

        subprocess.run(shlex.split("ufw allow out to any port 443"),
                       env=os.environ,
                       errors=True)
        upsertGithub(filePattern + ".md", repo)
        for fname in os.listdir(filePattern + "_files"):
            upsertGithub(filePattern + "_files/" + fname, repo)

        subprocess.run(shlex.split("ufw deny out to any port 443"),
                       env=os.environ,
                       errors=True)

    os.remove(tempFileName)

    subprocess.run(shlex.split("ufw allow out to any port 27017"),
                   env=os.environ,
                   errors=True)
    db.q.update_one({'_id': qId}, {
        '$set': {
            'isProcessed': True,
            'processedOn': datetime.now(),
            'notebook': gzip.compress(outFileContent)
        }
    })
Beispiel #22
0
    def preprocess(self, nb, resources, path):
        # Record initial loaded modules, including the proveit defaults and
        # proveit.magics.  All other modules will be deleted when
        # we are done so we can "recycle" our Kernel to be used cleanly
        # for the next notebook.
        init_modules_source = """
import sys
from proveit import *
import proveit.magics
__init_modules = list(sys.modules.keys())
__init_modules # avoid Prove-It magic assignment
"""
        init_modules_cell = nbformat.NotebookNode(cell_type='code', source=init_modules_source, metadata=dict())
        cell, _ = self.preprocess_cell(init_modules_cell, resources, 0)
        
        # change the working directory
        cd_source = 'import os\nos.chdir(r\"' + path + '")'        
        cd_cell = nbformat.NotebookNode(cell_type='code', source=cd_source, metadata=dict())
        self.preprocess_cell(cd_cell, resources, 0)
        
        # Execute each cell.  We must correct the execution count so we treat this
        # like it was the only notebook executed in this session (even though we
        # are actually recycling the Kernel).
        exec_count = 0
        for index, cell in enumerate(nb.cells): 
            if hasattr(cell, 'source') and cell['source'].strip() != '':
                cell, resources = self.preprocess_cell(cell, resources, index)
                if 'execution_count' in cell:
                    # make proper execution counts
                    exec_count += 1
                    cell['execution_count'] = exec_count
                    if 'outputs' in cell:
                        for output in cell['outputs']:
                            if 'execution_count' in output:
                                output['execution_count'] = exec_count
            nb.cells[index]
        
        # "reset" the stored Prove-It data.  Also,
        # Delete all modules except those that were initially loaded.
        # Also, %reset local variables and history.
        # We are preparing the Kernel to be recycled.
        reset_source = """
import sys
import proveit
proveit.reset()
# delete all modules but initial modules and proveit._core_ modules
for m in list(sys.modules.keys()):
    if m not in __init_modules:
        if '.' in m:
            parent, child = m.rsplit('.', 1)
            if parent in __init_modules:
                # remove the module being deleted from its parent package
                sys.modules[parent].__dict__.pop(child)
        del(sys.modules[m])
%reset
%reset in
%reset out
"""
        reset_cell = nbformat.NotebookNode(cell_type='code', source=reset_source, metadata=dict())
        cell, _ = self.preprocess_cell(reset_cell, resources, 0)
        
        # Garbage collect.
        garbage_collect_source = """import sys
import gc
gc.collect()
len(gc.get_objects()) # used to check for memory leaks
"""
        garbage_collect_cell = nbformat.NotebookNode(cell_type='code', source=garbage_collect_source, metadata=dict())
        cell, _ = self.preprocess_cell(garbage_collect_cell, resources, 0)
        # Useful debugging to check for memory leaks:
        #print('num gc objects', cell['outputs'][0]['data']['text/plain'])    
        return nb, resources
Beispiel #23
0
    def run(self, cell, use_timeout=None):
        """
        Run a notebook cell in the IPythonKernel

        Parameters
        ----------
        cell : IPython.notebook.Cell
            the cell to be run
        use_timeout : int or None (default)
            the time in seconds after which a cell is stopped and assumed to
            have timed out. If set to None the value in `default_timeout`
            is used

        Returns
        -------
        list of ex_cell_outputs
            a list of NotebookNodes of the returned types. This is
            similar to the list of outputs generated when a cell is run
        """

        if timeout is not None:
            use_timeout = use_timeout
        else:
            use_timeout = self.default_timeout

        if hasattr(cell, 'source'):
            uid = self.execute(cell.source)
        else:
            raise AttributeError('No source/input key')

        outs = []
        stdout_cells = {}

        while True:
            msg = self.listen(uid, use_timeout)

            msg_type = msg['msg_type']

            if msg_type == 'execute_input':
                continue
            elif msg_type == 'clear_output':
                outs = []
                continue
            elif msg_type == 'status':
                if msg['content']['execution_state'] == 'idle':
                    # we are done with the cell, let's compare
                    break

                continue

            out_cell = nbformat.NotebookNode(output_type=msg_type)

            content = msg['content']

            if msg_type == 'stream':
                name = content['name']
                if name not in stdout_cells:
                    out_cell.name = name
                    out_cell.text = content['text']
                    stdout_cells[name] = out_cell
                    outs.append(out_cell)
                else:
                    # we already have a stdout cell, so append to it
                    stdout_cells[name].text += content['text']

            elif msg_type in ('display_data', 'execute_result'):
                if hasattr(content, 'execution_count'):
                    out_cell['execution_count'] = content['execution_count']
                else:
                    out_cell['execution_count'] = None

                out_cell['data'] = content['data']
                out_cell['metadata'] = content['metadata']

                outs.append(out_cell)

            elif msg_type == 'error':
                out_cell.ename = content['ename']
                out_cell.evalue = content['evalue']
                out_cell.traceback = content['traceback']

                outs.append(out_cell)

            elif msg_type.startswith('comm_'):
                # messages used to initialize, close and unpdate widgets
                # we will ignore these and hope for the best
                pass

            else:
                tv.warning("Unhandled iopub msg of type `%s`" % msg_type)

        return outs
Beispiel #24
0
 def Execute(self, meta):
     print(meta)
     
     qId = ObjectId(meta['id'])
     print('acquiring egg')
     self.status.Update(qId, 'acquiring egg')
     
     egg = self.cm.GetProcessor(qId, meta['githubUser'])
 
     request = self.db.q.find_one({'_id': qId})
     
     githubUserName = request['githubUser']
     print(f"processing for: {githubUserName}")
     self.status.Update(qId, 'processing')
     
     githubAcc = Github(request['githubTok'])
     user = githubAcc.get_user()
     self.repo = user.get_repo("plutons")
     
     self.plutoPath = "/home/pluto/notebook-temp/" + meta['id'] + "/"
     
     try:
         os.makedirs(self.plutoPath)
     except FileExistsError:
         pass
     
     fullPath = request['file']
     notebook = gzip.decompress(request['notebook'])
     
     githubFileName = fullPath[fullPath.rfind('/')+1:]
     githubPath = fullPath[:fullPath.rfind('/')]
 
     self.nbFileName = self.plutoPath + githubFileName
     print(f"processing notebook: {self.nbFileName}")
     with open(self.nbFileName, mode='wb') as file:
         file.write(notebook)
         
     cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {self.nbFileName}"
     subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True)
         
     self.insertRef()
     
     egg.files.recursive_put(self.plutoPath, "/home/pluto/")
     
     print(f"executing in egg")
     self.status.Update(qId, 'executing in egg')
     egg.execute(shlex.split(f"jupyter nbconvert --to notebook --execute /home/pluto/{githubFileName} --inplace --allow-errors --ExecutePreprocessor.timeout=1200"))
     
     resp = egg.files.get(f"/home/pluto/{githubFileName}")
     with open(self.nbFileName, mode='wb') as file:
         file.write(resp)
     
     textLength = self.getOutputLength()
     print(f"total output length: {textLength}")
 
     if githubUserName != 'shyams80' and textLength > 10000:
         cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {self.nbFileName}"
         subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True)
         nbDoc = nbformat.read(self.nbFileName, as_version=4)
 
         for nbCell in nbDoc['cells']:
             if nbCell['cell_type'] != 'code' and nbCell['source'] != None:
                 continue
 
             nbCell['execution_count'] = 1
             outObj = nbformat.NotebookNode(output_type='stream', name='stderr', text=['total output string length exceeded 10000 characters. please stay within the limit.'])
             nbCell['outputs'].append(outObj)
             break
 
         nbformat.write(nbDoc, self.nbFileName, version=4)
 
     with open(self.nbFileName, mode='rb') as file:
         outFileContent = file.read()
         
     tooBig = False
     try:
         fileContent = self.repo.get_contents(fullPath)
         self.repo.update_file(fullPath, "response", outFileContent, fileContent.sha)
     except Exception as exp:
         print(exp)
         if exp.data["errors"][0]['code'] == 'too_large':
             tooBig = True
 
     if tooBig:
         print("file is too big!")
         self.status.Update(qId, 'file is too big!')
         
         egg.execute(shlex.split(f"jupyter nbconvert --to markdown --execute /home/pluto/{githubFileName} --allow-errors --ExecutePreprocessor.timeout=1200"))
         
         filePattern = githubFileName.replace(".ipynb", "")
         
         self.status.Update(qId, 'creating markdown...')
         egg.execute(shlex.split(f"./tard.sh {filePattern}"))
         
         resp = egg.files.get(f"/home/pluto/{filePattern}.tar.gz")
         with open(f"{self.plutoPath}{filePattern}.tar.gz", mode='wb') as file:
             file.write(resp)
             
         subprocess.run(shlex.split(f"tar xvf {self.plutoPath}{filePattern}.tar.gz -C {self.plutoPath}"), env=os.environ, errors=True)
         
         self.status.Update(qId, 'uploading markdown...')    
         self.upsertGithub(f"{self.plutoPath}{filePattern}.md", f"{githubPath}/{filePattern}.md")
         
         if os.path.isdir(f"{self.plutoPath}{filePattern}_files"):
             self.status.Update(qId, 'uploading images...')
             for fname in os.listdir(f"{self.plutoPath}{filePattern}_files"):
                 self.upsertGithub(f"{self.plutoPath}{filePattern}_files/{fname}", f"{githubPath}/{filePattern}_files/" + fname)
 
     egg.files.delete(f"/home/pluto/{githubFileName}")
     shutil.rmtree(self.plutoPath)
 
     self.db.q.update_one({'_id': qId}, {'$set': {'isProcessed': True, 'processedOn': datetime.now(), 'notebook': gzip.compress(outFileContent)}})
     self.status.Update(qId, 'finished')