Esempio n. 1
0
def jupytext(args=None):
    """Internal implementation of Jupytext command line"""
    args = parse_jupytext_args(args)

    def log(text):
        if not args.quiet:
            sys.stdout.write(text + '\n')

    if args.version:
        log(__version__)
        return 0

    if args.pre_commit:
        if args.notebooks:
            raise ValueError(
                '--pre-commit takes notebooks from the git index. Do not pass any notebook here.'
            )
        args.notebooks = notebooks_in_git_index(args.input_format)
        log('[jupytext] Notebooks in git index are:')
        for nb_file in args.notebooks:
            log(nb_file)

    def writef_git_add(notebook_, nb_file_, fmt_):
        write(notebook_, nb_file_, fmt=fmt_)
        if args.pre_commit:
            system('git', 'add', nb_file_)

    # Read notebook from stdin
    if not args.notebooks:
        if not args.pre_commit:
            args.notebooks = ['-']

    if args.set_formats is not None:
        # Replace empty string with None
        args.update_metadata = recursive_update(
            args.update_metadata,
            {'jupytext': {
                'formats': args.set_formats or None
            }})
        args.sync = True

    if args.paired_paths:
        if len(args.notebooks) != 1:
            raise ValueError('--paired-paths applies to a single notebook')
        print_paired_paths(args.notebooks[0], args.input_format)
        return 1

    if not args.to and not args.output and not args.sync \
            and not args.pipe and not args.check \
            and not args.test and not args.test_strict \
            and not args.update_metadata and not args.set_kernel:
        raise ValueError('Please select an action')

    if args.output and len(args.notebooks) != 1:
        raise ValueError('Please input a single notebook when using --output')

    if args.input_format:
        args.input_format = long_form_one_format(args.input_format)

    if args.to:
        args.to = long_form_one_format(args.to)
        set_format_options(args.to, args.format_options)

    # Main loop
    round_trip_conversion_errors = 0

    # Wildcard extension on Windows #202
    notebooks = []
    for pattern in args.notebooks:
        if '*' in pattern or '?' in pattern:
            notebooks.extend(glob.glob(pattern))
        else:
            notebooks.append(pattern)

    for nb_file in notebooks:
        try:
            if nb_file == '-' and args.sync:
                raise ValueError('Cannot sync a notebook on stdin')

            nb_dest = args.output or (None if not args.to else (
                '-' if nb_file == '-' else full_path(
                    base_path(nb_file, args.input_format), args.to)))

            # Just acting on metadata / pipe => save in place
            if not nb_dest and not args.sync:
                nb_dest = nb_file

            if nb_dest == '-':
                args.quiet = True

            # I. ### Read the notebook ###
            fmt = copy(args.input_format) or {}
            set_format_options(fmt, args.format_options)
            log('[jupytext] Reading {}{}'.format(
                nb_file if nb_file != '-' else 'stdin', ' in format {}'.format(
                    short_form_one_format(fmt)) if 'extension' in fmt else ''))

            notebook = read(nb_file, fmt=fmt)
            if not fmt:
                text_representation = notebook.metadata.get(
                    'jupytext', {}).get('text_representation', {})
                ext = os.path.splitext(nb_file)[1]
                if text_representation.get('extension') == ext:
                    fmt = {
                        key: text_representation[key]
                        for key in text_representation
                        if key in ['extension', 'format_name']
                    }
                elif ext:
                    fmt = {'extension': ext}

            # Compute actual extension when using script/auto, and update nb_dest if necessary
            dest_fmt = args.to
            if dest_fmt and dest_fmt['extension'] == '.auto':
                auto_ext = auto_ext_from_metadata(notebook.metadata)
                if not auto_ext:
                    raise ValueError(
                        'The notebook has no language information. '
                        'Please provide an explicit script extension.')
                dest_fmt['extension'] = auto_ext
                if not args.output and nb_file != '-':
                    nb_dest = full_path(base_path(nb_file, args.input_format),
                                        dest_fmt)

            # Set the kernel
            set_kernel = args.set_kernel
            if args.execute and notebook.metadata.get('kernelspec',
                                                      {}).get('name') is None:
                log("[jupytext] Setting default kernel with --set-kernel -")
                set_kernel = '-'

            if set_kernel:
                if set_kernel == '-':
                    language = notebook.metadata.get('jupytext', {}).get('main_language') \
                               or notebook.metadata['kernelspec']['language']
                    if not language:
                        raise ValueError(
                            'Cannot infer a kernel as notebook language is not defined'
                        )

                    kernelspec = kernelspec_from_language(language)
                    if not kernelspec:
                        raise ValueError(
                            'Found no kernel for {}'.format(language))
                else:
                    try:
                        kernelspec = get_kernel_spec(set_kernel)
                    except KeyError:
                        raise KeyError(
                            'Please choose a kernel name among {}'.format(
                                [name for name in find_kernel_specs()]))
                    kernelspec = {
                        'name': args.set_kernel,
                        'language': kernelspec.language,
                        'display_name': kernelspec.display_name
                    }

                args.update_metadata['kernelspec'] = kernelspec

            # Update the metadata
            if args.update_metadata:
                log("[jupytext] Updating notebook metadata with '{}'".format(
                    json.dumps(args.update_metadata)))
                # Are we updating a text file that has a metadata filter? #212
                if fmt['extension'] != '.ipynb' and \
                        notebook.metadata.get('jupytext', {}).get('notebook_metadata_filter') == '-all':
                    notebook.metadata.get('jupytext',
                                          {}).pop('notebook_metadata_filter')
                recursive_update(notebook.metadata, args.update_metadata)

                if 'kernelspec' in args.update_metadata and 'main_language' in notebook.metadata.get(
                        'jupytext', {}):
                    notebook.metadata['jupytext'].pop('main_language')

            # Read paired notebooks
            if args.sync:
                set_prefix_and_suffix(fmt, notebook, nb_file)
                try:
                    notebook, inputs_nb_file, outputs_nb_file = load_paired_notebook(
                        notebook, fmt, nb_file, log)
                except NotAPairedNotebook as err:
                    sys.stderr.write('[jupytext] Warning: ' + str(err) + '\n')
                    continue

            # II. ### Apply commands onto the notebook ###
            # Pipe the notebook into the desired commands
            for cmd in args.pipe or []:
                notebook = pipe_notebook(notebook, cmd, args.pipe_fmt)

            # and/or test the desired commands onto the notebook
            for cmd in args.check or []:
                pipe_notebook(notebook, cmd, args.pipe_fmt, update=False)

            # Execute the notebook
            if args.execute:
                log("[jupytext] Executing notebook")
                kernel_name = notebook.metadata.get('kernelspec',
                                                    {}).get('name')
                exec_proc = ExecutePreprocessor(timeout=None,
                                                kernel_name=kernel_name)
                exec_proc.preprocess(notebook, resources={})

            # III. ### Possible actions ###
            modified = args.update_metadata or args.pipe or args.execute
            # a. Test round trip conversion
            if args.test or args.test_strict:
                try:
                    test_round_trip_conversion(
                        notebook,
                        dest_fmt,
                        update=args.update,
                        allow_expected_differences=not args.test_strict,
                        stop_on_first_error=args.stop_on_first_error)
                except NotebookDifference as err:
                    round_trip_conversion_errors += 1
                    sys.stdout.write('{}: {}'.format(nb_file, str(err)))
                continue

            # b. Output to the desired file or format
            if nb_dest:
                if nb_dest == nb_file and not dest_fmt:
                    dest_fmt = fmt

                # Test consistency between dest name and output format
                if dest_fmt and nb_dest != '-':
                    base_path(nb_dest, dest_fmt)

                # Describe what jupytext is doing
                if os.path.isfile(nb_dest) and args.update:
                    if not nb_dest.endswith('.ipynb'):
                        raise ValueError('--update is only for ipynb files')
                    action = ' (destination file updated)'
                    check_file_version(notebook, nb_file, nb_dest)
                    combine_inputs_with_outputs(notebook,
                                                read(nb_dest),
                                                fmt=fmt)
                elif os.path.isfile(nb_dest):
                    action = ' (destination file replaced)'
                else:
                    action = ''

                log('[jupytext] Writing {nb_dest}{format}{action}'.format(
                    nb_dest=nb_dest,
                    format=' in format ' + short_form_one_format(dest_fmt)
                    if dest_fmt and 'format_name' in dest_fmt else '',
                    action=action))
                writef_git_add(notebook, nb_dest, dest_fmt)

            # c. Synchronize paired notebooks
            if args.sync:
                # Also update the original notebook if the notebook was modified
                if modified:
                    inputs_nb_file = outputs_nb_file = None
                formats = notebook.metadata['jupytext']['formats']

                for ipynb in [True, False]:
                    # Write first format last so that it is the most recent file
                    for alt_path, alt_fmt in paired_paths(
                            nb_file, fmt, formats)[::-1]:
                        # Write ipynb first for compatibility with our contents manager
                        if alt_path.endswith('.ipynb') != ipynb:
                            continue
                        # Do not write the ipynb file if it was not modified
                        # But, always write text representations to make sure they are the most recent
                        if alt_path == inputs_nb_file and alt_path == outputs_nb_file:
                            continue
                        log("[jupytext] Updating '{}'".format(alt_path))
                        writef_git_add(notebook, alt_path, alt_fmt)
        except (ValueError, TypeError, IOError) as err:
            if args.warn_only:
                sys.stderr.write('[jupytext] Error: {}\n'.format(str(err)))
            else:
                raise err

    return round_trip_conversion_errors
Esempio n. 2
0
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError
import traceback
import os

if __name__ == '__main__':

    notebook_filename = "C:/Users/1/Desktop/abc.ipynb"
    # notebook_filename = "D:/jupyter/宏观团队/HG/1.高频跟踪/3.周度策略观点@/3.周度策略观点.ipynb"

    with open(notebook_filename, encoding='utf8') as f:
        nb = nbformat.read(f, as_version=4)

    ep = ExecutePreprocessor(timeout=1000, kernel_name='python3')

    try:
        out = ep.preprocess(
            nb, {'metadata': {
                'C:/Users/1/Desktop/': 'notebooks/'
            }})
        print("成功")
    except CellExecutionError as e:
        # out = None
        msg = '报告执行出错 "%s".\n' % notebook_filename
        msg += '请查看 "%s"' % 'C:/Users/1/Desktop/abc_123.ipynb'
        print(msg)
        print('-------------begin-------')
        exc = traceback.format_exc()
        print(exc.replace("\n", "<br/>"))
        print('-------------end-------')
        with open('C:/Users/1/Desktop/abc_123.ipynb',
Esempio n. 3
0
def execute_nb(nb, path, nbargs=None):
    nbargs = {} if nbargs is None else nbargs
    ep = ExecutePreprocessor(**nbargs)
    ep.preprocess(nb, {'metadata': {'path': path}})
    return nb
Esempio n. 4
0
import string
import os
import sys
import json
import urllib.parse
import nbformat as nbf
import pandas as pd
sys.path.append('app/static/py')
from nbconvert.preprocessors import ExecutePreprocessor
from nbconvert.preprocessors.execute import executenb

#############################################
########## 2. Variables
#############################################
##### 1. Notebook Execution #####
ep = ExecutePreprocessor(timeout=600, kernel_name='venv')

###
from nbconvert import HTMLExporter
from traitlets.config import Config
c = Config()
c.HTMLExporter.preprocessors = [
    'nbconvert.preprocessors.ExtractOutputPreprocessor'
]
html_exporter_with_figs = HTMLExporter(config=c)

#################################################################
#################################################################
############### 1. Functions ####################################
#################################################################
#################################################################
Esempio n. 5
0
def test_cli_datasource_new(mock_subprocess, caplog, monkeypatch,
                            empty_data_context, filesystem_csv_2):
    context = empty_data_context
    root_dir = context.root_directory
    assert context.list_datasources() == []

    runner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(os.path.dirname(root_dir))
    result = runner.invoke(
        cli,
        "--v3-api datasource new",
        input=f"1\n1\n{filesystem_csv_2}\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert context.list_datasources() == []

    assert "What data would you like Great Expectations to connect to?" in stdout
    assert "What are you processing your files with?" in stdout

    assert result.exit_code == 0

    uncommitted_dir = os.path.join(root_dir, context.GE_UNCOMMITTED_DIR)
    expected_notebook = os.path.join(uncommitted_dir, "datasource_new.ipynb")
    assert os.path.isfile(expected_notebook)
    mock_subprocess.assert_called_once_with(
        ["jupyter", "notebook", expected_notebook])

    # Run notebook
    with open(expected_notebook) as f:
        nb = nbformat.read(f, as_version=4)
    ep = ExecutePreprocessor(timeout=600, kernel_name="python3")
    ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}})

    del context
    context = DataContext(root_dir)

    assert len(context.list_datasources()) == 1

    assert context.list_datasources() == [{
        "name": "my_datasource",
        "class_name": "Datasource",
        "module_name": "great_expectations.datasource",
        "execution_engine": {
            "module_name": "great_expectations.execution_engine",
            "class_name": "PandasExecutionEngine",
        },
        "data_connectors": {
            "my_datasource_example_data_connector": {
                "default_regex": {
                    "group_names": "data_asset_name",
                    "pattern": "(.*)",
                },
                "module_name": "great_expectations.datasource.data_connector",
                "base_directory": "../../filesystem_csv_2",
                "class_name": "InferredAssetFilesystemDataConnector",
            }
        },
    }]
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_new_connection_string(
    mock_subprocess, mock_emit, empty_data_context, empty_sqlite_db, caplog, monkeypatch
):
    monkeypatch.delenv(
        "GE_USAGE_STATS", raising=False
    )  # Undo the project-wide test default
    root_dir = empty_data_context.root_directory
    context: DataContext = empty_data_context
    assert context.list_datasources() == []

    runner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(os.path.dirname(context.root_directory))
    result = runner.invoke(
        cli,
        "--v3-api datasource new",
        input="2\n6\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert "What data would you like Great Expectations to connect to?" in stdout

    assert result.exit_code == 0

    uncommitted_dir = os.path.join(root_dir, context.GE_UNCOMMITTED_DIR)
    expected_notebook = os.path.join(uncommitted_dir, "datasource_new.ipynb")

    assert os.path.isfile(expected_notebook)
    mock_subprocess.assert_called_once_with(["jupyter", "notebook", expected_notebook])

    expected_call_args_list = [
        mock.call(
            {"event_payload": {}, "event": "data_context.__init__", "success": True}
        ),
        mock.call(
            {
                "event": "cli.datasource.new.begin",
                "event_payload": {"api_version": "v3"},
                "success": True,
            }
        ),
        mock.call(
            {
                "event": "cli.new_ds_choice",
                "event_payload": {
                    "type": "sqlalchemy",
                    "db": "other",
                    "api_version": "v3",
                },
                "success": True,
            }
        ),
        mock.call(
            {
                "event": "cli.datasource.new.end",
                "event_payload": {"api_version": "v3"},
                "success": True,
            }
        ),
    ]

    assert mock_emit.call_args_list == expected_call_args_list
    assert mock_emit.call_count == len(expected_call_args_list)

    # Run notebook
    with open(expected_notebook) as f:
        nb = nbformat.read(f, as_version=4)

    # mock the user adding a connection string into the notebook by overwriting the right cell

    assert "connection_string" in nb["cells"][5]["source"]
    nb["cells"][5]["source"] = '  connection_string = "sqlite://"'
    ep = ExecutePreprocessor(timeout=60, kernel_name="python3")
    ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}})

    del context
    context = DataContext(root_dir)

    assert context.list_datasources() == [
        {
            "module_name": "great_expectations.datasource",
            "execution_engine": {
                "module_name": "great_expectations.execution_engine",
                "connection_string": "sqlite://",
                "class_name": "SqlAlchemyExecutionEngine",
            },
            "class_name": "Datasource",
            "data_connectors": {
                "default_runtime_data_connector_name": {
                    "batch_identifiers": ["default_identifier_name"],
                    "class_name": "RuntimeDataConnector",
                    "module_name": "great_expectations.datasource.data_connector",
                },
                "default_inferred_data_connector_name": {
                    "class_name": "InferredAssetSqlDataConnector",
                    "module_name": "great_expectations.datasource.data_connector",
                    "include_schema_name": True,
                },
            },
            "name": "my_datasource",
        }
    ]

    assert_no_logging_messages_or_tracebacks(caplog, result)
Esempio n. 7
0
    # Find notebooks but not notebooks previously output from this script
    if f.endswith('.ipynb') and not f.endswith('_out.ipynb'):
        print(" - ", f)
        notebooks.append(f[:-6])  # Want the filename without '.ipynb'

# Execute notebooks and output
num_notebooks = len(notebooks)
print('*****')
for i, n in enumerate(notebooks):

    n_out = n + '_out' if not args.inplace else n

    with open(n + '.ipynb') as f:
        nb = nbformat.read(f, as_version=4)

    ep = ExecutePreprocessor(timeout=int(args.timeout),
                             kernel_name=args.kernel)
    try:
        print('Running', n, ':', i, '/', num_notebooks)
        out = ep.preprocess(nb, {'metadata': {'path': args.run_path}})
    except CellExecutionError as e:
        out = None
        msg = 'Error executing the notebook "%s".\n' % n
        msg += "{traceback}".format(traceback=e.traceback)
        # msg += 'See notebook "%s" for the traceback.' % n_out
        print(msg)
    except TimeoutError:
        msg = 'Timeout executing the notebook "%s".\n' % n
        print(msg)
    finally:
        # Write output file
        with open(n_out + '.ipynb', mode='wt') as f:
Esempio n. 8
0
    'IE', [85.8, 84.6, 84.7, 74.5, 66, 58.6, 54.7, 44.8, 36.2, 26.6, 20.1])
line_chart.add('Others',
               [14.2, 15.4, 15.3, 8.9, 9, 10.4, 8.9, 5.8, 6.7, 6.8, 7.5])

longtext = html_pygal.format(pygal_render=line_chart.render())
line_chart.render_to_png('./test.png')

#send_html_email(subject, longtext)
#send_html_email(subject, html_png)
#send_html_email(subject, line_chart.render_response())
#line_chart.render_to_file('test.svg')

import nbformat
from nbconvert.preprocessors import ExecutePreprocessor

with open('/home/pi/practise/python_learning/test1.ipynb') as f:
    nb = nbformat.read(f, as_version=4)

ep = ExecutePreprocessor(timeout=600)

ep.preprocess(nb, {'metadata': {'path': '/home/pi/practise/python_learning'}})

from traitlets.config import Config
from nbconvert import HTMLExporter
html_exporter = HTMLExporter()
#html_exporter.template_file = 'basic'

(body, resources) = html_exporter.from_notebook_node(nb)

send_html_email(subject, body)
Esempio n. 9
0
def test_example_notebooks(tmp_path, ipynb):
    with open(ipynb) as f:
        nb = nbformat.read(f, as_version=4)
    ep = ExecutePreprocessor(timeout=600, kernel_name="python3")
    ep.preprocess(nb, {"metadata": {"path": tmp_path}})
Esempio n. 10
0
def execute_jupyter_notebook(notebook):
    """Execute a jupyter notebook and return the execution result."""
    # Import Jupyter tools. Done in Jupyter scope so that they do not need to
    # be installed while validating plain Python snippets.
    import nbformat
    from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor

    # Execute
    try:

        # Open notebook file and read into a notebook object
        # The notebook object is automatically converted to version 4 because
        # nbconvert will only handle the most recent notebook format
        # https://github.com/ipython/ipython/issues/6992#issuecomment-63746907
        with open(notebook, 'r') as fd:
            original_nb = nbformat.read(fd, as_version=NBFORMAT_V4)

        # Make sure that cells exists
        if CELLS not in original_nb:
            original_nb[CELLS] = []

        # Create a copy of the original notebook and clean any previous
        # execution outputs and metadata from cells. We do this so that after
        # execution, we're guaranteed that the outputs present are from us
        # running the cells in order, not old execution results.
        nb = deepcopy(original_nb)
        for cell in nb[CELLS]:
            if cell[CELL_TYPE] == CODE:
                cell[EXECUTION_COUNT] = None
                cell[METADATA] = {}
                cell[OUTPUTS] = []

        # Create execution preprocessor.
        # timeout=None disables cell execution timeout. We disable cell timeout
        # in Jupyter because we want to measure timeout of the entire notebook.
        preprocessor = ExecutePreprocessor(
            kernel_name=KERNEL,
            timeout=None,
            extra_arguments=[
                '--InteractiveShellApp.extra_extension=exception_handler',
                '--colors=NoColor',
            ]
        )

        # Run notebook
        try:

            # Set allowed timeout seconds to be JUPYTER_BASE_TIMEOUT_SECONDS
            # plus an additional JUPYTER_CELL_TIMEOUT_SECONDS for each cell.
            # Jupyter notebooks often take longer to run that snippets.
            seconds = (
                JUPYTER_BASE_TIMEOUT_SECONDS
                + JUPYTER_CELL_TIMEOUT_SECONDS * len(nb[CELLS])
            )

            # Execute with timeout.
            logger.info(
                'Running ExecutePreprocessor on notebook with {} '
                'second timeout.'.format(seconds)
            )
            with Timeout(seconds=seconds):
                preprocessor.preprocess(nb, {})

            # Return success
            logger.info('Execution succeeded')
            return {STATUS_CODE: SUCCESS}

        except TimeoutError:

            logger.info('Execution timed out')
            return {STATUS_CODE: TIMEOUT}

        except CellExecutionError:

            # CellExecutionError indicates that one of the cells from
            # the notebook has an error output. Look for the first error
            # output from an executed code cell. We do this to get the
            # error name, message, and traceback in a structured format.
            logger.info('CellExecutionError, parsing root error')

            # Get all notebook code cells
            code_cells = list(
                cell
                for cell in nb[CELLS]
                if cell[CELL_TYPE] == CODE
            )

            # Find the first error output
            lines = 0
            error = None
            for cell in code_cells:

                # Look for an error output from the cell
                error_output = next(
                    (o for o in cell[OUTPUTS] if o[OUTPUT_TYPE] == ERROR),
                    None
                )

                # If there was an error output, save it. Otherwise, increment
                # the total number of source lines seen.
                if error_output:
                    error = error_output
                    break
                else:
                    lines += len(cell[SOURCE].split('\n'))

            # Raise exception if unable to find the error output.
            if not error:
                raise Exception('Unable to find notebook error output')

            # And parse the stack
            stack = list(map(ast.literal_eval, error[TRACEBACK]))

            # Override the file name for the input script
            stack[0][0] = notebook

            # Increment the line number to include all lines in earlier cells
            stack[0][1] += lines

            # Get the summary for the line that raised the exception
            e_filename, e_lineno, _, e_line = stack[-1]

            # Return status
            return {
                STATUS_CODE: EXCEPTION,
                EXCEPTION_NAME: error[ENAME],
                EXCEPTION_MESSAGE: error[EVALUE],
                EXCEPTION_FILE_NAME: e_filename,
                EXCEPTION_LINE_NUMBER: e_lineno,
                EXCEPTION_LINE: e_line,
                EXCEPTION_STACK: stack
            }

    except BaseException as e:

        logger.info('Execution produced an exception.')
        logger.error(e)
        return _get_exception_information(code=UNKNOWN_EXCEPTION)
Esempio n. 11
0
    def execute(self, force=False):
        """
        Executes the specified notebook file, and optionally write out the executed notebook to a
        new file.
        Parameters
        ----------
        write : bool, optional
            Write the executed notebook to a new file, or not.
        Returns
        -------
        executed_nb_path : str, ``None``
            The path to the executed notebook path, or ``None`` if ``write=False``.
        """

        with open(self.nb_path) as f:
            nb = nbformat.read(f, as_version=IPYTHON_VERSION)

        is_executed = nb['metadata'].get('docs_executed')

        if is_executed == 'executed' and not force:
            _logger.info(
                f"Notebook {self.nb} in {self.nb_dir} already executed, skipping"
            )
        else:

            # Execute the notebook
            _logger.info(f"Executing notebook {self.nb} in {self.nb_dir}")
            t0 = time.time()

            clear_executor = ClearOutputPreprocessor()
            executor = ExecutePreprocessor(**self.execute_kwargs)

            # First clean up the notebook and remove any cells that have been run
            clear_executor.preprocess(nb, {})

            try:
                executor.preprocess(nb, {'metadata': {'path': self.nb_dir}})
                execute_dict = {'docs_executed': 'executed'}
                nb['metadata'].update(execute_dict)
            except CellExecutionError as err:
                execute_dict = {'docs_executed': 'errored'}
                nb['metadata'].update(execute_dict)
                _logger.error(f"Error executing notebook {self.nb}")
                _logger.error(err)

            _logger.info(f"Finished running notebook ({time.time() - t0})")

            _logger.info(
                f"Writing executed notebook to {self.executed_nb_path}")
            # Makes sure original notebook isn't left blank in case of error during writing
            if self.overwrite:
                with open(self.temp_nb_path, 'w', encoding='utf-8') as f:
                    #with open(self.temp_nb_path, 'w') as f:
                    nbformat.write(nb, f)
                shutil.copyfile(self.temp_nb_path, self.executed_nb_path)
                os.remove(self.temp_nb_path)
            else:
                with open(self.executed_nb_path, 'w', encoding='utf-8') as f:
                    #with open(self.temp_nb_path, 'w') as f:
                    nbformat.write(nb, f)

        return self.executed_nb_path
Esempio n. 12
0
 def execute(self, path):
     with open(path) as f:
         nb = nbformat.read(f, as_version=4)
     ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
     ep.preprocess(nb)
Esempio n. 13
0
def notebook_tester(fname, kernelspec='python'):
    raw_nb = Exporter().from_filename(fname)
    raw_nb[0].metadata.setdefault('kernelspec', {})['name'] = kernelspec
    preproc = ExecutePreprocessor(timeout=-1)
    preproc.preprocess(*raw_nb)
Esempio n. 14
0
def execute_notebook(nb_path,
                     serial_number=None,
                     baud=None,
                     allow_errors=True,
                     SCOPETYPE='OPENADC',
                     PLATFORM='CWLITEARM',
                     **kwargs):
    """Execute a notebook via nbconvert and collect output.
       :returns (parsed nb object, execution errors)
    """
    notebook_dir, file_name = os.path.split(nb_path)
    real_path = Path(nb_path).absolute()

    with open(real_path, encoding='utf-8') as nbfile:
        nb = nbformat.read(nbfile, as_version=4)

        orig_parameters = extract_parameters(nb)
        params = parameter_values(orig_parameters,
                                  SCOPETYPE=SCOPETYPE,
                                  PLATFORM=PLATFORM,
                                  **kwargs)
        kwargs['SCOPETYPE'] = SCOPETYPE
        kwargs['PLATFORM'] = PLATFORM
        put_all_kwargs_in_notebook(params, **kwargs)
        nb = replace_definitions(nb, params, execute=False)

        ep = ExecutePreprocessor(timeout=None,
                                 kernel_name='python3',
                                 allow_errors=allow_errors)

        if serial_number or baud:
            ip = InLineCodePreprocessor(notebook_dir)
            # inline all code before doing any replacements
            nb, resources = ip.preprocess(nb, {})

        replacements = {}

        if serial_number:
            replacements.update({
                r'cw.scope(\(\))':
                'cw.scope(sn=\'{}\')'.format(serial_number),
                r'chipwhisperer.scope()':
                'chipwhisperer.scope(sn=\'{}\')'.format(serial_number)
            })

        if baud:
            replacements.update({
                r'program_target\(((?:[\w=\+/*\s]+\s*,\s*)*[\w=+/*]+)':
                r"program_target(\g<1>, baud=38400"
            })

        # %matplotlib notebook won't show up in blank plots
        # so replace with %matplotlib inline for now
        replacements.update({'%matplotlib notebook': '%matplotlib inline'})

        # complete all regex subtitutions
        if replacements:
            rp = RegexReplacePreprocessor(replacements)
            nb, resources = rp.preprocess(nb, {})

        if notebook_dir:
            with cd(notebook_dir):
                nb, resources = ep.preprocess(nb, {'metadata': {'path': './'}})
        else:
            nb, resources = ep.preprocess(nb, {'metadata': {'path': './'}})

        errors = [[i + 1, output] for i, cell in enumerate(nb.cells) if "outputs" in cell
                  for output in cell["outputs"] \
                  if output.output_type == "error"]

        export_kwargs = {'SCOPETYPE': SCOPETYPE, 'PLATFORM': PLATFORM}

        return nb, errors, export_kwargs
Esempio n. 15
0
tutorials = []
for fname in sorted(glob.glob1(src_dir, '*.ipynb')):
    basename = fname[:-6]
    output_ipynb_fname = os.path.join(target_dir, fname)
    output_rst_fname = os.path.join(target_dir, basename + '.rst')

    print('Running', fname)
    with open(os.path.join(src_dir, fname), 'r') as f:
        notebook = reads(f.read())

    # The first line of the tutorial file should give the title
    title = notebook.cells[0]['source'].split('\n')[0].strip('# ')
    tutorials.append((basename, title))

    # Execute the notebook
    preprocessor = ExecutePreprocessor()
    preprocessor.allow_errors = True
    notebook, _ = preprocessor.preprocess(notebook,
                                          {'metadata': {
                                              'path': src_dir
                                          }})

    print('Saving notebook and converting to RST')
    exporter = NotebookExporter()
    output, _ = exporter.from_notebook_node(notebook)
    with codecs.open(output_ipynb_fname, 'w', encoding='utf-8') as f:
        f.write(output)

    # Insert a note about ipython notebooks with a download link
    note = deindent(u'''
    .. only:: html
def execute_notebook(filename):
    with open(filename) as f:
        nb = nbformat.read(f, as_version=4)
        ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
        ep.preprocess(nb, {'metadata': {'path': os.path.dirname(filename)}})
Esempio n. 17
0
def run_notebook(notebook_path, out_path=None,
                 nb_suffix='-out', nb_kwargs=None, hide_input=True,
                 insert_pos=1, timeout=3600, execute_kwargs=None, ):
    """Runs a notebook and saves the output in a new notebook.

    Executes a notebook, optionally passing "arguments" in a way roughly
    similar to passing arguments to a function.
    Notebook arguments are passed in a dictionary (`nb_kwargs`) which is
    converted to a string containing python code, then inserted in the notebook
    as a code cell. The code contains only assignments of variables which
    can be used to control the execution of a suitably written notebook. When
    calling a notebook, you need to know which arguments (variables) to pass.
    Differently from functions, no check on the input arguments is performed.
    The "notebook signature" is only informally declared in a conventional
    markdown cell at the beginning of the notebook.

    Arguments:
        notebook_path (path-like object): path of the notebook to be
            executed. Valid values are strings or pathlib.Path objects.
        nb_suffix (string): suffix to append to the file name of the executed
            notebook.
        nb_kwargs (dict or None): If not None, this dict is converted to a
            string of python assignments with keys representing variables
            names and values variables content. This string is inserted as
            code-cell in the notebook to be executed.
        insert_pos (int): position of insertion of the code-cell containing
            the input arguments. Default is 1 (i.e. second cell). With this
            default, the input notebook can define, in the first cell, default
            values of input arguments (used when the notebook is executed
            with no arguments or through the Notebook GUI).
        timeout (int): timeout in seconds after which the execution is aborted.
        execute_kwargs (dict): additional arguments passed to
            `ExecutePreprocessor`.
        out_path (path-like or None): folder where to save the output
            notebook. If None, saves the notebook in the same folder as
            the template. Valid values are strings or pathlib.Path objects.
        hide_input (bool): whether to create a notebook with input cells
            hidden (useful to remind user that the auto-generated output
            is not meant to have the code edited.
    """
    timestamp_cell = ("**Executed:** %s\n\n**Duration:** %d seconds.\n\n"
                      "**Autogenerated from:** [%s](%s)")
    if nb_kwargs is not None:
        header = '# Cell inserted during automated execution.'
        code = dict_to_code(nb_kwargs)
        code_cell = '\n'.join((header, code))

    notebook_path = Path(notebook_path)
    if not notebook_path.is_file():
        raise FileNotFoundError("Path '%s' not found." % notebook_path)

    if out_path is None:
        out_path = notebook_path.parent
    out_path = Path(out_path)
    if not out_path.exists():
        raise FileNotFoundError("Output path '%s' not found." % out_path)
    out_notebook_path = (out_path /
                         ('%s%s.ipynb' % (notebook_path.stem, nb_suffix)))
    display(FileLink(str(notebook_path)))

    if execute_kwargs is None:
        execute_kwargs = {}
    ep = ExecutePreprocessor(timeout=timeout, **execute_kwargs)
    nb = nbformat.read(str(notebook_path), as_version=4)

    if hide_input:
        nb["metadata"].update({"hide_input": True})

    if len(nb_kwargs) > 0:
        nb['cells'].insert(insert_pos, nbformat.v4.new_code_cell(code_cell))

    start_time = time.time()
    try:
        # Execute the notebook
        ep.preprocess(nb, {'metadata': {'path': './'}})
    except:
        # Execution failed, print a message then raise.
        msg = 'Error executing the notebook "%s".\n\n' % notebook_path
        msg += 'See notebook "%s" for the traceback.' % out_notebook_path
        print(msg)
        raise
    else:
        # On successful execution, add timestamping cell
        duration = time.time() - start_time
        timestamp_cell = timestamp_cell % (time.ctime(start_time), duration,
                                           notebook_path, out_notebook_path)
        nb['cells'].insert(0, nbformat.v4.new_markdown_cell(timestamp_cell))
    finally:
        # Save the notebook even when it raises an error
        nbformat.write(nb, str(out_notebook_path))
        display(FileLink(str(out_notebook_path)))
Esempio n. 18
0
def gen_tutorials(repo_dir: str,
                  exec_tutorials: bool,
                  kernel_name: Optional[str] = None) -> None:
    """Generate HTML tutorials for Docusaurus Ax site from Jupyter notebooks.

    Also create ipynb and py versions of tutorial in Docusaurus site for
    download.
    """
    has_errors = False
    with open(os.path.join(repo_dir, "website", "tutorials.json"),
              "r") as infile:
        tutorial_config = json.loads(infile.read())

    # flatten config dict
    tutorial_configs = [
        config for category in tutorial_config.values() for config in category
    ]

    # prepare paths for converted tutorials & files
    os.makedirs(os.path.join(repo_dir, "website", "_tutorials"), exist_ok=True)
    os.makedirs(os.path.join(repo_dir, "website", "static", "files"),
                exist_ok=True)

    for config in tutorial_configs:
        tid = config["id"]
        t_dir = config.get("dir")
        exec_on_build = config.get("exec_on_build", True)

        print("Generating {} tutorial".format(tid))

        if t_dir is not None:
            tutorial_dir = os.path.join(repo_dir, "tutorials", t_dir)
            html_dir = os.path.join(repo_dir, "website", "_tutorials", t_dir)
            js_dir = os.path.join(repo_dir, "website", "pages", "tutorials",
                                  t_dir)
            py_dir = os.path.join(repo_dir, "website", "static", "files",
                                  t_dir)

            for d in [tutorial_dir, html_dir, js_dir, py_dir]:
                os.makedirs(d, exist_ok=True)

            tutorial_path = os.path.join(tutorial_dir, "{}.ipynb".format(tid))
            html_path = os.path.join(html_dir, "{}.html".format(tid))
            js_path = os.path.join(js_dir, "{}.js".format(tid))
            ipynb_path = os.path.join(py_dir, "{}.ipynb".format(tid))
            py_path = os.path.join(py_dir, "{}.py".format(tid))
            tar_path = os.path.join(py_dir, "{}.tar.gz".format(tid))
        else:
            tutorial_dir = os.path.join(repo_dir, "tutorials")
            tutorial_path = os.path.join(repo_dir, "tutorials",
                                         "{}.ipynb".format(tid))
            html_path = os.path.join(repo_dir, "website", "_tutorials",
                                     "{}.html".format(tid))
            js_path = os.path.join(repo_dir, "website", "pages", "tutorials",
                                   "{}.js".format(tid))
            ipynb_path = os.path.join(repo_dir, "website", "static", "files",
                                      "{}.ipynb".format(tid))
            py_path = os.path.join(repo_dir, "website", "static", "files",
                                   "{}.py".format(tid))

        # load notebook
        with open(tutorial_path, "r") as infile:
            nb_str = infile.read()
            nb = nbformat.reads(nb_str, nbformat.NO_CONVERT)

        # track total exec time (non-None if exec_on_build=True)
        total_time = None

        if exec_tutorials and exec_on_build:
            print("Executing tutorial {}".format(tid))
            kwargs = {
                "kernel_name": kernel_name
            } if kernel_name is not None else {}
            # 2.5 hours, in seconds
            timeout = int(60 * 60 * 2.5)
            ep = ExecutePreprocessor(timeout=timeout, **kwargs)
            start_time = time.time()

            # try / catch failures for now
            # will re-raise at the end
            try:
                # execute notebook, using `tutorial_dir` as working directory
                ep.preprocess(nb, {"metadata": {"path": tutorial_dir}})
                total_time = time.time() - start_time
                print(
                    "Done executing tutorial {}. Took {:.2f} seconds.".format(
                        tid, total_time))
            except Exception as exc:
                has_errors = True
                print("Couldn't execute tutorial {}!".format(tid))
                print(exc)
                total_time = None

        # convert notebook to HTML
        exporter = HTMLExporter()
        html, meta = exporter.from_notebook_node(nb)

        # pull out html div for notebook
        soup = BeautifulSoup(html, "html.parser")
        nb_meat = soup.find("div", {"id": "notebook-container"})
        del nb_meat.attrs["id"]
        nb_meat.attrs["class"] = ["notebook"]

        # when output html, iframe it (useful for Ax reports)
        for html_div in nb_meat.findAll("div", {"class": "output_html"}):
            if html_div.html is not None:
                iframe = soup.new_tag("iframe")
                iframe.attrs["src"] = "data:text/html;charset=utf-8," + str(
                    html_div.html)
                # replace `#` in CSS
                iframe.attrs["src"] = iframe.attrs["src"].replace("#", "%23")
                html_div.contents = [iframe]

        html_out = MOCK_JS_REQUIRES + str(nb_meat)

        # generate HTML file
        with open(html_path, "w") as html_outfile:
            html_outfile.write(html_out)

        # generate JS file
        t_dir_js = t_dir if t_dir else ""
        script = TEMPLATE.format(
            t_dir=t_dir_js,
            tid=tid,
            total_time=total_time if total_time is not None else "null",
        )
        with open(js_path, "w") as js_outfile:
            js_outfile.write(script)

        # output tutorial in both ipynb & py form
        nbformat.write(nb, ipynb_path)
        exporter = ScriptExporter()
        script, meta = exporter.from_notebook_node(nb)
        with open(py_path, "w") as py_outfile:
            py_outfile.write(script)

        # create .tar archive (if necessary)
        if t_dir is not None:
            with tarfile.open(tar_path, "w:gz") as tar:
                tar.add(tutorial_dir, arcname=os.path.basename(tutorial_dir))

    if has_errors:
        raise Exception(
            "There are errors in tutorials, will not continue to publish")
Esempio n. 19
0
def run_nb(fn):
    nb = nbformat.read(open(fn), as_version=nbformat.NO_CONVERT)
    # TODO: filter out export cells
    print(f"Doing {fn}")
    ExecutePreprocessor(timeout=600).preprocess(nb, {})
Esempio n. 20
0
    # Get the desired ipynb file path and parse into components
    _, fpath, outdir = sys.argv
    basedir, fname = os.path.split(fpath)
    fstem = fname[:-6]

    # Read the notebook
    with open(fpath) as f:
        nb = nbformat.read(f, as_version=4)

    # Run the notebook
    kernel = os.environ.get("NB_KERNEL", None)
    if kernel is None:
        kernel = nb["metadata"]["kernelspec"]["name"]
    ep = ExecutePreprocessor(timeout=600,
                             kernel_name=kernel,
                             extra_arguments=["--rc figure.dpi=88"])
    ep.preprocess(nb, {"metadata": {"path": basedir}})

    # Remove plain text execution result outputs
    for cell in nb.get("cells", {}):
        if "show-output" in cell["metadata"].get("tags", []):
            continue
        fields = cell.get("outputs", [])
        for field in fields:
            if field["output_type"] == "execute_result":
                data_keys = field["data"].keys()
                for key in list(data_keys):
                    if key == "text/plain":
                        field["data"].pop(key)
                if not field["data"]:
Esempio n. 21
0
                    default="png")
parser.add_argument('--execute',
                    type=bool,
                    help='execute notebook?',
                    default=True)
parser.parse_args()
try:
    args = parser.parse_args()
except SystemExit:
    sys.exit(0)
inputFile = args.input
outputFile = args.output

with open(inputFile) as fpin:
    text = fpin.read()

text += """
# <markdowncell>

# If you can read this, reads_py() is no longer broken!
"""

nbook = v3.reads_py(text)
nb = v4.upgrade(nbook)  # Upgrade v3 to v4
if args.execute:
    ep = ExecutePreprocessor(timeout=-1,\
             extra_arguments=["--InlineBackend.figure_format="+args.image])
    ep.preprocess(nb, {})
with open(outputFile, 'w', encoding='utf-8') as f:
    nbformat.write(nb, f)
Esempio n. 22
0
def _preproc():
    pythonkernel = 'python' + str(sys.version_info[0])
    return ExecutePreprocessor(timeout=120,
                               kernel_name=pythonkernel,
                               interrupt_on_timeout=True)
Esempio n. 23
0
 def setUp(self):
     pythonkernel = 'python' + str(sys.version_info[0])
     # see http://nbconvert.readthedocs.io/en/stable/execute_api.html
     self.ep = ExecutePreprocessor(timeout=600,
                                   kernel_name=pythonkernel,
                                   interrupt_on_timeout=True)
Esempio n. 24
0
def main(arglist):
    """Process IPython notebooks from a list of files."""
    args = parse_args(arglist)

    # Filter to only ipython notebook fikes
    nb_paths = [
        arg for arg in args.files
        if arg.endswith(".ipynb") and "student/" not in arg
    ]
    if not nb_paths:
        print("No notebook files found")
        sys.exit(0)

    # Allow environment to override stored kernel name
    exec_kws = {"timeout": 600}
    if "NB_KERNEL" in os.environ:
        exec_kws["kernel_name"] = os.environ["NB_KERNEL"]

    # Defer failures until after processing all notebooks
    errors = {}
    notebooks = {}

    for nb_path in nb_paths:

        # Load the notebook structure
        with open(nb_path) as f:
            nb = nbformat.read(f, nbformat.NO_CONVERT)

        if not sequentially_executed(nb):
            if args.require_sequntial:
                err = (
                    "Notebook is not sequentially executed on a fresh kernel."
                    "\n"
                    "Please do 'Restart and run all' before pushing to Github."
                )
                errors[nb_path] = err
                continue

        # Run the notebook from top to bottom, catching errors
        print(f"Executing {nb_path}")
        executor = ExecutePreprocessor(**exec_kws)
        try:
            executor.preprocess(nb)
        except Exception as err:
            # Log the error, but then continue
            errors[nb_path] = err
        else:
            notebooks[nb_path] = nb

    if errors or args.check_only:
        exit(errors)

    # TODO Check compliancy with PEP8, generate a report, but don't fail

    # TODO Check notebook name format?
    # (If implemented, update the CI workflow to only run on tutorials)

    # Post-process notebooks to remove solution code and write both versions
    for nb_path, nb in notebooks.items():

        # Extract components of the notebook path
        nb_dir, nb_fname = os.path.split(nb_path)
        nb_name, _ = os.path.splitext(nb_fname)

        # Loop through the cells and fix any Colab badges we encounter
        for cell in nb.get("cells", []):
            if has_colab_badge(cell):
                redirect_colab_badge_to_master_branch(cell)

        # Set the colab metadata to have the notebook name match the filepath
        if "colab" in nb["metadata"]:
            nb["metadata"]["colab"]["name"] = f"NeuromatchAcademy_{nb_name}"

        # Write out the executed version of the original notebooks
        print(f"Writing complete notebook to {nb_path}")
        with open(nb_path, "w") as f:
            nbformat.write(nb, f)

        # Create subdirectories, if they don't exist
        student_dir = make_sub_dir(nb_dir, "student")
        static_dir = make_sub_dir(nb_dir, "static")
        solutions_dir = make_sub_dir(nb_dir, "solutions")

        # Generate the student version and save it to a subdirectory
        print(f"Extracting solutions from {nb_path}")
        processed = extract_solutions(nb, nb_dir, nb_name)
        student_nb, static_images, solution_snippets = processed

        # Loop through cells and point the colab badge at the student version
        for cell in student_nb.get("cells", []):
            if has_colab_badge(cell):
                redirect_colab_badge_to_student_version(cell)

        # Write the student version of the notebook
        student_nb_path = os.path.join(student_dir, nb_fname)
        print(f"Writing student notebook to {student_nb_path}")
        with open(student_nb_path, "w") as f:
            nbformat.write(student_nb, f)

        # Write the images extracted from the solution cells
        print(f"Writing solution images to {static_dir}")
        for fname, image in static_images.items():
            fname = fname.replace("static", static_dir)
            image.save(fname)

        # Write the solution snippets
        print(f"Writing solution snippets to {solutions_dir}")
        for fname, snippet in solution_snippets.items():
            fname = fname.replace("solutions", solutions_dir)
            with open(fname, "w") as f:
                f.write(snippet)

    exit(errors)
Esempio n. 25
0
def jupytext_single_file(nb_file, args, log):
    """Apply the jupytext commmand, with given arguments, to a single file"""
    if nb_file == '-' and args.sync:
        raise ValueError('Cannot sync a notebook on stdin')

    nb_dest = args.output or (None if not args.to else (
        '-' if nb_file == '-' else full_path(
            base_path(nb_file, args.input_format), args.to)))

    # Just acting on metadata / pipe => save in place
    if not nb_dest and not args.sync:
        nb_dest = nb_file

    if nb_dest == '-':
        args.quiet = True

    # I. ### Read the notebook ###
    fmt = copy(args.input_format) or {}
    set_format_options(fmt, args.format_options)
    log('[jupytext] Reading {}{}'.format(
        nb_file if nb_file != '-' else 'stdin', ' in format {}'.format(
            short_form_one_format(fmt)) if 'extension' in fmt else ''))

    notebook = read(nb_file, fmt=fmt)
    if not fmt:
        text_representation = notebook.metadata.get('jupytext', {}).get(
            'text_representation', {})
        ext = os.path.splitext(nb_file)[1]
        if text_representation.get('extension') == ext:
            fmt = {
                key: text_representation[key]
                for key in text_representation
                if key in ['extension', 'format_name']
            }
        elif ext:
            fmt = {'extension': ext}

    # Compute actual extension when using script/auto, and update nb_dest if necessary
    dest_fmt = args.to
    if dest_fmt and dest_fmt['extension'] == '.auto':
        dest_fmt = check_auto_ext(dest_fmt, notebook.metadata, '--to')
        if not args.output and nb_file != '-':
            nb_dest = full_path(base_path(nb_file, args.input_format),
                                dest_fmt)

    # Set the kernel
    set_kernel = args.set_kernel
    if (not set_kernel) and args.execute and notebook.metadata.get(
            'kernelspec', {}).get('name') is None:
        set_kernel = '-'

    if set_kernel:
        if set_kernel == '-':
            language = notebook.metadata.get('jupytext', {}).get('main_language') \
                       or notebook.metadata['kernelspec']['language']

            if not language:
                raise ValueError(
                    'Cannot infer a kernel as notebook language is not defined'
                )

            kernelspec = kernelspec_from_language(language)

            if not kernelspec:
                raise ValueError('Found no kernel for {}'.format(language))
        else:
            try:
                kernelspec = get_kernel_spec(set_kernel)
            except KeyError:
                raise KeyError('Please choose a kernel name among {}'.format(
                    find_kernel_specs().keys()))

            kernelspec = {
                'name': args.set_kernel,
                'language': kernelspec.language,
                'display_name': kernelspec.display_name
            }

        log("[jupytext] Setting kernel {}".format(kernelspec.get('name')))
        args.update_metadata['kernelspec'] = kernelspec

    # Update the metadata
    if args.update_metadata:
        log("[jupytext] Updating notebook metadata with '{}'".format(
            json.dumps(args.update_metadata)))
        # Are we updating a text file that has a metadata filter? #212
        if notebook.metadata.get('jupytext',
                                 {}).get('notebook_metadata_filter') == '-all':
            notebook.metadata.get('jupytext',
                                  {}).pop('notebook_metadata_filter')
        recursive_update(notebook.metadata, args.update_metadata)

        if 'kernelspec' in args.update_metadata and 'main_language' in notebook.metadata.get(
                'jupytext', {}):
            notebook.metadata['jupytext'].pop('main_language')

    # Read paired notebooks, except if the pair is being created
    if args.sync:
        set_prefix_and_suffix(fmt, notebook, nb_file)
        if args.set_formats is None:
            try:
                notebook, inputs_nb_file, outputs_nb_file = load_paired_notebook(
                    notebook, fmt, nb_file, log)
            except NotAPairedNotebook as err:
                sys.stderr.write('[jupytext] Warning: ' + str(err) + '\n')
                return 0

    # II. ### Apply commands onto the notebook ###
    # Pipe the notebook into the desired commands
    prefix = None if nb_file == '-' else os.path.splitext(
        os.path.basename(nb_file))[0]
    for cmd in args.pipe or []:
        notebook = pipe_notebook(notebook, cmd, args.pipe_fmt, prefix=prefix)

    # and/or test the desired commands onto the notebook
    for cmd in args.check or []:
        pipe_notebook(notebook,
                      cmd,
                      args.pipe_fmt,
                      update=False,
                      prefix=prefix)

    # Execute the notebook
    if args.execute:
        kernel_name = notebook.metadata.get('kernelspec', {}).get('name')
        log("[jupytext] Executing notebook with kernel {}".format(kernel_name))
        exec_proc = ExecutePreprocessor(timeout=None, kernel_name=kernel_name)
        if nb_dest is not None and nb_dest != '-':
            resources = {'metadata': {'path': str(os.path.dirname(nb_dest))}}
        elif nb_file != '-':
            resources = {'metadata': {'path': str(os.path.dirname(nb_file))}}
        else:
            resources = {}
        exec_proc.preprocess(notebook, resources=resources)

    # III. ### Possible actions ###
    modified = args.update_metadata or args.pipe or args.execute
    # a. Test round trip conversion
    if args.test or args.test_strict:
        try:
            # Round trip from an ipynb document
            if fmt['extension'] == '.ipynb':
                test_round_trip_conversion(
                    notebook,
                    dest_fmt,
                    update=args.update,
                    allow_expected_differences=not args.test_strict,
                    stop_on_first_error=args.stop_on_first_error)

            # Round trip from a text file
            else:
                with open(nb_file) as fp:
                    org_text = fp.read()

                # If the destination is not ipynb, we convert to/back that format
                if dest_fmt['extension'] != '.ipynb':
                    dest_text = writes(notebook, fmt=dest_fmt)
                    notebook = reads(dest_text, fmt=dest_fmt)

                text = writes(notebook, fmt=fmt)

                if args.test_strict:
                    compare(text, org_text)
                else:
                    # we ignore the YAML header in the comparison #414
                    comment = _SCRIPT_EXTENSIONS.get(fmt['extension'],
                                                     {}).get('comment', '')
                    # white spaces between the comment char and the YAML delimiters are allowed
                    if comment:
                        comment = comment + r'\s*'
                    yaml_header = re.compile(
                        r'^{comment}---\s*\n.*\n{comment}---\s*\n'.format(
                            comment=comment), re.MULTILINE | re.DOTALL)
                    compare(re.sub(yaml_header, '', text),
                            re.sub(yaml_header, '', org_text))

        except (NotebookDifference, AssertionError) as err:
            sys.stdout.write('{}: {}'.format(nb_file, str(err)))
            return 1
        return 0

    # b. Output to the desired file or format
    if nb_dest:
        if nb_dest == nb_file and not dest_fmt:
            dest_fmt = fmt

        # Test consistency between dest name and output format
        if dest_fmt and nb_dest != '-':
            base_path(nb_dest, dest_fmt)

        # Describe what jupytext is doing
        if os.path.isfile(nb_dest) and args.update:
            if not nb_dest.endswith('.ipynb'):
                raise ValueError('--update is only for ipynb files')
            action = ' (destination file updated)'
            check_file_version(notebook, nb_file, nb_dest)
            combine_inputs_with_outputs(notebook, read(nb_dest), fmt=fmt)
        elif os.path.isfile(nb_dest):
            action = ' (destination file replaced)'
        else:
            action = ''

        log('[jupytext] Writing {nb_dest}{format}{action}'.format(
            nb_dest=nb_dest,
            format=' in format ' + short_form_one_format(dest_fmt)
            if dest_fmt and 'format_name' in dest_fmt else '',
            action=action))
        write(notebook, nb_dest, fmt=dest_fmt)
        if args.pre_commit:
            system('git', 'add', nb_dest)

    # c. Synchronize paired notebooks
    if args.sync:
        # Also update the original notebook if the notebook was modified
        if modified:
            inputs_nb_file = outputs_nb_file = None
        formats = notebook.metadata['jupytext']['formats']

        for ipynb in [True, False]:
            # Write first format last so that it is the most recent file
            for alt_path, alt_fmt in paired_paths(nb_file, fmt, formats)[::-1]:
                # Write ipynb first for compatibility with our contents manager
                if alt_path.endswith('.ipynb') != ipynb:
                    continue
                # Do not write the ipynb file if it was not modified
                # But, always write text representations to make sure they are the most recent
                if alt_path == inputs_nb_file and alt_path == outputs_nb_file:
                    continue
                log("[jupytext] Updating '{}'".format(alt_path))
                write(notebook, alt_path, fmt=alt_fmt)
                if args.pre_commit:
                    system('git', 'add', alt_path)
    elif os.path.isfile(nb_file) and nb_dest.endswith('.ipynb') and not nb_file.endswith('.ipynb') and \
            notebook.metadata.get('jupytext', {}).get('formats') is not None:
        # Update the original text file timestamp, as required by our Content Manager
        # Otherwise Jupyter will refuse to open the paired notebook #335
        log("[jupytext] Sync timestamp of '{}'".format(nb_file))
        os.utime(nb_file, None)

    return 0
Esempio n. 26
0
def run_notebook(path):
    nb = nbformat.read(open(path), as_version=nbformat.NO_CONVERT)
    ExecutePreprocessor().preprocess(nb, {})
    print('done')
Esempio n. 27
0
def compile_tutorial(tutorial_name, force_recompile=False):
    print('- Compiling tutorial ' + tutorial_name + '...')

    notebook_path = 'tutorial_notebooks/' + tutorial_name + '/' + tutorial_name + '.ipynb'
    export_path = 'tutorials/' + tutorial_name + '/' + tutorial_name
    thumb_dest = os.path.dirname(export_path) + '/thumb.png'

    if not os.path.exists(os.path.dirname(export_path)):
        os.makedirs(os.path.dirname(export_path))

    # Read in notebook
    notebook = nbformat.read(notebook_path, 4)

    # Scrape title, description and thumbnail
    first_cell = notebook.cells[0]

    title = first_cell.source.splitlines()[0]
    if '#' in title:
        title = title.replace('#', '').strip()

    description = ''
    for line in first_cell.source.splitlines()[1:]:
        if line.strip():
            description = line.strip()
            break

    if not description:
        print('  Description could not be found in the notebook.')

    if 'thumbnail_figure_index' in notebook.metadata:
        thumbnail_figure_index = notebook.metadata['thumbnail_figure_index']
    else:
        thumbnail_figure_index = -1

    if 'level' in notebook.metadata:
        level = notebook.metadata['level'].capitalize()
    elif 'difficulty' in notebook.metadata:
        level = notebook.metadata['difficulty'].capitalize()
    else:
        level = 'Unknown'

    # Check if the tutorial was already compiled.
    if os.path.exists(export_path + '.rst'):
        if os.path.getmtime(export_path +
                            '.rst') > os.path.getmtime(notebook_path):
            if force_recompile:
                print('  Already compiled. Recompiling anyway...')
            else:
                print('  Already compiled. Skipping...')
                return title, level, description, thumb_dest.split('/', 1)[-1]

    # Execute notebook if not already executed
    already_executed = any(
        c.get('outputs') or c.get('execution_count') for c in notebook.cells
        if c.cell_type == 'code')

    resources = {}

    if not already_executed:
        ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
        try:
            start = time.time()

            additional_cell_1 = {
                "cell_type":
                "code",
                "execution_count":
                None,
                "metadata": {},
                "outputs": [],
                "source":
                r"%matplotlib inline" + '\n' +
                r"%config InlineBackend.print_figure_kwargs = {'bbox_inches': None, 'figsize': (8, 6)}"
            }

            additional_cell_2 = {
                "cell_type":
                "code",
                "execution_count":
                None,
                "metadata": {},
                "outputs": [],
                "source":
                "import matplotlib as mpl\nmpl.rcParams['figure.figsize'] = (8, 6)\nmpl.rcParams['figure.dpi'] = 150\nmpl.rcParams['savefig.dpi'] = 150"
            }

            notebook.cells.insert(1, nbformat.from_dict(additional_cell_1))
            notebook.cells.insert(2, nbformat.from_dict(additional_cell_2))

            km, kc = ep.start_new_kernel(
                cwd=os.path.abspath(os.path.dirname(notebook_path)))
            kc.allow_stdin = False

            notebook, resources = ep.preprocess(notebook, km=km)

            notebook.cells.pop(2)
            notebook.cells.pop(1)

            km.shutdown_kernel()

            end = time.time()
            print('  Compilation took %d seconds.' % (end - start))
        except CellExecutionError as err:
            print('  Error while processing notebook:')
            print('  ', err)
    else:
        print('  Notebook was already executed.')

    exporter = RSTExporter()
    output, resources = exporter.from_notebook_node(notebook, resources)

    writer = FilesWriter(build_directory=os.path.dirname(export_path))
    writer.write(output,
                 resources,
                 notebook_name=os.path.basename(export_path))

    pictures = sorted(resources['outputs'], key=output.find)

    try:
        thumbnail_source = pictures[thumbnail_figure_index]

        # Read in thumbnail source image
        img = Image.open(os.path.dirname(export_path) + '/' + thumbnail_source)

        # Trim whitespace
        bg = Image.new(img.mode, img.size, img.getpixel((0, 0)))
        diff = ImageChops.difference(img, bg)
        diff = ImageChops.add(diff, diff)
        bbox = diff.getbbox()
        if bbox:
            img = img.crop(bbox)

        # Resize image to have a width of 400px
        img.thumbnail([400, 1000])

        # Save thumbnail
        img.save(thumb_dest)
    except:
        shutil.copyfile('_static/no_thumb.png', thumb_dest)

    print('  Done!')

    return title, level, description, thumb_dest.split('/', 1)[-1]
Esempio n. 28
0
def test_notebook_execution_with_pandas_backend(
    titanic_data_context_no_data_docs_no_checkpoint_store, ):
    """
    This tests that the notebook is written to disk and executes without error.

    To set this test up we:
    - create a scaffold notebook
    - verify that no validations have happened

    We then:
    - execute that notebook (Note this will raise various errors like
    CellExecutionError if any cell in the notebook fails
    - create a new context from disk
    - verify that a validation has been run with our expectation suite
    """
    # Since we'll run the notebook, we use a context with no data docs to avoid
    # the renderer's default behavior of building and opening docs, which is not
    # part of this test.
    context = titanic_data_context_no_data_docs_no_checkpoint_store
    root_dir = context.root_directory
    uncommitted_dir = os.path.join(root_dir, "uncommitted")
    suite_name = "my_suite"
    suite = context.create_expectation_suite(suite_name)

    csv_path = os.path.join(root_dir, "..", "data", "Titanic.csv")
    batch_kwargs = {"datasource": "mydatasource", "path": csv_path}

    # Sanity check test setup
    assert context.list_expectation_suite_names() == [suite_name]
    assert context.list_datasources() == [{
        "module_name": "great_expectations.datasource",
        "class_name": "PandasDatasource",
        "data_asset_type": {
            "module_name": "great_expectations.dataset",
            "class_name": "PandasDataset",
        },
        "batch_kwargs_generators": {
            "mygenerator": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": "../data",
            }
        },
        "name": "mydatasource",
    }]
    assert context.get_validation_result(suite_name) == {}
    notebook_path = os.path.join(uncommitted_dir, f"{suite_name}.ipynb")
    assert not os.path.isfile(notebook_path)

    # Create notebook
    renderer = SuiteScaffoldNotebookRenderer(
        titanic_data_context_no_data_docs_no_checkpoint_store, suite,
        batch_kwargs)
    renderer.render_to_disk(notebook_path)
    assert os.path.isfile(notebook_path)

    with open(notebook_path) as f:
        nb = nbformat.read(f, as_version=4)

    # Run notebook
    ep = ExecutePreprocessor(timeout=600, kernel_name="python3")
    ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}})

    # Useful to inspect executed notebook
    output_notebook = os.path.join(uncommitted_dir, "output.ipynb")
    with open(output_notebook, "w") as f:
        nbformat.write(nb, f)

    # Assertions about output
    context = DataContext(root_dir)
    obs_validation_result = context.get_validation_result(suite_name)
    assert obs_validation_result.statistics == {
        "evaluated_expectations": 3,
        "successful_expectations": 3,
        "unsuccessful_expectations": 0,
        "success_percent": 100,
    }
    suite = context.get_expectation_suite(suite_name)
    assert suite.expectations
Esempio n. 29
0
 def setUp(self):
     from nbconvert.preprocessors import ExecutePreprocessor
     self.preprocessor = ExecutePreprocessor(timeout=600, enabled=True, allow_errors=False)
Esempio n. 30
0
    def run_pipeline(self, arg, line='', cell='', local_ns=None):
        """Run notebooks sequentially in a pipeline.
           A dictionary called _pipeline_workspace is created by the magic that will be shared by all the notebooks in
           the pipeline. The state can contain DataFrames, Lists, Dictionaries and objects. Notebook parameterization
           can be used to load and read from the shared state.

           The pipeline supports execution of parameterized notebooks. If parameters are used, the first code cell will
           be treated to contain only parameter assignments. Parameters can be a string, number, list or dictionary.

           To save a notebook's execution in the pipeline, the save name should be specified along with the
           execution notebook separated with a colon.

           Run parameters will only change their equivalent parameters from the first code cell. Unknown parameters will
           be ignored. Adding parameters on an execution is optional.

                # simple pipeline
                Example1:
                    %%run_pipeline
                    first notebook in pipeline;
                    second notebook in pipeline;
                    third notebook in pipeline

                # pipleine with parameterized notebooks
                Example2:
                    %%run_pipeline
                    first notebook in pipeline  key01=int key01=string key02={'key01': param01};
                    second notebook in pipeline;
                    third notebook in pipeline:your save name key01=int key02=string key03=[param01, param02]

        """
        # save globals and locals so they can be referenced in bind vars

        clear_namespace_cell = nbformat.v4.new_code_cell(
            source="from IPython import get_ipython\n" +
            "_ip = get_ipython()\n" +
            "_user_vars = %who_ls\n" +
            "for _var in _user_vars:\n" +
            "    if _var != '_pipeline_workspace':\n" +
            "        del _ip.user_ns[_var]\n" +
            "import gc\n" +
            "gc.collect()"
        )
        pipeline_state_cell = nbformat.v4.new_code_cell(source="_pipeline_workspace = {'frames': list()}")

        if not (line or cell):
            if not arg.startswith("-"):
                line = arg
                arg = ''

        args = ParameterArgs(parse_argstring(self.run, arg))

        user_ns = self.shell.user_ns.copy()
        if local_ns:
            user_ns.update(local_ns)

        if not cell:
            cell = line

        notebook_run_cmds = cell.split(';')
        notebook_run_cmds = [notebook_run_cmd.strip() for notebook_run_cmd in notebook_run_cmds]

        execute_preprocessor = ExecutePreprocessor(kernel_name='python3', timeout=args.get('cell_timeout'))

        kernel_manager, kernel_comm = start_new_kernel(kernel_name='python3')

        execute_preprocessor.km = kernel_manager
        execute_preprocessor.kc = kernel_comm

        def execute_cell(nb4_cell):
            try:
                execute_preprocessor.run_cell(nb4_cell)
            except BaseException:
                if kernel_manager or kernel_comm:
                    kernel_comm.stop_channels()
                    kernel_manager.shutdown_kernel()

        def execute_notebook(notebook_filename, notebook_save_filename, params):

            with open(notebook_filename) as file_handler:
                notebook = nbformat.read(file_handler, as_version=4)
                b_errors = False

                if params:
                    for nb_cell in notebook.cells:
                        if nb_cell.cell_type == 'code':
                            new_cell_source = utils.substitute_params(nb_cell.source, params)
                            nb_cell.source = new_cell_source
                            break

                try:

                    execute_preprocessor.nb = notebook

                    progress_bar = widgets.IntProgress(
                        value=0,
                        min=0,
                        max=len(notebook.cells),
                        step=1,
                        bar_style='info',  # 'success', 'info', 'warning', 'danger' or ''
                        orientation='horizontal'
                    )

                    display_label = notebook_filename
                    if notebook_save_filename:
                        display_label = display_label + ' : ' + notebook_save_filename
                    display(widgets.HBox([widgets.Label(display_label), progress_bar]))

                    for idx, nb_cell in enumerate(notebook.cells):
                        execute_preprocessor.preprocess_cell(nb_cell, resources={'metadata': {}}, cell_index=idx)
                        progress_bar.value = idx + 1

                except CellExecutionError:
                    b_errors = True

                    progress_bar.bar_style = 'danger'

                    if kernel_manager or kernel_comm:
                        kernel_comm.stop_channels()
                        kernel_manager.shutdown_kernel()

                    raise
                finally:
                    if notebook_save_filename:
                        with open(notebook_save_filename, mode='wt') as file_handler:
                            nbformat.write(notebook, file_handler)

                    if not b_errors:
                        progress_bar.bar_style = 'success'

        execute_cell(pipeline_state_cell)
        for notebook_run_cmd in notebook_run_cmds:

            run_notebook_name, notebook_save_name, nb_params = utils.parse_run_str(notebook_run_cmd)

            execute_notebook(run_notebook_name, notebook_save_name, nb_params)
            execute_cell(clear_namespace_cell)

        if kernel_manager or kernel_comm:
            kernel_comm.stop_channels()
            kernel_manager.shutdown_kernel()