Example #1
0
def startPreprocessor(kernel):
  """
  Opens a Jupyter Notbook to write
   - Parameters: kernel
   - Returns: preprocessor, boolean
  """
  ex = None
  success = False
  try:
    ex = ExecutePreprocessor(kernel_name=kernel)
    ex.km, ex.kc = ex.start_new_kernel()
    success = True
  except NoSuchKernel as err:
    print(err, file=sys.stderr)
    ex = None
  finally:
    return ex, success
    def run_pipeline(self, arg, line='', cell='', local_ns=None):
        """Run notebooks sequentially in a pipeline.
           A dictionary called _pipeline_workspace is created by the magic that will be shared by all the notebooks in
           the pipeline. The state can contain DataFrames, Lists, Dictionaries and objects. Notebook parameterization
           can be used to load and read from the shared state.

           The pipeline supports execution of parameterized notebooks. If parameters are used, the first code cell will
           be treated to contain only parameter assignments. Parameters can be a string, number, list or dictionary.

           To save a notebook's execution in the pipeline, the save name should be specified along with the
           execution notebook separated with a colon.

           Run parameters will only change their equivalent parameters from the first code cell. Unknown parameters will
           be ignored. Adding parameters on an execution is optional.

                # simple pipeline
                Example1:
                    %%run_pipeline
                    first notebook in pipeline;
                    second notebook in pipeline;
                    third notebook in pipeline

                # pipleine with parameterized notebooks
                Example2:
                    %%run_pipeline
                    first notebook in pipeline  key01=int key01=string key02={'key01': param01};
                    second notebook in pipeline;
                    third notebook in pipeline:your save name key01=int key02=string key03=[param01, param02]

        """
        # save globals and locals so they can be referenced in bind vars

        clear_namespace_cell = nbformat.v4.new_code_cell(
            source="from IPython import get_ipython\n" +
            "_ip = get_ipython()\n" + "_user_vars = %who_ls\n" +
            "for _var in _user_vars:\n" +
            "    if _var != '_pipeline_workspace':\n" +
            "        del _ip.user_ns[_var]\n" + "import gc\n" + "gc.collect()")
        pipeline_state_cell = nbformat.v4.new_code_cell(
            source="_pipeline_workspace = {'frames': list()}")

        if not (line or cell):
            if not arg.startswith("-"):
                line = arg
                arg = ''

        args = ParameterArgs(parse_argstring(self.run, arg))

        user_ns = self.shell.user_ns.copy()
        if local_ns:
            user_ns.update(local_ns)

        if not cell:
            cell = line

        notebook_run_cmds = cell.split(';')
        notebook_run_cmds = [
            notebook_run_cmd.strip() for notebook_run_cmd in notebook_run_cmds
        ]

        execute_preprocessor = ExecutePreprocessor(
            kernel_name='python3', timeout=args.get('cell_timeout'))

        kernel_manager, kernel_comm = start_new_kernel(kernel_name='python3')

        execute_preprocessor.km = kernel_manager
        execute_preprocessor.kc = kernel_comm

        def execute_cell(nb4_cell):
            try:
                execute_preprocessor.run_cell(nb4_cell)
            except BaseException:
                if kernel_manager or kernel_comm:
                    kernel_comm.stop_channels()
                    kernel_manager.shutdown_kernel()

        def execute_notebook(notebook_filename, notebook_save_filename,
                             params):

            with open(notebook_filename) as file_handler:
                notebook = nbformat.read(file_handler, as_version=4)
                b_errors = False

                if params:
                    for nb_cell in notebook.cells:
                        if nb_cell.cell_type == 'code':
                            new_cell_source = utils.substitute_params(
                                nb_cell.source, params)
                            nb_cell.source = new_cell_source
                            break

                try:

                    execute_preprocessor.nb = notebook

                    progress_bar = widgets.IntProgress(
                        value=0,
                        min=0,
                        max=len(notebook.cells),
                        step=1,
                        bar_style=
                        'info',  # 'success', 'info', 'warning', 'danger' or ''
                        orientation='horizontal')

                    display_label = notebook_filename
                    if notebook_save_filename:
                        display_label = display_label + ' : ' + notebook_save_filename
                    display(
                        widgets.HBox(
                            [widgets.Label(display_label), progress_bar]))

                    for idx, nb_cell in enumerate(notebook.cells):
                        execute_preprocessor.preprocess_cell(
                            nb_cell,
                            resources={'metadata': {}},
                            cell_index=idx)
                        progress_bar.value = idx + 1

                except CellExecutionError:
                    b_errors = True

                    progress_bar.bar_style = 'danger'

                    if kernel_manager or kernel_comm:
                        kernel_comm.stop_channels()
                        kernel_manager.shutdown_kernel()

                    raise
                finally:
                    if notebook_save_filename:
                        with open(notebook_save_filename,
                                  mode='wt') as file_handler:
                            nbformat.write(notebook, file_handler)

                    if not b_errors:
                        progress_bar.bar_style = 'success'

        execute_cell(pipeline_state_cell)
        for notebook_run_cmd in notebook_run_cmds:

            run_notebook_name, notebook_save_name, nb_params = utils.parse_run_str(
                notebook_run_cmd)

            execute_notebook(run_notebook_name, notebook_save_name, nb_params)
            execute_cell(clear_namespace_cell)

        if kernel_manager or kernel_comm:
            kernel_comm.stop_channels()
            kernel_manager.shutdown_kernel()
        def execute_notebook(notebook_filename, notebook_save_filename,
                             params):
            log = UserMessages()

            with open(notebook_filename) as file_handler:
                notebook = nbformat.read(file_handler, as_version=4)
                b_errors = False
                execute_preprocessor = ExecutePreprocessor(
                    timeout=args.get('cell_timeout'),
                    allow_errors=args.get('allow_errors'))
                kernel_manager = None
                kernel_comm = None
                progress_bar = args.get('enable_progress_bar')

                if params:
                    for nb_cell in notebook.cells:
                        if nb_cell.cell_type == 'code':
                            new_cell_source = utils.substitute_params(
                                nb_cell.source, params)
                            nb_cell.source = new_cell_source
                            break

                try:
                    if progress_bar:

                        progress_bar = widgets.IntProgress(
                            value=0,
                            min=0,
                            max=len(notebook.cells),
                            step=1,
                            bar_style=
                            'info',  # 'success', 'info', 'warning', 'danger' or ''
                            orientation='horizontal')

                        kernel_manager, kernel_comm = start_new_kernel(
                            kernel_name=notebook['metadata']['kernelspec']
                            ['name'])
                        execute_preprocessor.km = kernel_manager
                        execute_preprocessor.kc = kernel_comm
                        execute_preprocessor.nb = notebook

                        display_label = notebook_filename
                        if notebook_save_filename:
                            display_label = display_label + ' : ' + notebook_save_filename
                        display(
                            widgets.HBox(
                                [widgets.Label(display_label), progress_bar]))

                        for idx, nb_cell in enumerate(notebook.cells):
                            execute_preprocessor.preprocess_cell(
                                nb_cell,
                                resources={'metadata': {}},
                                cell_index=idx)
                            progress_bar.value = idx + 1
                    else:
                        log.info("Running Notebook: " + notebook_filename)
                        execute_preprocessor.preprocess(
                            notebook, {'metadata': {}})
                except CellExecutionError:
                    b_errors = True
                    if progress_bar:
                        progress_bar.bar_style = 'danger'
                    raise
                except AttributeError:
                    b_errors = True
                    if progress_bar:
                        progress_bar.bar_style = 'danger'
                    raise
                finally:
                    if notebook_save_filename:
                        with open(notebook_save_filename,
                                  mode='wt') as file_handler:
                            nbformat.write(notebook, file_handler)

                    if kernel_manager or kernel_comm:
                        kernel_comm.stop_channels()
                        kernel_manager.shutdown_kernel()

                    if not b_errors:
                        if progress_bar:
                            progress_bar.bar_style = 'success'
                        else:
                            log.info(notebook_filename +
                                     " was executed successfully.")
                    elif b_errors and not progress_bar:
                        log.error(notebook_filename + " execution failed.")