예제 #1
0
def convert_notebook_to_py(nb_fn: Path, py_fn: Path) -> None:
    """
    From https://stackoverflow.com/questions/17077494/how-do-i-convert-a-ipython-notebook-into-a-python-file-via-commandline
    """
    import nbformat
    from nbconvert import PythonExporter

    with open(nb_fn) as fh:
        nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)

    exporter = PythonExporter()
    source, meta = exporter.from_notebook_node(nb)

    # Skip the magic, which gets converted to `get_ipython()`
    source.replace("get_ipython", "# get_ipython")

    with open(py_fn, "w+") as fh:
        fh.writelines(source)
예제 #2
0
    def collect(self):
        exporter = PythonExporter()
        exporter.exclude_markdown = True
        exporter.exclude_input_prompt = True

        notebook_contents = self.fspath.open(encoding='utf-8')

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore", "IPython.core.inputsplitter is deprecated")
            notebook = nbformat.read(notebook_contents, 4)
            code, _ = exporter.from_notebook_node(notebook)
            if pytest_version >= 50403:
                yield IPyNbTest.from_parent(name=self.name,
                                            parent=self,
                                            code=code)
            else:
                yield IPyNbTest(self.name, self, code)
예제 #3
0
 def load_source_code(notebook_path, python_path, python_code):
     """
     Load the pipeline source code from the specified source
     """
     source_code = ""
     sources = [notebook_path, python_path, python_code]
     assert sum(source is not None for source in sources) == 1
     if python_path is not None:
         with open(python_path) as file:
             source_code = file.read()
     elif notebook_path is not None:
         with open(notebook_path) as file:
             notebook = nbformat.reads(file.read(), nbformat.NO_CONVERT)
             exporter = PythonExporter()
             source_code, _ = exporter.from_notebook_node(notebook)
     elif python_code is not None:
         source_code = python_code
     return source_code
예제 #4
0
파일: helper.py 프로젝트: jakesen/opsdroid
def convert_ipynb_to_script(notebook_path, output_path):
    """Convert an iPython Notebook to a python script.

    Args:
        notebook_path (str): path to the notebook file.
        output_path (str): path to the script file destination.

    Examples:
        notebook_path : source path with .ipynb file '/path/src/my_file.ipynb.
        output_path : destination path with .py file '/path/src/my_file.py.

    """
    with open(notebook_path, 'r') as notebook_path_handle:
        raw_notebook = notebook_path_handle.read()
        notebook = nbformat.reads(raw_notebook, as_version=4)
        script, _ = PythonExporter().from_notebook_node(notebook)
        with open(output_path, 'w') as output_path_handle:
            output_path_handle.write(script)
예제 #5
0
def _nb_sample_to_py(notebook_path: str, output_path: str):
    """nb_sample_to_py converts notebook kfp sample to a python file.

    Cells with tag "skip-in-test" will be omitted.
    """
    with open(notebook_path, 'r') as f:
        nb = nbformat.read(f, as_version=4)
        # Cells with skip-in-test tag will be omitted.
        # Example code that needs the tag:
        # kfp.Client().create_run_from_pipeline_func()
        # so that we won't submit pipelines when compiling them.
        nb.cells = [
            cell for cell in nb.cells
            if 'skip-in-test' not in cell.get('metadata', {}).get('tags', [])
        ]
        py_exporter = PythonExporter()
        (py_code, res) = py_exporter.from_notebook_node(nb)
        with open(output_path, 'w') as out:
            out.write(py_code)
예제 #6
0
def convert_notebook(notebook_path):
  write_folder = cache.get_parent_folder(notebook_path)
  file_name = cache.get_file_name(notebook_path)
  write_file = os.path.join(write_folder, "%s.%s" % (file_name, props.TYPE_PYTHON))
  if cache.file_exists(write_file):
    LOGGER.info("'%s' already converted. Moving on ... " % file_name)
    return
  else:
    LOGGER.info("Converting filename '%s' ... " % file_name)
  with open(notebook_path) as fh:
    nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)
  exporter = PythonExporter()
  try:
    source, meta = exporter.from_notebook_node(nb)
  except nbformat.validator.NotebookValidationError:
    LOGGER.error("Validation error while converting '%s'." % notebook_path)
    return
  with open(write_file, 'w+') as fh:
    fh.writelines(source.encode('utf-8'))
예제 #7
0
def nbconverter(notebookPath,directionPath=None):
	"""method name is the same as jupyter default converter name :
	   Nbconvert(notebookPath,directionPath) 
	:param notebookPath: source path of the ipynb file you want to convert
	:param direction Path : direction path of the formatted .py file 
	"""
	try:
		with open(notebookPath,'r',encoding='utf-8') as fh:
			nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)
	except FileNotFoundError:
		if '.ipynb' not in notebookPath:
			notebookPath +=  '.ipynb'
		with open(notebookPath,'r',encoding='utf-8') as fh:
			nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)

	if directionPath == None:
		directionPath = notebookPath.replace('.ipynb','.py')

	pattern_input = r'# In\[[\d\s]*\]:'
	pattern_comment = '^#.+'
	pattern_variables = r'^[a-zA-Z0-9_]+\[?[ |0-9]*\]? *$'
	pattern_square_bracket = r'^\[.*\]$'
	pattern_string = r"^[\'|\"].*[\'|\"] *$"
	pattern_number = r'^\d+ *[\+|\-|\*|\/]? *\d* *$'

	def print_match(matched):
		return 'print({})'.format(matched.group().strip())

	exporter =     PythonExporter()
	source, meta = exporter.from_notebook_node(nb)

	source = source.split('\n')[2:]
	source = [ t for t in source if t and not re.match(pattern_input,t)]
	source = [ '\n' + t if re.match(pattern_comment,t) else t for t in source ]
	source = [re.sub(pattern=pattern_variables,repl=print_match,string=t) for t in source]
	source = [re.sub(pattern=pattern_square_bracket,repl=print_match,string=t) for t in source]
	source = [re.sub(pattern=pattern_string,repl=print_match,string=t) for t in source]

	source = '\n'.join(source) + '\n\n'
	with open(directionPath, 'w+',encoding='utf-8') as fh:
		fh.write(source)
		print('{} has been saved'.format(directionPath))
예제 #8
0
def get_converted_script(input_notebook_path: str, conf: MlVToolConf) -> str:
    """
        Extract notebook python content using nbconvert
    """
    exporter = PythonExporter(get_config(TEMPLATE_PATH))
    exporter.register_filter(name='filter_trailing_cells',
                             jinja_filter=filter_trailing_cells)
    exporter.register_filter(name='get_formatted_cells',
                             jinja_filter=get_formatted_cells)
    exporter.register_filter(name='get_data_from_docstring',
                             jinja_filter=get_data_from_docstring)
    exporter.register_filter(name='sanitize_method_name',
                             jinja_filter=to_method_name)
    resources = {'ignore_keys': conf.ignore_keys}
    logging.debug(f'Template info {resources}')
    try:
        script_content, _ = exporter.from_filename(input_notebook_path,
                                                   resources=resources)
    except Exception as e:
        raise MlVToolException(e) from e
    return script_content
예제 #9
0
    def __init__(self, nb, jsonable_parameter=True, end_cell_index=None):
        if isinstance(nb, nbformat.notebooknode.NotebookNode):
            pass
        elif isinstance(nb, str):
            if os.path.splitext(nb)[1] != '.ipynb':
                raise ValueError(
                    "The extension of the jupyter notebook = '{}' is not '.ipynb'"
                    .format(nb))
            nb = _read_nb(nb)
        else:
            raise TypeError()
        self.language = nb['metadata']['kernelspec']['language']
        self.language_version = nb['metadata']['language_info']['version']
        self.kernel = nb['metadata']['kernelspec']['name']
        self.nb = nb
        self.exporter = PythonExporter()

        if self.language == 'python' and self.language_version[0] == '3':
            self.param_cell_index, self.param_value = self._cell_index_of_possible_param(
                jsonable_parameter, end_cell_index)
        else:
            self.param_cell_index, self.param_value = sort_dict({}), {}
예제 #10
0
def scan_jupyter_imports(filename):
    '''
       Auxiliary function to get Python imports from Jupyter notebooks
    '''
    # check input is correct
    if not os.access(filename, os.R_OK):
        raise IOError("File {} can't be read\n".format(filename))

    logging.debug('Python scan for file: {}'.format(filename))

    deps = set()

    try:

        # https://nbformat.readthedocs.io/en/latest/api.html
        ipynb = nbformat.read(filename, as_version=nbformat.NO_CONVERT)
        # https://nbconvert.readthedocs.io/en/latest/nbconvert_library.html
        python_exporter = PythonExporter()
        (body, resources) = python_exporter.from_notebook_node(ipynb)

        tree = ast.parse(body)

        for node in ast.walk(tree):
            modules = is_import(node)
            if modules is not None:
                for m in modules:
                    if not is_python_std(m):
                        orig = cleanup_import(m)
                        tran = translate_python_import(orig)
                        if tran != "ignore" and tran not in PY_LOCAL:
                            deps.add(tran)
                            logging.debug('Translating Python dependency {} into {}'.format(orig, tran))
                        else:
                            logging.debug('Ignoring Python dependency: {}'.format(orig))

    except BaseException:
        logging.warning("Could not parse file: {}".format(filename))

    return deps
예제 #11
0
def test_notebooks():
    """Run all notebooks in /docs/tutorials/ as tests."""
    # Get the notebook names
    root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    path = os.path.join(root, 'docs', 'tutorials')
    notebooks = glob.glob(os.path.join(path, '*.ipynb'))

    # Convert them to python scripts
    exporter = PythonExporter()
    for notebook in notebooks:
        # Get the script as a string
        script, _ = exporter.from_filename(notebook)

        # Get rid of %matplotlib inline commands
        script = script.replace("get_ipython().magic('matplotlib inline')", "")
        script = script.replace(
            "get_ipython().run_line_magic('matplotlib', 'inline')", "")

        # Get rid of %run commands
        script = re.sub("get_ipython\(\).magic\('run (.*)'\)", r"#", script)
        script = re.sub("get_ipython\(\).run_line_magic\('run', '(.*)'\)",
                        r"#", script)

        # Remove the %time wrappers
        script = re.sub("get_ipython\(\).magic\('time (.*)'\)", r"\1", script)
        script = re.sub("get_ipython\(\).run_line_magic\('time', '(.*)'\)",
                        r"\1", script)

        # Remove calls to map.show()
        script = re.sub("(.*)\.show()(.*)", r"#", script)

        # Run it
        print("Running %s..." % os.path.basename(notebook))
        try:
            exec(script, globals(), globals())
        except AssertionError:
            print("Error in %s." % notebook)
            raise
        pl.close('all')
예제 #12
0
def convert_notebook() -> str:
    ''' Converts the current notebook to an executable .py file
    
    Returns
    -------
    str : the full path and filename of the converted file
    
    '''

    try:
        import ipykernel
        import notebook.notebookapp
    except ImportError as e:
        #log.exception('ImportError : This only runs in a Jupyter Notebook environment ' + str(e))
        return

    from nbconvert import PythonExporter
    from nbconvert.writers import FilesWriter
    import nbformat

    exporter = PythonExporter()
    nbfile = get_notebook_name()
    nb = nbformat.read(nbfile, nbformat.NO_CONVERT)

    start = 0
    # Don't use the last cell
    nb.cells = nb.cells[start:-2]

    (output, resources) = exporter.from_notebook_node(nb)

    filename = nbfile.split('/')[-1].split('.')[0]

    outfile = filename

    # Save to file
    writer = FilesWriter()
    writer.write(output, resources, outfile)

    return outfile + ".py"
예제 #13
0
def test_run_notebooks(path, microbatch, device):
    """ There are a lot of examples in different notebooks, and all of them should be working.

    Parameters
    ----------
    path : str
        Location of notebook to run.

    microbatch : int or None
        If None, then no microbatch is applied.
        If int, then size of microbatch used.

    device : str or None
        If None, then default device behaviour is used.
        If str, then any option of device configuration from :class:`.tf.TFModel` is supported.

    Notes
    -----
    `device` is moved to separate parameter in order to work properly with `parametrize`.
    """
    # pylint: disable=exec-used
    if path.startswith(TUTORIALS_DIR) and 'CPU' not in device:
        pytest.skip("Tutorials don't utilize device config.")

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        from nbconvert import PythonExporter
        code, _ = PythonExporter().from_filename(path)

    code_ = []
    for line in code.split('\n'):
        if not line.startswith('#'):
            flag = sum([name in line for name in BAD_PREFIXES])
            if flag == 0:
                code_.append(line)

    code = '\n'.join(code_)
    exec(code, {'MICROBATCH': microbatch, 'DEVICE': device})
예제 #14
0
def parse_ipynb_file(filename):
    """A simple parser for extracting the names of imported modules / packages
    from an IPython (Jupyter) notebook.
    Parameters
    ----------
    filename : str
        Path to an IPython notebook file.
    Returns
    -------
    packages : set
        A unique list of all (root) packages imported by the specified notebook
        file.
    """
    import nbformat
    from nbconvert import PythonExporter

    with open(filename) as _file:
        nb_stuff = nbformat.reads(_file.read(), as_version=4)

    exporter = PythonExporter()
    (body, _) = exporter.from_notebook_node(nb_stuff)

    return parse_py_module(body)
def save_run_py(nb):
    """Select cells appropriate for `run-readout-scan.py`."""
    should_start_with = '# Put in a file named run-readout-scan.py'
    run_py = nb.copy()
    run_py.cells = [
        cell for cell in run_py.cells
        if cell.source.startswith(should_start_with)
    ]

    # Fix-up the content
    first_cell_lines = run_py.cells[0].source.splitlines()
    new_first_cell_lines = []
    added_imports = False
    for line in first_cell_lines:
        if line.startswith(should_start_with):
            # Ignore this comment, which is out of place in the exported py file
            continue

        if line.startswith('import') and not added_imports:
            # Add imports that aren't necessary when it's in one file.
            new_first_cell_lines += [
                'from recirq.readout_scan.tasks import ReadoutScanTask, run_readout_scan'
            ]
            added_imports = True

        # Rest of the lines are unmodified
        new_first_cell_lines += [line]

    # Mutate the original cell
    run_py.cells[0].source = '\n'.join(new_first_cell_lines)

    exporter = PythonExporter()
    exporter.raw_template = TEMPLATE
    output, resources = exporter.from_notebook_node(run_py)
    with open(f'{REPO_DIR}/recirq/readout_scan/run-readout-scan.py', 'w') as f:
        f.write(output)
def export_to_script(input_notebook_path: str, output_path: str,
                     conf: MlVToolConf):
    """
        Export a notebook to a parameterize Python 3 script
        using Jinja templates
    """
    logging.info(
        f'Generate Python script {output_path} from Jupyter Notebook {input_notebook_path}'
    )
    logging.debug(f'Global Configuration: {conf}')
    logging.debug(f'Template path {TEMPLATE_PATH}')

    exporter = PythonExporter(get_config(TEMPLATE_PATH))
    exporter.register_filter(name='filter_trailing_cells',
                             jinja_filter=filter_trailing_cells)
    exporter.register_filter(name='get_formatted_cells',
                             jinja_filter=get_formatted_cells)
    exporter.register_filter(name='get_data_from_docstring',
                             jinja_filter=get_data_from_docstring)
    exporter.register_filter(name='sanitize_method_name',
                             jinja_filter=to_method_name)
    resources = {'ignore_keys': conf.ignore_keys}
    logging.debug(f'Template info {resources}')
    try:
        script_content, _ = exporter.from_filename(input_notebook_path,
                                                   resources=resources)
    except Exception as e:
        raise MlVToolException(e) from e

    if not script_content:
        logging.warning('Empty notebook provided. Nothing to do.')
        return
    write_python_script(script_content, output_path)
    logging.log(
        logging.WARNING + 1,
        f'Python script successfully generated in {abspath(output_path)}')
예제 #17
0
from lib2to3 import refactor
from nbconvert import PythonExporter
import nbformat
import ast, gast

fixers = set(refactor.get_fixers_from_package('lib2to3.fixes'))
exporter = PythonExporter()


def fix_python2(code):
    tool = refactor.RefactoringTool(fixers, {}, explicit=True)

    return str(tool.refactor_string(code + '\n', 'test'))


def ipynb_to_code(ipynb):
    notebook = nbformat.reads(ipynb, as_version=4)
    code, meta = exporter.from_notebook_node(notebook)

    return code


class FuncCallVisitor(ast.NodeVisitor):
    def __init__(self):
        self._name = deque()

    @property
    def name(self):
        return '.'.join(self._name)

    @name.deleter
예제 #18
0
                              "\033[0m")
                        break
                else:
                    print_error(
                        f"{cell_num}: no docs are generated for {out_var.name}"
                    )
    return "\n".join(comments), json_map


if __name__ == "__main__":
    with open(nb_path, encoding="UTF-8") as f:
        file_content = f.read()
    notebook = nbformat.reads(file_content, as_version=4)

    # estabish map from line in .py to line in .ipynb
    lines = PythonExporter().from_notebook_node(notebook)[0].split("\n")
    code_cells = list(
        filter(lambda cell: cell["cell_type"] == "code", notebook.cells))
    code_indices = list(
        filter(lambda i: notebook.cells[i] in code_cells,
               range(len(notebook.cells))))
    # begin_indices = [
    #     i + 3 for i in range(len(lines)) if lines[i].startswith("# In[")
    # ]
    # line_to_idx = {}
    # for i, idx in enumerate(begin_indices):
    #     l = len(notebook.cells[code_indices[i]].source.split("\n"))
    #     for j in range(l):
    #         line_to_idx[idx + j] = (code_indices[i], j)

    # load static comments
예제 #19
0
 def __init__(self) -> None:
     self.instructions = None
     self.source = None
     self.meta = None
     self.python_exporter = PythonExporter()
     self.imports = defaultdict(list)
예제 #20
0
# In[{{ cell.execution_count if cell.execution_count else ' ' }}]:
{% endif %}
{% endblock in_prompt %}

{% block input %}
with jubo.cell("cell_{{ range(1000) | random }}{{ range(1000) | random }}{{ range(1000) | random }}"):
    with jubo.display_patched():
{{ cell.source | ipython2python | indent | indent}}
{% endblock input %}

{% block markdowncell scoped %}
{{ cell.source | comment_lines }}
{% endblock markdowncell %}"""

dl = DictLoader({'python.tpl': tmplt})
pyex = PythonExporter(extra_loaders=[dl])


def convert(infile, outfile):
    (code, _) = pyex.from_filename(infile)

    with open(outfile, 'w') as ofp:
        ofp.write(code)


if __name__ == "__main__":
    innb = os.path.abspath(os.path.join('.', argv[1]))
    outpy = os.path.abspath(os.path.join('.', argv[2]))

    convert(innb, outpy)
예제 #21
0
파일: export.py 프로젝트: lcary/nbd
 def __init__(self):
     self.exporter = PythonExporter()
예제 #22
0
def generate_duet_notebooks() -> None:
    tests = defaultdict(list)

    SLEEP_TIME = 500

    try:
        os.makedirs(NOTEBOOK_TESTS_PATH, exist_ok=True)
    except BaseException as e:
        print("os.makedirs failed ", e)

    try:
        shutil.rmtree(CHECKPOINT_PATH)
    except BaseException as e:
        print("rmtree failed ", e)

    try:
        os.makedirs(CHECKPOINT_PATH, exist_ok=True)
    except BaseException as e:
        print("os.makedirs failed ", e)

    testcase_lib = {}

    for path in TARGETS:
        testname = re.sub("[^0-9a-zA-Z]+", "_", str(path))
        output = NOTEBOOK_TESTS_PATH / testname

        file_name = str(path.stem)
        is_do = False
        is_ds = False

        if file_name.endswith("_Data_Scientist"):
            testcase = file_name.replace("_Data_Scientist", "")
            tests[testcase].append(testname)
            is_ds = True
        elif file_name.endswith("_Data_Owner"):
            testcase = file_name.replace("_Data_Owner", "")
            tests[testcase].append(testname)
            is_do = True
        else:
            continue

        load_lib_search = r"load_lib\(\W+([a-z_-]+)\W+\)"

        with open(path, "r") as f:
            load_lib_results = re.search(load_lib_search, str(f.read()),
                                         re.IGNORECASE)
            if load_lib_results:
                lib_name = load_lib_results.group(1)
                testcase_lib[testcase] = lib_name

        notebook_nodes = nbformat.read(path, as_version=4)

        custom_cell = nbformat.v4.new_code_cell(source=asyncio_event_loop)
        notebook_nodes["cells"].insert(0, custom_cell)

        for idx, cell in enumerate(notebook_nodes["cells"]):
            if cell["cell_type"] == "code" and "loopback=True" in cell[
                    "source"]:
                network_url = "f'http://0.0.0.0:{get_global_var()}'"
                notebook_nodes["cells"][idx]["source"] = cell[
                    "source"].replace(
                        "loopback=True",
                        f"loopback=True, network_url={network_url}")
            if cell["cell_type"] == "markdown" and "Checkpoint" in cell[
                    "source"]:
                checkpoint = (cell["source"].lower().split("checkpoint")
                              [1].strip().split(":")[0].strip())

                testcase_checkpoint_dir = f"checkpoints/{testcase}/"

                # For DO, we wait until DS gets to the same checkpoint
                if is_do:
                    ck_file = testcase_checkpoint_dir + (
                        testcase + "_DO_checkpoint_" + str(checkpoint))
                    wait_file = testcase_checkpoint_dir + (
                        testcase + "_DS_checkpoint_" + str(checkpoint))
                    checkpoint_cell = nbformat.v4.new_code_cell(
                        source=checkpoint_template.format(
                            ck_file, SLEEP_TIME, wait_file))

                # For DS, we wait until DO gets to the next checkpoint
                elif is_ds:
                    ck_file = testcase_checkpoint_dir + (
                        testcase + "_DS_checkpoint_" + str(checkpoint))
                    wait_file = testcase_checkpoint_dir + (
                        testcase + "_DO_checkpoint_" +
                        str(int(checkpoint) + 1))
                    checkpoint_cell = nbformat.v4.new_code_cell(
                        source=checkpoint_ack_template.format(
                            ck_file, SLEEP_TIME, wait_file, wait_file))
                notebook_nodes["cells"][idx] = checkpoint_cell

        try:
            exporter = PythonExporter()

            (body, resources) = exporter.from_notebook_node(notebook_nodes)
            write_file = FilesWriter()

            # replace empty cells with print statements for easy debugging
            empty_cell = "# In[ ]:"
            counter = 1
            cell_type = "DO" if is_do else "DS"
            while empty_cell in body:
                body = body.replace(empty_cell,
                                    f"print('{cell_type} Cell: {counter}')", 1)
                counter += 1

            # replace any test variables / lines to make things faster in test mode
            json_file = f"{path}.json"
            if os.path.exists(json_file):
                with open(json_file, "r") as f:
                    json_rules = json.loads(f.read())

                    for rules in json_rules["replace_lines"]:
                        try:
                            body = re.sub(
                                rules["match"],
                                rules["replace"],
                                body,
                                flags=re.MULTILINE,
                            )
                        except Exception as e:
                            print(
                                f"Failed to replace rule {rules} for test: {path}. {e}"
                            )

            write_file.write(output=body,
                             resources=resources,
                             notebook_name=str(output))
        except Exception as e:
            print(f"There was a problem exporting the file(s): {e}")

    for case in tests:
        test = tests[case]
        if len(test) != 2:
            print("invalid testcase ", test)

        print(case, test)

        template = open(NOTEBOOK_TESTS_PATH / "duet_test.py.template").read()

        output_py = template.replace("{{TESTCASE}}", str(case))
        output_py = output_py.replace("checkpoints", f"checkpoints/{case}")

        for script in test:
            if "Data_Owner" in script:
                output_py = output_py.replace("{{DO_SCRIPT}}", script)
            elif "Data_Scientist" in script:
                output_py = output_py.replace("{{DS_SCRIPT}}", script)

        decorator = ""
        if case in testcase_lib:
            lib_name = testcase_lib[case]
            decorator = f"@pytest.mark.vendor(lib='{lib_name}')"
        output_py = output_py.replace("{{LIB_DECORATOR}}",
                                      decorator + "\[email protected]")

        with open(NOTEBOOK_TESTS_PATH / f"duet_{case}_test.py", "w") as out_py:
            out_py.write(output_py)
예제 #23
0
def export_python(nb, destfn):
    exporter = PythonExporter()
    body, resources = exporter.from_notebook_node(nb)
    with open(destfn, 'w') as f:
        f.write(body)
예제 #24
0
def gen_tutorials(input_dir: str, output_dir: str) -> None:
    """Generate HTML tutorials for botorch Docusaurus site from Jupyter notebooks.
    Also create ipynb and py versions of tutorial in Docusaurus site for
    download.
    """
    with open(os.path.join(input_dir, "mapping.json")) as infile:
        tutorial_config = json.load(infile)

    # create output directories if necessary
    html_out_dir = Path(output_dir) / "_tutorials"
    files_out_dir = Path(output_dir) / "static" / "files"
    html_out_dir.mkdir(parents=True, exist_ok=True)
    files_out_dir.mkdir(parents=True, exist_ok=True)

    tutorial_ids = {x["id"] for v in tutorial_config.values() for x in v}

    for tid in tutorial_ids:
        print(f"Generating {tid} tutorial")

        # convert notebook to HTML
        ipynb_in_path = os.path.join(input_dir, f"{tid}.ipynb")
        with open(ipynb_in_path, encoding="utf8") as infile:
            nb_str = infile.read()
            nb = nbformat.reads(nb_str, nbformat.NO_CONVERT)

        # displayname is absent from notebook metadata
        nb["metadata"]["kernelspec"]["display_name"] = "python3"

        exporter = HTMLExporter()
        html, meta = exporter.from_notebook_node(nb)

        # pull out html div for notebook
        soup = BeautifulSoup(html, "html.parser")
        nb_meat = soup.find("div", {"id": "notebook-container"})
        del nb_meat.attrs["id"]
        nb_meat.attrs["class"] = ["notebook"]
        html_out = JS_SCRIPTS + str(nb_meat)

        # generate html file
        html_out_path = os.path.join(
            html_out_dir,
            f"{tid}.html",
        )
        with open(html_out_path, "w", encoding="utf8") as html_outfile:
            html_outfile.write(html_out)

        # generate JS file
        script = TEMPLATE.format(tid)
        js_out_path = os.path.join(output_dir, "pages", "tutorials",
                                   f"{tid}.js")
        Path(js_out_path).parent.mkdir(exist_ok=True, parents=True)
        with open(js_out_path, "w", encoding="utf8") as js_outfile:
            js_outfile.write(script)

        # output tutorial in both ipynb & py form
        ipynb_out_path = os.path.join(files_out_dir, f"{tid}.ipynb")
        with open(ipynb_out_path, "w", encoding="utf8") as ipynb_outfile:
            ipynb_outfile.write(nb_str)
        exporter = PythonExporter()
        script, meta = exporter.from_notebook_node(nb)
        # make sure to use python3 shebang
        script = script.replace(
            "#!/usr/bin/env python",
            "#!/usr/bin/env python3",
        )
        py_out_path = os.path.join(output_dir, "static", "files", f"{tid}.py")
        with open(py_out_path, "w", encoding="utf8") as py_outfile:
            py_outfile.write(script)
예제 #25
0
def convert_to_python(notebook, template_file=None):
    exporter = PythonExporter()
    exporter.template_file = template_file
    notebook_code, meta = exporter.from_notebook_node(notebook)
    return notebook_code, meta
예제 #26
0
파일: pipeline.py 프로젝트: ETCBC/pipeline
import os
from subprocess import Popen, PIPE
from shutil import copy, copytree, rmtree
import nbformat
from nbconvert import PythonExporter
from tf.fabric import Fabric

from utils import bzip, caption

py = PythonExporter()

githubBase = os.path.expanduser("~/github/etcbc")
pipelineRepo = "pipeline"
utilsScript = "programs/utils.py"

programDir = "programs"
standardParams = "CORE_NAME VERSION".strip().split()


def runNb(repo, dirName, nb, force=False, **parameters):
    caption(3, "Run notebook [{}/{}] with parameters:".format(repo, nb))
    for (param, value) in sorted(parameters.items()):
        caption(0, "\t{:<20} = {}".format(param, value))

    location = "{}/{}/{}".format(githubBase, repo, dirName)
    nbFile = "{}/{}.ipynb".format(location, nb)
    pyFile = "{}/{}.py".format(location, nb)
    nbObj = nbformat.read(nbFile, 4)
    pyScript = py.from_notebook_node(nbObj)[0]
    with open(pyFile, "w") as s:
        s.write(pyScript)
예제 #27
0
def export_python(wd, name):
    nb = _read(wd, name)
    exporter = PythonExporter()
    body, resources = exporter.from_notebook_node(nb)
    with open("{}/{}.py".format(wd, name), 'w') as f:
        f.write(body)
예제 #28
0
def get_preprocessed_entry_point(
    entry_point,
    chief_config,
    worker_config,
    worker_count,
    distribution_strategy,
    called_from_notebook=False,
):
    """Creates python script for distribution based on the given `entry_point`.

    This utility creates a new python script called `preprocessed_entry_point`
    based on the given `entry_point` and `distribution_strategy` inputs. This
    script will become the new Docker entry point python program.

    1. If `entry_point` is a python file name and `distribution_strategy` is
    auto, then `preprocessed_entry_point` will have the user given
    `entry_point` code wrapped in a Tensorflow distribution strategy.

    2. If `entry_point` is None and `run` is invoked inside of a python script,
    then `preprocessed_entry_point` will be this python script (sys.args[0]).

    3. If `entry_point` is an `ipynb` file, then `preprocessed_entry_point`
    will be the code from the notebook. This utility uses `nbconvert`
    to get the code from notebook.

    4. If `entry_point` is None and `run` is invoked inside of an `ipynb`
    notebook, then `preprocessed_entry_point` will be the code from the
    notebook. This urility uses `google.colab` client API to fetch the code.

    For cases 2, 3 & 4, if `distribution_strategy` is auto, then this script
    will be wrapped in a Tensorflow distribution strategy.

    The distribution strategy instance created is based on the machine
    configurations provided using the `chief_config`, `worker_count` params.
    - If the number of workers > 0,
        - If accelerator type is TPU, we will create an instance of
        `tf.distribute.experimental.TPUStrategy`.
        - Otherwise, we will create a default instance of
        `tf.distribute.experimental.MultiWorkerMirroredStrategy`.
    - If number of GPUs > 0, we will create a default instance of
        `tf.distribute.MirroredStrategy`
    - Otherwise, we will use `tf.distribute.OneDeviceStrategy`

    Args:
        entry_point: Optional string. File path to the python file or iPython
            notebook that contains the TensorFlow code.
            Note) This path must be in the current working directory tree.
            Example) 'train.py', 'training/mnist.py', 'mnist.ipynb'
            If `entry_point` is not provided, then
            - If you are in an iPython notebook environment, then the
                current notebook is taken as the `entry_point`.
            - Otherwise, the current python script is taken as the
                `entry_point`.
        chief_config: `MachineConfig` that represents the configuration
            for the chief worker in a distribution cluster.
        worker_config: `MachineConfig` that represents the configuration
            for the workers in a distribution cluster.
        worker_count: Integer that represents the number of general workers
            in a distribution cluster. This count does not include the chief
            worker.
        distribution_strategy: 'auto' or None. Defaults to 'auto'.
            'auto' means we will take care of creating a Tensorflow
            distribution strategy instance based on the machine configurations
            provided using the `chief_config`, `worker_config` and
            `worker_count` params.
        called_from_notebook: Boolean. True if the API is run in a
            notebook environment.

    Returns:
        The `preprocessed_entry_point` file path.

    Raises:
        RuntimeError: If invoked from Notebook but unable to access it.
                      Typically, this is due to missing the `nbconvert` package.
    """

    # Set `TF_KERAS_RUNNING_REMOTELY` env variable. This is required in order
    # to prevent running `tfc.run` if we are already in a cloud environment.
    # This is applicable only when `entry_point` is None.
    script_lines = [
        "import os\n",
        "import tensorflow as tf\n",
        'os.environ["TF_KERAS_RUNNING_REMOTELY"]="1"\n',
    ]

    # Auto wrap in distribution strategy.
    if distribution_strategy == "auto":
        if worker_count > 0:
            if machine_config.is_tpu_config(worker_config):
                strategy = get_tpu_cluster_resolver_fn()
                strategy.extend(
                    [
                        "resolver = wait_for_tpu_cluster_resolver_ready()\n",
                        "tf.config.experimental_connect_to_cluster(resolver)\n",
                        "tf.tpu.experimental.initialize_tpu_system(resolver)\n",
                        "strategy = tf.distribute.experimental.TPUStrategy("
                        "resolver)\n",
                    ]
                )
            else:
                strategy = [
                    "strategy = tf.distribute.experimental."
                    "MultiWorkerMirroredStrategy()\n"
                ]
        elif chief_config.accelerator_count > 1:
            strategy = ["strategy = tf.distribute.MirroredStrategy()\n"]
        else:
            strategy = [
                "strategy = tf.distribute.OneDeviceStrategy(device='/gpu:0')\n"]
        script_lines.extend(strategy)
        script_lines.append(
            "tf.distribute.experimental_set_strategy(strategy)\n")

    # If `entry_point` is not provided, detect if we are in a notebook
    # or a python script. Fetch the `entry_point`.
    if entry_point is None and not called_from_notebook:
        # Current python script is assumed to be the entry_point.
        entry_point = sys.argv[0]

    # Add user's code.
    if entry_point is not None and entry_point.endswith("py"):
        # We are using exec here to execute the user code object.
        # This will support use case where the user's program has a
        # main method.
        _, entry_point_file_name = os.path.split(entry_point)
        script_lines.append(
            'exec(open("{}").read())\n'.format(entry_point_file_name))
    else:
        if called_from_notebook:
            # Kaggle integration
            if os.getenv("KAGGLE_CONTAINER_NAME"):
                logger.info("Preprocessing Kaggle notebook...")
                py_content = _get_kaggle_notebook_content()
            else:
                # Colab integration
                py_content = _get_colab_notebook_content()
        else:
            if PythonExporter is None:
                raise RuntimeError(
                    "Unable to access iPython notebook. "
                    "Please make sure you have installed `nbconvert` package."
                )

            # Get the python code from the iPython notebook.
            (py_content, _) = PythonExporter().from_filename(entry_point)
            py_content = py_content.splitlines(keepends=True)

        # Remove any iPython special commands and add the python code
        # to script_lines.
        for line in py_content:
            if not (
                line.startswith("!") or
                line.startswith("%") or
                line.startswith("#")
            ):
                script_lines.append(line)

    # Create a tmp wrapped entry point script file.
    _, output_file = tempfile.mkstemp(suffix=".py")
    with open(output_file, "w") as f:
        f.writelines(script_lines)
    return output_file
예제 #29
0
def check_notebook(filename, all_sol_vars):
    """
    Compares the variables of a Jupyter notebook to a reference solution.

    :param filename: of the notebook to check.
    :param all_sol_vars: dict which contains the reference solutions.
    :return: True when all checks were successful or no checks could be performed.
    """
    print('Testing notebook ' + filename)

    # Find all solutions for the current notebook (usually more than one for TensorFlow code)
    solutions = sorted([
        solution for solution in all_sol_vars.keys()
        if solution.startswith(filename)
    ])

    if not solutions:
        print(
            'No matching solution for the file %s found. The notebook is skipped.\n'
            % filename)
        return True

    # Load the notebook file
    with open(filename) as file:
        nb = nbformat.read(file, as_version=4)

    # Keep only the code cells (and especially remove the raw cells)
    nb.cells[:] = [cell for cell in nb.cells if cell.cell_type == 'code']

    # Convert the notebook to a Python script
    exporter = PythonExporter()
    source, meta = exporter.from_notebook_node(nb)
    source = re.sub(
        '(.*?)get_ipython', r'#\1get_ipython',
        source)  # Comment out code lines which are only available in ipython

    # Run the student's solution
    stud_vars = {}
    with open(os.devnull, "w") as stream, contextlib.redirect_stdout(stream):
        # print commands are prevented (warnings will still be shown)
        exec(source, stud_vars)

    # Test each solution
    messages = {}
    correct_solution = ''
    for solution in solutions:
        messages[solution] = check_vars(stud_vars, all_sol_vars[solution])

        if not messages[solution]:
            correct_solution = solution

            # The first check was already successful; no need to check other possible solutions
            break

    if correct_solution:
        print(
            f'{colorama.Fore.GREEN}The test for the notebook %s was successful (checked against %s). No errors found.{colorama.Style.RESET_ALL}\n'
            % (filename, correct_solution))
        return True
    else:
        # All solutions are incorrect. Show the errors for the first solution (arbitrary)
        print(
            'The test for the notebook %s was not successful. At least one variable does not contain the expected result (errors compared to the solution %s are shown).'
            % (filename, solutions[0]))

        for message in messages[solutions[0]]:
            print(message)

        return False
예제 #30
0
def nb_to_python(nb_path):
    """convert notebook to python script"""
    exporter = PythonExporter()
    output, resources = exporter.from_filename(nb_path)
    return output