def convert_notebook_to_py(nb_fn: Path, py_fn: Path) -> None: """ From https://stackoverflow.com/questions/17077494/how-do-i-convert-a-ipython-notebook-into-a-python-file-via-commandline """ import nbformat from nbconvert import PythonExporter with open(nb_fn) as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) exporter = PythonExporter() source, meta = exporter.from_notebook_node(nb) # Skip the magic, which gets converted to `get_ipython()` source.replace("get_ipython", "# get_ipython") with open(py_fn, "w+") as fh: fh.writelines(source)
def collect(self): exporter = PythonExporter() exporter.exclude_markdown = True exporter.exclude_input_prompt = True notebook_contents = self.fspath.open(encoding='utf-8') with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "IPython.core.inputsplitter is deprecated") notebook = nbformat.read(notebook_contents, 4) code, _ = exporter.from_notebook_node(notebook) if pytest_version >= 50403: yield IPyNbTest.from_parent(name=self.name, parent=self, code=code) else: yield IPyNbTest(self.name, self, code)
def load_source_code(notebook_path, python_path, python_code): """ Load the pipeline source code from the specified source """ source_code = "" sources = [notebook_path, python_path, python_code] assert sum(source is not None for source in sources) == 1 if python_path is not None: with open(python_path) as file: source_code = file.read() elif notebook_path is not None: with open(notebook_path) as file: notebook = nbformat.reads(file.read(), nbformat.NO_CONVERT) exporter = PythonExporter() source_code, _ = exporter.from_notebook_node(notebook) elif python_code is not None: source_code = python_code return source_code
def convert_ipynb_to_script(notebook_path, output_path): """Convert an iPython Notebook to a python script. Args: notebook_path (str): path to the notebook file. output_path (str): path to the script file destination. Examples: notebook_path : source path with .ipynb file '/path/src/my_file.ipynb. output_path : destination path with .py file '/path/src/my_file.py. """ with open(notebook_path, 'r') as notebook_path_handle: raw_notebook = notebook_path_handle.read() notebook = nbformat.reads(raw_notebook, as_version=4) script, _ = PythonExporter().from_notebook_node(notebook) with open(output_path, 'w') as output_path_handle: output_path_handle.write(script)
def _nb_sample_to_py(notebook_path: str, output_path: str): """nb_sample_to_py converts notebook kfp sample to a python file. Cells with tag "skip-in-test" will be omitted. """ with open(notebook_path, 'r') as f: nb = nbformat.read(f, as_version=4) # Cells with skip-in-test tag will be omitted. # Example code that needs the tag: # kfp.Client().create_run_from_pipeline_func() # so that we won't submit pipelines when compiling them. nb.cells = [ cell for cell in nb.cells if 'skip-in-test' not in cell.get('metadata', {}).get('tags', []) ] py_exporter = PythonExporter() (py_code, res) = py_exporter.from_notebook_node(nb) with open(output_path, 'w') as out: out.write(py_code)
def convert_notebook(notebook_path): write_folder = cache.get_parent_folder(notebook_path) file_name = cache.get_file_name(notebook_path) write_file = os.path.join(write_folder, "%s.%s" % (file_name, props.TYPE_PYTHON)) if cache.file_exists(write_file): LOGGER.info("'%s' already converted. Moving on ... " % file_name) return else: LOGGER.info("Converting filename '%s' ... " % file_name) with open(notebook_path) as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) exporter = PythonExporter() try: source, meta = exporter.from_notebook_node(nb) except nbformat.validator.NotebookValidationError: LOGGER.error("Validation error while converting '%s'." % notebook_path) return with open(write_file, 'w+') as fh: fh.writelines(source.encode('utf-8'))
def nbconverter(notebookPath,directionPath=None): """method name is the same as jupyter default converter name : Nbconvert(notebookPath,directionPath) :param notebookPath: source path of the ipynb file you want to convert :param direction Path : direction path of the formatted .py file """ try: with open(notebookPath,'r',encoding='utf-8') as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) except FileNotFoundError: if '.ipynb' not in notebookPath: notebookPath += '.ipynb' with open(notebookPath,'r',encoding='utf-8') as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) if directionPath == None: directionPath = notebookPath.replace('.ipynb','.py') pattern_input = r'# In\[[\d\s]*\]:' pattern_comment = '^#.+' pattern_variables = r'^[a-zA-Z0-9_]+\[?[ |0-9]*\]? *$' pattern_square_bracket = r'^\[.*\]$' pattern_string = r"^[\'|\"].*[\'|\"] *$" pattern_number = r'^\d+ *[\+|\-|\*|\/]? *\d* *$' def print_match(matched): return 'print({})'.format(matched.group().strip()) exporter = PythonExporter() source, meta = exporter.from_notebook_node(nb) source = source.split('\n')[2:] source = [ t for t in source if t and not re.match(pattern_input,t)] source = [ '\n' + t if re.match(pattern_comment,t) else t for t in source ] source = [re.sub(pattern=pattern_variables,repl=print_match,string=t) for t in source] source = [re.sub(pattern=pattern_square_bracket,repl=print_match,string=t) for t in source] source = [re.sub(pattern=pattern_string,repl=print_match,string=t) for t in source] source = '\n'.join(source) + '\n\n' with open(directionPath, 'w+',encoding='utf-8') as fh: fh.write(source) print('{} has been saved'.format(directionPath))
def get_converted_script(input_notebook_path: str, conf: MlVToolConf) -> str: """ Extract notebook python content using nbconvert """ exporter = PythonExporter(get_config(TEMPLATE_PATH)) exporter.register_filter(name='filter_trailing_cells', jinja_filter=filter_trailing_cells) exporter.register_filter(name='get_formatted_cells', jinja_filter=get_formatted_cells) exporter.register_filter(name='get_data_from_docstring', jinja_filter=get_data_from_docstring) exporter.register_filter(name='sanitize_method_name', jinja_filter=to_method_name) resources = {'ignore_keys': conf.ignore_keys} logging.debug(f'Template info {resources}') try: script_content, _ = exporter.from_filename(input_notebook_path, resources=resources) except Exception as e: raise MlVToolException(e) from e return script_content
def __init__(self, nb, jsonable_parameter=True, end_cell_index=None): if isinstance(nb, nbformat.notebooknode.NotebookNode): pass elif isinstance(nb, str): if os.path.splitext(nb)[1] != '.ipynb': raise ValueError( "The extension of the jupyter notebook = '{}' is not '.ipynb'" .format(nb)) nb = _read_nb(nb) else: raise TypeError() self.language = nb['metadata']['kernelspec']['language'] self.language_version = nb['metadata']['language_info']['version'] self.kernel = nb['metadata']['kernelspec']['name'] self.nb = nb self.exporter = PythonExporter() if self.language == 'python' and self.language_version[0] == '3': self.param_cell_index, self.param_value = self._cell_index_of_possible_param( jsonable_parameter, end_cell_index) else: self.param_cell_index, self.param_value = sort_dict({}), {}
def scan_jupyter_imports(filename): ''' Auxiliary function to get Python imports from Jupyter notebooks ''' # check input is correct if not os.access(filename, os.R_OK): raise IOError("File {} can't be read\n".format(filename)) logging.debug('Python scan for file: {}'.format(filename)) deps = set() try: # https://nbformat.readthedocs.io/en/latest/api.html ipynb = nbformat.read(filename, as_version=nbformat.NO_CONVERT) # https://nbconvert.readthedocs.io/en/latest/nbconvert_library.html python_exporter = PythonExporter() (body, resources) = python_exporter.from_notebook_node(ipynb) tree = ast.parse(body) for node in ast.walk(tree): modules = is_import(node) if modules is not None: for m in modules: if not is_python_std(m): orig = cleanup_import(m) tran = translate_python_import(orig) if tran != "ignore" and tran not in PY_LOCAL: deps.add(tran) logging.debug('Translating Python dependency {} into {}'.format(orig, tran)) else: logging.debug('Ignoring Python dependency: {}'.format(orig)) except BaseException: logging.warning("Could not parse file: {}".format(filename)) return deps
def test_notebooks(): """Run all notebooks in /docs/tutorials/ as tests.""" # Get the notebook names root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) path = os.path.join(root, 'docs', 'tutorials') notebooks = glob.glob(os.path.join(path, '*.ipynb')) # Convert them to python scripts exporter = PythonExporter() for notebook in notebooks: # Get the script as a string script, _ = exporter.from_filename(notebook) # Get rid of %matplotlib inline commands script = script.replace("get_ipython().magic('matplotlib inline')", "") script = script.replace( "get_ipython().run_line_magic('matplotlib', 'inline')", "") # Get rid of %run commands script = re.sub("get_ipython\(\).magic\('run (.*)'\)", r"#", script) script = re.sub("get_ipython\(\).run_line_magic\('run', '(.*)'\)", r"#", script) # Remove the %time wrappers script = re.sub("get_ipython\(\).magic\('time (.*)'\)", r"\1", script) script = re.sub("get_ipython\(\).run_line_magic\('time', '(.*)'\)", r"\1", script) # Remove calls to map.show() script = re.sub("(.*)\.show()(.*)", r"#", script) # Run it print("Running %s..." % os.path.basename(notebook)) try: exec(script, globals(), globals()) except AssertionError: print("Error in %s." % notebook) raise pl.close('all')
def convert_notebook() -> str: ''' Converts the current notebook to an executable .py file Returns ------- str : the full path and filename of the converted file ''' try: import ipykernel import notebook.notebookapp except ImportError as e: #log.exception('ImportError : This only runs in a Jupyter Notebook environment ' + str(e)) return from nbconvert import PythonExporter from nbconvert.writers import FilesWriter import nbformat exporter = PythonExporter() nbfile = get_notebook_name() nb = nbformat.read(nbfile, nbformat.NO_CONVERT) start = 0 # Don't use the last cell nb.cells = nb.cells[start:-2] (output, resources) = exporter.from_notebook_node(nb) filename = nbfile.split('/')[-1].split('.')[0] outfile = filename # Save to file writer = FilesWriter() writer.write(output, resources, outfile) return outfile + ".py"
def test_run_notebooks(path, microbatch, device): """ There are a lot of examples in different notebooks, and all of them should be working. Parameters ---------- path : str Location of notebook to run. microbatch : int or None If None, then no microbatch is applied. If int, then size of microbatch used. device : str or None If None, then default device behaviour is used. If str, then any option of device configuration from :class:`.tf.TFModel` is supported. Notes ----- `device` is moved to separate parameter in order to work properly with `parametrize`. """ # pylint: disable=exec-used if path.startswith(TUTORIALS_DIR) and 'CPU' not in device: pytest.skip("Tutorials don't utilize device config.") with warnings.catch_warnings(): warnings.simplefilter("ignore") from nbconvert import PythonExporter code, _ = PythonExporter().from_filename(path) code_ = [] for line in code.split('\n'): if not line.startswith('#'): flag = sum([name in line for name in BAD_PREFIXES]) if flag == 0: code_.append(line) code = '\n'.join(code_) exec(code, {'MICROBATCH': microbatch, 'DEVICE': device})
def parse_ipynb_file(filename): """A simple parser for extracting the names of imported modules / packages from an IPython (Jupyter) notebook. Parameters ---------- filename : str Path to an IPython notebook file. Returns ------- packages : set A unique list of all (root) packages imported by the specified notebook file. """ import nbformat from nbconvert import PythonExporter with open(filename) as _file: nb_stuff = nbformat.reads(_file.read(), as_version=4) exporter = PythonExporter() (body, _) = exporter.from_notebook_node(nb_stuff) return parse_py_module(body)
def save_run_py(nb): """Select cells appropriate for `run-readout-scan.py`.""" should_start_with = '# Put in a file named run-readout-scan.py' run_py = nb.copy() run_py.cells = [ cell for cell in run_py.cells if cell.source.startswith(should_start_with) ] # Fix-up the content first_cell_lines = run_py.cells[0].source.splitlines() new_first_cell_lines = [] added_imports = False for line in first_cell_lines: if line.startswith(should_start_with): # Ignore this comment, which is out of place in the exported py file continue if line.startswith('import') and not added_imports: # Add imports that aren't necessary when it's in one file. new_first_cell_lines += [ 'from recirq.readout_scan.tasks import ReadoutScanTask, run_readout_scan' ] added_imports = True # Rest of the lines are unmodified new_first_cell_lines += [line] # Mutate the original cell run_py.cells[0].source = '\n'.join(new_first_cell_lines) exporter = PythonExporter() exporter.raw_template = TEMPLATE output, resources = exporter.from_notebook_node(run_py) with open(f'{REPO_DIR}/recirq/readout_scan/run-readout-scan.py', 'w') as f: f.write(output)
def export_to_script(input_notebook_path: str, output_path: str, conf: MlVToolConf): """ Export a notebook to a parameterize Python 3 script using Jinja templates """ logging.info( f'Generate Python script {output_path} from Jupyter Notebook {input_notebook_path}' ) logging.debug(f'Global Configuration: {conf}') logging.debug(f'Template path {TEMPLATE_PATH}') exporter = PythonExporter(get_config(TEMPLATE_PATH)) exporter.register_filter(name='filter_trailing_cells', jinja_filter=filter_trailing_cells) exporter.register_filter(name='get_formatted_cells', jinja_filter=get_formatted_cells) exporter.register_filter(name='get_data_from_docstring', jinja_filter=get_data_from_docstring) exporter.register_filter(name='sanitize_method_name', jinja_filter=to_method_name) resources = {'ignore_keys': conf.ignore_keys} logging.debug(f'Template info {resources}') try: script_content, _ = exporter.from_filename(input_notebook_path, resources=resources) except Exception as e: raise MlVToolException(e) from e if not script_content: logging.warning('Empty notebook provided. Nothing to do.') return write_python_script(script_content, output_path) logging.log( logging.WARNING + 1, f'Python script successfully generated in {abspath(output_path)}')
from lib2to3 import refactor from nbconvert import PythonExporter import nbformat import ast, gast fixers = set(refactor.get_fixers_from_package('lib2to3.fixes')) exporter = PythonExporter() def fix_python2(code): tool = refactor.RefactoringTool(fixers, {}, explicit=True) return str(tool.refactor_string(code + '\n', 'test')) def ipynb_to_code(ipynb): notebook = nbformat.reads(ipynb, as_version=4) code, meta = exporter.from_notebook_node(notebook) return code class FuncCallVisitor(ast.NodeVisitor): def __init__(self): self._name = deque() @property def name(self): return '.'.join(self._name) @name.deleter
"\033[0m") break else: print_error( f"{cell_num}: no docs are generated for {out_var.name}" ) return "\n".join(comments), json_map if __name__ == "__main__": with open(nb_path, encoding="UTF-8") as f: file_content = f.read() notebook = nbformat.reads(file_content, as_version=4) # estabish map from line in .py to line in .ipynb lines = PythonExporter().from_notebook_node(notebook)[0].split("\n") code_cells = list( filter(lambda cell: cell["cell_type"] == "code", notebook.cells)) code_indices = list( filter(lambda i: notebook.cells[i] in code_cells, range(len(notebook.cells)))) # begin_indices = [ # i + 3 for i in range(len(lines)) if lines[i].startswith("# In[") # ] # line_to_idx = {} # for i, idx in enumerate(begin_indices): # l = len(notebook.cells[code_indices[i]].source.split("\n")) # for j in range(l): # line_to_idx[idx + j] = (code_indices[i], j) # load static comments
def __init__(self) -> None: self.instructions = None self.source = None self.meta = None self.python_exporter = PythonExporter() self.imports = defaultdict(list)
# In[{{ cell.execution_count if cell.execution_count else ' ' }}]: {% endif %} {% endblock in_prompt %} {% block input %} with jubo.cell("cell_{{ range(1000) | random }}{{ range(1000) | random }}{{ range(1000) | random }}"): with jubo.display_patched(): {{ cell.source | ipython2python | indent | indent}} {% endblock input %} {% block markdowncell scoped %} {{ cell.source | comment_lines }} {% endblock markdowncell %}""" dl = DictLoader({'python.tpl': tmplt}) pyex = PythonExporter(extra_loaders=[dl]) def convert(infile, outfile): (code, _) = pyex.from_filename(infile) with open(outfile, 'w') as ofp: ofp.write(code) if __name__ == "__main__": innb = os.path.abspath(os.path.join('.', argv[1])) outpy = os.path.abspath(os.path.join('.', argv[2])) convert(innb, outpy)
def __init__(self): self.exporter = PythonExporter()
def generate_duet_notebooks() -> None: tests = defaultdict(list) SLEEP_TIME = 500 try: os.makedirs(NOTEBOOK_TESTS_PATH, exist_ok=True) except BaseException as e: print("os.makedirs failed ", e) try: shutil.rmtree(CHECKPOINT_PATH) except BaseException as e: print("rmtree failed ", e) try: os.makedirs(CHECKPOINT_PATH, exist_ok=True) except BaseException as e: print("os.makedirs failed ", e) testcase_lib = {} for path in TARGETS: testname = re.sub("[^0-9a-zA-Z]+", "_", str(path)) output = NOTEBOOK_TESTS_PATH / testname file_name = str(path.stem) is_do = False is_ds = False if file_name.endswith("_Data_Scientist"): testcase = file_name.replace("_Data_Scientist", "") tests[testcase].append(testname) is_ds = True elif file_name.endswith("_Data_Owner"): testcase = file_name.replace("_Data_Owner", "") tests[testcase].append(testname) is_do = True else: continue load_lib_search = r"load_lib\(\W+([a-z_-]+)\W+\)" with open(path, "r") as f: load_lib_results = re.search(load_lib_search, str(f.read()), re.IGNORECASE) if load_lib_results: lib_name = load_lib_results.group(1) testcase_lib[testcase] = lib_name notebook_nodes = nbformat.read(path, as_version=4) custom_cell = nbformat.v4.new_code_cell(source=asyncio_event_loop) notebook_nodes["cells"].insert(0, custom_cell) for idx, cell in enumerate(notebook_nodes["cells"]): if cell["cell_type"] == "code" and "loopback=True" in cell[ "source"]: network_url = "f'http://0.0.0.0:{get_global_var()}'" notebook_nodes["cells"][idx]["source"] = cell[ "source"].replace( "loopback=True", f"loopback=True, network_url={network_url}") if cell["cell_type"] == "markdown" and "Checkpoint" in cell[ "source"]: checkpoint = (cell["source"].lower().split("checkpoint") [1].strip().split(":")[0].strip()) testcase_checkpoint_dir = f"checkpoints/{testcase}/" # For DO, we wait until DS gets to the same checkpoint if is_do: ck_file = testcase_checkpoint_dir + ( testcase + "_DO_checkpoint_" + str(checkpoint)) wait_file = testcase_checkpoint_dir + ( testcase + "_DS_checkpoint_" + str(checkpoint)) checkpoint_cell = nbformat.v4.new_code_cell( source=checkpoint_template.format( ck_file, SLEEP_TIME, wait_file)) # For DS, we wait until DO gets to the next checkpoint elif is_ds: ck_file = testcase_checkpoint_dir + ( testcase + "_DS_checkpoint_" + str(checkpoint)) wait_file = testcase_checkpoint_dir + ( testcase + "_DO_checkpoint_" + str(int(checkpoint) + 1)) checkpoint_cell = nbformat.v4.new_code_cell( source=checkpoint_ack_template.format( ck_file, SLEEP_TIME, wait_file, wait_file)) notebook_nodes["cells"][idx] = checkpoint_cell try: exporter = PythonExporter() (body, resources) = exporter.from_notebook_node(notebook_nodes) write_file = FilesWriter() # replace empty cells with print statements for easy debugging empty_cell = "# In[ ]:" counter = 1 cell_type = "DO" if is_do else "DS" while empty_cell in body: body = body.replace(empty_cell, f"print('{cell_type} Cell: {counter}')", 1) counter += 1 # replace any test variables / lines to make things faster in test mode json_file = f"{path}.json" if os.path.exists(json_file): with open(json_file, "r") as f: json_rules = json.loads(f.read()) for rules in json_rules["replace_lines"]: try: body = re.sub( rules["match"], rules["replace"], body, flags=re.MULTILINE, ) except Exception as e: print( f"Failed to replace rule {rules} for test: {path}. {e}" ) write_file.write(output=body, resources=resources, notebook_name=str(output)) except Exception as e: print(f"There was a problem exporting the file(s): {e}") for case in tests: test = tests[case] if len(test) != 2: print("invalid testcase ", test) print(case, test) template = open(NOTEBOOK_TESTS_PATH / "duet_test.py.template").read() output_py = template.replace("{{TESTCASE}}", str(case)) output_py = output_py.replace("checkpoints", f"checkpoints/{case}") for script in test: if "Data_Owner" in script: output_py = output_py.replace("{{DO_SCRIPT}}", script) elif "Data_Scientist" in script: output_py = output_py.replace("{{DS_SCRIPT}}", script) decorator = "" if case in testcase_lib: lib_name = testcase_lib[case] decorator = f"@pytest.mark.vendor(lib='{lib_name}')" output_py = output_py.replace("{{LIB_DECORATOR}}", decorator + "\[email protected]") with open(NOTEBOOK_TESTS_PATH / f"duet_{case}_test.py", "w") as out_py: out_py.write(output_py)
def export_python(nb, destfn): exporter = PythonExporter() body, resources = exporter.from_notebook_node(nb) with open(destfn, 'w') as f: f.write(body)
def gen_tutorials(input_dir: str, output_dir: str) -> None: """Generate HTML tutorials for botorch Docusaurus site from Jupyter notebooks. Also create ipynb and py versions of tutorial in Docusaurus site for download. """ with open(os.path.join(input_dir, "mapping.json")) as infile: tutorial_config = json.load(infile) # create output directories if necessary html_out_dir = Path(output_dir) / "_tutorials" files_out_dir = Path(output_dir) / "static" / "files" html_out_dir.mkdir(parents=True, exist_ok=True) files_out_dir.mkdir(parents=True, exist_ok=True) tutorial_ids = {x["id"] for v in tutorial_config.values() for x in v} for tid in tutorial_ids: print(f"Generating {tid} tutorial") # convert notebook to HTML ipynb_in_path = os.path.join(input_dir, f"{tid}.ipynb") with open(ipynb_in_path, encoding="utf8") as infile: nb_str = infile.read() nb = nbformat.reads(nb_str, nbformat.NO_CONVERT) # displayname is absent from notebook metadata nb["metadata"]["kernelspec"]["display_name"] = "python3" exporter = HTMLExporter() html, meta = exporter.from_notebook_node(nb) # pull out html div for notebook soup = BeautifulSoup(html, "html.parser") nb_meat = soup.find("div", {"id": "notebook-container"}) del nb_meat.attrs["id"] nb_meat.attrs["class"] = ["notebook"] html_out = JS_SCRIPTS + str(nb_meat) # generate html file html_out_path = os.path.join( html_out_dir, f"{tid}.html", ) with open(html_out_path, "w", encoding="utf8") as html_outfile: html_outfile.write(html_out) # generate JS file script = TEMPLATE.format(tid) js_out_path = os.path.join(output_dir, "pages", "tutorials", f"{tid}.js") Path(js_out_path).parent.mkdir(exist_ok=True, parents=True) with open(js_out_path, "w", encoding="utf8") as js_outfile: js_outfile.write(script) # output tutorial in both ipynb & py form ipynb_out_path = os.path.join(files_out_dir, f"{tid}.ipynb") with open(ipynb_out_path, "w", encoding="utf8") as ipynb_outfile: ipynb_outfile.write(nb_str) exporter = PythonExporter() script, meta = exporter.from_notebook_node(nb) # make sure to use python3 shebang script = script.replace( "#!/usr/bin/env python", "#!/usr/bin/env python3", ) py_out_path = os.path.join(output_dir, "static", "files", f"{tid}.py") with open(py_out_path, "w", encoding="utf8") as py_outfile: py_outfile.write(script)
def convert_to_python(notebook, template_file=None): exporter = PythonExporter() exporter.template_file = template_file notebook_code, meta = exporter.from_notebook_node(notebook) return notebook_code, meta
import os from subprocess import Popen, PIPE from shutil import copy, copytree, rmtree import nbformat from nbconvert import PythonExporter from tf.fabric import Fabric from utils import bzip, caption py = PythonExporter() githubBase = os.path.expanduser("~/github/etcbc") pipelineRepo = "pipeline" utilsScript = "programs/utils.py" programDir = "programs" standardParams = "CORE_NAME VERSION".strip().split() def runNb(repo, dirName, nb, force=False, **parameters): caption(3, "Run notebook [{}/{}] with parameters:".format(repo, nb)) for (param, value) in sorted(parameters.items()): caption(0, "\t{:<20} = {}".format(param, value)) location = "{}/{}/{}".format(githubBase, repo, dirName) nbFile = "{}/{}.ipynb".format(location, nb) pyFile = "{}/{}.py".format(location, nb) nbObj = nbformat.read(nbFile, 4) pyScript = py.from_notebook_node(nbObj)[0] with open(pyFile, "w") as s: s.write(pyScript)
def export_python(wd, name): nb = _read(wd, name) exporter = PythonExporter() body, resources = exporter.from_notebook_node(nb) with open("{}/{}.py".format(wd, name), 'w') as f: f.write(body)
def get_preprocessed_entry_point( entry_point, chief_config, worker_config, worker_count, distribution_strategy, called_from_notebook=False, ): """Creates python script for distribution based on the given `entry_point`. This utility creates a new python script called `preprocessed_entry_point` based on the given `entry_point` and `distribution_strategy` inputs. This script will become the new Docker entry point python program. 1. If `entry_point` is a python file name and `distribution_strategy` is auto, then `preprocessed_entry_point` will have the user given `entry_point` code wrapped in a Tensorflow distribution strategy. 2. If `entry_point` is None and `run` is invoked inside of a python script, then `preprocessed_entry_point` will be this python script (sys.args[0]). 3. If `entry_point` is an `ipynb` file, then `preprocessed_entry_point` will be the code from the notebook. This utility uses `nbconvert` to get the code from notebook. 4. If `entry_point` is None and `run` is invoked inside of an `ipynb` notebook, then `preprocessed_entry_point` will be the code from the notebook. This urility uses `google.colab` client API to fetch the code. For cases 2, 3 & 4, if `distribution_strategy` is auto, then this script will be wrapped in a Tensorflow distribution strategy. The distribution strategy instance created is based on the machine configurations provided using the `chief_config`, `worker_count` params. - If the number of workers > 0, - If accelerator type is TPU, we will create an instance of `tf.distribute.experimental.TPUStrategy`. - Otherwise, we will create a default instance of `tf.distribute.experimental.MultiWorkerMirroredStrategy`. - If number of GPUs > 0, we will create a default instance of `tf.distribute.MirroredStrategy` - Otherwise, we will use `tf.distribute.OneDeviceStrategy` Args: entry_point: Optional string. File path to the python file or iPython notebook that contains the TensorFlow code. Note) This path must be in the current working directory tree. Example) 'train.py', 'training/mnist.py', 'mnist.ipynb' If `entry_point` is not provided, then - If you are in an iPython notebook environment, then the current notebook is taken as the `entry_point`. - Otherwise, the current python script is taken as the `entry_point`. chief_config: `MachineConfig` that represents the configuration for the chief worker in a distribution cluster. worker_config: `MachineConfig` that represents the configuration for the workers in a distribution cluster. worker_count: Integer that represents the number of general workers in a distribution cluster. This count does not include the chief worker. distribution_strategy: 'auto' or None. Defaults to 'auto'. 'auto' means we will take care of creating a Tensorflow distribution strategy instance based on the machine configurations provided using the `chief_config`, `worker_config` and `worker_count` params. called_from_notebook: Boolean. True if the API is run in a notebook environment. Returns: The `preprocessed_entry_point` file path. Raises: RuntimeError: If invoked from Notebook but unable to access it. Typically, this is due to missing the `nbconvert` package. """ # Set `TF_KERAS_RUNNING_REMOTELY` env variable. This is required in order # to prevent running `tfc.run` if we are already in a cloud environment. # This is applicable only when `entry_point` is None. script_lines = [ "import os\n", "import tensorflow as tf\n", 'os.environ["TF_KERAS_RUNNING_REMOTELY"]="1"\n', ] # Auto wrap in distribution strategy. if distribution_strategy == "auto": if worker_count > 0: if machine_config.is_tpu_config(worker_config): strategy = get_tpu_cluster_resolver_fn() strategy.extend( [ "resolver = wait_for_tpu_cluster_resolver_ready()\n", "tf.config.experimental_connect_to_cluster(resolver)\n", "tf.tpu.experimental.initialize_tpu_system(resolver)\n", "strategy = tf.distribute.experimental.TPUStrategy(" "resolver)\n", ] ) else: strategy = [ "strategy = tf.distribute.experimental." "MultiWorkerMirroredStrategy()\n" ] elif chief_config.accelerator_count > 1: strategy = ["strategy = tf.distribute.MirroredStrategy()\n"] else: strategy = [ "strategy = tf.distribute.OneDeviceStrategy(device='/gpu:0')\n"] script_lines.extend(strategy) script_lines.append( "tf.distribute.experimental_set_strategy(strategy)\n") # If `entry_point` is not provided, detect if we are in a notebook # or a python script. Fetch the `entry_point`. if entry_point is None and not called_from_notebook: # Current python script is assumed to be the entry_point. entry_point = sys.argv[0] # Add user's code. if entry_point is not None and entry_point.endswith("py"): # We are using exec here to execute the user code object. # This will support use case where the user's program has a # main method. _, entry_point_file_name = os.path.split(entry_point) script_lines.append( 'exec(open("{}").read())\n'.format(entry_point_file_name)) else: if called_from_notebook: # Kaggle integration if os.getenv("KAGGLE_CONTAINER_NAME"): logger.info("Preprocessing Kaggle notebook...") py_content = _get_kaggle_notebook_content() else: # Colab integration py_content = _get_colab_notebook_content() else: if PythonExporter is None: raise RuntimeError( "Unable to access iPython notebook. " "Please make sure you have installed `nbconvert` package." ) # Get the python code from the iPython notebook. (py_content, _) = PythonExporter().from_filename(entry_point) py_content = py_content.splitlines(keepends=True) # Remove any iPython special commands and add the python code # to script_lines. for line in py_content: if not ( line.startswith("!") or line.startswith("%") or line.startswith("#") ): script_lines.append(line) # Create a tmp wrapped entry point script file. _, output_file = tempfile.mkstemp(suffix=".py") with open(output_file, "w") as f: f.writelines(script_lines) return output_file
def check_notebook(filename, all_sol_vars): """ Compares the variables of a Jupyter notebook to a reference solution. :param filename: of the notebook to check. :param all_sol_vars: dict which contains the reference solutions. :return: True when all checks were successful or no checks could be performed. """ print('Testing notebook ' + filename) # Find all solutions for the current notebook (usually more than one for TensorFlow code) solutions = sorted([ solution for solution in all_sol_vars.keys() if solution.startswith(filename) ]) if not solutions: print( 'No matching solution for the file %s found. The notebook is skipped.\n' % filename) return True # Load the notebook file with open(filename) as file: nb = nbformat.read(file, as_version=4) # Keep only the code cells (and especially remove the raw cells) nb.cells[:] = [cell for cell in nb.cells if cell.cell_type == 'code'] # Convert the notebook to a Python script exporter = PythonExporter() source, meta = exporter.from_notebook_node(nb) source = re.sub( '(.*?)get_ipython', r'#\1get_ipython', source) # Comment out code lines which are only available in ipython # Run the student's solution stud_vars = {} with open(os.devnull, "w") as stream, contextlib.redirect_stdout(stream): # print commands are prevented (warnings will still be shown) exec(source, stud_vars) # Test each solution messages = {} correct_solution = '' for solution in solutions: messages[solution] = check_vars(stud_vars, all_sol_vars[solution]) if not messages[solution]: correct_solution = solution # The first check was already successful; no need to check other possible solutions break if correct_solution: print( f'{colorama.Fore.GREEN}The test for the notebook %s was successful (checked against %s). No errors found.{colorama.Style.RESET_ALL}\n' % (filename, correct_solution)) return True else: # All solutions are incorrect. Show the errors for the first solution (arbitrary) print( 'The test for the notebook %s was not successful. At least one variable does not contain the expected result (errors compared to the solution %s are shown).' % (filename, solutions[0])) for message in messages[solutions[0]]: print(message) return False
def nb_to_python(nb_path): """convert notebook to python script""" exporter = PythonExporter() output, resources = exporter.from_filename(nb_path) return output