def lines_to_notebook(lines, name=None): """ Convert the lines of an m file into an IPython notebook Parameters ---------- lines : list A list of strings. Each element is a line in the m file Returns ------- notebook : an IPython NotebookNode class instance, containing the information required to create a file """ source = [] md = np.empty(len(lines), dtype=object) new_cell = np.empty(len(lines), dtype=object) for idx, l in enumerate(lines): new_cell[idx], md[idx], this_source = format_line(l) # Transitions between markdown and code and vice-versa merit a new # cell, even if no newline, or "%%" is found. Make sure not to do this # check for the very first line! if idx>1 and not new_cell[idx]: if md[idx] != md[idx-1]: new_cell[idx] = True source.append(this_source) # This defines the breaking points between cells: new_cell_idx = np.hstack([np.where(new_cell)[0], -1]) # Listify the sources: cell_source = [source[new_cell_idx[i]:new_cell_idx[i+1]] for i in range(len(new_cell_idx)-1)] cell_md = [md[new_cell_idx[i]] for i in range(len(new_cell_idx)-1)] cells = [] # Append the notebook with loading matlab magic extension notebook_head = "import pymatbridge as pymat\n" + "ip = get_ipython()\n" \ + "pymat.load_ipython_extension(ip)" cells.append(nbformat.new_code_cell(notebook_head, language='python')) for cell_idx, cell_s in enumerate(cell_source): if cell_md[cell_idx]: cells.append(nbformat.new_text_cell('markdown', cell_s)) else: cell_s.insert(0, '%%matlab\n') cells.append(nbformat.new_code_cell(cell_s, language='matlab')) ws = nbformat.new_worksheet(cells=cells) notebook = nbformat.new_notebook(metadata=nbformat.new_metadata(), worksheets=[ws]) return notebook
def mdstrip(paths): for path in paths: if os.path.isdir(path): files = glob(os.path.join(path, "*.ipynb")) else: files = [path] for in_file in files: input_nb_name = basename(in_file) slug = input_nb_name[:-6] title = slug.replace("_", " ") actual_title_nb = io.StringIO( title_data.replace("{{ title }}", title)) title_nb = nbf.read(actual_title_nb, "ipynb") title_cell = title_nb.worksheets[0].cells[0] input_nb = nbf.read(open(in_file), "ipynb") worksheet = input_nb.worksheets[0] # add graphic here & append to cell_list cell_list = [title_cell] for cell in worksheet.cells: if cell.cell_type == ("code"): cell.outputs = [] cell_list.append(cell) elif cell.cell_type == "heading": cell_list.append(cell) output_nb = nbf.new_notebook() output_nb_name = slug+".prod.ipynb" output_nb.worksheets.append(nbf.new_worksheet(cells=cell_list)) with open(output_nb_name, 'w') as f: nbf.write(output_nb, f, "ipynb")
def test_very_long_cells(self): """ Torture test that long cells do not cause issues """ lorem_ipsum_text = textwrap.dedent("""\ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec dignissim, ipsum non facilisis tempus, dui felis tincidunt metus, nec pulvinar neque odio eget risus. Nulla nisi lectus, cursus suscipit interdum at, ultrices sit amet orci. Mauris facilisis imperdiet elit, vitae scelerisque ipsum dignissim non. Integer consequat malesuada neque sit amet pulvinar. Curabitur pretium ut turpis eget aliquet. Maecenas sagittis lacus sed lectus volutpat, eu adipiscing purus pulvinar. Maecenas consequat luctus urna, eget cursus quam mollis a. Aliquam vitae ornare erat, non hendrerit urna. Sed eu diam nec massa egestas pharetra at nec tellus. Fusce feugiat lacus quis urna sollicitudin volutpat. Quisque at sapien non nibh feugiat tempus ac ultricies purus. """) lorem_ipsum_text = lorem_ipsum_text.replace("\n"," ") + "\n\n" large_lorem_ipsum_text = "".join([lorem_ipsum_text]*3000) notebook_name = "lorem_ipsum_long.ipynb" tex_name = "lorem_ipsum_long.tex" with self.create_temp_cwd([]): nb = current.new_notebook( worksheets=[ current.new_worksheet(cells=[ current.new_text_cell('markdown',source=large_lorem_ipsum_text) ]) ] ) with open(notebook_name, 'w') as f: current.write(nb, f, 'ipynb') self.call('nbconvert --to latex --log-level 0 ' + os.path.join(notebook_name)) assert os.path.isfile(tex_name)
def split_into_units(nb_name): """Split notebook into units.""" try: with io.open(nb_name, 'r', encoding='utf-8') as f: nb = current.read(f, 'json') except IOError as e: if e.errno == 2: print('File not found: {0}'.format(nb_name)) return [] else: raise e indexes = [] cells = nb.worksheets[0].cells for (i, cell) in enumerate(cells): if cell.cell_type == 'heading' and cell.level == 1: indexes.append(i) separated_cells = [cells[i:j] for i, j in zip(indexes, indexes[1:]+[None])] worksheets = map(lambda cells: current.new_worksheet(name=cells[0].source, cells=cells), separated_cells) units = map(lambda worksheet: current.new_notebook(name=worksheet.name, worksheets=[worksheet]), worksheets) return units
def test_coalesce_sequenced_streams(self): """Can the coalesce streams preprocessor merge a sequence of streams?""" outputs = [nbformat.new_output(output_type="stream", stream="stdout", output_text="0"), nbformat.new_output( output_type="stream", stream="stdout", output_text="1"), nbformat.new_output( output_type="stream", stream="stdout", output_text="2"), nbformat.new_output( output_type="stream", stream="stdout", output_text="3"), nbformat.new_output( output_type="stream", stream="stdout", output_text="4"), nbformat.new_output( output_type="stream", stream="stdout", output_text="5"), nbformat.new_output( output_type="stream", stream="stdout", output_text="6"), nbformat.new_output(output_type="stream", stream="stdout", output_text="7")] cells = [nbformat.new_code_cell( input="# None", prompt_number=1, outputs=outputs)] worksheets = [nbformat.new_worksheet(name="worksheet1", cells=cells)] nb = nbformat.new_notebook(name="notebook1", worksheets=worksheets) res = self.build_resources() nb, res = coalesce_streams(nb, res) outputs = nb.worksheets[0].cells[0].outputs self.assertEqual(outputs[0].text, u'01234567')
def test_save(self): resp = self.nb_api.read('a.ipynb', 'foo') nbcontent = json.loads(resp.text)['content'] nb = to_notebook_json(nbcontent) ws = new_worksheet() nb.worksheets = [ws] ws.cells.append(new_heading_cell(u'Created by test ³')) nbmodel = {'name': 'a.ipynb', 'path': 'foo', 'content': nb} resp = self.nb_api.save( 'a.ipynb', path='foo', body=json.dumps(nbmodel)) nbfile = pjoin(self.notebook_dir.name, 'foo', 'a.ipynb') with io.open(nbfile, 'r', encoding='utf-8') as f: newnb = read(f, format='ipynb') self.assertEqual(newnb.worksheets[0].cells[0].source, u'Created by test ³') nbcontent = self.nb_api.read('a.ipynb', 'foo').json()['content'] newnb = to_notebook_json(nbcontent) self.assertEqual(newnb.worksheets[0].cells[0].source, u'Created by test ³') # Save and rename nbmodel = {'name': 'a2.ipynb', 'path': 'foo/bar', 'content': nb} resp = self.nb_api.save( 'a.ipynb', path='foo', body=json.dumps(nbmodel)) saved = resp.json() self.assertEqual(saved['name'], 'a2.ipynb') self.assertEqual(saved['path'], 'foo/bar') assert os.path.isfile( pjoin(self.notebook_dir.name, 'foo', 'bar', 'a2.ipynb')) assert not os.path.isfile( pjoin(self.notebook_dir.name, 'foo', 'a.ipynb')) with assert_http_error(404): self.nb_api.read('a.ipynb', 'foo')
def convert(py_file, ipynb_file): cells = [] imports = "" # needed for each file for cell, line_type, line in parse_blocks(py_file): if line_type == file_type: # write cell to file try: fname = line.split()[1] except: raise Exception( "Markdown for file output must be the " + "following format\n#to_file filename.py: " + "{}\n".format(line) ) sys.exit(0) with open(fname, "w") as f: f.write(cell) new_cell = "%run {}".format(fname) ipynb_cell = nbf.new_code_cell(new_cell) cells.append(ipynb_cell) else: # convert cell to ipynb cell ipynb_cell = nbf.new_code_cell(cell) cells.append(ipynb_cell) # create new notebook nb = nbf.new_notebook() nb["worksheets"].append(nbf.new_worksheet(cells=cells)) with open(ipynb_file, "w") as f: nbf.write(nb, f, "ipynb")
def mini_markdown_nb(markdown): "create a single text cell notebook with markdown in it" nb = new_notebook() wks = new_worksheet() cell = new_text_cell('markdown', source=markdown) nb['worksheets'].append(wks) nb['worksheets'][0]['cells'].append(cell) return nb
def convert_normal_cells(normal_cells): """ Convert normal_cells into html. """ for cell in normal_cells: if cell.cell_type == 'markdown': cell.source = re.sub(r'\\begin\{ *equation *\}', '\[', cell.source) cell.source = re.sub(r'\\end\{ *equation *\}', '\]', cell.source) worksheet = current.new_worksheet(cells=normal_cells) tmp = current.new_notebook(worksheets=[worksheet]) html = export_unit_to_html(tmp) return html
def __init__(self, document): nodes.NodeVisitor.__init__(self, document) self.settings = settings = document.settings lcode = settings.language_code self.language = languages.get_language(lcode, document.reporter) # A heterogenous stack used in conjunction with the tree traversal. # Make sure that the pops correspond to the pushes: self.context = [] self.body = [] ws = nbformat.new_worksheet() self.nb = nbformat.new_notebook(worksheets=[ws])
def build_notebook(self): """Build a reveal slides notebook in memory for use with tests. Overrides base in PreprocessorTestsBase""" outputs = [nbformat.new_output(output_type="svg", output_svg=self.simple_svg)] slide_metadata = {'slideshow' : {'slide_type': 'slide'}} subslide_metadata = {'slideshow' : {'slide_type': 'subslide'}} cells=[nbformat.new_code_cell(input="", prompt_number=1, outputs=outputs)] worksheets = [nbformat.new_worksheet(name="worksheet1", cells=cells)] return nbformat.new_notebook(name="notebook1", worksheets=worksheets)
def build_notebook(self): """Build a notebook in memory for use with preprocessor tests""" outputs = [nbformat.new_output(output_type="stream", stream="stdout", output_text="a"), nbformat.new_output(output_type="text", output_text="b"), nbformat.new_output(output_type="stream", stream="stdout", output_text="c"), nbformat.new_output(output_type="stream", stream="stdout", output_text="d"), nbformat.new_output(output_type="stream", stream="stderr", output_text="e"), nbformat.new_output(output_type="stream", stream="stderr", output_text="f"), nbformat.new_output(output_type="png", output_png='Zw==')] #g cells=[nbformat.new_code_cell(input="$ e $", prompt_number=1,outputs=outputs), nbformat.new_text_cell('markdown', source="$ e $")] worksheets = [nbformat.new_worksheet(name="worksheet1", cells=cells)] return nbformat.new_notebook(name="notebook1", worksheets=worksheets)
def test_run_nb(self): """Test %run notebook.ipynb""" from IPython.nbformat import current nb = current.new_notebook( worksheets=[ current.new_worksheet(cells=[ current.new_code_cell("answer=42") ]) ] ) src = current.writes(nb, 'json') self.mktmp(src, ext='.ipynb') _ip.magic("run %s" % self.fname) nt.assert_equal(_ip.user_ns['answer'], 42)
def test_coalesce_replace_streams(self): """Are \\r characters handled?""" outputs = [nbformat.new_output(output_type="stream", stream="stdout", output_text="z"), nbformat.new_output(output_type="stream", stream="stdout", output_text="\ra"), nbformat.new_output(output_type="stream", stream="stdout", output_text="\nz\rb"), nbformat.new_output(output_type="stream", stream="stdout", output_text="\nz"), nbformat.new_output(output_type="stream", stream="stdout", output_text="\rc\n"), nbformat.new_output(output_type="stream", stream="stdout", output_text="z\rz\rd")] cells=[nbformat.new_code_cell(input="# None", prompt_number=1,outputs=outputs)] worksheets = [nbformat.new_worksheet(name="worksheet1", cells=cells)] nb = nbformat.new_notebook(name="notebook1", worksheets=worksheets) res = self.build_resources() nb, res = coalesce_streams(nb, res) outputs = nb.worksheets[0].cells[0].outputs self.assertEqual(outputs[0].text, u'a\nb\nc\nd')
def build_notebook(self): """Build a reveal slides notebook in memory for use with tests. Overrides base in TransformerTestsBase""" outputs = [nbformat.new_output(output_type="stream", stream="stdout", output_text="a")] slide_metadata = {'slideshow' : {'slide_type': 'slide'}} subslide_metadata = {'slideshow' : {'slide_type': 'subslide'}} cells=[nbformat.new_code_cell(input="", prompt_number=1, outputs=outputs), nbformat.new_text_cell('markdown', source="", metadata=slide_metadata), nbformat.new_code_cell(input="", prompt_number=2, outputs=outputs), nbformat.new_text_cell('markdown', source="", metadata=slide_metadata), nbformat.new_text_cell('markdown', source="", metadata=subslide_metadata)] worksheets = [nbformat.new_worksheet(name="worksheet1", cells=cells)] return nbformat.new_notebook(name="notebook1", worksheets=worksheets)
def to_notebook(infile, hr_separated=False): """Given markdown, returns an ipynb compliant JSON string""" parser = markdown.DocParser() ast = json.loads(markdown.ASTtoJSON( parser.parse(infile.read()))) cells = [current.new_text_cell('markdown', '')] for block in ast.get('children', []): if block['t'] in ["IndentedCode", "FencedCode"]: cells.append(current.new_code_cell( block['string_content'].rstrip() )) elif block['t'] in ['SetextHeader', 'ATXHeader']: src = '{} {}'.format( '#' * block.get('level', 1), ''.join(block['strings']) ).rstrip() if hr_separated and cells[-1]['cell_type'] is 'markdown': cells[-1]['source'] += '\n\n{}'.format(src) else: cells.append(current.new_text_cell('markdown', src)) elif block['t'] in ['HorizontalRule']: # We don't render horizontal rules if hr_separated: cells.append(current.new_text_cell('markdown', '')) else: src = '\n'.join(block['strings']).rstrip() if hr_separated and cells[-1]['cell_type'] is 'markdown': cells[-1]['source'] += '\n\n{}'.format(src) else: cells.append(current.new_text_cell('markdown', src)) cells = tidy_notebook(cells[:]) worksheet = current.new_worksheet(cells=cells) nb = current.new_notebook( basename(infile.name).split('.')[:-1], worksheets=[worksheet] ) # using the indent option leaves a bunch of trailing whitespace. No thanks! return json.dumps(nb, indent=2).replace(' \n', '\n')
def normalize(in_file, out_file): worksheet = in_file.worksheets[0] cell_list = [] # add graphic here & append to cell_list for cell in worksheet.cells: if cell.cell_type == ("code"): cell.outputs = [] cell.prompt_number = "" cell_list.append(cell) output_nb = nbf.new_notebook() # XXX should set name ... output_nb.worksheets.append(nbf.new_worksheet(cells=cell_list)) tmp_file = io.StringIO() nbf.write(output_nb, tmp_file, "ipynb") # Then write reorganized (i.e. key-sorted) JSON file to out_file tmp_file.seek(0) j_nb = json.load(tmp_file) json.dump(j_nb, out_file, sort_keys=True, indent=2)
def write_notebook(flow, options): """See http://nbviewer.ipython.org/gist/fperez/9716279""" from IPython.nbformat import current as nbf nb = nbf.new_notebook() cells = [ #nbf.new_text_cell('heading', "This is an auto-generated notebook for %s" % os.path.basename(pseudopath)), nbf.new_code_cell("""\ ##%%javascript ##IPython.OutputArea.auto_scroll_threshold = 9999; from __future__ import print_function from abipy import abilab %matplotlib inline mpld3 = abilab.mpld3_enable_notebook() import pylab pylab.rcParams['figure.figsize'] = (25.0, 10.0) import seaborn as sns #sns.set(style="dark", palette="Set2") sns.set(style='ticks', palette='Set2')"""), nbf.new_code_cell("flow = abilab.Flow.pickle_load('%s')" % flow.workdir), nbf.new_code_cell("flow.show_dependencies()"), nbf.new_code_cell("flow.check_status(show=True, verbose=0)"), nbf.new_code_cell("flow.show_inputs(nids=None, wslice=None)"), nbf.new_code_cell("flow.inspect(nids=None, wslice=None)"), nbf.new_code_cell("flow.show_abierrors()"), nbf.new_code_cell("flow.show_qouts()"), ] # Now that we have the cells, we can make a worksheet with them and add it to the notebook: nb['worksheets'].append(nbf.new_worksheet(cells=cells)) # Next, we write it to a file on disk that we can then open as a new notebook. # Note: This should be as easy as: nbf.write(nb, fname), but the current api is a little more verbose and needs a real file-like object. import tempfile _, tmpfname = tempfile.mkstemp(suffix='.ipynb', text=True) with open(tmpfname, 'w') as fh: nbf.write(nb, fh, 'ipynb') os.system("ipython notebook %s" % tmpfname)
def test_contents_manager(self): "make sure ContentsManager returns right files (ipynb, bin, txt)." nbdir = self.notebook_dir.name base = self.base_url() nb = new_notebook(name='testnb') ws = new_worksheet() nb.worksheets = [ws] ws.cells.append(new_heading_cell(u'Created by test ³')) cc1 = new_code_cell(input=u'print(2*6)') cc1.outputs.append(new_output(output_text=u'12', output_type='stream')) ws.cells.append(cc1) with io.open(pjoin(nbdir, 'testnb.ipynb'), 'w', encoding='utf-8') as f: write(nb, f, format='ipynb') with io.open(pjoin(nbdir, 'test.bin'), 'wb') as f: f.write(b'\xff' + os.urandom(5)) f.close() with io.open(pjoin(nbdir, 'test.txt'), 'w') as f: f.write(u'foobar') f.close() r = requests.get(url_path_join(base, 'files', 'testnb.ipynb')) self.assertEqual(r.status_code, 200) self.assertIn('print(2*6)', r.text) json.loads(r.text) r = requests.get(url_path_join(base, 'files', 'test.bin')) self.assertEqual(r.status_code, 200) self.assertEqual(r.headers['content-type'], 'application/octet-stream') self.assertEqual(r.content[:1], b'\xff') self.assertEqual(len(r.content), 6) r = requests.get(url_path_join(base, 'files', 'test.txt')) self.assertEqual(r.status_code, 200) self.assertEqual(r.headers['content-type'], 'text/plain') self.assertEqual(r.text, 'foobar')
def notebook(self, s): """Export and convert IPython notebooks. This function can export the current IPython history to a notebook file or can convert an existing notebook file into a different format. For example, to export the history to "foo.ipynb" do "%notebook -e foo.ipynb". To export the history to "foo.py" do "%notebook -e foo.py". To convert "foo.ipynb" to "foo.json" do "%notebook -f json foo.ipynb". Possible formats include (json/ipynb, py). """ args = magic_arguments.parse_argstring(self.notebook, s) from IPython.nbformat import current args.filename = unquote_filename(args.filename) if args.export: fname, name, format = current.parse_filename(args.filename) cells = [] hist = list(self.shell.history_manager.get_range()) for session, prompt_number, input in hist[:-1]: cells.append(current.new_code_cell(prompt_number=prompt_number, input=input)) worksheet = current.new_worksheet(cells=cells) nb = current.new_notebook(name=name,worksheets=[worksheet]) with io.open(fname, 'w', encoding='utf-8') as f: current.write(nb, f, format) elif args.format is not None: old_fname, old_name, old_format = current.parse_filename(args.filename) new_format = args.format if new_format == u'xml': raise ValueError('Notebooks cannot be written as xml.') elif new_format == u'ipynb' or new_format == u'json': new_fname = old_name + u'.ipynb' new_format = u'json' elif new_format == u'py': new_fname = old_name + u'.py' else: raise ValueError('Invalid notebook format: %s' % new_format) with io.open(old_fname, 'r', encoding='utf-8') as f: nb = current.read(f, old_format) with io.open(new_fname, 'w', encoding='utf-8') as f: current.write(nb, f, new_format)
def setUp(self): nbdir = self.notebook_dir.name if not os.path.isdir(pjoin(nbdir, 'foo')): os.mkdir(pjoin(nbdir, 'foo')) nb = new_notebook(name='testnb') ws = new_worksheet() nb.worksheets = [ws] ws.cells.append(new_heading_cell(u'Created by test ³')) cc1 = new_code_cell(input=u'print(2*6)') cc1.outputs.append(new_output(output_text=u'12')) cc1.outputs.append(new_output(output_png=png_green_pixel, output_type='pyout')) ws.cells.append(cc1) with io.open(pjoin(nbdir, 'foo', 'testnb.ipynb'), 'w', encoding='utf-8') as f: write(nb, f, format='ipynb') self.nbconvert_api = NbconvertAPI(self.base_url())
def test_checkpoints(self): resp = self.nb_api.read('a.ipynb', 'foo') r = self.nb_api.new_checkpoint('a.ipynb', 'foo') self.assertEqual(r.status_code, 201) cp1 = r.json() self.assertEqual(set(cp1), {'id', 'last_modified'}) self.assertEqual(r.headers['Location'].split('/')[-1], cp1['id']) # Modify it nbcontent = json.loads(resp.text)['content'] nb = to_notebook_json(nbcontent) ws = new_worksheet() nb.worksheets = [ws] hcell = new_heading_cell('Created by test') ws.cells.append(hcell) # Save nbmodel = {'name': 'a.ipynb', 'path': 'foo', 'content': nb} resp = self.nb_api.save( 'a.ipynb', path='foo', body=json.dumps(nbmodel)) # List checkpoints cps = self.nb_api.get_checkpoints('a.ipynb', 'foo').json() self.assertEqual(cps, [cp1]) nbcontent = self.nb_api.read('a.ipynb', 'foo').json()['content'] nb = to_notebook_json(nbcontent) self.assertEqual(nb.worksheets[0].cells[0].source, 'Created by test') # Restore cp1 r = self.nb_api.restore_checkpoint('a.ipynb', 'foo', cp1['id']) self.assertEqual(r.status_code, 204) nbcontent = self.nb_api.read('a.ipynb', 'foo').json()['content'] nb = to_notebook_json(nbcontent) self.assertEqual(nb.worksheets, []) # Delete cp1 r = self.nb_api.delete_checkpoint('a.ipynb', 'foo', cp1['id']) self.assertEqual(r.status_code, 204) cps = self.nb_api.get_checkpoints('a.ipynb', 'foo').json() self.assertEqual(cps, [])
def new_notebook_from_string(notebookname, filename, sourcestring): root = ast.parse(sourcestring, filename=filename, mode='exec') x = DetermineBlocks() for child in ast.iter_child_nodes(root): print child.lineno, child x.visit(child) x.end() sourcelines = sourcestring.splitlines() cells = [] for block in x.blocks: print block blocklines = sourcelines[block[1]-1:block[2]] blocksrc = '\n'.join(blocklines) if len(blocksrc) > 0: cell = notebook_format.new_code_cell(input=blocksrc) cells.append(cell) ws = notebook_format.new_worksheet(cells=cells) result = notebook_format.new_notebook(worksheets=[ws]) result.metadata.name = notebookname return result
def parse(self, pdj, name=''): meta, body = pdj self._markdown_buffer = '' self._init_cells() for cell in body: t, c = cell['t'], cell['c'] if t == 'Header': self.process_header(c) continue elif t == 'CodeBlock': self.process_codeblock(c) continue elif t in ('Para', 'Plain'): res = self.process_inline(c) elif t == 'RawBlock': res = c[1] elif t == 'BlockQuote': res = self.process_blockquote(c) elif t == 'BulletList': res = self.process_bulletlist(c) elif t == 'OrderedList': res = self.process_orderedlist(c) elif t == 'Table': res = self.process_table(c) elif t == 'DefinitionList': res = self.process_definitionlist(c) else: raise ValueError('Not yet for ' + t) if res != '': self._markdown_buffer = '\n'.join((self._markdown_buffer, res)) self.flush_markdown() self._post_cells() nb = nbf.new_notebook(name=name) ws = nbf.new_worksheet() ws['cells'] += self._cells[:] nb['worksheets'].append(ws) return nb
def generate_report(reviews, dataset_name, file_name, load_reviews_code): nb = nbf.new_notebook() title = '# ' + dataset_name + ' Dataset Analysis' title_cell = nbf.new_text_cell(u'markdown', title) rda = ReviewsDatasetAnalyzer(reviews) num_reviews = len(rda.reviews) num_users = len(rda.user_ids) num_items = len(rda.item_ids) user_avg_reviews = float(num_reviews) / num_users item_avg_reviews = float(num_reviews) / num_items sparsity = rda.calculate_sparsity_approx() fact_sheet_text =\ '## Fact Sheet\n' +\ 'The ' + dataset_name + ' contains:\n' +\ '* ' + str(num_reviews) + ' reviews\n' +\ '* Made by ' + str(num_users) + ' users\n' +\ '* About ' + str(num_items) + ' items\n' +\ '* It has an approximated sparsity of ' + str(sparsity) + '\n' +\ '\nNow we are going to analyze the number of reviews per user and ' \ 'per item' fact_sheet_cell = nbf.new_text_cell(u'markdown', fact_sheet_text) reviews_analysis_code =\ 'import sys\n' +\ 'sys.path.append(\'/Users/fpena/UCC/Thesis/projects/yelp/source/python\')\n' +\ 'from etl import ETLUtils\n\n' +\ 'from etl.reviews_dataset_analyzer import ReviewsDatasetAnalyzer\n' +\ '\n# Load reviews\n' + load_reviews_code + '\n' +\ 'rda = ReviewsDatasetAnalyzer(reviews)\n' reviews_analysis_cell = nbf.new_code_cell(reviews_analysis_code) user_analysis_text =\ '## Users Reviews Analysis\n' +\ '* The average number of reviews per user is ' + str(user_avg_reviews) + '\n' +\ '* The minimum number of reviews a user has is ' + str(min(rda.users_count)) + '\n' +\ '* The maximum number of reviews a user has is ' + str(max(rda.users_count)) user_analysis_cell = nbf.new_text_cell(u'markdown', user_analysis_text) counts_per_user_code =\ '# Number of reviews per user\n' +\ 'users_summary = rda.summarize_reviews_by_field(\'user_id\')\n' +\ 'print(\'Average number of reviews per user\', float(rda.num_reviews)/rda.num_users)\n' +\ 'users_summary.plot(kind=\'line\', rot=0)' counts_per_user_cell = nbf.new_code_cell(counts_per_user_code) item_analysis_text =\ '## Items Reviews Analysis\n' +\ '* The average number of reviews per item is ' + str(item_avg_reviews) + '\n' +\ '* The minimum number of reviews an item has is ' + str(min(rda.items_count)) + '\n' +\ '* The maximum number of reviews an item has is ' + str(max(rda.items_count)) item_analysis_cell = nbf.new_text_cell(u'markdown', item_analysis_text) counts_per_item_code =\ '# Number of reviews per item\n' +\ 'items_summary = rda.summarize_reviews_by_field(\'offering_id\')\n' +\ 'print(\'Average number of reviews per item\', float(rda.num_reviews)/rda.num_items)\n' +\ 'items_summary.plot(kind=\'line\', rot=0)' counts_per_item_cell = nbf.new_code_cell(counts_per_item_code) common_items_text =\ '## Number of items 2 users have in common\n' +\ 'In this section we are going to count the number of items two ' \ 'users have in common' common_items_text_cell = nbf.new_text_cell(u'markdown', common_items_text) common_items_code =\ '# Number of items 2 users have in common\n' +\ 'common_item_counts = rda.count_items_in_common()\n' +\ 'plt.plot(common_item_counts.keys(), common_item_counts.values())\n' common_items_code_cell = nbf.new_code_cell(common_items_code) common_items_box_code =\ 'from pylab import boxplot\n' +\ 'my_data = [key for key, value in common_item_counts.iteritems() for i in xrange(value)]\n' +\ 'mean_common_items = float(sum(my_data))/len(my_data)\n' +\ 'print(\'Average number of common items between two users:\', mean_common_items)\n' +\ 'boxplot(my_data)' common_items_box_cell = nbf.new_code_cell(common_items_box_code) cells = [] cells.append(title_cell) cells.append(fact_sheet_cell) cells.append(reviews_analysis_cell) cells.append(user_analysis_cell) cells.append(counts_per_user_cell) cells.append(item_analysis_cell) cells.append(counts_per_item_cell) cells.append(common_items_text_cell) cells.append(common_items_code_cell) cells.append(common_items_box_cell) nb['worksheets'].append(nbf.new_worksheet(cells=cells)) with open(file_name, 'w') as f: nbf.write(nb, f, 'ipynb')
def write_notebook(pseudopath, with_eos=False, tmpfile=None): """ Write an ipython notebook to pseudopath. By default, the notebook is created in the same directory as pseudopath but with the extension `ipynb` unless `tmpfile` is set to True. In the later case, a temporay file is created. Args: pseudo: Path to the pseudopotential file. with_eos: True if EOS plots are wanted. Returns: The path to the ipython notebook. See http://nbviewer.ipython.org/gist/fperez/9716279 """ from IPython.nbformat import current as nbf # from IPython.nbformat import v3 as nbf # import IPython.nbformat as nbf nb = nbf.new_notebook() cells = [ nbf.new_heading_cell("This is an auto-generated notebook for %s" % os.path.basename(pseudopath)), nbf.new_code_cell( """\ from __future__ import print_function %matplotlib inline import mpld3 from mpld3 import plugins as plugs plugs.DEFAULT_PLUGINS = [plugs.Reset(), plugs.Zoom(), plugs.BoxZoom(), plugs.MousePosition()] mpld3.enable_notebook() import seaborn as sns #sns.set(style="dark", palette="Set2") sns.set(style='ticks', palette='Set2')""" ), nbf.new_code_cell( """\ # Construct the pseudo object and get the DojoReport from pymatgen.io.abinitio.pseudos import Pseudo pseudo = Pseudo.from_file('%s') report = pseudo.dojo_report""" % os.path.abspath(pseudopath) ), nbf.new_heading_cell("ONCVPSP Input File:"), nbf.new_code_cell( """\ input_file = pseudo.filepath.replace(".psp8", ".in") %cat $input_file""" ), nbf.new_code_cell( """\ # Get data from the output file from pseudo_dojo.ppcodes.oncvpsp import OncvOutputParser, PseudoGenDataPlotter onc_parser = OncvOutputParser(pseudo.filepath.replace(".psp8", ".out")) # Parse the file and build the plotter onc_parser.scan() plotter = onc_parser.make_plotter()""" ), nbf.new_heading_cell("AE and PS radial wavefunctions $\phi(r)$:"), nbf.new_code_cell("fig = plotter.plot_radial_wfs(show=False)"), nbf.new_heading_cell("Arctan of the logarithmic derivatives:"), nbf.new_code_cell("fig = plotter.plot_atan_logders(show=False)"), nbf.new_heading_cell("Convergence in $G$-space estimated by ONCVPSP:"), nbf.new_code_cell("fig = plotter.plot_ene_vs_ecut(show=False)"), nbf.new_heading_cell("Projectors:"), nbf.new_code_cell("fig = plotter.plot_projectors(show=False)"), nbf.new_heading_cell("Core-Valence-Model charge densities:"), nbf.new_code_cell("fig = plotter.plot_densities(show=False)"), nbf.new_heading_cell("Local potential and $l$-dependent potentials:"), nbf.new_code_cell("fig = plotter.plot_potentials(show=False)"), # nbf.new_heading_cell("1-st order derivative of $v_l$ and $v_{loc}$ computed via finite differences:"), # nbf.new_code_cell("""fig = plotter.plot_der_potentials(order=1, show=False)"""), # nbf.new_heading_cell("2-nd order derivative of $v_l$ and $v_{loc}$ computed via finite differences:"), # nbf.new_code_cell("""fig = plotter.plot_der_potentials(order=2, show=False)"""), nbf.new_heading_cell("Model core charge and form factors computed by ABINIT"), nbf.new_code_cell( """\ with pseudo.open_pspsfile() as psps: psps.plot()""" ), nbf.new_heading_cell("Convergence of the total energy:"), nbf.new_code_cell( """\ # Convergence of the total energy (computed from the deltafactor runs with Wien2K equilibrium volume) fig = report.plot_etotal_vs_ecut(show=False)""" ), nbf.new_heading_cell("Convergence of the deltafactor results:"), nbf.new_code_cell( """fig = report.plot_deltafactor_convergence(what=("dfact_meV", "dfactprime_meV"), show=False)""" ), nbf.new_heading_cell("Convergence of $\Delta v_0$, $\Delta b_0$, and $\Delta b_1$ (deltafactor tests)"), nbf.new_code_cell( """\ # Here we plot the difference wrt Wien2k results. fig = report.plot_deltafactor_convergence(what=("-dfact_meV", "-dfactprime_meV"), show=False)""" ), nbf.new_heading_cell("deltafactor EOS for the different cutoff energies:"), nbf.new_code_cell("fig = report.plot_deltafactor_eos(show=False)"), nbf.new_heading_cell("Convergence of the GBRV lattice parameters:"), nbf.new_code_cell("fig = report.plot_gbrv_convergence(show=False)"), nbf.new_heading_cell("Convergence of phonon frequencies at $\Gamma$:"), nbf.new_code_cell("fig = report.plot_phonon_convergence(show=False)"), # nbf.new_heading_cell("Comparison with the other pseudos in this table"), # nbf.new_code_cell("""\ # from pseudo_dojo import get_pseudos # pseudos = get_pseudos(".") # if len(pseudos) > 1: # pseudos.dojo_compare()"""), ] if with_eos: # Add EOS plots cells.update( [ nbf.new_heading_cell("GBRV EOS for the FCC structure:"), nbf.new_code_cell("""fig = report.plot_gbrv_eos(struct_type="fcc", show=False)"""), nbf.new_heading_cell("GBRV EOS for the BCC structure:"), nbf.new_code_cell("""fig = report.plot_gbrv_eos(struct_type="bcc", show=False)"""), ] ) # Now that we have the cells, we can make a worksheet with them and add it to the notebook: nb["worksheets"].append(nbf.new_worksheet(cells=cells)) # Next, we write it to a file on disk that we can then open as a new notebook. # Note: This should be as easy as: nbf.write(nb, fname), but the current api is # a little more verbose and needs a real file-like object. if tmpfile is None: root, ext = os.path.splitext(pseudopath) nbpath = root + ".ipynb" else: import tempfile _, nbpath = tempfile.mkstemp(suffix=".ipynb", text=True) with open(nbpath, "wt") as f: nbf.write(nb, f, "ipynb") return nbpath
# generate two notebook files that are large enough for benchmarking. import IPython.nbformat.current as nbformat import random def new_code_cell(): nlines = random.randint(0, 30) input = [str(random.random()) for i in range(nlines)] code_cell = nbformat.new_code_cell(input=input) return code_cell cells = [new_code_cell() for i in range(100)] worksheet = nbformat.new_worksheet(cells=cells) nb = nbformat.new_notebook(name='Test Notebook') nb['worksheets'].append(worksheet) with open('nb1.ipynb', 'w') as out: nbformat.write(nb, out, 'ipynb') cells = nb['worksheets'][0]['cells'] # Take original notebook and make changes to it ncells = len(cells) to_change = [random.choice(list(range(ncells))) for i in range(10)] for tc in to_change: input = cells[tc]['input']
def add_code_cell(self, nb): output = current.new_output("display_data", output_javascript="alert('hi');") cell = current.new_code_cell("print('hi')", outputs=[output]) if not nb.worksheets: nb.worksheets.append(current.new_worksheet()) nb.worksheets[0].cells.append(cell)
def run(self): path = os.path.abspath(os.path.expanduser(self.args.path)) final_dir = os.path.abspath(os.path.expanduser(self.args.final_dir)) if not path.endswith(".yaml") and not path.endswith(".yml"): raise ValueError filename = os.path.basename(path) new_filename = "Mission" + filename.replace(".yml", ".ipynb").replace(".yaml", ".ipynb") final_dest = os.path.join(final_dir, new_filename) mission, screens = mission_loader(path) nb = nbf.new_notebook() mission_cell = nbf.new_text_cell('markdown', self.assemble_mission_cell(mission).strip()) cells = [mission_cell] for screen in screens: text = self.assemble_screen_meta(screen) text += "\n\n" if screen["type"] == "code": text += "# " + screen["name"] text += "\n\n" text += screen["left_text"] if "instructions" in screen: text += "\n\n" text += "## Instructions\n\n" text += screen["instructions"] if "hint" in screen: text += "\n\n" text += "## Hint\n\n" text += screen["hint"] elif screen["type"] == "video": text += "# " + screen["name"] text += "\n\n" text += screen["video"] elif screen["type"] == "text": text += "# " + screen["name"] text += "\n\n" text += screen["text"] cell = nbf.new_text_cell('markdown', text.strip()) cells.append(cell) if screen["type"] == "code": text = "" if "initial" not in screen and "answer" not in screen: text += screen["initial_display"] else: items = [ {"key": "initial", "name": "## Initial"}, {"key": "initial_display", "name": "## Display"}, {"key": "answer", "name": "## Answer"}, {"key": "check_val", "name": "## Check val"}, {"key": "check_vars", "name": "## Check vars"}, {"key": "check_code_run", "name": "## Check code run"} ] for item in items: if item["key"] in screen and len(str(screen[item["key"]]).strip()) > 0: if item["key"] == "check_vars" and len(screen[item["key"]]) == 0: continue text += item["name"] + "\n\n" if item["key"] == "check_val": text += '"' + str(screen[item["key"]]).strip().replace("\n", "\\n") + '"' else: text += str(screen[item["key"]]).strip() text += "\n\n" cell = nbf.new_code_cell(input=text.strip()) cells.append(cell) nb['worksheets'].append(nbf.new_worksheet(cells=cells)) with open(final_dest, 'w+') as f: nbf.write(nb, f, 'ipynb') # Copy any associated files over original_dir = os.path.dirname(path) for f in os.listdir(original_dir): full_path = os.path.join(original_dir, f) if os.path.isfile(full_path): if not f.endswith(".yaml") and not f.endswith(".yml") and not f.endswith(".ipynb"): shutil.copy2(full_path, os.path.join(final_dir, f))
def write_notebook(pseudopath): """See http://nbviewer.ipython.org/gist/fperez/9716279""" nb = nbf.new_notebook() cells = [ nbf.new_text_cell('heading', "This is an auto-generated notebook for %s" % os.path.basename(pseudopath)), nbf.new_code_cell("""\ from __future__ import print_function %matplotlib inline import mpld3 from mpld3 import plugins as plugs plugs.DEFAULT_PLUGINS = [plugs.Reset(), plugs.Zoom(), plugs.BoxZoom(), plugs.MousePosition()] mpld3.enable_notebook() import seaborn as sns #sns.set(style="dark", palette="Set2") sns.set(style='ticks', palette='Set2')"""), nbf.new_code_cell("""\ # Construct the pseudo object and get the DojoReport from pymatgen.io.abinitio.pseudos import Pseudo pseudo = Pseudo.from_file('%s') report = pseudo.dojo_report""" % os.path.basename(pseudopath)), nbf.new_text_cell('heading', "ONCVPSP Input File:"), nbf.new_code_cell("""\ input_file = pseudo.filepath.replace(".psp8", ".in") %cat $input_file"""), nbf.new_code_cell("""\ # Get data from the output file from pseudo_dojo.ppcodes.oncvpsp import OncvOutputParser, PseudoGenDataPlotter onc_parser = OncvOutputParser(pseudo.filepath.replace(".psp8", ".out")) # Parse the file and build the plotter onc_parser.scan() plotter = onc_parser.make_plotter()"""), nbf.new_text_cell('heading', "AE/PS radial wavefunctions $\phi(r)$:"), nbf.new_code_cell("""fig = plotter.plot_radial_wfs(show=False)"""), nbf.new_text_cell('heading', "Arctan of the logarithmic derivatives:"), nbf.new_code_cell("""fig = plotter.plot_atan_logders(show=False)"""), nbf.new_text_cell('heading', "Convergence in $G$-space estimated by ONCVPSP:"), nbf.new_code_cell("""fig = plotter.plot_ene_vs_ecut(show=False)"""), nbf.new_text_cell('heading', "Projectors:"), nbf.new_code_cell("""fig = plotter.plot_projectors(show=False)"""), nbf.new_text_cell('heading', "Core/Valence/Model charge densities:"), nbf.new_code_cell("""fig = plotter.plot_densities(show=False)"""), nbf.new_text_cell('heading', "Local potential and $l$-dependent potentials:"), nbf.new_code_cell("""fig = plotter.plot_potentials(show=False)"""), #nbf.new_text_cell('heading', "1-st order derivative of $v_l$ and $v_{loc}$ computed via finite differences:"), #nbf.new_code_cell("""fig = plotter.plot_der_potentials(order=1, show=False)"""), #nbf.new_text_cell('heading', "2-nd order derivative of $v_l$ and $v_{loc}$ computed via finite differences:"), #nbf.new_code_cell("""fig = plotter.plot_der_potentials(order=2, show=False)"""), nbf.new_text_cell('heading', "Convergence of the total energy:"), nbf.new_code_cell("""\ # Convergence of the total energy (computed from the deltafactor runs with Wien2K equilibrium volume) fig = report.plot_etotal_vs_ecut(show=False)"""), nbf.new_text_cell('heading', "Convergence of the deltafactor results:"), nbf.new_code_cell("""fig = report.plot_deltafactor_convergence(what=("dfact_meV", "dfactprime_meV"), show=False)"""), nbf.new_text_cell('heading', "Convergence of $\Delta v_0$, $\Delta b_0$, and $\Delta b_1$ (deltafactor tests)"), nbf.new_code_cell("""\ # Here we plot the difference wrt Wien2k results. fig = report.plot_deltafactor_convergence(what=("-dfact_meV", "-dfactprime_meV"), show=False)"""), nbf.new_text_cell('heading', "deltafactor EOS for the different cutoff energies:"), nbf.new_code_cell("""fig = report.plot_deltafactor_eos(show=False)"""), nbf.new_text_cell('heading', "Convergence of the GBRV lattice parameters:"), nbf.new_code_cell("""fig = report.plot_gbrv_convergence(show=False)"""), nbf.new_text_cell('heading', "GBRV EOS for the FCC structure:"), nbf.new_code_cell("""fig = report.plot_gbrv_eos(struct_type="fcc", show=False)"""), nbf.new_text_cell('heading', "GBRV EOS for the BCC structure:"), nbf.new_code_cell("""fig = report.plot_gbrv_eos(struct_type="bcc", show=False)"""), # nbf.new_text_cell('heading', "Comparison with the other pseudos in this table"), # nbf.new_code_cell("""\ #from pseudo_dojo import get_pseudos #pseudos = get_pseudos(".") #if len(pseudos) > 1: # pseudos.dojo_compare()"""), ] # Now that we have the cells, we can make a worksheet with them and add it to the notebook: nb['worksheets'].append(nbf.new_worksheet(cells=cells)) # Next, we write it to a file on disk that we can then open as a new notebook. # Note: This should be as easy as: nbf.write(nb, fname), but the current api is a little more verbose and needs a real file-like object. root, ext = os.path.splitext(pseudopath) with open(root + '.ipynb', 'w') as f: nbf.write(nb, f, 'ipynb')
def create_empty_notebook(self, path): nb = current.new_notebook() nb.worksheets.append(current.new_worksheet()) with io.open(path, 'w', encoding='utf-8') as f: current.write(nb, f, 'json')
def index(): session["sid"] = len(worksheets) worksheets[session["sid"]] = current.new_worksheet() return render_template("app.html")
def notebook_merge(local, base, remote, check_modified=False): """Unify three notebooks into a single notebook with merge metadata. The result of this function is a valid notebook that can be loaded by the IPython Notebook front-end. This function adds additional cell metadata that the front-end Javascript uses to render the merge. Parameters ---------- local : dict The local branch's version of the notebook. base : dict The last common ancestor of local and remote. remote : dict The remote branch's version of the notebook. Returns ------- nb : A valid notebook containing merge metadata. """ local_cells = get_cells(local) base_cells = get_cells(base) remote_cells = get_cells(remote) rows = [] current_row = [] empty_cell = lambda: { 'cell_type': 'code', 'language': 'python', 'outputs': [], 'prompt_number': 1, 'text': ['Placeholder'], 'metadata': { 'state': 'empty' } } diff_of_diffs = merge(local_cells, base_cells, remote_cells) # For each item in the higher-order diff, create a "row" that # corresponds to a row in the NBDiff interface. A row contains: # | LOCAL | BASE | REMOTE | for item in diff_of_diffs: state = item['state'] cell = copy.deepcopy(diff_result_to_cell(item['value'])) if state == 'deleted': # This change is between base and local branches. # It can be an addition or a deletion. if cell['metadata']['state'] == 'unchanged': # This side doesn't have the change; wait # until we encounter the change to create the row. continue cell['metadata']['side'] = 'local' remote_cell = empty_cell() remote_cell['metadata']['side'] = 'remote' if cell['metadata']['state'] == 'deleted' \ or cell['metadata']['state'] == 'unchanged': base_cell = copy.deepcopy(cell) else: base_cell = empty_cell() base_cell['metadata']['side'] = 'base' # This change is on the right. current_row = [ cell, base_cell, remote_cell, ] elif state == 'added': # This change is between base and remote branches. # It can be an addition or a deletion. cell['metadata']['side'] = 'remote' if cell['metadata']['state'] == 'unchanged': # This side doesn't have the change; wait # until we encounter the change to create the row. continue if cell['metadata']['state'] == 'deleted': base_cell = copy.deepcopy(cell) base_cell['metadata']['state'] = 'unchanged' local_cell = copy.deepcopy(cell) local_cell['metadata']['state'] = 'unchanged' else: base_cell = empty_cell() local_cell = empty_cell() base_cell['metadata']['side'] = 'base' local_cell['metadata']['side'] = 'local' current_row = [ local_cell, base_cell, cell, ] elif state == 'unchanged': # The same item occurs between base-local and base-remote. # This happens if both branches made the same change, whether # that is an addition or deletion. If neither branches # changed a given cell, that cell shows up here too. cell1 = copy.deepcopy(cell) cell3 = copy.deepcopy(cell) if cell['metadata']['state'] == 'deleted' \ or cell['metadata']['state'] == 'unchanged': # If the change is a deletion, the cell-to-be-deleted # should in the base as 'unchanged'. The user will # choose to make it deleted. cell2 = copy.deepcopy(cell) cell2['metadata']['state'] = 'unchanged' else: # If the change is an addition, it should not # show in the base; the user must add it to the merged version. cell2 = empty_cell() cell1['metadata']['side'] = 'local' cell2['metadata']['side'] = 'base' cell3['metadata']['side'] = 'remote' current_row = [ cell1, cell2, cell3, ] rows.append(current_row) # Chain all rows together; create a flat array from the nested array. # Use the base notebook's notebook-level metadata (title, version, etc.) result_notebook = local if len(result_notebook['worksheets']) == 0: result_notebook['worksheets'] = [nbformat.new_worksheet()] new_cell_array = list(it.chain.from_iterable(rows)) result_notebook['worksheets'][0]['cells'] = new_cell_array result_notebook['metadata']['nbdiff-type'] = 'merge' return result_notebook
def create_empty_notebook(self, path): nb = current.new_notebook() nb.worksheets.append(current.new_worksheet()) with io.open(path, "w", encoding="utf-8") as f: current.write(nb, f, "json")