def find_headers(notebook_name, highest_level=2, lowest_level=3): """ Find all headers in the specified range in the notebook. Parameters ---------- notebook_name : str Name of a Jupyter notebook. highest_level : int, optional The highest level header to be identified (1 is highest, 6 is lowest). lowest_level : int, optional The lowest level header to be identified (1 is highest, 6 is lowest). Must be less than or equal to ``highest_level``. Returns ------- dict Keys of the dictionary are the headings (including the leading hashtags), values are the line number on which the heading appears in the json source of the notebook. """ headings = {} # No idea at all why any line number offset is needed, but this # seems to do the trick. line_number_offset = 1 # Generate the part of the regex pattern that represents the hashtags # that are the beginning of a heading. hashtags = [] for level in range(highest_level, lowest_level + 1): hashtags.append('#' * level) hashtags = '|'.join(hashtags) header = re.compile(r'(' + f'({hashtags})' + r' +[a-zA-Z].+?\n)') notebook = nbf.read(notebook_name, as_version=4) for cell in markdown_cells(notebook): groups = [g for g in re.finditer(header, cell['source'])] for g in groups: # We have a header, will get line numbers shortly headings[g.group(0)] = -1 with open(notebook_name, 'r') as f: nb_lines = f.readlines() for head in headings.keys(): for line_num, line in enumerate(nb_lines): if head[:-1] in line: if headings[head] > 0: print(f'Oh no! Bad {notebook_name}') print(f'...duplicate heading: {head}') raise RuntimeError('oh no') headings[head] = line_num + line_number_offset return headings
def wrap_notebook_markdown(nb_name, wrap_at=80): with open(nb_name) as f: nb = nbf.read(f, as_version=4) wrapper = TextWrapper(width=wrap_at, break_long_words=False, break_on_hyphens=False, replace_whitespace=False, drop_whitespace=True) for cell in markdown_cells(nb): link_groups = find_links(cell['source']) protected, restore = protect_from_wrap(cell['source'], link_groups) latex_groups = find_latex(protected) protected, restore = protect_from_wrap(protected, latex_groups, restore_info=restore) lines = protected.split('\n') new_lines = [] for line in lines: if line: new_lines.extend(wrapper.wrap(line)) else: new_lines.append('') new_source = '\n'.join(new_lines) cell['source'] = restore_protected_content(new_source, restore) return nb
def github_magic(nb_file_for_book, original_notebook, comment_group=DEFAULT_COMMENT_GROUP): """ Add links in nb_file to lines on PR opened just for commenting on this specific file. """ # 5. Scan the notebook for sections headers (level 2 or 3). <---- BOTH # 6. Get line numbers IN The ORIGINAL notebook of these headers. <--- ORIG # 7. Add a link after the header with text something like that below. <--- BOTH # Link is to the magical github place for making comments. # Done! repo = get_github_repo('astropy', 'ccd-reduction-and-photometry-guide') base_url = \ create_pr_for_commenting(original_notebook, repo, comment_group=comment_group) heading_in_original = find_headers(original_notebook, highest_level=2, lowest_level=3) comment_link_text = ('*Click here to comment on this section on ' 'GitHub (opens in new tab).*') cell_content_to_insert = \ {k: f'\n[{comment_link_text}]({base_url + str(v)})' + '{:target="_blank"}\n' for k, v in heading_in_original.items()} book_nb = nbf.read(nb_file_for_book, as_version=4) for cell in markdown_cells(book_nb): for k, v in cell_content_to_insert.items(): if k in cell['source']: pre, post = cell['source'].split(k) new_source = pre + k + v + post cell['source'] = new_source with open(nb_file_for_book, 'w') as fp: nbf.write(book_nb, fp)
def replace_links_in_notebook(nb_file): notebook = nbf.read(nb_file, as_version=4) for cell in markdown_cells(notebook): cell['source'] = replace_link_urls(cell['source']) with open(nb_file, 'w') as f: nbf.write(notebook, f)