def split_markdown_cell( nb: notebooknode.NotebookNode) -> notebooknode.NotebookNode: """split a markdown cell if it contains tab block. a new property `class` is added to the metadata for a tab cell. """ # merge continous markdown cells grouped_cells = common.group_list( nb.cells, lambda cell, _: cell.cell_type == 'markdown') new_cells = [] for is_md, group in grouped_cells: if not is_md: new_cells.extend(group) else: src = '\n\n'.join(cell.source for cell in group) md_cells = markdown.split_markdown(src) is_tab_cell = lambda cell, _: cell['class'] if (cell[ 'type'] == 'markdown' and 'class' in cell) else 'not_tab_cell' grouped_md_cells = common.group_list(md_cells, is_tab_cell) for tab, md_group in grouped_md_cells: new_cell = nbformat.v4.new_markdown_cell( markdown.join_markdown_cells(md_group)) if tab != 'not_tab_cell': assert tab.startswith('`') and tab.endswith('`'), tab new_cell.metadata['tab'] = [ t.strip() for t in tab[1:-1].split(',') ] new_cells.append(new_cell) new_cells = [cell for cell in new_cells if cell.source] return create_new_notebook(nb, new_cells)
def _merge_tabs(nb: notebooknode.NotebookNode): """merge side-by-side tabs into a single one""" def _tab_status(cell, status): tab = _get_cell_tab(cell) if tab: if cell.cell_type == 'markdown': return 1 if tab == 'all': return 0 return 2 return 0 cell_groups = common.group_list(nb.cells, _tab_status) meta = [(in_tab, [cell.metadata['tab'] for cell in group] if in_tab else None) for in_tab, group in cell_groups] new_cells = [] i = 0 while i < len(meta): in_tab, tabs = meta[i] if not in_tab: new_cells.append((False, cell_groups[i][1])) i += 1 else: j = i + 1 while j < len(meta): if meta[j][1] != tabs: break j += 1 groups = [group for _, group in cell_groups[i:j]] new_cells.append((True, [x for x in zip(*groups)])) i = j return new_cells
def _merge_tabs(nb: notebooknode.NotebookNode, tabs: List[str]): """merge side-by-side tabs into a single one. Returns a list of item, an item can be (False, a list of not-in-tab-cell) or (True, a list of (tab_name, a list of cell-in-this-tab)) """ tab_status = lambda cell, _: 1 if get_cell_tab(cell) else 0 cell_groups = common.group_list(nb.cells, tab_status) new_groups = [] for in_tab, cells in cell_groups: if not in_tab: new_groups.append((False, cells)) continue # a special case that we can merge into non-tab cells mergable = True for cell in cells: if set(cell.metadata['tab']) != set(tabs): mergable = False break if mergable: new_groups.append((False, cells)) continue # the general case group_dict = {tab: [] for tab in tabs} # type: ignore for cell in cells: for tab in cell.metadata['tab']: group_dict[tab].append(cell) group = [(tab, group_dict[tab]) for tab in tabs if len(group_dict[tab])] new_groups.append((True, group)) # merge two consecutive code blocks. The first # code should not contain output for is_tab, group in new_groups: if not is_tab: continue for i, (tab, tab_cell) in enumerate(group): new_tab_cell = [] for cell in tab_cell: if (len(new_tab_cell) > 0 and new_tab_cell[-1].cell_type == 'code' and cell.cell_type == 'code' and not _has_output(new_tab_cell[-1])): cell = copy.deepcopy(cell) cell.source = new_tab_cell[-1].source + '\n\n' + cell.source new_tab_cell[-1] = cell else: new_tab_cell.append(cell) group[i] = (tab, new_tab_cell) return new_groups
def _generate_slides( nb: notebooknode.NotebookNode) -> Optional[notebooknode.NotebookNode]: new_cells = [] has_slides = False for cell in nb.cells: if cell.cell_type != 'markdown': # remove comments lines = cell.source.splitlines() new_lines = [] for l in lines: new_l = re.sub(r'\#\ .*', '', l) if new_l != l and not new_l.rstrip(): continue new_lines.append(new_l.rstrip()) cell.source = '\n'.join(new_lines) new_cells.append(cell) else: slide_type = '-' src = [] matches = _match_slide_marks(cell.source) if matches: has_slides = True for pair, text in matches: if pair[0].startswith('['): slide_type = 'slide' src.append(text) src = '\n'.join(src) if src: # cannot simply use . as it could be in code such as `a.text()` for m in ('.\n', '. '): sentences = [s.strip() for s in src.split(m)] src = m.join([s[0].upper() + s[1:] for s in sentences]) src = src.replace('.$$', '$$').replace(',$$', '$$') src = src.rstrip(',. \n:,。:') # find level-1 head for l in cell.source.splitlines(): if l.strip().startswith('# '): src = l + '\n\n' + src break if not src: continue new_cells.append( nbformat.v4.new_markdown_cell( src, metadata={"slideshow": { "slide_type": slide_type }})) if not has_slides: return None # merge code cell in the same slide if they don't have output md_code_group = common.group_list(new_cells, lambda cell, _: cell.cell_type == 'code') merged_code_cell = [] for is_code, group in md_code_group: if not is_code: merged_code_cell.extend(group) else: src = [] for i, cell in enumerate(group): src.append(cell.source) if i == len(group) - 1 or 'outputs' in cell and len( cell['outputs']): cell.source = '\n\n'.join(src) src = [] merged_code_cell.append(cell) # clean #@save for cell in merged_code_cell: if cell.cell_type == 'code': cell.source = cell.source.replace( \ '\n#@save\n', '\n').replace('#@save', '').strip() return notebook.create_new_notebook(nb, merged_code_cell)
def split_text(text: str) -> List[Dict[str, str]]: """Split text into a list of paragraphs 1. type: text, list, image, title, equation, table 1. source: 1. prefix: 1. mark: """ # split into paragraphs lines = text.splitlines() groups = common.group_list(lines, lambda a, _: a.strip() == '') paras = ['\n'.join(item) for empty_line, item in groups if not empty_line] def _fallback(p, type): logging.warn(f'Wrong {type} format:\n' + p) cells.append({'type': 'text', 'source': p}) cells = [] for p in paras: lines = p.splitlines() + [''] p += '\n' if p.startswith('#'): # parse title if not _is_mark(lines[1:]): _fallback(p, 'title') else: m = re.match(r'#+ *', lines[0]) cells.append({ 'type': 'title', 'prefix': m[0], 'source': lines[0][m.span()[1]:], 'mark': '\n'.join(lines[1:]) }) elif p.startswith('$$'): # parse equations m = re.findall(r'\$\$', p) if len(m) != 2: _fallback(p, 'equation') else: cells.append({'type': 'equation', 'source': p}) elif p.startswith('!['): # parse images if not lines[0].strip().endswith(')') or not _is_mark(lines[1:]): _fallback(p, 'image') else: cells.append({'type': 'image', 'source': p}) elif p.startswith('|'): # parse table for i, l in enumerate(lines): if not l.startswith('|'): break if not _is_mark(lines[i:]): _fallback(p, 'equation') else: cells.append({'type': 'table', 'source': p}) else: groups = common.group_list(lines, _list) for prefix, item in groups: if len(prefix.split('__')) == 2: prefix = prefix.split('__')[0] source = '\n'.join(item)[len(prefix):] if prefix == '': cells.append({'type': 'text', 'source': source}) else: cells.append({ 'type': 'list', 'prefix': prefix, 'source': source }) return cells