Python group_listの例、d2lbook.common.group_list Pythonの例

コード例 #1

0

ファイルを表示

def split_markdown_cell(
        nb: notebooknode.NotebookNode) -> notebooknode.NotebookNode:
    """split a markdown cell if it contains tab block.

    a new property `class` is added to the metadata for a tab cell.
    """
    # merge continous markdown cells
    grouped_cells = common.group_list(
        nb.cells, lambda cell, _: cell.cell_type == 'markdown')
    new_cells = []
    for is_md, group in grouped_cells:
        if not is_md:
            new_cells.extend(group)
        else:
            src = '\n\n'.join(cell.source for cell in group)
            md_cells = markdown.split_markdown(src)
            is_tab_cell = lambda cell, _: cell['class'] if (cell[
                'type'] == 'markdown' and 'class' in cell) else 'not_tab_cell'
            grouped_md_cells = common.group_list(md_cells, is_tab_cell)
            for tab, md_group in grouped_md_cells:
                new_cell = nbformat.v4.new_markdown_cell(
                    markdown.join_markdown_cells(md_group))
                if tab != 'not_tab_cell':
                    assert tab.startswith('`') and tab.endswith('`'), tab
                    new_cell.metadata['tab'] = [
                        t.strip() for t in tab[1:-1].split(',')
                    ]
                new_cells.append(new_cell)
    new_cells = [cell for cell in new_cells if cell.source]
    return create_new_notebook(nb, new_cells)

コード例 #2

0

ファイルを表示

ファイル: notebook.py プロジェクト: cuongvng/d2l-book

def _merge_tabs(nb: notebooknode.NotebookNode):
    """merge side-by-side tabs into a single one"""
    def _tab_status(cell, status):
        tab = _get_cell_tab(cell)
        if tab:
            if cell.cell_type == 'markdown':
                return 1
            if tab == 'all':
                return 0
            return 2
        return 0

    cell_groups = common.group_list(nb.cells, _tab_status)
    meta = [(in_tab, [cell.metadata['tab']
                      for cell in group] if in_tab else None)
            for in_tab, group in cell_groups]
    new_cells = []
    i = 0
    while i < len(meta):
        in_tab, tabs = meta[i]
        if not in_tab:
            new_cells.append((False, cell_groups[i][1]))
            i += 1
        else:
            j = i + 1
            while j < len(meta):
                if meta[j][1] != tabs:
                    break
                j += 1
            groups = [group for _, group in cell_groups[i:j]]
            new_cells.append((True, [x for x in zip(*groups)]))
            i = j

    return new_cells

コード例 #3

0

ファイルを表示

def _merge_tabs(nb: notebooknode.NotebookNode, tabs: List[str]):
    """merge side-by-side tabs into a single one.

    Returns a list of item, an item can be (False, a list of not-in-tab-cell) or
    (True, a list of (tab_name, a list of cell-in-this-tab))
    """
    tab_status = lambda cell, _: 1 if get_cell_tab(cell) else 0
    cell_groups = common.group_list(nb.cells, tab_status)
    new_groups = []
    for in_tab, cells in cell_groups:
        if not in_tab:
            new_groups.append((False, cells))
            continue
        # a special case that we can merge into non-tab cells
        mergable = True
        for cell in cells:
            if set(cell.metadata['tab']) != set(tabs):
                mergable = False
                break
        if mergable:
            new_groups.append((False, cells))
            continue
        # the general case
        group_dict = {tab: [] for tab in tabs}  # type: ignore
        for cell in cells:
            for tab in cell.metadata['tab']:
                group_dict[tab].append(cell)
        group = [(tab, group_dict[tab]) for tab in tabs
                 if len(group_dict[tab])]
        new_groups.append((True, group))
    # merge two consecutive code blocks. The first
    # code should not contain output
    for is_tab, group in new_groups:
        if not is_tab:
            continue
        for i, (tab, tab_cell) in enumerate(group):
            new_tab_cell = []
            for cell in tab_cell:
                if (len(new_tab_cell) > 0
                        and new_tab_cell[-1].cell_type == 'code'
                        and cell.cell_type == 'code'
                        and not _has_output(new_tab_cell[-1])):
                    cell = copy.deepcopy(cell)
                    cell.source = new_tab_cell[-1].source + '\n\n' + cell.source
                    new_tab_cell[-1] = cell
                else:
                    new_tab_cell.append(cell)
            group[i] = (tab, new_tab_cell)
    return new_groups

コード例 #4

0

ファイルを表示

def _generate_slides(
        nb: notebooknode.NotebookNode) -> Optional[notebooknode.NotebookNode]:
    new_cells = []
    has_slides = False
    for cell in nb.cells:
        if cell.cell_type != 'markdown':
            # remove comments
            lines = cell.source.splitlines()
            new_lines = []
            for l in lines:
                new_l = re.sub(r'\#\ .*', '', l)
                if new_l != l and not new_l.rstrip():
                    continue
                new_lines.append(new_l.rstrip())
            cell.source = '\n'.join(new_lines)
            new_cells.append(cell)
        else:
            slide_type = '-'
            src = []
            matches = _match_slide_marks(cell.source)
            if matches:
                has_slides = True
            for pair, text in matches:
                if pair[0].startswith('['):
                    slide_type = 'slide'
                src.append(text)
            src = '\n'.join(src)
            if src:
                # cannot simply use . as it could be in code such as `a.text()`
                for m in ('.\n', '. '):
                    sentences = [s.strip() for s in src.split(m)]
                    src = m.join([s[0].upper() + s[1:] for s in sentences])
                src = src.replace('.$$', '$$').replace(',$$', '$$')
                src = src.rstrip(',. \n:，。：')
            # find level-1 head
            for l in cell.source.splitlines():
                if l.strip().startswith('# '):
                    src = l + '\n\n' + src
                    break
            if not src: continue
            new_cells.append(
                nbformat.v4.new_markdown_cell(
                    src, metadata={"slideshow": {
                        "slide_type": slide_type
                    }}))
    if not has_slides:
        return None

    # merge code cell in the same slide if they don't have output
    md_code_group = common.group_list(new_cells,
                                      lambda cell, _: cell.cell_type == 'code')
    merged_code_cell = []
    for is_code, group in md_code_group:
        if not is_code:
            merged_code_cell.extend(group)
        else:
            src = []
            for i, cell in enumerate(group):
                src.append(cell.source)
                if i == len(group) - 1 or 'outputs' in cell and len(
                        cell['outputs']):
                    cell.source = '\n\n'.join(src)
                    src = []
                    merged_code_cell.append(cell)
    # clean #@save
    for cell in merged_code_cell:
        if cell.cell_type == 'code':
            cell.source = cell.source.replace( \
                '\n#@save\n', '\n').replace('#@save', '').strip()
    return notebook.create_new_notebook(nb, merged_code_cell)

コード例 #5

0

ファイルを表示

def split_text(text: str) -> List[Dict[str, str]]:
    """Split text into a list of paragraphs

    1. type: text, list, image, title, equation, table
    1. source:
    1. prefix:
    1. mark:
    """
    # split into paragraphs
    lines = text.splitlines()
    groups = common.group_list(lines, lambda a, _: a.strip() == '')
    paras = ['\n'.join(item) for empty_line, item in groups if not empty_line]

    def _fallback(p, type):
        logging.warn(f'Wrong {type} format:\n' + p)
        cells.append({'type': 'text', 'source': p})

    cells = []
    for p in paras:
        lines = p.splitlines() + ['']
        p += '\n'
        if p.startswith('#'):
            # parse title
            if not _is_mark(lines[1:]):
                _fallback(p, 'title')
            else:
                m = re.match(r'#+ *', lines[0])
                cells.append({
                    'type': 'title',
                    'prefix': m[0],
                    'source': lines[0][m.span()[1]:],
                    'mark': '\n'.join(lines[1:])
                })
        elif p.startswith('$$'):
            # parse equations
            m = re.findall(r'\$\$', p)
            if len(m) != 2:
                _fallback(p, 'equation')
            else:
                cells.append({'type': 'equation', 'source': p})
        elif p.startswith('!['):
            # parse images
            if not lines[0].strip().endswith(')') or not _is_mark(lines[1:]):
                _fallback(p, 'image')
            else:
                cells.append({'type': 'image', 'source': p})
        elif p.startswith('|'):
            # parse table
            for i, l in enumerate(lines):
                if not l.startswith('|'):
                    break
            if not _is_mark(lines[i:]):
                _fallback(p, 'equation')
            else:
                cells.append({'type': 'table', 'source': p})
        else:
            groups = common.group_list(lines, _list)
            for prefix, item in groups:
                if len(prefix.split('__')) == 2:
                    prefix = prefix.split('__')[0]
                source = '\n'.join(item)[len(prefix):]
                if prefix == '':
                    cells.append({'type': 'text', 'source': source})
                else:
                    cells.append({
                        'type': 'list',
                        'prefix': prefix,
                        'source': source
                    })
    return cells