コード例 #1
0
ファイル: ipynb.py プロジェクト: KGHustad/doconce
def ipynb_code(filestr, code_blocks, code_block_types,
               tex_blocks, format):
    """
    # We expand all newcommands now
    from html import embed_newcommands
    newcommands = embed_newcommands(filestr)
    if newcommands:
        filestr = newcommands + filestr
    """
    # Fix pandoc citations to normal internal links: [[key]](#key)
    filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr)

    # filestr becomes json list after this function so we must typeset
    # envirs here. All envirs are typeset as pandoc_quote.
    from common import _CODE_BLOCK, _MATH_BLOCK
    envir_format = option('ipynb_admon=', 'paragraph')
    # Remove all !bpop-!epop environments (they cause only problens and
    # have no use)
    for envir in 'pop', 'slidecell':
        filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr,
                         flags=re.MULTILINE)
        filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr,
                         flags=re.MULTILINE)
    filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr,
                     flags=re.DOTALL|re.MULTILINE)
    filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE)
    from doconce import doconce_envirs
    envirs = doconce_envirs()[8:-2]
    for envir in envirs:
        pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir)
        if envir_format in ('quote', 'paragraph', 'hrule'):
            def subst(m):
                title = m.group(1).strip()
                # Text size specified in parenthesis?
                m2 = re.search('^\s*\((.+?)\)', title)

                if title == '' and envir not in ('block', 'quote'):
                    title = envir.capitalize() + '.'
                elif title.lower() == 'none':
                    title == ''
                elif m2:
                    text_size = m2.group(1).lower()
                    title = title.replace('(%s)' % text_size, '').strip()
                elif title and title[-1] not in ('.', ':', '!', '?'):
                    # Make sure the title ends with puncuation
                    title += '.'
                # Recall that this formatting is called very late
                # so native format must be used!
                if title:
                    title = '**' + title + '**\n'
                    # Could also consider subsubsection formatting
                block = m.group(2)

                # Always use quote typesetting for quotes
                if envir_format == 'quote' or envir == 'quote':
                    # Make Markdown quote of the block: lines start with >
                    lines = []
                    for line in block.splitlines():
                        # Just quote plain text
                        if not (_MATH_BLOCK in line or
                                _CODE_BLOCK in line or
                                line.startswith('FIGURE:') or
                                line.startswith('MOVIE:') or
                                line.startswith('|')):
                            lines.append('> ' + line)
                        else:
                            lines.append('\n' + line + '\n')
                    block = '\n'.join(lines) + '\n\n'

                    # Add quote and a blank line after title
                    if title:
                        title = '> ' + title + '>\n'
                else:
                    # Add a blank line after title
                    if title:
                        title += '\n'

                if envir_format == 'hrule':
                    # Native ------ does not work, use <hr/>
                    #text = '\n\n----------\n' + title + '----------\n' + \
                    #       block + '\n----------\n\n'
                    text = '\n\n<hr/>\n' + title + \
                           block + '\n<hr/>\n\n'
                else:
                    text = title + block + '\n\n'
                return text
        else:
            errwarn('*** error: --ipynb_admon=%s is not supported'  % envir_format)
        filestr = re.sub(pattern, subst, filestr,
                         flags=re.DOTALL | re.MULTILINE)

    # Fix pyshell and ipy interactive sessions: remove prompt and output.
    # or split in multiple cells such that output comes out at the end of a cell
    # Fix sys environments and use run prog.py so programs can be run in cell
    # Insert %matplotlib inline in the first block using matplotlib
    # Only typeset Python code as blocks, otherwise !bc environmens
    # become plain indented Markdown.
    from doconce import dofile_basename
    from sets import Set
    ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename
    src_paths = Set()
    mpl_inline = False

    split_pyshell = option('ipynb_split_pyshell=', 'on')
    if split_pyshell is None:
        split_pyshell = False
    elif split_pyshell in ('no', 'False', 'off'):
        split_pyshell = False
    else:
        split_pyshell = True

    ipynb_code_tp = [None]*len(code_blocks)
    for i in range(len(code_blocks)):
        # Check if continuation lines are in the code block, because
        # doconce.py inserts a blank after the backslash
        if '\\ \n' in code_blocks[i]:
            code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n')

        if not mpl_inline and (
            re.search(r'import +matplotlib', code_blocks[i]) or \
            re.search(r'from +matplotlib', code_blocks[i]) or \
            re.search(r'import +scitools', code_blocks[i]) or \
            re.search(r'from +scitools', code_blocks[i])):
            code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i]
            mpl_inline = True

        tp = code_block_types[i]
        if tp.endswith('-t'):
            # Standard Markdown code with pandoc/github extension
            language = tp[:-2]
            language_spec = language2pandoc.get(language, '')
            #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n'
            code_blocks[i] = "```%s\n" % language_spec + \
                             indent_lines(code_blocks[i].strip(), format) + \
                             "```"
            ipynb_code_tp[i] = 'markdown'
        elif tp.startswith('pyshell') or tp.startswith('ipy'):
            lines = code_blocks[i].splitlines()
            last_cell_end = -1
            if split_pyshell:
                new_code_blocks = []
                # Split for each output an put in separate cell
                for j in range(len(lines)):
                    if lines[j].startswith('>>>') or lines[j].startswith('... '):
                        lines[j] = lines[j][4:]
                    elif lines[j].startswith('In ['):  # IPython
                        lines[j] = ':'.join(lines[j].split(':')[1:]).strip()
                    elif lines[j].startswith('   ...: '): # IPython
                        lines[j] = lines[j][8:]
                    else:
                        # output (no prefix or Out)
                        lines[j] = ''
                        new_code_blocks.append(
                            '\n'.join(lines[last_cell_end+1:j+1]))
                        last_cell_end = j
                code_blocks[i] = new_code_blocks
                ipynb_code_tp[i] = 'cell'
            else:
                # Remove prompt and output lines; leave code executable in cell
                for j in range(len(lines)):
                    if lines[j].startswith('>>> ') or lines[j].startswith('... '):
                        lines[j] = lines[j][4:]
                    elif lines[j].startswith('In ['):
                        lines[j] = ':'.join(lines[j].split(':')[1:]).strip()
                    else:
                        # output
                        lines[j] = ''

                for j in range(lines.count('')):
                    lines.remove('')
                code_blocks[i] = '\n'.join(lines)
                ipynb_code_tp[i] = 'cell'

        elif tp.startswith('sys'):
            # Do we find execution of python file? If so, copy the file
            # to separate subdir and make a run file command in a cell.
            # Otherwise, it is just a plain verbatim Markdown block.
            found_unix_lines = False
            lines = code_blocks[i].splitlines()
            for j in range(len(lines)):
                m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)',
                              lines[j])
                if m:
                    name = m.group(2).strip()
                    if os.path.isfile(name):
                        src_paths.add(os.path.dirname(name))
                        lines[j] = '%%run "%s"' % fullpath
                else:
                    found_unix_lines = True
            src_paths = list(src_paths)
            if src_paths and not found_unix_lines:
                # This is a sys block with run commands only
                code_blocks[i] = '\n'.join(lines)
                ipynb_code_tp[i] = 'cell'
            else:
                # Standard Markdown code
                code_blocks[i] = '\n'.join(lines)
                code_blocks[i] = indent_lines(code_blocks[i], format)
                ipynb_code_tp[i] = 'markdown'
        elif tp.endswith('hid'):
            ipynb_code_tp[i] = 'cell_hidden'
        elif tp.startswith('py'):
            ipynb_code_tp[i] = 'cell'
        else:
            # Should support other languages as well, but not for now
            code_blocks[i] = indent_lines(code_blocks[i], format)
            ipynb_code_tp[i] = 'markdown'

    # figure_files and movie_files are global variables and contain
    # all figures and movies referred to
    src_paths = list(src_paths)
    if figure_files:
        src_paths += figure_files
    if movie_files:
        src_paths += movie_files

    if src_paths:
        # Make tar file with all the source dirs with files
        # that need to be executed
        os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths)))
        errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook')
    elif os.path.isfile(ipynb_tarfile):
        os.remove(ipynb_tarfile)


    # Parse document into markdown text, code blocks, and tex blocks.
    # Store in nested list notebook_blocks.
    notebook_blocks = [[]]
    authors = ''
    for line in filestr.splitlines():
        if line.startswith('authors = [new_author(name='):  # old author method
            authors = line[10:]
        elif _CODE_BLOCK in line:
            code_block_tp = line.split()[-1]
            if code_block_tp in ('pyhid',) or not code_block_tp.endswith('hid'):
                notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip()
                notebook_blocks.append(line)
            # else: hidden block to be dropped (may include more languages
            # with time in the above tuple)
        elif _MATH_BLOCK in line:
            notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip()
            notebook_blocks.append(line)
        else:
            if not isinstance(notebook_blocks[-1], list):
                notebook_blocks.append([])
            notebook_blocks[-1].append(line)
    if isinstance(notebook_blocks[-1], list):
        notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip()


    # Add block type info
    pattern = r'(\d+) +%s'
    for i in range(len(notebook_blocks)):
        if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]):
            m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i])
            idx = int(m.group(1))
            if ipynb_code_tp[idx] == 'cell':
                notebook_blocks[i] = ['cell', notebook_blocks[i]]
            elif ipynb_code_tp[idx] == 'cell_hidden':
                notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]]
            else:
                notebook_blocks[i] = ['text', notebook_blocks[i]]
        elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]):
            notebook_blocks[i] = ['math', notebook_blocks[i]]
        else:
            notebook_blocks[i] = ['text', notebook_blocks[i]]

    # Go through tex_blocks and wrap in $$
    # (doconce.py runs align2equations so there are no align/align*
    # environments in tex blocks)
    label2tag = {}
    tag_counter = 1
    for i in range(len(tex_blocks)):
        # Extract labels and add tags
        labels = re.findall(r'label\{(.+?)\}', tex_blocks[i])
        for label in labels:
            label2tag[label] = tag_counter
            # Insert tag to get labeled equation
            tex_blocks[i] = tex_blocks[i].replace(
                'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter))
            tag_counter += 1

        # Remove \[ and \] or \begin/end{equation*} in single equations
        tex_blocks[i] = tex_blocks[i].replace(r'\[', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\]', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '')
        # Check for illegal environments
        m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*', 'align*', 'align',
                             'array'):
                errwarn("""\
*** warning: latex envir \\begin{%s} does not work well in Markdown.
    Stick to \\[ ... \\], equation, equation*, align, or align*
    environments in math environments.
""" % envir)
        eq_type = 'heading'  # or '$$'
        eq_type = '$$'
        # Markdown: add $$ on each side of the equation
        if eq_type == '$$':
            # Make sure there are no newline after equation
            tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$'
        # Here: use heading (###) and simple formula (remove newline
        # in math expressions to keep everything within a heading) as
        # the equation then looks bigger
        elif eq_type == 'heading':
            tex_blocks[i] = '### $ ' + '  '.join(tex_blocks[i].splitlines()) + ' $'

        # Add labels for the eqs above the block (for reference)
        if labels:
            #label_tp = '<a name="%s"></a>'
            label_tp = '<div id="%s"></div>'
            tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \
                            ' '.join([label_tp % label
                                      for label in labels]) + '\n\n' + \
                                      tex_blocks[i]

    # blocks is now a list of text chunks in markdown and math/code line
    # instructions. Insert code and tex blocks
    for i in range(len(notebook_blocks)):
        if _CODE_BLOCK in notebook_blocks[i][1] or _MATH_BLOCK in notebook_blocks[i][1]:
            words = notebook_blocks[i][1].split()
            # start of notebook_blocks[i]: number block-indicator code-type
            n = int(words[0])
            if _CODE_BLOCK in notebook_blocks[i][1]:
                notebook_blocks[i][1] = code_blocks[n]  # can be list!
            if _MATH_BLOCK in notebook_blocks[i][1]:
                notebook_blocks[i][1] = tex_blocks[n]

    # Make IPython structures

    nb_version = int(option('ipynb_version=', '4'))
    if nb_version == 3:
        try:
            from IPython.nbformat.v3 import (
                new_code_cell, new_text_cell, new_worksheet,
                new_notebook, new_metadata, new_author)
            nb = new_worksheet()
        except ImportError:
            errwarn('*** error: could not import IPython.nbformat.v3!')
            errwarn('    set --ipynb_version=4 or leave out --ipynb_version=3')
            _abort()
    elif nb_version == 4:
        try:
            from nbformat.v4 import (
                new_code_cell, new_markdown_cell, new_notebook)
        except ImportError:
            # Try old style
            try:
                from IPython.nbformat.v4 import (
                    new_code_cell, new_markdown_cell, new_notebook)
            except ImportError:
                errwarn('*** error: cannot do import nbformat.v4 or IPython.nbformat.v4')
                errwarn('    make sure IPython notebook or Jupyter is installed correctly')
                _abort()
        cells = []

    mdstr = []  # plain md format of the notebook
    prompt_number = 1
    for block_tp, block in notebook_blocks:
        if (block_tp == 'text' or block_tp == 'math') and block != '':
            # Pure comments between math/code and math/code come
            # out as empty blocks, should detect that situation
            # (challenging - can have multiple lines of comments,
            # or begin and end comment lines with important things between)
            if nb_version == 3:
                nb.cells.append(new_text_cell(u'markdown', source=block))
            elif nb_version == 4:
                cells.append(new_markdown_cell(source=block))
            mdstr.append(('markdown', block))
        elif block_tp == 'cell' and block != '' and block != []:
            if isinstance(block, list):
                for block_ in block:
                    block_ = block_.rstrip()
                    if block_ != '':
                        if nb_version == 3:
                            nb.cells.append(new_code_cell(
                                input=block_,
                                prompt_number=prompt_number,
                                collapsed=False))
                        elif nb_version == 4:
                            cells.append(new_code_cell(
                                source=block_,
                                execution_count=prompt_number,
                                metadata=dict(collapsed=False)))
                        prompt_number += 1
                        mdstr.append(('codecell', block_))
            else:
                block = block.rstrip()
                if block != '':
                    if nb_version == 3:
                        nb.cells.append(new_code_cell(
                            input=block,
                            prompt_number=prompt_number,
                            collapsed=False))
                    elif nb_version == 4:
                        cells.append(new_code_cell(
                            source=block,
                            execution_count=prompt_number,
                            metadata=dict(collapsed=False)))
                    prompt_number += 1
                    mdstr.append(('codecell', block))
        elif block_tp == 'cell_hidden' and block != '':
            block = block.rstrip()
            if nb_version == 3:
                nb.cells.append(new_code_cell(
                    input=block, prompt_number=prompt_number, collapsed=True))
            elif nb_version == 4:
                cells.append(new_code_cell(
                    source=block,
                    execution_count=prompt_number,
                    metadata=dict(collapsed=True)))
            prompt_number += 1
            mdstr.append(('codecell', block))

    """
    # Dump the notebook cells in a simple ASCII format
    # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file)
    f = open(dofile_basename + '.md-ipynb', 'w')
    for cell_tp, block in mdstr:
        if cell_tp == 'markdown':
            f.write('\n-----\n\n')
        elif cell_tp == 'codecell':
            f.write('\n-----py\n\n')
        f.write(block)
    f.close()
    """

    if nb_version == 3:
        # Catch the title as the first heading
        m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE)
        title = m.group(1).strip() if m else ''
        # md below is not used for anything
        if authors:
            authors = eval(authors)
            md = new_metadata(name=title, authors=authors)
        else:
            md = new_metadata(name=title)
        nb = new_notebook(worksheets=[nb], metadata=new_metadata())
        # Let us make v4 notebook here by upgrading
        from IPython.nbformat.v4 import upgrade
        nb = upgrade(nb)
        import IPython.nbformat.v4.nbjson as nbjson

        # Convert nb to json format
        filestr = nbjson.writes(nb)
    elif nb_version == 4:
        nb = new_notebook(cells=cells)
        from IPython.nbformat import writes
        filestr = writes(nb, version=4)

    # Check that there are no empty cells:
    if '"input": []' in filestr:
        errwarn('*** error: empty cells in notebook - report bug in DocOnce')
        _abort()
    # must do the replacements here at the very end when json is written out
    # \eqref and labels will not work, but labels (only in math) do no harm
    filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr,
                     flags=re.MULTILINE)
    # \\eqref{} just gives (???) link at this stage - future versions
    # will probably support labels
    #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr)
    # Now we use explicit references to tags
    def subst(m):
        label = m.group(1)
        try:
            return r'[(%s)](#%s)' % (label2tag[label], label)
        except KeyError as e:
            errwarn('*** error: label "%s" is not defined' % str(e))

    filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr)
    """
    # MathJax reference to tag (recall that the equations have both label
    # and tag (know that tag only works well in HTML, but this mjx-eqn-no
    # label does not work in ipynb)
    filestr = re.sub(r'\(ref\{(.+?)\}\)',
                     lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr)
    """
    #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr)

    '''
    # Final fixes: replace all text between cells by markdown code cells
    # Note: the patterns are overlapping so a plain re.sub will not work,
    # here we run through all blocks found and subsitute the first remaining
    # one, one by one.
    pattern = r'   \},\n(.+?)\{\n    "cell_type":'
    begin_pattern = r'^(.+?)\{\n    "cell_type":'
    remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL)
    remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL)
    import string
    for block in remaining_block_begin + remaining_blocks:
        filestr = string.replace(filestr, block, json_markdown(block) + '   ',
                                 maxreplace=1)
    filestr_end = re.sub(r'   \{\n    "cell_type": .+?\n   \},\n', '', filestr,
                         flags=re.DOTALL)
    filestr = filestr.replace(filestr_end, json_markdown(filestr_end))
    filestr = """{
 "metadata": {
  "name": "SOME NAME"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
""" + filestr.rstrip() + '\n'+ \
    json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """
   ],
   "metadata": {}
  }
 ]
}"""
    '''
    return filestr
コード例 #2
0
def xml_ref_and_label(section_label2title, format, filestr):
    # This is the first format-specific function to be called.
    # Go through the file and mark end of paragraphs.
    filestr = xml_find_paragraphs(filestr)

    pattern = r'([Ss]ections?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="section">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Cc]apters?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="chapter">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Aa]ppendix)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="appendix">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Aa]ppendices)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="appendix">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Ff]igures?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="chapter">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'(the\s*)?([Ee]xercises?|[Pp]rojects?|[Pp]roblems?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<2>" type="exercise">\g<3></ref>'
    filestr = re.sub(pattern, replacement, filestr)

    # Need special adjustment to handle start of sentence (capital) or not.
    pattern = r'([.?!]\s+|^)<ref prefix="'
    replacement = r'\g<1><ref sentencestart="True" prefix="'
    filestr = re.sub(pattern, replacement, filestr, flags=re.MULTILINE)

    # Did xml to this point. References of this type are hard...
    # 2DO: the rest FIXME: the rest [[[[[[


    # extract the labels in the text (filestr is now without
    # mathematics and those labels)
    running_text_labels = re.findall(r'label\{(.+?)\}', filestr)

    # XML: can have label as attribute for section/subsection/etc?
    # or always <label>..</label>? Think what is best, probably
    # the first one.

    # make special anchors for all the section titles with labels:
    def subst_heading(m):
        heading_tp = (11 - len(m.group(1)))/2
        s = '<section type="%s" label="%s">%s</section>' % (heading_tp, label, title.replace('\\', '\\\\'))
        return s

    for label in section_label2title:
        # make new anchor for this label (put in title):
        title = section_label2title[label]
        title_pattern = r'(={3,9})\s*%s\s*(={3,9})\s*label\{%s\}' % (re.escape(title), label)
        filestr = re.sub(title_pattern, subst_heading, filestr)

    # Deal with all sections without proceding labels
    def subst_heading2(m):
        heading_tp = (11 - len(m.group(1)))/2
        s = '<section type="%s" label="None">%s</section>' % \
            (heading_tp, m.group(2).strip())
        return s
    filestr = re.sub(r'^(={3,9})(.+?)(={3,9})', subst_heading2,
                     filestr, flags=re.MULTILINE)

    filestr = re.sub(r'label\{(.+?)\}', r'<label>\g<1></label>', filestr)

    # Number all figures, find all figure labels and replace their
    # references by the figure numbers
    # (note: figures are already handled!)
    # Can process this by findall and sub, since we assume all
    # captions with label are unique because the label should be unique.
    pattern = '<caption>(.+?)</caption>'
    captions = re.findall(pattern, filestr, flags=re.DOTALL)
    fig_no = 0
    for caption in captions:
        m = re.search(r'<label>(.+?)</label>', filestr)
        if m:
            label = m.group(1)
            filestr = re.sub(pattern,
                             r'<caption no="%s">\g<1></caption>' % fig_no,
                             filestr,
                             flags=re.DOTALL)
            fig_no += 1

    # Replace all admon and many other envirs here (then xml_quote etc
    # will not be called later by doconce.py)
    from doconce import doconce_envirs
    for envir in doconce_envirs()[5:-2]:
        filestr = re.sub(r'^!b%s +(.+)' % envir, r'<%s heading="\g<1>">' % envir,
                         filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!b%s *' % envir, r'<%s>' % envir,
                         filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!e%s *' % envir, r'</%s>' % envir,
                         filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!split', '\n<split />', filestr, flags=re.MULTILINE)

    # [[[ Could replace label=None in <section> tags by a unique section id
    return filestr
コード例 #3
0
ファイル: xml.py プロジェクト: apetcho/doconce
def xml_ref_and_label(section_label2title, format, filestr):
    # This is the first format-specific function to be called.
    # Go through the file and mark end of paragraphs.
    filestr = xml_find_paragraphs(filestr)

    pattern = r'([Ss]ections?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="section">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Cc]apters?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="chapter">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Aa]ppendix)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="appendix">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Aa]ppendices)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="appendix">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'([Ff]igures?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<1>" type="chapter">\g<2></ref>'
    filestr = re.sub(pattern, replacement, filestr)
    pattern = r'(the\s*)?([Ee]xercises?|[Pp]rojects?|[Pp]roblems?)\s+ref\{(.+?)\}'
    replacement = r'<ref prefix="\g<2>" type="exercise">\g<3></ref>'
    filestr = re.sub(pattern, replacement, filestr)

    # Need special adjustment to handle start of sentence (capital) or not.
    pattern = r'([.?!]\s+|^)<ref prefix="'
    replacement = r'\g<1><ref sentencestart="True" prefix="'
    filestr = re.sub(pattern, replacement, filestr, flags=re.MULTILINE)

    # Did xml to this point. References of this type are hard...
    # 2DO: the rest FIXME: the rest [[[[[[


    # extract the labels in the text (filestr is now without
    # mathematics and those labels)
    running_text_labels = re.findall(r'label\{(.+?)\}', filestr)

    # XML: can have label as attribute for section/subsection/etc?
    # or always <label>..</label>? Think what is best, probably
    # the first one.

    # make special anchors for all the section titles with labels:
    def subst_heading(m):
        heading_tp = (11 - len(m.group(1)))/2
        s = '<section type="%s" label="%s">%s</section>' % (heading_tp, label, title.replace('\\', '\\\\'))
        return s

    for label in section_label2title:
        # make new anchor for this label (put in title):
        title = section_label2title[label]
        title_pattern = r'(={3,9})\s*%s\s*(={3,9})\s*label\{%s\}' % (re.escape(title), label)
        filestr = re.sub(title_pattern, subst_heading, filestr)

    # Deal with all sections without proceding labels
    def subst_heading2(m):
        heading_tp = (11 - len(m.group(1)))/2
        s = '<section type="%s" label="None">%s</section>' % \
            (heading_tp, m.group(2).strip())
        return s
    filestr = re.sub(r'^(={3,9})(.+?)(={3,9})', subst_heading2,
                     filestr, flags=re.MULTILINE)

    filestr = re.sub(r'label\{(.+?)\}', r'<label>\g<1></label>', filestr)

    # Number all figures, find all figure labels and replace their
    # references by the figure numbers
    # (note: figures are already handled!)
    # Can process this by findall and sub, since we assume all
    # captions with label are unique because the label should be unique.
    pattern = '<caption>(.+?)</caption>'
    captions = re.findall(pattern, filestr, flags=re.DOTALL)
    fig_no = 0
    for caption in captions:
        m = re.search(r'<label>(.+?)</label>', filestr)
        if m:
            label = m.group(1)
            filestr = re.sub(pattern,
                             r'<caption no="%s">\g<1></caption>' % fig_no,
                             filestr,
                             flags=re.DOTALL)
            fig_no += 1

    # Replace all admon and many other envirs here (then xml_quote etc
    # will not be called later by doconce.py)
    from doconce import doconce_envirs
    for envir in doconce_envirs()[5:-2]:
        filestr = re.sub(r'^!b%s +(.+)' % envir, r'<%s heading="\g<1>">' % envir,
                         filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!b%s *' % envir, r'<%s>' % envir,
                         filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!e%s *' % envir, r'</%s>' % envir,
                         filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!split', '\n<split />', filestr, flags=re.MULTILINE)

    # [[[ Could replace label=None in <section> tags by a unique section id
    return filestr
コード例 #4
0
ファイル: ipynb.py プロジェクト: sjsrey/doconce
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    """
    # We expand all newcommands now
    from html import embed_newcommands
    newcommands = embed_newcommands(filestr)
    if newcommands:
        filestr = newcommands + filestr
    """
    # Fix pandoc citations to normal internal links: [[key]](#key)
    filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr)

    # filestr becomes json list after this function so we must typeset
    # envirs here. All envirs are typeset as pandoc_quote.
    from common import _CODE_BLOCK, _MATH_BLOCK
    envir_format = option('ipynb_admon=', 'paragraph')
    # Remove all !bpop-!epop environments (they cause only problens and
    # have no use)
    for envir in 'pop', 'slidecell':
        filestr = re.sub('^<!-- !b%s .*\n' % envir,
                         '',
                         filestr,
                         flags=re.MULTILINE)
        filestr = re.sub('^<!-- !e%s .*\n' % envir,
                         '',
                         filestr,
                         flags=re.MULTILINE)
    filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n',
                     '',
                     filestr,
                     flags=re.DOTALL | re.MULTILINE)
    filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE)
    from doconce import doconce_envirs
    envirs = doconce_envirs()[8:-2]
    for envir in envirs:
        pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir)
        if envir_format in ('quote', 'paragraph', 'hrule'):

            def subst(m):
                title = m.group(1).strip()
                # Text size specified in parenthesis?
                m2 = re.search('^\s*\((.+?)\)', title)

                if title == '' and envir not in ('block', 'quote'):
                    title = envir.capitalize() + '.'
                elif title.lower() == 'none':
                    title == ''
                elif m2:
                    text_size = m2.group(1).lower()
                    title = title.replace('(%s)' % text_size, '').strip()
                elif title and title[-1] not in ('.', ':', '!', '?'):
                    # Make sure the title ends with puncuation
                    title += '.'
                # Recall that this formatting is called very late
                # so native format must be used!
                if title:
                    title = '**' + title + '**\n'
                    # Could also consider subsubsection formatting
                block = m.group(2)

                # Always use quote typesetting for quotes
                if envir_format == 'quote' or envir == 'quote':
                    # Make Markdown quote of the block: lines start with >
                    lines = []
                    for line in block.splitlines():
                        # Just quote plain text
                        if not (_MATH_BLOCK in line or _CODE_BLOCK in line
                                or line.startswith('FIGURE:')
                                or line.startswith('MOVIE:')
                                or line.startswith('|')):
                            lines.append('> ' + line)
                        else:
                            lines.append('\n' + line + '\n')
                    block = '\n'.join(lines) + '\n\n'

                    # Add quote and a blank line after title
                    if title:
                        title = '> ' + title + '>\n'
                else:
                    # Add a blank line after title
                    if title:
                        title += '\n'

                if envir_format == 'hrule':
                    # Native ------ does not work, use <hr/>
                    #text = '\n\n----------\n' + title + '----------\n' + \
                    #       block + '\n----------\n\n'
                    text = '\n\n<hr/>\n' + title + \
                           block + '\n<hr/>\n\n'
                else:
                    text = title + block + '\n\n'
                return text
        else:
            print '*** error: --ipynb_admon=%s is not supported' % envir_format
        filestr = re.sub(pattern,
                         subst,
                         filestr,
                         flags=re.DOTALL | re.MULTILINE)

    # Fix pyshell and ipy interactive sessions: remove prompt and output.
    # or split in multiple cells such that output comes out at the end of a cell
    # Fix sys environments and use run prog.py so programs can be run in cell
    # Insert %matplotlib inline in the first block using matplotlib
    # Only typeset Python code as blocks, otherwise !bc environmens
    # become plain indented Markdown.
    from doconce import dofile_basename
    from sets import Set
    ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename
    src_paths = Set()
    mpl_inline = False

    split_pyshell = option('ipynb_split_pyshell=', 'on')
    if split_pyshell is None:
        split_pyshell = False
    elif split_pyshell in ('no', 'False', 'off'):
        split_pyshell = False
    else:
        split_pyshell = True

    ipynb_code_tp = [None] * len(code_blocks)
    for i in range(len(code_blocks)):
        # Check if continuation lines are in the code block, because
        # doconce.py inserts a blank after the backslash
        if '\\ \n' in code_blocks[i]:
            code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n')

        if not mpl_inline and (
            re.search(r'import +matplotlib', code_blocks[i]) or \
            re.search(r'from +matplotlib', code_blocks[i]) or \
            re.search(r'import +scitools', code_blocks[i]) or \
            re.search(r'from +scitools', code_blocks[i])):
            code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i]
            mpl_inline = True

        tp = code_block_types[i]
        if tp.endswith('-t'):
            # Standard Markdown code with pandoc/github extension
            language = tp[:-2]
            language_spec = language2pandoc.get(language, '')
            #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n'
            code_blocks[i] = "```%s\n" % language_spec + \
                             indent_lines(code_blocks[i].strip(), format) + \
                             "```"
            ipynb_code_tp[i] = 'markdown'
        elif tp.startswith('pyshell') or tp.startswith('ipy'):
            lines = code_blocks[i].splitlines()
            last_cell_end = -1
            if split_pyshell:
                new_code_blocks = []
                # Split for each output an put in separate cell
                for j in range(len(lines)):
                    if lines[j].startswith('>>>') or lines[j].startswith(
                            '... '):
                        lines[j] = lines[j][4:]
                    elif lines[j].startswith('In ['):  # IPython
                        lines[j] = ':'.join(lines[j].split(':')[1:]).strip()
                    elif lines[j].startswith('   ...: '):  # IPython
                        lines[j] = lines[j][8:]
                    else:
                        # output (no prefix or Out)
                        lines[j] = ''
                        new_code_blocks.append('\n'.join(lines[last_cell_end +
                                                               1:j + 1]))
                        last_cell_end = j
                code_blocks[i] = new_code_blocks
                ipynb_code_tp[i] = 'cell'
            else:
                # Remove prompt and output lines; leave code executable in cell
                for j in range(len(lines)):
                    if lines[j].startswith('>>> ') or lines[j].startswith(
                            '... '):
                        lines[j] = lines[j][4:]
                    elif lines[j].startswith('In ['):
                        lines[j] = ':'.join(lines[j].split(':')[1:]).strip()
                    else:
                        # output
                        lines[j] = ''

                for j in range(lines.count('')):
                    lines.remove('')
                code_blocks[i] = '\n'.join(lines)
                ipynb_code_tp[i] = 'cell'

        elif tp.startswith('sys'):
            # Do we find execution of python file? If so, copy the file
            # to separate subdir and make a run file command in a cell.
            # Otherwise, it is just a plain verbatim Markdown block.
            found_unix_lines = False
            lines = code_blocks[i].splitlines()
            for j in range(len(lines)):
                m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)',
                              lines[j])
                if m:
                    name = m.group(2).strip()
                    if os.path.isfile(name):
                        src_paths.add(os.path.dirname(name))
                        lines[j] = '%%run "%s"' % fullpath
                else:
                    found_unix_lines = True
            src_paths = list(src_paths)
            if src_paths and not found_unix_lines:
                # This is a sys block with run commands only
                code_blocks[i] = '\n'.join(lines)
                ipynb_code_tp[i] = 'cell'
            else:
                # Standard Markdown code
                code_blocks[i] = '\n'.join(lines)
                code_blocks[i] = indent_lines(code_blocks[i], format)
                ipynb_code_tp[i] = 'markdown'
        elif tp.endswith('hid'):
            ipynb_code_tp[i] = 'cell_hidden'
        elif tp.startswith('py'):
            ipynb_code_tp[i] = 'cell'
        else:
            # Should support other languages as well, but not for now
            code_blocks[i] = indent_lines(code_blocks[i], format)
            ipynb_code_tp[i] = 'markdown'

    # figure_files and movie_files are global variables and contain
    # all figures and movies referred to
    src_paths = list(src_paths)
    if figure_files:
        src_paths += figure_files
    if movie_files:
        src_paths += movie_files

    if src_paths:
        # Make tar file with all the source dirs with files
        # that need to be executed
        os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths)))
        print 'collected all required additional files in', ipynb_tarfile, 'which must be distributed with the notebook'
    elif os.path.isfile(ipynb_tarfile):
        os.remove(ipynb_tarfile)

    # Parse document into markdown text, code blocks, and tex blocks.
    # Store in nested list notebook_blocks.
    notebook_blocks = [[]]
    authors = ''
    for line in filestr.splitlines():
        if line.startswith('authors = [new_author(name='):  # old author method
            authors = line[10:]
        elif _CODE_BLOCK in line:
            code_block_tp = line.split()[-1]
            if code_block_tp in (
                    'pyhid', ) or not code_block_tp.endswith('hid'):
                notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip()
                notebook_blocks.append(line)
            # else: hidden block to be dropped (may include more languages
            # with time in the above tuple)
        elif _MATH_BLOCK in line:
            notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip()
            notebook_blocks.append(line)
        else:
            if not isinstance(notebook_blocks[-1], list):
                notebook_blocks.append([])
            notebook_blocks[-1].append(line)
    if isinstance(notebook_blocks[-1], list):
        notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip()

    # Add block type info
    pattern = r'(\d+) +%s'
    for i in range(len(notebook_blocks)):
        if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]):
            m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i])
            idx = int(m.group(1))
            if ipynb_code_tp[idx] == 'cell':
                notebook_blocks[i] = ['cell', notebook_blocks[i]]
            elif ipynb_code_tp[idx] == 'cell_hidden':
                notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]]
            else:
                notebook_blocks[i] = ['text', notebook_blocks[i]]
        elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]):
            notebook_blocks[i] = ['math', notebook_blocks[i]]
        else:
            notebook_blocks[i] = ['text', notebook_blocks[i]]

    # Go through tex_blocks and wrap in $$
    # (doconce.py runs align2equations so there are no align/align*
    # environments in tex blocks)
    label2tag = {}
    tag_counter = 1
    for i in range(len(tex_blocks)):
        # Extract labels and add tags
        labels = re.findall(r'label\{(.+?)\}', tex_blocks[i])
        for label in labels:
            label2tag[label] = tag_counter
            # Insert tag to get labeled equation
            tex_blocks[i] = tex_blocks[i].replace(
                'label{%s}' % label,
                'label{%s} \\tag{%s}' % (label, tag_counter))
            tag_counter += 1

        # Remove \[ and \] or \begin/end{equation*} in single equations
        tex_blocks[i] = tex_blocks[i].replace(r'\[', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\]', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '')
        # Check for illegal environments
        m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*', 'align*', 'align',
                             'array'):
                print """\
*** warning: latex envir \\begin{%s} does not work well in Markdown.
    Stick to \\[ ... \\], equation, equation*, align, or align*
    environments in math environments.
""" % envir
        eq_type = 'heading'  # or '$$'
        eq_type = '$$'
        # Markdown: add $$ on each side of the equation
        if eq_type == '$$':
            # Make sure there are no newline after equation
            tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$'
        # Here: use heading (###) and simple formula (remove newline
        # in math expressions to keep everything within a heading) as
        # the equation then looks bigger
        elif eq_type == 'heading':
            tex_blocks[i] = '### $ ' + '  '.join(
                tex_blocks[i].splitlines()) + ' $'

        # Add labels for the eqs above the block (for reference)
        if labels:
            #label_tp = '<a name="%s"></a>'
            label_tp = '<div id="%s"></div>'
            tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \
                            ' '.join([label_tp % label
                                      for label in labels]) + '\n\n' + \
                                      tex_blocks[i]

    # blocks is now a list of text chunks in markdown and math/code line
    # instructions. Insert code and tex blocks
    for i in range(len(notebook_blocks)):
        if _CODE_BLOCK in notebook_blocks[i][
                1] or _MATH_BLOCK in notebook_blocks[i][1]:
            words = notebook_blocks[i][1].split()
            # start of notebook_blocks[i]: number block-indicator code-type
            n = int(words[0])
            if _CODE_BLOCK in notebook_blocks[i][1]:
                notebook_blocks[i][1] = code_blocks[n]  # can be list!
            if _MATH_BLOCK in notebook_blocks[i][1]:
                notebook_blocks[i][1] = tex_blocks[n]

    # Make IPython structures

    nb_version = int(option('ipynb_version=', '3'))
    if nb_version == 3:
        from IPython.nbformat.v3 import (new_code_cell, new_text_cell,
                                         new_worksheet, new_notebook,
                                         new_metadata, new_author)
        nb = new_worksheet()
    elif nb_version == 4:
        from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell,
                                         new_notebook)
        cells = []

    mdstr = []  # plain md format of the notebook
    prompt_number = 1
    for block_tp, block in notebook_blocks:
        if (block_tp == 'text' or block_tp == 'math') and block != '':
            # Pure comments between math/code and math/code come
            # out as empty blocks, should detect that situation
            # (challenging - can have multiple lines of comments,
            # or begin and end comment lines with important things between)
            if nb_version == 3:
                nb.cells.append(new_text_cell(u'markdown', source=block))
            elif nb_version == 4:
                cells.append(new_markdown_cell(source=block))
            mdstr.append(('markdown', block))
        elif block_tp == 'cell' and block != '' and block != []:
            if isinstance(block, list):
                for block_ in block:
                    block_ = block_.rstrip()
                    if block_ != '':
                        if nb_version == 3:
                            nb.cells.append(
                                new_code_cell(input=block_,
                                              prompt_number=prompt_number,
                                              collapsed=False))
                        elif nb_version == 4:
                            cells.append(
                                new_code_cell(source=block_,
                                              execution_count=prompt_number))
                        prompt_number += 1
                        mdstr.append(('codecell', block_))
            else:
                block = block.rstrip()
                if block != '':
                    if nb_version == 3:
                        nb.cells.append(
                            new_code_cell(input=block,
                                          prompt_number=prompt_number,
                                          collapsed=False))
                    elif nb_version == 4:
                        cells.append(
                            new_code_cell(source=block,
                                          execution_count=prompt_number))
                    prompt_number += 1
                    mdstr.append(('codecell', block))
        elif block_tp == 'cell_hidden' and block != '':
            block = block.rstrip()
            if nb_version == 3:
                nb.cells.append(
                    new_code_cell(input=block,
                                  prompt_number=prompt_number,
                                  collapsed=True))
            elif nb_version == 4:
                cells.append(
                    new_code_cell(source=block, execution_count=prompt_number))
            prompt_number += 1
            mdstr.append(('codecell', block))
    """
    # Dump the notebook cells in a simple ASCII format
    # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file)
    f = open(dofile_basename + '.md-ipynb', 'w')
    for cell_tp, block in mdstr:
        if cell_tp == 'markdown':
            f.write('\n-----\n\n')
        elif cell_tp == 'codecell':
            f.write('\n-----py\n\n')
        f.write(block)
    f.close()
    """

    if nb_version == 3:
        # Catch the title as the first heading
        m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE)
        title = m.group(1).strip() if m else ''
        # md below is not used for anything
        if authors:
            authors = eval(authors)
            md = new_metadata(name=title, authors=authors)
        else:
            md = new_metadata(name=title)
        nb = new_notebook(worksheets=[nb], metadata=new_metadata())
        # Let us make v4 notebook here by upgrading
        from IPython.nbformat.v4 import upgrade
        nb = upgrade(nb)
        import IPython.nbformat.v4.nbjson as nbjson

        # Convert nb to json format
        filestr = nbjson.writes(nb)
    elif nb_version == 4:
        nb = new_notebook(cells=cells)
        from IPython.nbformat import writes
        filestr = writes(nb, version=4)

    # Check that there are no empty cells:
    if '"input": []' in filestr:
        print '*** error: empty cells in notebook - report bug in DocOnce'
        _abort()
    # must do the replacements here at the very end when json is written out
    # \eqref and labels will not work, but labels (only in math) do no harm
    filestr = re.sub(r'([^\\])label\{',
                     r'\g<1>\\\\label{',
                     filestr,
                     flags=re.MULTILINE)

    # \\eqref{} just gives (???) link at this stage - future versions
    # will probably support labels
    #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr)
    # Now we use explicit references to tags
    def subst(m):
        label = m.group(1)
        try:
            return r'[(%s)](#%s)' % (label2tag[label], label)
        except KeyError as e:
            print '*** error: label "%s" is not defined' % str(e)

    filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr)
    """
    # MathJax reference to tag (recall that the equations have both label
    # and tag (know that tag only works well in HTML, but this mjx-eqn-no
    # label does not work in ipynb)
    filestr = re.sub(r'\(ref\{(.+?)\}\)',
                     lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr)
    """
    #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr)
    '''
    # Final fixes: replace all text between cells by markdown code cells
    # Note: the patterns are overlapping so a plain re.sub will not work,
    # here we run through all blocks found and subsitute the first remaining
    # one, one by one.
    pattern = r'   \},\n(.+?)\{\n    "cell_type":'
    begin_pattern = r'^(.+?)\{\n    "cell_type":'
    remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL)
    remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL)
    import string
    for block in remaining_block_begin + remaining_blocks:
        filestr = string.replace(filestr, block, json_markdown(block) + '   ',
                                 maxreplace=1)
    filestr_end = re.sub(r'   \{\n    "cell_type": .+?\n   \},\n', '', filestr,
                         flags=re.DOTALL)
    filestr = filestr.replace(filestr_end, json_markdown(filestr_end))
    filestr = """{
 "metadata": {
  "name": "SOME NAME"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
""" + filestr.rstrip() + '\n'+ \
    json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """
   ],
   "metadata": {}
  }
 ]
}"""
    '''
    return filestr