Ejemplo n.º 1
0
def old_epytext_code(filestr):
    # In rst syntax, code blocks are typeset with :: (verbatim)
    # followed by intended blocks. This function indents everything
    # inside code (or TeX) blocks. The code here is similar to
    # rst.rst_code, but a special epytext version was
    # necessary since epytext is fooled by \n in code/tex blocks.

    # first indent all code/tex blocks:
    filestr, code_blocks, tex_blocks = remove_code_and_tex(filestr)
    for i in range(len(code_blocks)):
        code_blocks[i] = indent_lines(code_blocks[i], True)
    for i in range(len(tex_blocks)):
        tex_blocks[i] = indent_lines(tex_blocks[i], True)
    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst')

    # substitute !bc and !ec appropriately:
    # (see rst.rst_code for comments if problems)
    from rst import bc_regex_pattern, bt_regex_pattern
    c = re.compile(bc_regex_pattern, re.DOTALL)
    filestr = c.sub(r'\g<1>::\n\n', filestr)
    filestr = re.sub(r'!ec\n', '\n\n', filestr)
    c = re.compile(bt_regex_pattern, re.DOTALL)
    filestr = c.sub(r'\g<1>::\n\n', filestr)
    filestr = re.sub(r'!et\n', '\n\n', filestr)
    return filestr
Ejemplo n.º 2
0
def old_epytext_code(filestr):
    # In rst syntax, code blocks are typeset with :: (verbatim)
    # followed by intended blocks. This function indents everything
    # inside code (or TeX) blocks. The code here is similar to
    # rst.rst_code, but a special epytext version was
    # necessary since epytext is fooled by \n in code/tex blocks.

    # first indent all code/tex blocks:
    filestr, code_blocks, tex_blocks = remove_code_and_tex(filestr)
    for i in range(len(code_blocks)):
        code_blocks[i] = indent_lines(code_blocks[i], True)
    for i in range(len(tex_blocks)):
        tex_blocks[i] = indent_lines(tex_blocks[i], True)
    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst')

    # substitute !bc and !ec appropriately:
    # (see rst.rst_code for comments if problems)
    from rst import bc_regex_pattern, bt_regex_pattern
    c = re.compile(bc_regex_pattern, re.DOTALL)
    filestr = c.sub(r'\g<1>::\n\n', filestr)
    filestr = re.sub(r'!ec\n', '\n\n', filestr)
    c = re.compile(bt_regex_pattern, re.DOTALL)
    filestr = c.sub(r'\g<1>::\n\n', filestr)
    filestr = re.sub(r'!et\n', '\n\n', filestr)
    return filestr
Ejemplo n.º 3
0
def mwiki_code(filestr, code_blocks, code_block_types,
               tex_blocks, format):
    # http://en.wikipedia.org/wiki/Help:Displaying_a_formula
    # MediaWiki math does not support labels in equations.
    # The enviros equation and \[ \] must be removed (not supported).

    for i in range(len(tex_blocks)):
        # Standard align works in Wikipedia and Wikibooks.
        # Standard align gives somewhat ugly output on wiiki.com services,
        # but a set of separate equations is not much better.
        # We therefore stick to align instead.
        #tex_blocks[i] = align2equations(tex_blocks[i])
        tex_blocks[i] = equation2nothing(tex_blocks[i])
        tex_blocks[i], labels = remove_labels(tex_blocks[i])
        for label in labels:
            if label in filestr:
                print '*** warning: reference to label "%s" in an equation does not work in MediaWiki' % label

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    # Supported programming languages:
    # http://www.mediawiki.org/wiki/Extension:SyntaxHighlight_GeSHi#Supported_languages
    envir2lang = dict(cod='python', pycod='python', cycod='python',
                      fcod='fortran', ccod='c', cppcod='cpp',
                      mcod='matlab', plcod='perl', shcod='bash',
                      pro='python', pypro='python', cypro='python',
                      fpro='fortran', cpro='c', cpppro='cpp',
                      mpro='matlab', plpro='perl', shpro='bash',
                      rbpro='ruby', rbcod='ruby',
                      javacod='java', javapro='java',
                      htmlcod='html5', xmlcod='xml',
                      htmlpro='html5', xmlpro='xml',
                      html='html5', xml='xml',
                      sys='bash', dat='text', csv='text', txt='text',
                      pyoptpro='python', pyscpro='python',
                      ipy='python', pyshell='python',
                      dipy='python', dpyshell='python',
                      )

    for key in envir2lang:
        language = envir2lang[key]
        cpattern = re.compile(r'^!bc\s+%s\s*\n' % key, flags=re.MULTILINE)
        filestr = cpattern.sub('<syntaxhighlight lang="%s">\n' % \
                               envir2lang[key], filestr)
    c = re.compile(r'^!bc.*$\n', re.MULTILINE)
    filestr = c.sub('<syntaxhighlight lang="text">\n', filestr)
    filestr = re.sub(r'!ec\n', '</syntaxhighlight>\n', filestr)
    c = re.compile(r'^!bt\n', re.MULTILINE)
    filestr = c.sub(':<math>\n', filestr)
    filestr = re.sub(r'!et\n', '</math>\n', filestr)

    # Final fix of MediaWiki file

    # __TOC__ syntax is misinterpretated as paragraph heading, so we
    # use <<<TOC>>> instead and replace to right syntax here at the end.
    filestr = filestr.replace('<<<TOC>>>', '__TOC__')

    return filestr
Ejemplo n.º 4
0
def gwiki_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)
    c = re.compile(r'^!bc(.*?)\n', re.MULTILINE)
    filestr = c.sub(r'{{{\n', filestr)
    filestr = re.sub(r'!ec\n', r'}}}\n', filestr)
    c = re.compile(r'^!bt\n', re.MULTILINE)
    filestr = c.sub(r'{{{\n', filestr)
    filestr = re.sub(r'!et\n', r'}}}\n', filestr)
    return filestr
Ejemplo n.º 5
0
def gwiki_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)
    c = re.compile(r"^!bc(.*?)\n", re.MULTILINE)
    filestr = c.sub(r"{{{\n", filestr)
    filestr = re.sub(r"!ec\n", r"}}}\n", filestr)
    c = re.compile(r"^!bt\n", re.MULTILINE)
    filestr = c.sub(r"{{{\n", filestr)
    filestr = re.sub(r"!et\n", r"}}}\n", filestr)
    return filestr
Ejemplo n.º 6
0
def mwiki_code(filestr, code_blocks, code_block_types,
               tex_blocks, format):
    # http://en.wikipedia.org/wiki/Help:Displaying_a_formula
    # MediaWiki math does not support labels in equations.
    # The enviros equation and \[ \] must be removed (not supported).

    for i in range(len(tex_blocks)):
        # Standard align works in Wikipedia and Wikibooks.
        # Standard align gives somewhat ugly output on wiiki.com services,
        # but a set of separate equations is not much better.
        # We therefore stick to align instead.
        #tex_blocks[i] = align2equations(tex_blocks[i])
        tex_blocks[i] = equation2nothing(tex_blocks[i])
        tex_blocks[i], labels = remove_labels(tex_blocks[i])
        for label in labels:
            if label in filestr:
                print '*** warning: reference to label "%s" in an equation does not work in MediaWiki' % label

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    # Supported programming languages:
    # http://www.mediawiki.org/wiki/Extension:SyntaxHighlight_GeSHi#Supported_languages
    envir2lang = dict(cod='python', pycod='python', cycod='python',
                      fcod='fortran', ccod='c', cppcod='cpp',
                      mcod='matlab', plcod='perl', shcod='bash',
                      pro='python', pypro='python', cypro='python',
                      fpro='fortran', cpro='c', cpppro='cpp',
                      mpro='matlab', plpro='perl', shpro='bash',
                      rbpro='ruby', rbcod='ruby',
                      javacod='java', javapro='java',
                      htmlcod='html5', xmlcod='xml',
                      htmlpro='html5', xmlpro='xml',
                      html='html5', xml='xml',
                      sys='bash', dat='text', csv='text', txt='text',
                      pyoptpro='python', pyscpro='python',
                      ipy='python', pyshell='python',
                      )

    for key in envir2lang:
        language = envir2lang[key]
        cpattern = re.compile(r'^!bc\s+%s\s*\n' % key, flags=re.MULTILINE)
        filestr = cpattern.sub('<syntaxhighlight lang="%s">\n' % \
                               envir2lang[key], filestr)
    c = re.compile(r'^!bc.*$\n', re.MULTILINE)
    filestr = c.sub('<syntaxhighlight lang="text">\n', filestr)
    filestr = re.sub(r'!ec\n', '</syntaxhighlight>\n', filestr)
    c = re.compile(r'^!bt\n', re.MULTILINE)
    filestr = c.sub(':<math>\n', filestr)
    filestr = re.sub(r'!et\n', '</math>\n', filestr)

    # Final fix of MediaWiki file

    # __TOC__ syntax is misinterpretated as paragraph heading, so we
    # use <<<TOC>>> instead and replace to right syntax here at the end.
    filestr = filestr.replace('<<<TOC>>>', '__TOC__')

    return filestr
Ejemplo n.º 7
0
def rst_code(filestr, code_blocks, code_block_types,
             tex_blocks, format):
    # In rst syntax, code blocks are typeset with :: (verbatim)
    # followed by intended blocks. This function indents everything
    # inside code (or TeX) blocks.

    for i in range(len(code_blocks)):
        code_blocks[i] = indent_lines(code_blocks[i], format)
    for i in range(len(tex_blocks)):
        tex_blocks[i] = indent_lines(tex_blocks[i], format)

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst')

    # substitute !bc and !ec appropriately:
    # the line before the !bc block must end in [a-zA-z0-9)"]
    # followed by [\n:.?!,] see the bc_regex_pattern global variable above
    # (problems with substituting !bc and !bt may be caused by
    # missing characters in these two families)
    #c = re.compile(bc_regex_pattern, re.DOTALL)
    filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL)
    filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE)
    #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE)

    #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL)
    #filestr = c.sub(r'\g<1>:\n\n', filestr)
    #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE)
    #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE)

    # just use the same substitution as for code blocks:
    filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr,
                     flags=re.MULTILINE)
    #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE)

    # Fix: if there are !bc-!ec or other environments after each
    # other without text in between, there is a difficulty with the
    # :: symbol before the code block. In these cases, we get
    # !ec::, !et::, !bbox:: etc. from the above substitutions.
    # We just replace these by empty text.
    filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr,
                     flags=re.MULTILINE)

    # Check
    for pattern in '^!bt', '^!et':
        c = re.compile(pattern, re.MULTILINE)
        m = c.search(filestr)
        if m:
            print """
Still %s left after handling of code and tex blocks. Problem is probably
that %s is not preceded by text which can be extended with :: (required).
""" % (pattern, pattern)
            _abort()

    # Final fixes

    filestr = fix_underlines_in_headings(filestr)
    # Ensure blank line before and after comments
    filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ',
                     filestr, flags=re.MULTILINE)
    filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>',
                     filestr, flags=re.MULTILINE)
    # Line breaks interfer with tables and needs a final blank line too
    lines = filestr.splitlines()
    inside_block = False
    for i in range(len(lines)):
        if lines[i].startswith('<linebreakpipe>') and not inside_block:
            inside_block = True
            lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n'
            continue
        if lines[i].startswith('<linebreakpipe>') and inside_block:
            lines[i] = '|' + lines[i].replace('<linebreakpipe>', '')
            continue
        if inside_block and not lines[i].startswith('<linebreakpipe>'):
            inside_block = False
            lines[i] = '| ' + lines[i] + '\n'
    filestr = '\n'.join(lines)

    # Remove too much vertical space
    filestr = re.sub(r'\n\n\n+', '\n\n', filestr)

    return filestr
Ejemplo n.º 8
0
def xml_code(filestr, code_blocks, code_block_types,
              tex_blocks, format):
    """Replace code and LaTeX blocks by html environments."""

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    for i in range(len(tex_blocks)):
        if 'label' in tex_blocks[i]:
            # Fix label -> \label in tex_blocks
            tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{')
            tex_blocks[i] = re.sub(r'^label\{', '\\label{',
                                   tex_blocks[i], flags=re.MULTILINE)


    def subst(m):
        tp = m.group(1).strip()
        if tp:
            return '<code type="%s">' % tp
        else:
            return '<code>'

    filestr = re.sub(r'^!bc(.*)', subst, filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!ec', r'</code>', filestr, flags=re.MULTILINE)

    math_tp = []
    if tex_blocks:
        math_tp.append('tex-blocks')
    if re.search(r'<inlinemath>.+?</inlinemath>', filestr):
        math_tp.append('inline-math')
    filestr = '<mathematics type="%s">' % \
              (','.join(math_tp) if math_tp else 'None') + '\n' + filestr

    filestr = re.sub(r'^!bt', '<latex>',  filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et', '</latex>', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'\(ref\{(.+?)\}\)', r'<eqref>\g<1></eqref>', filestr)


    # Add </li> in lists
    cpattern = re.compile('<li>(.+?)(\s+)<li>', re.DOTALL)
    def find_list_items(match):
        """Return replacement from match of <li> tags."""
        # Does the match run out of the list?
        if re.search(r'</?(ul|ol)>', match.group(1)):
            return '<li>' + match.group(1) + match.group(2)
        else:
            return '<li>' + match.group(1) + '</li>' + match.group(2)

    # cpattern can only detect every two list item because it cannot work
    # with overlapping patterns. Remedy: have two <li> to avoid overlap,
    # fix that after all replacements are done.
    filestr = filestr.replace('<li>', '<li><li>')
    filestr = cpattern.sub(find_list_items, filestr)
    # Fix things that go wrong with cpattern: list items that go
    # through end of lists over to next list item.
    cpattern = re.compile('<li>(.+?)(\s+)(</?ol>|</?ul>)', re.DOTALL)
    filestr = cpattern.sub('<li>\g<1></li>\g<2>\g<3>', filestr)
    filestr = filestr.replace('<li><li>', '<li>')  # fix

    # Reduce redunant newlines and <p> (easy with lookahead pattern)
    # Eliminate any <p> that goes with blanks up to <p> or a section
    # heading
    pattern = r'<newline/>\s+(?=<newline/>|<[hH]\d>)'
    filestr = re.sub(pattern, '', filestr)
    # Extra blank before section heading
    pattern = r'\s+(?=^<[hH]\d>)'
    filestr = re.sub(pattern, '\n\n', filestr, flags=re.MULTILINE)
    # Elimate <newline/> before equations and before lists
    filestr = re.sub(r'<newline/>\s+(<math|<ul>|<ol>)', r'\g<1>', filestr)
    filestr = re.sub(r'<newline/>\s+<title>', '<title>', filestr)
    # Eliminate <newline/> after </h1>, </h2>, etc.
    filestr = re.sub(r'(</h\d>)\s+<newline/>', '\g<1>\n', filestr)

    return filestr
Ejemplo n.º 9
0
def matlabnb_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    # Remove all begin-end and \[ \] in tex blocks, join to one line,
    # embed in $$. Write error message if anything else than a single equation.
    pattern = 'begin\{(.+?)\}'
    for i in range(len(tex_blocks)):
        m = re.search(pattern, tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*'):
                errwarn(
                    '*** warning: \\begin{%s}-\\end{%s} does not work in Matlab notebooks'
                    % (envir, envir))
            tex_blocks[i] = re.sub(r'\\begin{%s}\s+' % envir, '',
                                   tex_blocks[i])
            tex_blocks[i] = re.sub(r'\\end{%s}\s+' % envir, '', tex_blocks[i])
        tex_blocks[i] = re.sub(r'\\\[', '', tex_blocks[i])
        tex_blocks[i] = re.sub(r'\\\]', '', tex_blocks[i])
        tex_blocks[i] = re.sub(r'label\{(.+?)\}', '', tex_blocks[i])
        tex_blocks[i] = '$$' + ' '.join(
            tex_blocks[i].strip().splitlines()).strip() + '$$'
        # Note: now the tex block ends with $$!et

    # Insert % in code if envir with -t name or if not Matlab code
    for i in range(len(code_blocks)):
        executable_matlab = code_block_types[i] in ('mcod', 'mpro')
        if not executable_matlab:
            # Note that monospace font requires two blanks after %
            code_blocks[i] = '\n'.join([
                '%  ' + line for line in code_blocks[i].splitlines()
                if not (line.startswith('!bc') or line.startswith('!ec'))
            ]) + '\n'

    # Insert % at the beginning of each line
    from common import _CODE_BLOCK, _MATH_BLOCK
    code_line = r'^\d+ ' + _CODE_BLOCK
    code_line_problem = r' (\d+ ' + _CODE_BLOCK + ')'
    math_line = r'^\d+ ' + _MATH_BLOCK
    math_line_problem = r' (\d+ ' + _MATH_BLOCK + ')'
    heading_no = 0
    lines = filestr.splitlines()
    for i in range(len(lines)):
        if re.search(code_line, lines[i], flags=re.MULTILINE):
            if heading_no < 2:
                # Add %% (empty heading) before code block because
                # code cannot come after the first heading, only
                # after the second and onwards
                lines[i] = '%%\n' + lines[i]
                continue
        elif re.search(math_line, lines[i], flags=re.MULTILINE):
            continue
        elif re.search(code_line_problem, lines[i], flags=re.MULTILINE):
            # Paragraphs can move a block indicator after its heading, insert \n
            lines[i] = re.sub(code_line_problem, '\n\g<1>', lines[i])
        elif re.search(math_line_problem, lines[i], flags=re.MULTILINE):
            # Paragraphs can move a block indicator after its heading, insert \n
            lines[i] = re.sub(math_line_problem, '\n\g<1>', lines[i])
        elif lines[i].startswith('>>>H'):
            # Heading
            lines[i] = '%%' + lines[i].replace('>>>H', '')
            heading_no += 1
        else:
            lines[i] = '% ' + lines[i]

    filestr = '\n'.join(lines)
    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'matlabnb')
    filestr = re.sub(r'\$\$!et', '$$', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!bt\s+\$\$', '% $$', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!bc.+', '', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!ec', '', filestr, flags=re.MULTILINE)
    # Remove all blank lines
    filestr = re.sub(r'^\s+', '', filestr, flags=re.MULTILINE)
    # Fix emphasize markup (conflicts with boldface so we do a hack)
    filestr = re.sub(r'\^\^\^X(.+?)X\^\^\^',
                     '_\g<1>_',
                     filestr,
                     flags=re.DOTALL)  # emph
    filestr = re.sub(r'\{\{\{X(.+?)X\}\}\}',
                     '*\g<1>*',
                     filestr,
                     flags=re.DOTALL)  # bold
    filestr = re.sub(r'<<<X(.+?)X>>>', '|\g<1>|', filestr,
                     flags=re.DOTALL)  # verb

    return filestr
Ejemplo n.º 10
0
def pandoc_code(filestr, code_blocks, code_block_types,
                tex_blocks, format):
    # Note: the tex code require the MathJax fix of doconce md2html
    # to insert right MathJax extensions to interpret align and labels
    # correctly.
    # (Also, doconce.py runs align2equations so there are no align/align*
    # environments in tex blocks.)
    for i in range(len(tex_blocks)):
        # Remove latex envir in single equations
        tex_blocks[i] = tex_blocks[i].replace(r'\[', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\]', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '')
        #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$')
        #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$')
        # Check for illegal environments
        m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*', 'align*', 'align'):
                print """\
*** warning: latex envir \\begin{%s} does not work well.
""" % envir
        # Add $$ on each side of the equation
        tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n'
    # Note: HTML output from pandoc requires $$ while latex cannot have
    # them if begin-end inside ($$\begin{...} \end{...}$$)

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    defs = dict(cod='Python', pycod='Python', cppcod='Cpp',
                fcod='Fortran', ccod='C',
                pro='Python', pypro='Python', cpppro='Cpp',
                fpro='Fortran', cpro='C',
                rbcod='Ruby', rbpro='Ruby',
                plcod='Perl', plpro='Perl',
                sys='Bash', dat='Python',
                pyoptpro='Python')
        # (the "python" typesetting is neutral if the text
        # does not parse as python)

    github_md = option('github_md')

    # Code blocks apply the ~~~~~ delimiter, with blank lines before
    # and after (alternative: indent code 4 spaces - not preferred)
    for key in defs:
        language = defs[key]
        if github_md:
            replacement = '\n```%s\n' % defs[key]
        else:
            # pandoc-extended Markdown
            replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % defs[key]
            #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % defs[key]  # enable line numbering
        filestr = re.sub(r'^!bc\s+%s\s*\n' % key,
                         replacement, filestr, flags=re.MULTILINE)

    # any !bc with/without argument becomes an unspecified block
    if github_md:
        replacement = '\n```'
    else:
        replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
    filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE)

    if github_md:
        replacement = '\n```\n'
    else:
        replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'
    filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE)

    filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE)

    # \eqref and labels will not work, but labels do no harm
    filestr = filestr.replace(' label{', ' \\label{')
    pattern = r'^label\{'
    filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr)

    # Final fixes

    # Seems that title and author must appear on the very first lines
    filestr = filestr.lstrip()

    # Enable tasks lists:
    #   - [x] task 1 done
    #   - [ ] task 2 not yet done
    if github_md:
        pattern = '^(\s+)\*\s+(\[[x ]\])\s+'
        filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE)

    return filestr
Ejemplo n.º 11
0
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    """
    # We expand all newcommands now
    from html import embed_newcommands
    newcommands = embed_newcommands(filestr)
    if newcommands:
        filestr = newcommands + filestr
    """

    # Note: the tex code require the MathJax fix of doconce md2html
    # to insert right MathJax extensions to interpret align and labels
    # correctly.
    # (Also, doconce.py runs align2equations so there are no align/align*
    # environments in tex blocks.)
    for i in range(len(tex_blocks)):
        # Remove latex envir in single equations
        tex_blocks[i] = tex_blocks[i].replace(r'\[', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\]', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '')
        #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$')
        #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$')
        # Check for illegal environments
        m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*', 'align*', 'align',
                             'array'):
                errwarn("""\
*** warning: latex envir \\begin{%s} does not work well.
""" % envir)
        # Add $$ on each side of the equation
        tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n'
    # Note: HTML output from pandoc requires $$ while latex cannot have
    # them if begin-end inside ($$\begin{...} \end{...}$$)

    if option('strict_markdown_output'):
        # Code blocks are just indented
        for i in range(len(code_blocks)):
            code_blocks[i] = indent_lines(code_blocks[i], format)

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    github_md = option('github_md')

    if not option('strict_markdown_output'):
        pass
        if github_md:
            for key in language2pandoc:
                language2pandoc[key] = language2pandoc[key].lower()

        # Code blocks apply the ~~~~~ delimiter, with blank lines before
        # and after
        for key in language2pandoc:
            language = language2pandoc[key]
            if github_md:
                replacement = '\n```%s\n' % language2pandoc[key]
            else:
                # pandoc-extended Markdown
                replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % language2pandoc[
                    key]
                #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % language2pandoc[key]  # enable line numbering
            filestr = re.sub(r'^!bc\s+%s\s*\n' % key,
                             replacement,
                             filestr,
                             flags=re.MULTILINE)

        # any !bc with/without argument becomes an unspecified block
        if github_md:
            replacement = '\n```'
        else:
            replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
        filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE)

        if github_md:
            replacement = '```\n'
        else:
            replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'
        filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE)
    else:
        # Strict Markdown: just indented blocks
        filestr = re.sub(r'^!bc.*$', '', filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!ec\s*$', '', filestr, flags=re.MULTILINE)

    filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE)

    # \eqref and labels will not work, but labels do no harm
    filestr = filestr.replace(' label{', ' \\label{')
    pattern = r'^label\{'
    filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr)

    # Final fixes

    # Seems that title and author must appear on the very first lines
    filestr = filestr.lstrip()

    # Enable tasks lists:
    #   - [x] task 1 done
    #   - [ ] task 2 not yet done
    if github_md:
        pattern = '^(\s+)\*\s+(\[[x ]\])\s+'
        filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE)

    return filestr
Ejemplo n.º 12
0
def xml_code(filestr, code_blocks, code_block_types,
              tex_blocks, format):
    """Replace code and LaTeX blocks by html environments."""

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    for i in range(len(tex_blocks)):
        if 'label' in tex_blocks[i]:
            # Fix label -> \label in tex_blocks
            tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{')
            tex_blocks[i] = re.sub(r'^label\{', '\\label{',
                                   tex_blocks[i], flags=re.MULTILINE)


    def subst(m):
        tp = m.group(1).strip()
        if tp:
            return '<code type="%s">' % tp
        else:
            return '<code>'

    filestr = re.sub(r'^!bc(.*)', subst, filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!ec', r'</code>', filestr, flags=re.MULTILINE)

    math_tp = []
    if tex_blocks:
        math_tp.append('tex-blocks')
    if re.search(r'<inlinemath>.+?</inlinemath>', filestr):
        math_tp.append('inline-math')
    filestr = '<mathematics type="%s">' % \
              (','.join(math_tp) if math_tp else 'None') + '\n' + filestr

    filestr = re.sub(r'^!bt', '<latex>',  filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et', '</latex>', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'\(ref\{(.+?)\}\)', r'<eqref>\g<1></eqref>', filestr)


    # Add </li> in lists
    cpattern = re.compile('<li>(.+?)(\s+)<li>', re.DOTALL)
    def find_list_items(match):
        """Return replacement from match of <li> tags."""
        # Does the match run out of the list?
        if re.search(r'</?(ul|ol)>', match.group(1)):
            return '<li>' + match.group(1) + match.group(2)
        else:
            return '<li>' + match.group(1) + '</li>' + match.group(2)

    # cpattern can only detect every two list item because it cannot work
    # with overlapping patterns. Remedy: have two <li> to avoid overlap,
    # fix that after all replacements are done.
    filestr = filestr.replace('<li>', '<li><li>')
    filestr = cpattern.sub(find_list_items, filestr)
    # Fix things that go wrong with cpattern: list items that go
    # through end of lists over to next list item.
    cpattern = re.compile('<li>(.+?)(\s+)(</?ol>|</?ul>)', re.DOTALL)
    filestr = cpattern.sub('<li>\g<1></li>\g<2>\g<3>', filestr)
    filestr = filestr.replace('<li><li>', '<li>')  # fix

    # Reduce redunant newlines and <p> (easy with lookahead pattern)
    # Eliminate any <p> that goes with blanks up to <p> or a section
    # heading
    pattern = r'<newline/>\s+(?=<newline/>|<[hH]\d>)'
    filestr = re.sub(pattern, '', filestr)
    # Extra blank before section heading
    pattern = r'\s+(?=^<[hH]\d>)'
    filestr = re.sub(pattern, '\n\n', filestr, flags=re.MULTILINE)
    # Elimate <newline/> before equations and before lists
    filestr = re.sub(r'<newline/>\s+(<math|<ul>|<ol>)', r'\g<1>', filestr)
    filestr = re.sub(r'<newline/>\s+<title>', '<title>', filestr)
    # Eliminate <newline/> after </h1>, </h2>, etc.
    filestr = re.sub(r'(</h\d>)\s+<newline/>', '\g<1>\n', filestr)

    return filestr
Ejemplo n.º 13
0
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    # In rst syntax, code blocks are typeset with :: (verbatim)
    # followed by intended blocks. This function indents everything
    # inside code (or TeX) blocks.

    for i in range(len(code_blocks)):
        code_blocks[i] = indent_lines(code_blocks[i], format)
    for i in range(len(tex_blocks)):
        tex_blocks[i] = indent_lines(tex_blocks[i], format)

    # Fix labels
    if option('rst_mathjax'):
        for i in range(len(tex_blocks)):
            tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{')

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst')

    # substitute !bc and !ec appropriately:
    # the line before the !bc block must end in [a-zA-z0-9)"...]
    # followed by [\n:.?!,] see the bc_regex_pattern global variable above
    # (problems with substituting !bc and !bt may be caused by
    # missing characters in these two families)
    filestr = re.sub(bc_regex_pattern,
                     r'\g<1>::\n\n',
                     filestr,
                     flags=re.MULTILINE | re.DOTALL)
    # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::)
    filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr)
    filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE)
    #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE)

    #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL)
    #filestr = c.sub(r'\g<1>:\n\n', filestr)
    #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE)
    #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE)

    if option('rst_mathjax') and (re.search(
            r'^!bt', filestr, flags=re.MULTILINE)
                                  or re.search(r'\\\( .+ \\\)', filestr)):
        # First add MathJax script in the very beginning of the file
        from html import mathjax_header
        latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst')
        filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr
        # Replace all the !bt parts by raw html directive (make sure
        # the coming block is sufficiently indented, we used 8 chars above)[[[
        filestr = re.sub(bt_regex_pattern,
                         r'\g<1>\n\n.. raw:: html\n\n        $$',
                         filestr,
                         flags=re.MULTILINE)
        filestr = re.sub(r'^!et *\n',
                         '        $$\n\n',
                         filestr,
                         flags=re.MULTILINE)
        # Remove inner \[..\] from equations $$ \[ ... \] $$
        filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr)
        filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr)
        # Equation references (ref{...}) must be \eqref{...} in MathJax
        # (note: this affects also (ref{...}) syntax in verbatim blocks...)
        filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr)
    else:
        # just use the same substitution for tex blocks as for code blocks:
        filestr = re.sub(bt_regex_pattern,
                         r'\g<1>::\n',
                         filestr,
                         flags=re.MULTILINE)
        #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE)

    # Fix: if there are !bc-!ec or other environments after each
    # other without text in between, there is a difficulty with the
    # :: symbol before the code block. In these cases, we get
    # !ec::, !et::, !bbox:: etc. from the above substitutions.
    # We just replace these by empty text.
    filestr = re.sub(r'^(!(b|e)[a-z]+)::',
                     r'\g<1>',
                     filestr,
                     flags=re.MULTILINE)

    # Check
    for pattern in '^!bt', '^!et':
        c = re.compile(pattern, re.MULTILINE)
        m = c.search(filestr)
        if m:
            print """
Still %s left after handling of code and tex blocks. Problem is probably
that %s is not preceded by text which can be extended with :: (required).
""" % (pattern, pattern)
            _abort()

    # Final fixes

    filestr = fix_underlines_in_headings(filestr)
    # Ensure blank line before and after comments
    filestr = re.sub(r'([.:;?!])\n^\.\. ',
                     r'\g<1>\n\n.. ',
                     filestr,
                     flags=re.MULTILINE)
    filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)',
                     r'\g<1>\n\n\g<2>',
                     filestr,
                     flags=re.MULTILINE)
    # Line breaks interfer with tables and needs a final blank line too
    lines = filestr.splitlines()
    inside_block = False
    for i in range(len(lines)):
        if lines[i].startswith('<linebreakpipe>') and not inside_block:
            inside_block = True
            lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n'
            continue
        if lines[i].startswith('<linebreakpipe>') and inside_block:
            lines[i] = '|' + lines[i].replace('<linebreakpipe>', '')
            continue
        if inside_block and not lines[i].startswith('<linebreakpipe>'):
            inside_block = False
            lines[i] = '| ' + lines[i] + '\n'
    filestr = '\n'.join(lines)

    # Remove too much vertical space
    filestr = re.sub(r'\n\n\n+', '\n\n', filestr)

    return filestr
Ejemplo n.º 14
0
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format):
    # Note: the tex code require the MathJax fix of doconce md2html
    # to insert right MathJax extensions to interpret align and labels
    # correctly.
    # (Also, doconce.py runs align2equations so there are no align/align*
    # environments in tex blocks.)
    for i in range(len(tex_blocks)):
        # Remove latex envir in single equations
        tex_blocks[i] = tex_blocks[i].replace(r'\[', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\]', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '')
        #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$')
        #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$')
        # Check for illegal environments
        m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*', 'align*', 'align'):
                print """\
*** warning: latex envir \\begin{%s} does not work well.
""" % envir
        # Add $$ on each side of the equation
        tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n'
    # Note: HTML output from pandoc requires $$ while latex cannot have
    # them if begin-end inside ($$\begin{...} \end{...}$$)

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    defs = dict(cod='Python',
                pycod='Python',
                cppcod='Cpp',
                fcod='Fortran',
                ccod='C',
                pro='Python',
                pypro='Python',
                cpppro='Cpp',
                fpro='Fortran',
                cpro='C',
                rbcod='Ruby',
                rbpro='Ruby',
                plcod='Perl',
                plpro='Perl',
                sys='Bash',
                dat='Python',
                pyoptpro='Python')
    # (the "python" typesetting is neutral if the text
    # does not parse as python)

    github_md = option('github_md')

    # Code blocks apply the ~~~~~ delimiter, with blank lines before
    # and after (alternative: indent code 4 spaces - not preferred)
    for key in defs:
        language = defs[key]
        if github_md:
            replacement = '\n```%s\n' % defs[key]
        else:
            # pandoc-extended Markdown
            replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % defs[
                key]
            #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % defs[key]  # enable line numbering
        filestr = re.sub(r'^!bc\s+%s\s*\n' % key,
                         replacement,
                         filestr,
                         flags=re.MULTILINE)

    # any !bc with/without argument becomes an unspecified block
    if github_md:
        replacement = '\n```'
    else:
        replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
    filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE)

    if github_md:
        replacement = '\n```\n'
    else:
        replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'
    filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE)

    filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE)

    # \eqref and labels will not work, but labels do no harm
    filestr = filestr.replace(' label{', ' \\label{')
    pattern = r'^label\{'
    filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr)

    # Final fixes

    # Seems that title and author must appear on the very first lines
    filestr = filestr.lstrip()

    # Enable tasks lists:
    #   - [x] task 1 done
    #   - [ ] task 2 not yet done
    if github_md:
        pattern = '^(\s+)\*\s+(\[[x ]\])\s+'
        filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE)

    return filestr
Ejemplo n.º 15
0
def rst_code(filestr, code_blocks, code_block_types,
             tex_blocks, format):
    # In rst syntax, code blocks are typeset with :: (verbatim)
    # followed by intended blocks. This function indents everything
    # inside code (or TeX) blocks.

    for i in range(len(code_blocks)):
        code_blocks[i] = indent_lines(code_blocks[i], format)
    for i in range(len(tex_blocks)):
        tex_blocks[i] = indent_lines(tex_blocks[i], format)

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst')

    # substitute !bc and !ec appropriately:
    # the line before the !bc block must end in [a-zA-z0-9)"]
    # followed by [\n:.?!,] see the bc_regex_pattern global variable above
    # (problems with substituting !bc and !bt may be caused by
    # missing characters in these two families)
    #c = re.compile(bc_regex_pattern, re.DOTALL)
    filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL)
    filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE)
    #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE)

    #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL)
    #filestr = c.sub(r'\g<1>:\n\n', filestr)
    #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE)
    #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE)

    # just use the same substitution as for code blocks:
    filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr,
                     flags=re.MULTILINE)
    #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE)

    # sphinx math:
    #filestr = re.sub(r'!bt\n', '\n.. math::\n\n', filestr)
    #filestr = re.sub(r'!et\n', '\n\n', filestr)

    #filestr = re.sub(r'!et\n', '\n', filestr)
    #filestr = re.sub(r'!et\n', '', filestr)

    # Fix: if there are !bc-!ec or !bt-!et environments after each
    # other without text in between, there is a difficulty with the
    # :: symbol before the code block. In these cases, we get
    # !ec:: and !et:: from the above substitutions. We just replace
    # these by empty text.
    filestr = filestr.replace('!ec::', '')
    filestr = filestr.replace('!et::', '')

    # Check
    for pattern in '^!bt', '^!et':
        c = re.compile(pattern, re.MULTILINE)
        m = c.search(filestr)
        if m:
            print """
Still %s left after handling of code and tex blocks. Problem is probably
that %s is not preceded by text which can be extended with :: (required).
""" % (pattern, pattern)
            _abort()

    # Final fixes

    filestr = fix_underlines_in_headings(filestr)
    # Ensure blank line before comments
    filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ',
                     filestr, flags=re.MULTILINE)

    return filestr
Ejemplo n.º 16
0
def rst_code(filestr, code_blocks, code_block_types,
             tex_blocks, format):
    # In rst syntax, code blocks are typeset with :: (verbatim)
    # followed by intended blocks. This function indents everything
    # inside code (or TeX) blocks.

    for i in range(len(code_blocks)):
        code_blocks[i] = indent_lines(code_blocks[i], format)
    for i in range(len(tex_blocks)):
        tex_blocks[i] = indent_lines(tex_blocks[i], format)

    # Fix labels
    if option('rst_mathjax'):
        for i in range(len(tex_blocks)):
            tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{')

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst')

    # substitute !bc and !ec appropriately:
    # the line before the !bc block must end in [a-zA-z0-9)"...]
    # followed by [\n:.?!,] see the bc_regex_pattern global variable above
    # (problems with substituting !bc and !bt may be caused by
    # missing characters in these two families)
    filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL)
    # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::)
    filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr)
    filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE)
    #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE)

    #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL)
    #filestr = c.sub(r'\g<1>:\n\n', filestr)
    #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE)
    #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE)

    if option('rst_mathjax') and (re.search(r'^!bt', filestr, flags=re.MULTILINE) or re.search(r'\\\( .+ \\\)', filestr)):
        # First add MathJax script in the very beginning of the file
        from html import mathjax_header
        latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst')
        filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr
        # Replace all the !bt parts by raw html directive (make sure
        # the coming block is sufficiently indented, we used 8 chars above)[[[
        filestr = re.sub(bt_regex_pattern, r'\g<1>\n\n.. raw:: html\n\n        $$', filestr,
                         flags=re.MULTILINE)
        filestr = re.sub(r'^!et *\n', '        $$\n\n', filestr, flags=re.MULTILINE)
        # Remove inner \[..\] from equations $$ \[ ... \] $$
        filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr)
        filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr)
        # Equation references (ref{...}) must be \eqref{...} in MathJax
        # (note: this affects also (ref{...}) syntax in verbatim blocks...)
        filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr)
    else:
        # just use the same substitution for tex blocks as for code blocks:
        filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr,
                         flags=re.MULTILINE)
        #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE)

    # Fix: if there are !bc-!ec or other environments after each
    # other without text in between, there is a difficulty with the
    # :: symbol before the code block. In these cases, we get
    # !ec::, !et::, !bbox:: etc. from the above substitutions.
    # We just replace these by empty text.
    filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr,
                     flags=re.MULTILINE)

    # Check
    for pattern in '^!bt', '^!et':
        c = re.compile(pattern, re.MULTILINE)
        m = c.search(filestr)
        if m:
            errwarn("""
Still %s left after handling of code and tex blocks. Problem is probably
that %s is not preceded by text which can be extended with :: (required).
""" % (pattern, pattern))
            _abort()

    # Final fixes

    filestr = fix_underlines_in_headings(filestr)
    # Ensure blank line before and after comments
    filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ',
                     filestr, flags=re.MULTILINE)
    filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>',
                     filestr, flags=re.MULTILINE)
    # Line breaks interfer with tables and needs a final blank line too
    lines = filestr.splitlines()
    inside_block = False
    for i in range(len(lines)):
        if lines[i].startswith('<linebreakpipe>') and not inside_block:
            inside_block = True
            lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n'
            continue
        if lines[i].startswith('<linebreakpipe>') and inside_block:
            lines[i] = '|' + lines[i].replace('<linebreakpipe>', '')
            continue
        if inside_block and not lines[i].startswith('<linebreakpipe>'):
            inside_block = False
            lines[i] = '| ' + lines[i] + '\n'
    filestr = '\n'.join(lines)

    # Remove too much vertical space
    filestr = re.sub(r'\n\n\n+', '\n\n', filestr)

    return filestr
Ejemplo n.º 17
0
def pandoc_code(filestr, code_blocks, code_block_types,
                tex_blocks, format):
    """
    # We expand all newcommands now
    from html import embed_newcommands
    newcommands = embed_newcommands(filestr)
    if newcommands:
        filestr = newcommands + filestr
    """

    # Note: the tex code require the MathJax fix of doconce md2html
    # to insert right MathJax extensions to interpret align and labels
    # correctly.
    # (Also, doconce.py runs align2equations so there are no align/align*
    # environments in tex blocks.)
    for i in range(len(tex_blocks)):
        # Remove latex envir in single equations
        tex_blocks[i] = tex_blocks[i].replace(r'\[', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\]', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '')
        tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '')
        #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$')
        #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$')
        # Check for illegal environments
        m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*', 'align*', 'align',
                             'array'):
                print """\
*** warning: latex envir \\begin{%s} does not work well.
""" % envir
        # Add $$ on each side of the equation
        tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n'
    # Note: HTML output from pandoc requires $$ while latex cannot have
    # them if begin-end inside ($$\begin{...} \end{...}$$)

    if option('strict_markdown_output'):
        # Code blocks are just indented
        for i in range(len(code_blocks)):
            code_blocks[i] = indent_lines(code_blocks[i], format)

    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format)

    github_md = option('github_md')

    if not option('strict_markdown_output'):
        pass
        if github_md:
            for key in language2pandoc:
                language2pandoc[key] = language2pandoc[key].lower()

        # Code blocks apply the ~~~~~ delimiter, with blank lines before
        # and after
        for key in language2pandoc:
            language = language2pandoc[key]
            if github_md:
                replacement = '\n```%s\n' % language2pandoc[key]
            else:
                # pandoc-extended Markdown
                replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % language2pandoc[key]
                #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % language2pandoc[key]  # enable line numbering
            filestr = re.sub(r'^!bc\s+%s\s*\n' % key,
                             replacement, filestr, flags=re.MULTILINE)

        # any !bc with/without argument becomes an unspecified block
        if github_md:
            replacement = '\n```'
        else:
            replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
        filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE)

        if github_md:
            replacement = '```\n'
        else:
            replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'
        filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE)
    else:
        # Strict Markdown: just indented blocks
        filestr = re.sub(r'^!bc.*$', '', filestr, flags=re.MULTILINE)
        filestr = re.sub(r'^!ec\s*$', '', filestr, flags=re.MULTILINE)

    filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE)

    # \eqref and labels will not work, but labels do no harm
    filestr = filestr.replace(' label{', ' \\label{')
    pattern = r'^label\{'
    filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr)

    # Final fixes

    # Seems that title and author must appear on the very first lines
    filestr = filestr.lstrip()

    # Enable tasks lists:
    #   - [x] task 1 done
    #   - [ ] task 2 not yet done
    if github_md:
        pattern = '^(\s+)\*\s+(\[[x ]\])\s+'
        filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE)

    return filestr
Ejemplo n.º 18
0
def matlabnb_code(filestr, code_blocks, code_block_types,
                  tex_blocks, format):
    # Remove all begin-end and \[ \] in tex blocks, join to one line,
    # embed in $$. Write error message if anything else than a single equation.
    pattern = 'begin\{(.+?)\}'
    for i in range(len(tex_blocks)):
        m = re.search(pattern, tex_blocks[i])
        if m:
            envir = m.group(1)
            if envir not in ('equation', 'equation*'):
                errwarn('*** warning: \\begin{%s}-\\end{%s} does not work in Matlab notebooks' % (envir, envir))
            tex_blocks[i] = re.sub(r'\\begin{%s}\s+' % envir, '', tex_blocks[i])
            tex_blocks[i] = re.sub(r'\\end{%s}\s+' % envir, '', tex_blocks[i])
        tex_blocks[i] = re.sub(r'\\\[', '', tex_blocks[i])
        tex_blocks[i] = re.sub(r'\\\]', '', tex_blocks[i])
        tex_blocks[i] = re.sub(r'label\{(.+?)\}', '', tex_blocks[i])
        tex_blocks[i] = '$$' + ' '.join(tex_blocks[i].strip().splitlines()).strip() + '$$'
        # Note: now the tex block ends with $$!et

    # Insert % in code if envir with -t name or if not Matlab code
    for i in range(len(code_blocks)):
        executable_matlab = code_block_types[i] in ('mcod', 'mpro')
        if not executable_matlab:
            # Note that monospace font requires two blanks after %
            code_blocks[i] = '\n'.join([
                '%  ' + line for line in code_blocks[i].splitlines()
                if not (line.startswith('!bc') or line.startswith('!ec'))]) + '\n'

    # Insert % at the beginning of each line
    from common import _CODE_BLOCK, _MATH_BLOCK
    code_line = r'^\d+ ' + _CODE_BLOCK
    code_line_problem = r' (\d+ ' + _CODE_BLOCK + ')'
    math_line = r'^\d+ ' + _MATH_BLOCK
    math_line_problem = r' (\d+ ' + _MATH_BLOCK + ')'
    heading_no = 0
    lines = filestr.splitlines()
    for i in range(len(lines)):
        if re.search(code_line, lines[i], flags=re.MULTILINE):
            if heading_no < 2:
                # Add %% (empty heading) before code block because
                # code cannot come after the first heading, only
                # after the second and onwards
                lines[i] = '%%\n' + lines[i]
                continue
        elif re.search(math_line, lines[i], flags=re.MULTILINE):
            continue
        elif re.search(code_line_problem, lines[i], flags=re.MULTILINE):
            # Paragraphs can move a block indicator after its heading, insert \n
            lines[i] = re.sub(code_line_problem, '\n\g<1>', lines[i])
        elif re.search(math_line_problem, lines[i], flags=re.MULTILINE):
            # Paragraphs can move a block indicator after its heading, insert \n
            lines[i] = re.sub(math_line_problem, '\n\g<1>', lines[i])
        elif lines[i].startswith('>>>H'):
            # Heading
            lines[i] = '%%' + lines[i].replace('>>>H', '')
            heading_no += 1
        else:
            lines[i] = '% ' + lines[i]

    filestr = '\n'.join(lines)
    filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'matlabnb')
    filestr = re.sub(r'\$\$!et', '$$', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!bt\s+\$\$', '% $$', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!bc.+', '', filestr, flags=re.MULTILINE)
    filestr = re.sub(r'^!ec', '', filestr, flags=re.MULTILINE)
    # Remove all blank lines
    filestr = re.sub(r'^\s+', '', filestr, flags=re.MULTILINE)
    # Fix emphasize markup (conflicts with boldface so we do a hack)
    filestr = re.sub(r'\^\^\^X(.+?)X\^\^\^', '_\g<1>_', filestr,
                     flags=re.DOTALL)  # emph
    filestr = re.sub(r'\{\{\{X(.+?)X\}\}\}', '*\g<1>*', filestr,
                     flags=re.DOTALL)  # bold
    filestr = re.sub(r'<<<X(.+?)X>>>',       '|\g<1>|', filestr,
                     flags=re.DOTALL)  # verb

    return filestr