def old_epytext_code(filestr): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. The code here is similar to # rst.rst_code, but a special epytext version was # necessary since epytext is fooled by \n in code/tex blocks. # first indent all code/tex blocks: filestr, code_blocks, tex_blocks = remove_code_and_tex(filestr) for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], True) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], True) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # (see rst.rst_code for comments if problems) from rst import bc_regex_pattern, bt_regex_pattern c = re.compile(bc_regex_pattern, re.DOTALL) filestr = c.sub(r'\g<1>::\n\n', filestr) filestr = re.sub(r'!ec\n', '\n\n', filestr) c = re.compile(bt_regex_pattern, re.DOTALL) filestr = c.sub(r'\g<1>::\n\n', filestr) filestr = re.sub(r'!et\n', '\n\n', filestr) return filestr
def rst_abstract(m): # r'\n*\g<type>.* \g<text>\n\g<rest>' name = m.group('type').strip() text = m.group('text').strip() rest = m.group('rest').strip() if option('rst_uio'): s = """ .. uio-introduction:: %s .. contents:: .. section-numbering:: %s """ % (indent_lines(text, 'rst'), rest) return s else: if name.lower() == 'preface': # Drop heading (short abstract for books) return '\n%(text)s\n\n%(rest)s' % vars() else: return '\n*%(name)s.* %(text)s\n\n%(rest)s' % vars()
def rst_abstract(m): # r'\n*\g<type>.* \g<text>\n\g<rest>' name = m.group("type").strip() text = m.group("text").strip() rest = m.group("rest").strip() if option("rst_uio"): s = """ .. uio-introduction:: %s .. contents:: .. section-numbering:: %s """ % ( indent_lines(text, "rst"), rest, ) return s else: return "\n*%(name)s.* %(text)s\n\n%(rest)s" % vars()
def rst_quote(block, format, text_size='normal'): # Insert empty comment to distinguish from possibly # previous list, code, etc. return """ .. %s """ % (indent_lines(block, format, ' ' * 4))
def rst_quote(block, format, text_size='normal'): # Insert empty comment to distinguish from possibly # previous list, code, etc. return """ .. %s """ % (indent_lines(block, format, ' '*4))
def rst_notice(block, format, title='Notice', text_size='normal'): if title.startswith('Notice'): return """ .. note:: %s """ % (indent_lines(block, format, ' ' * 3)) else: return rst_admon(block, format, title, text_size)
def rst_notice(block, format, title='Notice', text_size='normal'): if title.startswith('Notice'): return """ .. note:: %s """ % (indent_lines(block, format, ' '*3)) else: return rst_admon(block, format, title, text_size)
def rst_warning(block, format, title='Warning', text_size='normal'): if title.startswith('Warning'): # Use pre-defined admonition that coincides with our needs return """ .. warning:: %s """ % (indent_lines(block, format, ' ' * 4)) else: return rst_admon(block, format, title, text_size)
def rst_warning(block, format, title='Warning', text_size='normal'): if title.startswith('Warning'): # Use pre-defined admonition that coincides with our needs return """ .. warning:: %s """ % (indent_lines(block, format, ' '*4)) else: return rst_admon(block, format, title, text_size)
def rst_movie(m): html_text = html_movie(m) html_text = indent_lines(html_text, 'sphinx') rst_text = '.. raw:: html\n' + html_text + '\n' filename = m.group('filename') if not filename.startswith('http') and not filename.startswith('mov'): errwarn('*** warning: movie file %s' % filename) errwarn(' is not in mov* subdirectory - this will give problems with sphinx') return rst_text
def rst_movie(m): html_text = html_movie(m) html_text = indent_lines(html_text, 'sphinx') rst_text = '.. raw:: html\n' + html_text + '\n' filename = m.group('filename') if not filename.startswith('http') and not filename.startswith('mov'): print '*** warning: movie file %s' % filename print ' is not in mov* subdirectory - this will give problems with sphinx' return rst_text
def rst_movie(m): html_text = html_movie(m) html_text = indent_lines(html_text, "sphinx") rst_text = ".. raw:: html\n" + html_text + "\n" filename = m.group("filename") if not filename.startswith("http") and not filename.startswith("mov"): print "*** warning: movie file %s" % filename print " is not in mov* subdirectory - this will give problems with sphinx" return rst_text
def rst_notice(block, format, title="Notice", text_size="normal"): if title.startswith("Notice"): return """ .. note:: %s """ % ( indent_lines(block, format, " " * 3) ) else: return rst_admon(block, format, title, text_size)
def rst_admon(block, format, title='Admonition', text_size='normal'): if title == '' or title.lower() == 'none': title = 'Notice' # dummy title: with title as '', nothing comes out if title[-1] in ('!', ':', '?', ';', '.'): # : is always added to the title - remove other punctuation title = title[:-1] return """ .. admonition:: %s %s """ % (title, indent_lines(block, format, ' ' * 3))
def rst_admon(block, format, title='Admonition', text_size='normal'): if title == '' or title.lower() == 'none': title = 'Notice' # dummy title: with title as '', nothing comes out if title[-1] in ('!', ':', '?', ';', '.'): # : is always added to the title - remove other punctuation title = title[:-1] return """ .. admonition:: %s %s """ % (title, indent_lines(block, format, ' '*3))
def rst_admon(block, format, title="Admonition", text_size="normal"): if title == "" or title.lower() == "none": title = "Notice" # dummy title: with title as '', nothing comes out if title[-1] in ("!", ":", "?", ";", "."): # : is always added to the title - remove other punctuation title = title[:-1] return """ .. admonition:: %s %s """ % ( title, indent_lines(block, format, " " * 3), )
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['plain'] = '.txt' BLANKLINE['plain'] = '\n' # replacement patterns for substitutions of inline tags encoding = 'utf-8' INLINE_TAGS_SUBST['plain'] = { 'math': r'\g<begin>\g<subst>\g<end>', # drop $ signs 'math2': r'\g<begin>\g<puretext>\g<end>', 'emphasize': None, 'bold': None, 'figure': None, 'movie': default_movie, 'verbatim': r'\g<begin>\g<subst>\g<end>', # no ` chars #'linkURL': r'\g<begin>\g<link> (\g<url>)\g<end>', 'linkURL2': r'\g<link> (\g<url>)', 'linkURL3': r'\g<link> (\g<url>)', 'linkURL2v': r'\g<link> (\g<url>)', 'linkURL3v': r'\g<link> (\g<url>)', 'plainURL': r'\g<url>', 'colortext': '\g<text>', 'title': r'======= \g<subst> =======\n', # doconce top section, to be substituted later 'author': plain_author, 'date': r'\nDate: \g<subst>\n', 'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst'))), 'section': lambda m: '%s\n%s' % (m.group('subst'), '='*len(m.group('subst'))), 'subsection': lambda m: '%s\n%s' % (m.group('subst'), '-'*len(m.group('subst'))), 'subsubsection': lambda m: '%s\n%s\n' % (m.group('subst'), '~'*len(m.group('subst'))), 'paragraph': r'*\g<subst>*\g<space>', # extra blank 'abstract': r'\n*\g<type>.* \g<text>\g<rest>', 'linebreak': r'\g<text>', 'footnote': None, 'non-breaking-space': ' ', 'ampersand2': r' \g<1>&\g<2>', } from rst import rst_code CODE['plain'] = rst_code from common import DEFAULT_ARGLIST ARGLIST['plain'] = DEFAULT_ARGLIST LIST['plain'] = { 'itemize': {'begin': '', 'item': '*', 'end': '\n'}, 'enumerate': {'begin': '', 'item': '%d.', 'end': '\n'}, 'description': {'begin': '', 'item': '%s', 'end': '\n'}, 'separator': '\n', } CROSS_REFS['plain'] = plain_ref_and_label from rst import rst_table TABLE['plain'] = rst_table #TABLE['plain'] = plain_table EXERCISE['plain'] = plain_exercise INDEX_BIB['plain'] = plain_index_bib TOC['plain'] = plain_toc from common import indent_lines ENVIRS['plain'] = { 'warning': lambda block, format, title='Warning', text_size='normal': plain_box(block, title), 'notice': lambda block, format, title='Notice', text_size='normal': plain_box(block, title), 'question': lambda block, format, title='Question', text_size='normal': plain_box(block, title), 'hint': lambda block, format, title='Hint', text_size='normal': plain_box(block, title), 'summary': lambda block, format, title='Summary', text_size='normal': plain_box(block, title), 'block': lambda block, format, title='Block', text_size='normal': plain_box(block, title), 'box': lambda block, format, title='none', text_size='normal': plain_box(block, title), 'quote': lambda block, format, title='none', text_size='normal': indent_lines(block, 'plain'), } QUIZ['plain'] = plain_quiz
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # Fix labels if option('rst_mathjax'): for i in range(len(tex_blocks)): tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"...] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE | re.DOTALL) # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::) filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) if option('rst_mathjax') and (re.search( r'^!bt', filestr, flags=re.MULTILINE) or re.search(r'\\\( .+ \\\)', filestr)): # First add MathJax script in the very beginning of the file from html import mathjax_header latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst') filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr # Replace all the !bt parts by raw html directive (make sure # the coming block is sufficiently indented, we used 8 chars above)[[[ filestr = re.sub(bt_regex_pattern, r'\g<1>\n\n.. raw:: html\n\n $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', ' $$\n\n', filestr, flags=re.MULTILINE) # Remove inner \[..\] from equations $$ \[ ... \] $$ filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr) filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr) # Equation references (ref{...}) must be \eqref{...} in MathJax # (note: this affects also (ref{...}) syntax in verbatim blocks...) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) else: # just use the same substitution for tex blocks as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix: if there are !bc-!ec or other environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec::, !et::, !bbox:: etc. from the above substitutions. # We just replace these by empty text. filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr, flags=re.MULTILINE) # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: print """ Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove too much vertical space filestr = re.sub(r'\n\n\n+', '\n\n', filestr) return filestr
def subst_def(m): text = indent_lines(m.group('text'), format, ' '*3) name = m.group('name') start = '.. [#%s] ' % name return start + text.lstrip()
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # Fix labels if option('rst_mathjax'): for i in range(len(tex_blocks)): tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"...] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL) # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::) filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) if option('rst_mathjax') and (re.search(r'^!bt', filestr, flags=re.MULTILINE) or re.search(r'\\\( .+ \\\)', filestr)): # First add MathJax script in the very beginning of the file from html import mathjax_header latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst') filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr # Replace all the !bt parts by raw html directive (make sure # the coming block is sufficiently indented, we used 8 chars above)[[[ filestr = re.sub(bt_regex_pattern, r'\g<1>\n\n.. raw:: html\n\n $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', ' $$\n\n', filestr, flags=re.MULTILINE) # Remove inner \[..\] from equations $$ \[ ... \] $$ filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr) filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr) # Equation references (ref{...}) must be \eqref{...} in MathJax # (note: this affects also (ref{...}) syntax in verbatim blocks...) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) else: # just use the same substitution for tex blocks as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix: if there are !bc-!ec or other environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec::, !et::, !bbox:: etc. from the above substitutions. # We just replace these by empty text. filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr, flags=re.MULTILINE) # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: errwarn(""" Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern)) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove too much vertical space filestr = re.sub(r'\n\n\n+', '\n\n', filestr) return filestr
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) #c = re.compile(bc_regex_pattern, re.DOTALL) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) # just use the same substitution as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix: if there are !bc-!ec or other environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec::, !et::, !bbox:: etc. from the above substitutions. # We just replace these by empty text. filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr, flags=re.MULTILINE) # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: print """ Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove too much vertical space filestr = re.sub(r'\n\n\n+', '\n\n', filestr) return filestr
def rst_quiz(quiz): text = html_quiz(quiz) text = '.. raw:: html\n' + indent_lines(text, format, ' '*4) + '\n' return text
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['matlabnb'] = '.m' BLANKLINE['matlabnb'] = '\n' # replacement patterns for substitutions of inline tags encoding = 'utf-8' INLINE_TAGS_SUBST['matlabnb'] = { 'math': None, 'math2': r'\g<begin>$\g<latexmath>$\g<end>', # emphasize goes to _..._ and bold subst afterwards takes it to *...* # make a different syntax and fix it in matlabnb_code 'emphasize': r'\g<begin>^^^X\g<subst>X^^^\g<end>', 'bold': r'\g<begin>*\g<subst>*\g<end>', # Need a hack to avoid |...| for verbatim to avoid conflict in tables 'verbatim': r'\g<begin><<<X\g<subst>X>>>\g<end>', 'figure': lambda m: '<<%s>>' % m.group('filename'), 'movie': default_movie, 'linkURL2': r'\g<link> <\g<url>>', 'linkURL3': r'\g<link> <\g<url>>', 'linkURL2v': r'\g<link> <\g<url>>', 'linkURL3v': r'\g<link> <\g<url>>', 'plainURL': r'<\g<url>>', 'comment': r'%% %s', 'inlinecomment': None, 'colortext': '\g<text>', 'title': r'>>>H \g<subst>\n', 'author': matlabnb_author, 'date': r'\nDate: \g<subst>\n', 'chapter': r'>>>H \g<subst>', 'section': r'>>>H \g<subst>', 'subsection': r'>>>H \g<subst>', 'subsubsection': r'>>>H \g<subst>', # Same problem with abstract/paragraph as with emphasize, use same trick 'abstract': r'\n{{{X\g<type>.X}}} \g<text>\g<rest>', 'paragraph': r'{{{X\g<subst>X}}} ', # extra blank 'linebreak': r'\g<text>', 'footnote': None, 'non-breaking-space': ' ', 'ampersand2': r' \g<1>&\g<2>', } CODE['matlabnb'] = matlabnb_code from common import DEFAULT_ARGLIST ARGLIST['matlabnb'] = DEFAULT_ARGLIST FIGURE_EXT['matlabnb'] = { 'search': ('.png', '.gif', '.jpg', '.jpeg', '.pdf'), #.pdf? 'convert': ('.png', '.gif', '.jpg') } LIST['matlabnb'] = { 'itemize': { 'begin': '', 'item': '*', 'end': '\n' }, 'enumerate': { 'begin': '', 'item': '#', 'end': '\n' }, 'description': { 'begin': '', 'item': '%s', 'end': '\n' }, 'separator': '\n', } CROSS_REFS['matlabnb'] = matlabnb_ref_and_label from html import html_table TABLE['matlabnb'] = html_table #TABLE['matlabnb'] = matlabnb_table EXERCISE['matlabnb'] = plain_exercise INDEX_BIB['matlabnb'] = matlabnb_index_bib TOC['matlabnb'] = matlabnb_toc from common import indent_lines ENVIRS['matlabnb'] = { 'warning': lambda block, format, title='Warning', text_size='normal': matlabnb_box(block, title), 'notice': lambda block, format, title='Notice', text_size='normal': matlabnb_box( block, title), 'question': lambda block, format, title='Question', text_size='normal': matlabnb_box(block, title), 'hint': lambda block, format, title='Hint', text_size='normal': matlabnb_box( block, title), 'summary': lambda block, format, title='Summary', text_size='normal': matlabnb_box(block, title), 'block': lambda block, format, title='Block', text_size='normal': matlabnb_box( block, title), 'box': lambda block, format, title='none', text_size='normal': matlabnb_box( block, title), 'quote': lambda block, format, title='none', text_size='normal': indent_lines( block, 'matlabnb'), } QUIZ['matlabnb'] = matlabnb_quiz
def define( FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr, ): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION["plain"] = ".txt" BLANKLINE["plain"] = "\n" # replacement patterns for substitutions of inline tags encoding = "utf-8" INLINE_TAGS_SUBST["plain"] = { "math": r"\g<begin>\g<subst>\g<end>", # drop $ signs "math2": r"\g<begin>\g<puretext>\g<end>", "emphasize": None, "bold": None, "figure": None, "movie": default_movie, "verbatim": r"\g<begin>\g<subst>\g<end>", # no ` chars #'linkURL': r'\g<begin>\g<link> (\g<url>)\g<end>', "linkURL2": r"\g<link> (\g<url>)", "linkURL3": r"\g<link> (\g<url>)", "linkURL2v": r"\g<link> (\g<url>)", "linkURL3v": r"\g<link> (\g<url>)", "plainURL": r"\g<url>", "colortext": "\g<text>", "title": r"======= \g<subst> =======\n", # doconce top section, to be substituted later "author": plain_author, "date": r"\nDate: \g<subst>\n", "chapter": lambda m: "%s\n%s" % (m.group("subst"), "%" * len(m.group("subst"))), "section": lambda m: "%s\n%s" % (m.group("subst"), "=" * len(m.group("subst"))), "subsection": lambda m: "%s\n%s" % (m.group("subst"), "-" * len(m.group("subst"))), "subsubsection": lambda m: "%s\n%s\n" % (m.group("subst"), "~" * len(m.group("subst"))), "paragraph": r"*\g<subst>*\g<space>", # extra blank "abstract": r"\n*\g<type>.* \g<text>\g<rest>", "linebreak": r"\g<text>", "footnote": None, "non-breaking-space": " ", "ampersand2": r" \g<1>&\g<2>", } from rst import rst_code CODE["plain"] = rst_code from common import DEFAULT_ARGLIST ARGLIST["plain"] = DEFAULT_ARGLIST LIST["plain"] = { "itemize": {"begin": "", "item": "*", "end": "\n"}, "enumerate": {"begin": "", "item": "%d.", "end": "\n"}, "description": {"begin": "", "item": "%s", "end": "\n"}, "separator": "\n", } CROSS_REFS["plain"] = plain_ref_and_label from rst import rst_table TABLE["plain"] = rst_table # TABLE['plain'] = plain_table EXERCISE["plain"] = plain_exercise INDEX_BIB["plain"] = plain_index_bib TOC["plain"] = plain_toc from common import indent_lines ENVIRS["plain"] = { "warning": lambda block, format, title="Warning", text_size="normal": plain_box(block, title), "notice": lambda block, format, title="Notice", text_size="normal": plain_box(block, title), "question": lambda block, format, title="Question", text_size="normal": plain_box(block, title), "hint": lambda block, format, title="Hint", text_size="normal": plain_box(block, title), "summary": lambda block, format, title="Summary", text_size="normal": plain_box(block, title), "block": lambda block, format, title="Block", text_size="normal": plain_box(block, title), "box": lambda block, format, title="none", text_size="normal": plain_box(block, title), "quote": lambda block, format, title="none", text_size="normal": indent_lines(block, "plain"), } QUIZ["plain"] = plain_quiz
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL | re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: print '*** error: --ipynb_admon=%s is not supported' % envir_format filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None] * len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append('\n'.join(lines[last_cell_end + 1:j + 1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) print 'collected all required additional files in', ipynb_tarfile, 'which must be distributed with the notebook' elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ( 'pyhid', ) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): print """\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join( tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][ 1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '3')) if nb_version == 3: from IPython.nbformat.v3 import (new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() elif nb_version == 4: from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block_, execution_count=prompt_number)) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number)) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number)) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: print '*** error: empty cells in notebook - report bug in DocOnce' _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: print '*** error: label "%s" is not defined' % str(e) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Note: the tex code require the MathJax fix of doconce md2html # to insert right MathJax extensions to interpret align and labels # correctly. # (Also, doconce.py runs align2equations so there are no align/align* # environments in tex blocks.) for i in range(len(tex_blocks)): # Remove latex envir in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$') #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well. """ % envir) # Add $$ on each side of the equation tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n' # Note: HTML output from pandoc requires $$ while latex cannot have # them if begin-end inside ($$\begin{...} \end{...}$$) if option('strict_markdown_output'): # Code blocks are just indented for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) github_md = option('github_md') if not option('strict_markdown_output'): pass if github_md: for key in language2pandoc: language2pandoc[key] = language2pandoc[key].lower() # Code blocks apply the ~~~~~ delimiter, with blank lines before # and after for key in language2pandoc: language = language2pandoc[key] if github_md: replacement = '\n```%s\n' % language2pandoc[key] else: # pandoc-extended Markdown replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % language2pandoc[ key] #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % language2pandoc[key] # enable line numbering filestr = re.sub(r'^!bc\s+%s\s*\n' % key, replacement, filestr, flags=re.MULTILINE) # any !bc with/without argument becomes an unspecified block if github_md: replacement = '\n```' else: replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE) if github_md: replacement = '```\n' else: replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n' filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE) else: # Strict Markdown: just indented blocks filestr = re.sub(r'^!bc.*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec\s*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE) # \eqref and labels will not work, but labels do no harm filestr = filestr.replace(' label{', ' \\label{') pattern = r'^label\{' filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) # Final fixes # Seems that title and author must appear on the very first lines filestr = filestr.lstrip() # Enable tasks lists: # - [x] task 1 done # - [ ] task 2 not yet done if github_md: pattern = '^(\s+)\*\s+(\[[x ]\])\s+' filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE) return filestr
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) #c = re.compile(bc_regex_pattern, re.DOTALL) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) # just use the same substitution as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # sphinx math: #filestr = re.sub(r'!bt\n', '\n.. math::\n\n', filestr) #filestr = re.sub(r'!et\n', '\n\n', filestr) #filestr = re.sub(r'!et\n', '\n', filestr) #filestr = re.sub(r'!et\n', '', filestr) # Fix: if there are !bc-!ec or !bt-!et environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec:: and !et:: from the above substitutions. We just replace # these by empty text. filestr = filestr.replace('!ec::', '') filestr = filestr.replace('!et::', '') # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: print """ Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) return filestr
def subst_def(m): text = indent_lines(m.group('text'), format, ' ' * 3) name = m.group('name') start = '.. [#%s] ' % name return start + text.lstrip()
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['matlabnb'] = '.m' BLANKLINE['matlabnb'] = '\n' # replacement patterns for substitutions of inline tags encoding = 'utf-8' INLINE_TAGS_SUBST['matlabnb'] = { 'math': None, 'math2': r'\g<begin>$\g<latexmath>$\g<end>', # emphasize goes to _..._ and bold subst afterwards takes it to *...* # make a different syntax and fix it in matlabnb_code 'emphasize': r'\g<begin>^^^X\g<subst>X^^^\g<end>', 'bold': r'\g<begin>*\g<subst>*\g<end>', # Need a hack to avoid |...| for verbatim to avoid conflict in tables 'verbatim': r'\g<begin><<<X\g<subst>X>>>\g<end>', 'figure': lambda m: '<<%s>>' % m.group('filename'), 'movie': default_movie, 'linkURL2': r'\g<link> <\g<url>>', 'linkURL3': r'\g<link> <\g<url>>', 'linkURL2v': r'\g<link> <\g<url>>', 'linkURL3v': r'\g<link> <\g<url>>', 'plainURL': r'<\g<url>>', 'comment': r'%% %s', 'inlinecomment': None, 'colortext': '\g<text>', 'title': r'>>>H \g<subst>\n', 'author': matlabnb_author, 'date': r'\nDate: \g<subst>\n', 'chapter': r'>>>H \g<subst>', 'section': r'>>>H \g<subst>', 'subsection': r'>>>H \g<subst>', 'subsubsection': r'>>>H \g<subst>', # Same problem with abstract/paragraph as with emphasize, use same trick 'abstract': r'\n{{{X\g<type>.X}}} \g<text>\g<rest>', 'paragraph': r'{{{X\g<subst>X}}} ', # extra blank 'linebreak': r'\g<text>', 'footnote': None, 'non-breaking-space': ' ', 'ampersand2': r' \g<1>&\g<2>', } CODE['matlabnb'] = matlabnb_code from common import DEFAULT_ARGLIST ARGLIST['matlabnb'] = DEFAULT_ARGLIST FIGURE_EXT['matlabnb'] = { 'search': ('.png', '.gif', '.jpg', '.jpeg', '.pdf'), #.pdf? 'convert': ('.png', '.gif', '.jpg')} LIST['matlabnb'] = { 'itemize': {'begin': '', 'item': '*', 'end': '\n'}, 'enumerate': {'begin': '', 'item': '#', 'end': '\n'}, 'description': {'begin': '', 'item': '%s', 'end': '\n'}, 'separator': '\n', } CROSS_REFS['matlabnb'] = matlabnb_ref_and_label from html import html_table TABLE['matlabnb'] = html_table #TABLE['matlabnb'] = matlabnb_table EXERCISE['matlabnb'] = plain_exercise INDEX_BIB['matlabnb'] = matlabnb_index_bib TOC['matlabnb'] = matlabnb_toc from common import indent_lines ENVIRS['matlabnb'] = { 'warning': lambda block, format, title='Warning', text_size='normal': matlabnb_box(block, title), 'notice': lambda block, format, title='Notice', text_size='normal': matlabnb_box(block, title), 'question': lambda block, format, title='Question', text_size='normal': matlabnb_box(block, title), 'hint': lambda block, format, title='Hint', text_size='normal': matlabnb_box(block, title), 'summary': lambda block, format, title='Summary', text_size='normal': matlabnb_box(block, title), 'block': lambda block, format, title='Block', text_size='normal': matlabnb_box(block, title), 'box': lambda block, format, title='none', text_size='normal': matlabnb_box(block, title), 'quote': lambda block, format, title='none', text_size='normal': indent_lines(block, 'matlabnb'), } QUIZ['matlabnb'] = matlabnb_quiz
def rst_movie(m): html_text = html_movie(m) html_text = indent_lines(html_text, 'sphinx') rst_text = '.. raw:: html\n' + html_text + '\n' return rst_text
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format): # Note: the tex code require the MathJax fix of doconce md2html # to insert right MathJax extensions to interpret align and labels # correctly. # (Also, doconce.py runs align2equations so there are no align/align* # environments in tex blocks.) for i in range(len(tex_blocks)): # Remove latex envir in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$') #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): print """\ *** warning: latex envir \\begin{%s} does not work well. """ % envir # Add $$ on each side of the equation tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n' # Note: HTML output from pandoc requires $$ while latex cannot have # them if begin-end inside ($$\begin{...} \end{...}$$) if option('strict_markdown_output'): # Code blocks are just indented for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) github_md = option('github_md') if not option('strict_markdown_output'): # Mapping of envirs to correct Pandoc verbatim environment defs = dict(cod='Python', pycod='Python', cppcod='Cpp', fcod='Fortran', ccod='C', pro='Python', pypro='Python', cpppro='Cpp', fpro='Fortran', cpro='C', rbcod='Ruby', rbpro='Ruby', plcod='Perl', plpro='Perl', htmlcod='HTML', htmlpro='HTML', # sys, dat, csv, txt: no support for pure text, # just use a plain text block #sys='Bash', pyoptpro='Python', pyscpro='Python', ipy='Python', pyshell='Python') # (the "Python" typesetting is neutral if the text # does not parse as python) # Code blocks apply the ~~~~~ delimiter, with blank lines before # and after for key in defs: language = defs[key] if github_md: replacement = '\n```%s\n' % defs[key] else: # pandoc-extended Markdown replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % defs[key] #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % defs[key] # enable line numbering filestr = re.sub(r'^!bc\s+%s\s*\n' % key, replacement, filestr, flags=re.MULTILINE) # any !bc with/without argument becomes an unspecified block if github_md: replacement = '\n```' else: replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE) if github_md: replacement = '```\n' else: replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n' filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE) else: # Strict Markdown: just indented blocks filestr = re.sub(r'^!bc.*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec\s*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE) # \eqref and labels will not work, but labels do no harm filestr = filestr.replace(' label{', ' \\label{') pattern = r'^label\{' filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) # Final fixes # Seems that title and author must appear on the very first lines filestr = filestr.lstrip() # Enable tasks lists: # - [x] task 1 done # - [ ] task 2 not yet done if github_md: pattern = '^(\s+)\*\s+(\[[x ]\])\s+' filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE) return filestr
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Note: the tex code require the MathJax fix of doconce md2html # to insert right MathJax extensions to interpret align and labels # correctly. # (Also, doconce.py runs align2equations so there are no align/align* # environments in tex blocks.) for i in range(len(tex_blocks)): # Remove latex envir in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$') #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): print """\ *** warning: latex envir \\begin{%s} does not work well. """ % envir # Add $$ on each side of the equation tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n' # Note: HTML output from pandoc requires $$ while latex cannot have # them if begin-end inside ($$\begin{...} \end{...}$$) if option('strict_markdown_output'): # Code blocks are just indented for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) github_md = option('github_md') if not option('strict_markdown_output'): pass if github_md: for key in language2pandoc: language2pandoc[key] = language2pandoc[key].lower() # Code blocks apply the ~~~~~ delimiter, with blank lines before # and after for key in language2pandoc: language = language2pandoc[key] if github_md: replacement = '\n```%s\n' % language2pandoc[key] else: # pandoc-extended Markdown replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % language2pandoc[key] #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % language2pandoc[key] # enable line numbering filestr = re.sub(r'^!bc\s+%s\s*\n' % key, replacement, filestr, flags=re.MULTILINE) # any !bc with/without argument becomes an unspecified block if github_md: replacement = '\n```' else: replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE) if github_md: replacement = '```\n' else: replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n' filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE) else: # Strict Markdown: just indented blocks filestr = re.sub(r'^!bc.*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec\s*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE) # \eqref and labels will not work, but labels do no harm filestr = filestr.replace(' label{', ' \\label{') pattern = r'^label\{' filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) # Final fixes # Seems that title and author must appear on the very first lines filestr = filestr.lstrip() # Enable tasks lists: # - [x] task 1 done # - [ ] task 2 not yet done if github_md: pattern = '^(\s+)\*\s+(\[[x ]\])\s+' filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE) return filestr
def subst_def(m): text = indent_lines(m.group("text"), format, " " * 3) name = m.group("name") start = ".. [#%s] " % name return start + text.lstrip()
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL|re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: errwarn('*** error: --ipynb_admon=%s is not supported' % envir_format) filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None]*len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append( '\n'.join(lines[last_cell_end+1:j+1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook') elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ('pyhid',) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir) eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join(tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '4')) if nb_version == 3: try: from IPython.nbformat.v3 import ( new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() except ImportError: errwarn('*** error: could not import IPython.nbformat.v3!') errwarn(' set --ipynb_version=4 or leave out --ipynb_version=3') _abort() elif nb_version == 4: try: from nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: # Try old style try: from IPython.nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: errwarn('*** error: cannot do import nbformat.v4 or IPython.nbformat.v4') errwarn(' make sure IPython notebook or Jupyter is installed correctly') _abort() cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block_, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=True))) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: errwarn('*** error: empty cells in notebook - report bug in DocOnce') _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e)) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['plain'] = '.txt' BLANKLINE['plain'] = '\n' # replacement patterns for substitutions of inline tags encoding = 'utf-8' INLINE_TAGS_SUBST['plain'] = { 'math': r'\g<begin>\g<subst>\g<end>', # drop $ signs 'math2': r'\g<begin>\g<puretext>\g<end>', 'emphasize': None, 'bold': None, 'figure': None, 'movie': default_movie, 'verbatim': r'\g<begin>\g<subst>\g<end>', # no ` chars #'linkURL': r'\g<begin>\g<link> (\g<url>)\g<end>', 'linkURL2': r'\g<link> (\g<url>)', 'linkURL3': r'\g<link> (\g<url>)', 'linkURL2v': r'\g<link> (\g<url>)', 'linkURL3v': r'\g<link> (\g<url>)', 'plainURL': r'\g<url>', 'colortext': '\g<text>', 'title': r'======= \g<subst> =======\n', # doconce top section, to be substituted later 'author': plain_author, 'date': r'\nDate: \g<subst>\n', 'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%' * len(m.group('subst'))), 'section': lambda m: '%s\n%s' % (m.group('subst'), '=' * len(m.group('subst'))), 'subsection': lambda m: '%s\n%s' % (m.group('subst'), '-' * len(m.group('subst'))), 'subsubsection': lambda m: '%s\n%s\n' % (m.group('subst'), '~' * len(m.group('subst'))), 'paragraph': r'*\g<subst>*\g<space>', # extra blank 'abstract': r'\n*\g<type>.* \g<text>\g<rest>', 'linebreak': r'\g<text>', 'footnote': None, 'non-breaking-space': ' ', 'ampersand2': r' \g<1>&\g<2>', } from rst import rst_code CODE['plain'] = rst_code from common import DEFAULT_ARGLIST ARGLIST['plain'] = DEFAULT_ARGLIST LIST['plain'] = { 'itemize': { 'begin': '', 'item': '*', 'end': '\n' }, 'enumerate': { 'begin': '', 'item': '%d.', 'end': '\n' }, 'description': { 'begin': '', 'item': '%s', 'end': '\n' }, 'separator': '\n', } CROSS_REFS['plain'] = plain_ref_and_label from rst import rst_table TABLE['plain'] = rst_table #TABLE['plain'] = plain_table EXERCISE['plain'] = plain_exercise INDEX_BIB['plain'] = plain_index_bib TOC['plain'] = plain_toc from common import indent_lines ENVIRS['plain'] = { 'warning': lambda block, format, title='Warning', text_size='normal': plain_box( block, title), 'notice': lambda block, format, title='Notice', text_size='normal': plain_box( block, title), 'question': lambda block, format, title='Question', text_size='normal': plain_box( block, title), 'hint': lambda block, format, title='Hint', text_size='normal': plain_box( block, title), 'summary': lambda block, format, title='Summary', text_size='normal': plain_box( block, title), 'block': lambda block, format, title='Block', text_size='normal': plain_box( block, title), 'box': lambda block, format, title='none', text_size='normal': plain_box( block, title), 'quote': lambda block, format, title='none', text_size='normal': indent_lines( block, 'plain'), } QUIZ['plain'] = plain_quiz