def rst_author(authors_and_institutions, auth2index, inst2index, index2inst, auth2email): if option('rst_uio'): if authors_and_institutions: # Use first author and email responsible = authors_and_institutions[0][0] email = authors_and_institutions[0][2] text = """ .. uio-meta:: :responsible-name: %s """ % responsible if email: text += ' :responsible-email: %s\n\n' % email else: print '*** error: with --rst_uio there must be an AUTHOR:' print ' field with (at least) one author w/email who will be' print ' listed as the resposible under uio-meta::' _abort() else: authors = [] for author, i, email in authors_and_institutions: if email: email = email.replace('@', ' at ') authors.append(author + ' (%s)' % email) else: authors.append(author) text = ':Authors: ' + ', '.join( authors) # (text is already r-stripped in typeset_authors) # we skip institutions in rst return text
def rst_author(authors_and_institutions, auth2index, inst2index, index2inst, auth2email): if option('rst_uio'): if authors_and_institutions: # Use first author and email responsible = authors_and_institutions[0][0] email = authors_and_institutions[0][2] text = """ .. uio-meta:: :responsible-name: %s """ % responsible if email: text += ' :responsible-email: %s\n\n' % email else: errwarn('*** error: with --rst_uio there must be an AUTHOR:') errwarn(' field with (at least) one author w/email who will be') errwarn(' listed as the resposible under uio-meta::') _abort() else: authors = [] for author, i, email in authors_and_institutions: if email: email = email.replace('@', ' at ') authors.append(author + ' (%s)' % email) else: authors.append(author) text = ':Authors: ' + ', '.join(authors) # (text is already r-stripped in typeset_authors) # we skip institutions in rst return text
def rst_author(authors_and_institutions, auth2index, inst2index, index2inst, auth2email): if option("rst_uio"): if authors_and_institutions: # Use first author and email responsible = authors_and_institutions[0][0] email = authors_and_institutions[0][2] text = ( """ .. uio-meta:: :responsible-name: %s """ % responsible ) if email: text += " :responsible-email: %s\n\n" % email else: print "*** error: with --rst_uio there must be an AUTHOR:" print " field with (at least) one author w/email who will be" print " listed as the resposible under uio-meta::" _abort() else: authors = [] for author, i, email in authors_and_institutions: if email: email = email.replace("@", " at ") authors.append(author + " (%s)" % email) else: authors.append(author) text = ":Authors: " + ", ".join(authors) # (text is already r-stripped in typeset_authors) # we skip institutions in rst return text
def rst_bib(filestr, citations, pubfile, pubdata, numbering=True): """ Replace doconce citations and bibliography with reST syntax. If numbering is True, the keys used in the bibliography are replaced by numbers (RefX). This will often look better. """ if not citations: return filestr filestr = cite_with_multiple_args2multiple_cites(filestr) if numbering: # Find max no of digits n = len(str(max(citations.values()))) cite = '[Ref%%0%dd]' % n # cannot have blanks in ref label for label in citations: if numbering: filestr = filestr.replace('cite{%s}' % label, cite % citations[label] + '_') else: filestr = filestr.replace('cite{%s}' % label, '[%s]_' % label) if pubfile is not None: # Could use rst format, but we stick to the common doconce format bibtext = bibliography(pubdata, citations, format='rst') if numbering: for label in citations: try: bibtext = bibtext.replace( '[%s]' % label, cite % citations[label]) except UnicodeDecodeError as e: if "can't decode byte" in str(e): try: bibtext = bibtext.decode('utf-8').replace( '[%s]' % label, cite % citations[label]) except UnicodeDecodeError as e: errwarn('UnicodeDecodeError: ' + e) errwarn('*** error: problems in %s' % pubfile) errwarn(' with key ' + label) errwarn(' tried to do decode("utf-8"), but it did not work') else: errwarn(e) errwarn('*** error: problems in %s' % pubfile) errwarn(' with key ' + label) _abort() filestr = re.sub(r'^BIBFILE:.+$', bibtext, filestr, flags=re.MULTILINE) return filestr
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['rst'] = '.rst' BLANKLINE['rst'] = '\n' encoding = 'utf-8' # 'latin-1' INLINE_TAGS_SUBST['rst'] = { 'math': r'\g<begin>\g<subst>\g<end>', 'math2': r'\g<begin>\g<puretext>\g<end>', # math and math2 are redefined below if --rst_mathjax #'math': r'\g<begin>:math:`\g<subst>`\g<end>', # sphinx #'math2': r'\g<begin>:math:`\g<latexmath>`\g<end>', 'emphasize': None, # => just use doconce markup (*emphasized words*) 'bold': r'\g<begin>**\g<subst>**\g<end>', 'verbatim': r'\g<begin>``\g<subst>``\g<end>', 'label': r'\g<subst>', # should be improved, rst has cross ref 'reference': r'\g<subst>', #colortext cannot employ pure HTML code. Recipe: http://stackoverflow.com/questions/4669689/how-to-use-color-in-text-with-restructured-text-rst2html-py-or-how-to-insert-h (this is too comprehensive). Use bold instead. #'colortext': r'<font color="\g<color>">\g<text></font>', 'colortext': r'**\g<text>**', # Use anonymous hyperlink references to avoid warnings if the link # name appears twice #'linkURL': r'\g<begin>`\g<link> <\g<url>>`__\g<end>', #'linkURL': r'\g<begin>`\g<link>`_\g<end>' + '\n\n.. ' + r'__\g<link>: \g<url>' + '\n\n', # better (?): make function instead that stacks up the URLs and dumps them at the end; can be used for citations as well 'linkURL2': r'`\g<link> <\g<url>>`__', 'linkURL3': r'`\g<link> <\g<url>>`__', 'linkURL2v': r'`\g<link> <\g<url>>`__', # no verbatim, does not work well 'linkURL3v': r'`\g<link> <\g<url>>`__', # same 'plainURL': r'`<\g<url>>`_', 'inlinecomment': r'color{red}{(**\g<name>**: \g<comment>})', # the replacement string differs, depending on the match object m: # (note len(m.group('subst')) gives wrong length for latin-1 strings, # seems to work for utf-8, if problems: replace lambda function # with an ordinary function where you can debug and test! #'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst').decode(encoding))), 'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst'))), 'section': lambda m: '%s\n%s' % (m.group('subst'), '='*len(m.group('subst'))), 'subsection': lambda m: '%s\n%s' % (m.group('subst'), '-'*len(m.group('subst'))), 'subsubsection': lambda m: '%s\n%s\n' % (m.group('subst'), '~'*len(m.group('subst'))), 'paragraph': r'**\g<subst>**\n', # extra newline 'abstract': rst_abstract, #'title': r'======= \g<subst> =======\n', # doconce top section, must be the highest section level (but no higher than others, need more code) 'title': None, # taken care of in ref_and_label_commoncode 'date': r':Date: \g<subst>\n', 'author': rst_author, 'figure': rst_figure, 'movie': rst_movie, #'comment': '.. %s', # rst does not like empty comment lines: # so therefore we introduce a function to remove empty comment lines # (we insert an extra blank first to be safe) 'comment': lambda c: '' if c.isspace() or c == '' else '\n.. %s\n' % c, #'linebreak': r'| \g<text>', # does not work: interfers with tables and requires a final blank line after block 'linebreak': r'<linebreakpipe> \g<text>', # fixed in rst_code/sphinx_code as a hack 'footnote': rst_footnotes, 'non-breaking-space': ' |nbsp| ', 'horizontal-rule': '---------', 'ampersand2': r' \g<1>&\g<2>', } if option('rst_mathjax'): # rst2html conversion requires four backslashes here for one of them # to survive INLINE_TAGS_SUBST['rst']['math'] = r'\g<begin>\\\\( \g<subst> \\\\)\g<end>' INLINE_TAGS_SUBST['rst']['math2'] = r'\g<begin>\\\\( \g<latexmath> \\\\)\g<end>' ENVIRS['rst'] = { 'quote': rst_quote, 'warning': rst_warning, 'question': rst_question, 'notice': rst_notice, 'summary': rst_summary, 'block': rst_block, 'box': rst_box, } CODE['rst'] = rst_code # function for typesetting code LIST['rst'] = { 'itemize': {'begin': '', 'item': '*', 'end': '\n'}, # lists must end with a blank line - we insert one extra, 'enumerate': {'begin': '', 'item': '%d.', 'end': '\n'}, 'description': {'begin': '', 'item': '%s', 'end': '\n'}, 'separator': '\n', } from common import DEFAULT_ARGLIST ARGLIST['rst'] = DEFAULT_ARGLIST FIGURE_EXT['rst'] = { 'search': ('.png', '.gif', '.jpg', '.jpeg', '.pdf', '.eps', '.ps'), 'convert': ('.png', '.gif', '.jpg')} CROSS_REFS['rst'] = rst_ref_and_label INDEX_BIB['rst'] = rst_index_bib TABLE['rst'] = rst_table EXERCISE['rst'] = plain_exercise TOC['rst'] = lambda s: '.. contents:: Table of Contents\n :depth: 2' QUIZ['rst'] = rst_quiz INTRO['rst'] = """\ .. Automatically generated reStructuredText file from DocOnce source (https://github.com/hplgit/doconce/) """ # http://stackoverflow.com/questions/11830242/non-breaking-space from common import INLINE_TAGS if re.search(INLINE_TAGS['non-breaking-space'], filestr): nbsp = """ .. |nbsp| unicode:: 0xA0 :trim: """ if 'TITLE:' not in filestr: import common if common.format in ('rst', 'sphinx'): errwarn('*** error: non-breaking space character ~ is used,') errwarn(' but this will give an error when the document does') errwarn(' not have a title.') _abort() else: INTRO['rst'] += nbsp
def ref_and_label_commoncode(section_label2title, format, filestr): filestr = fix_ref_section_chapter(filestr, format) # Deal with the problem of identical titles, which makes problem # with non-unique links in reST: add a counter to the title debugtext = '' section_pattern = r'^\s*(={3,9})(.+?)(={3,9})(\s*label\{(.+?)\})?' all_sections = re.findall(section_pattern, filestr, flags=re.MULTILINE) # First count the no of titles with the same wording titles = {} max_heading = 1 # track the top heading level for correct TITLE typesetting for heading, title, dummy2, dummy3, label in all_sections: entry = None if label == '' else label if title in titles: titles[title].append(entry) else: titles[title] = [entry] max_heading = max(max_heading, len(heading)) # Typeset TITLE so that it gets the highest+1 (but no higher) section sevel max_heading += 2 # one level up (2 =) max_heading = min(max_heading, 9) pattern = r'^TITLE:\s*(.+)$' if format == 'sphinx': # Title cannot be more than 63 chars... m = re.search(pattern, filestr, flags=re.MULTILINE) if m: title = m.group(1).strip() if len(title) > 63: errwarn('*** error: sphinx title cannot be longer than 63 characters') errwarn(' current title: "%s" (%d characters)' % (title, len(title))) _abort() filestr = re.sub(pattern, '.. Document title:\n\n%s \g<1> %s\n' % ('='*max_heading, '='*max_heading), filestr, flags=re.MULTILINE) # Make new titles title_counter = {} # count repeated titles (need to append counter to make unique links) sections = [] for heading, title, dummy2, dummy3, label in all_sections: label = None if label == '' else label if len(titles[title]) > 1: if title in title_counter: title_counter[title] += 1 else: title_counter[title] = 1 # Add much whitespace so we can recognize the titles after # formats are compiled and remove the number new_title = title + ' (%d) ' % title_counter[title] sections.append((heading, new_title, label, title)) if label in section_label2title: section_label2title[label] = new_title else: sections.append((heading, title, label, title)) # Make replacements for heading, title, label, old_title in sections: if title != old_title: debugtext += '\nchanged title: %s -> %s\n' % (old_title, title) # Avoid trouble with \t, \n in replacement title = title.replace('\\', '\\\\') # The substitution depends on whether we have a label or not if label is not None: title_pattern = r'%s\s*%s\s*%s\s*label\{%s\}' % (heading, re.escape(old_title), heading, label) # title may contain ? () etc., that's why we take re.escape replacement = '.. _%s:\n\n' % label + r'%s %s %s' % \ (heading, title, heading) else: title_pattern = r'%s\s*%s\s*%s' % (heading, re.escape(old_title), heading) replacement = r'%s %s %s' % (heading, title, heading) filestr, n = re.subn(title_pattern, replacement, filestr, count=1) if n > 1: raise ValueError('Replaced more than one title. BUG!') # remove label{...} from output #filestr = re.sub(r'^label\{.+?\}\s*$', '', filestr, flags=re.MULTILINE) cpattern = re.compile(r'^label\{[^}]+?\}\s*$', flags=re.MULTILINE) filestr = cpattern.sub('', filestr) filestr = re.sub(r'label\{[^}]+?\}', '', filestr) # all the remaining import doconce doconce.debugpr(debugtext) return filestr
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # Fix labels if option('rst_mathjax'): for i in range(len(tex_blocks)): tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"...] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL) # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::) filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) if option('rst_mathjax') and (re.search(r'^!bt', filestr, flags=re.MULTILINE) or re.search(r'\\\( .+ \\\)', filestr)): # First add MathJax script in the very beginning of the file from html import mathjax_header latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst') filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr # Replace all the !bt parts by raw html directive (make sure # the coming block is sufficiently indented, we used 8 chars above)[[[ filestr = re.sub(bt_regex_pattern, r'\g<1>\n\n.. raw:: html\n\n $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', ' $$\n\n', filestr, flags=re.MULTILINE) # Remove inner \[..\] from equations $$ \[ ... \] $$ filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr) filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr) # Equation references (ref{...}) must be \eqref{...} in MathJax # (note: this affects also (ref{...}) syntax in verbatim blocks...) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) else: # just use the same substitution for tex blocks as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix: if there are !bc-!ec or other environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec::, !et::, !bbox:: etc. from the above substitutions. # We just replace these by empty text. filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr, flags=re.MULTILINE) # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: errwarn(""" Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern)) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove too much vertical space filestr = re.sub(r'\n\n\n+', '\n\n', filestr) return filestr
def define( FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr, ): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION["rst"] = ".rst" BLANKLINE["rst"] = "\n" encoding = "utf-8" # 'latin-1' INLINE_TAGS_SUBST["rst"] = { "math": r"\g<begin>\g<subst>\g<end>", "math2": r"\g<begin>\g<puretext>\g<end>", # math and math2 are redefined below if --rst_mathjax #'math': r'\g<begin>:math:`\g<subst>`\g<end>', # sphinx #'math2': r'\g<begin>:math:`\g<latexmath>`\g<end>', "emphasize": None, # => just use doconce markup (*emphasized words*) "bold": r"\g<begin>**\g<subst>**\g<end>", "verbatim": r"\g<begin>``\g<subst>``\g<end>", "label": r"\g<subst>", # should be improved, rst has cross ref "reference": r"\g<subst>", # colortext cannot employ pure HTML code. Recipe: http://stackoverflow.com/questions/4669689/how-to-use-color-in-text-with-restructured-text-rst2html-py-or-how-to-insert-h (this is too comprehensive). Use bold instead. #'colortext': r'<font color="\g<color>">\g<text></font>', "colortext": r"**\g<text>**", # Use anonymous hyperlink references to avoid warnings if the link # name appears twice #'linkURL': r'\g<begin>`\g<link> <\g<url>>`__\g<end>', #'linkURL': r'\g<begin>`\g<link>`_\g<end>' + '\n\n.. ' + r'__\g<link>: \g<url>' + '\n\n', # better (?): make function instead that stacks up the URLs and dumps them at the end; can be used for citations as well "linkURL2": r"`\g<link> <\g<url>>`__", "linkURL3": r"`\g<link> <\g<url>>`__", "linkURL2v": r"`\g<link> <\g<url>>`__", # no verbatim, does not work well "linkURL3v": r"`\g<link> <\g<url>>`__", # same "plainURL": r"`<\g<url>>`_", "inlinecomment": r"color{red}{(**\g<name>**: \g<comment>})", # the replacement string differs, depending on the match object m: # (note len(m.group('subst')) gives wrong length for latin-1 strings, # seems to work for utf-8, if problems: replace lambda function # with an ordinary function where you can debug and test! #'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst').decode(encoding))), "chapter": lambda m: "%s\n%s" % (m.group("subst"), "%" * len(m.group("subst"))), "section": lambda m: "%s\n%s" % (m.group("subst"), "=" * len(m.group("subst"))), "subsection": lambda m: "%s\n%s" % (m.group("subst"), "-" * len(m.group("subst"))), "subsubsection": lambda m: "%s\n%s\n" % (m.group("subst"), "~" * len(m.group("subst"))), "paragraph": r"**\g<subst>**\n", # extra newline "abstract": rst_abstract, #'title': r'======= \g<subst> =======\n', # doconce top section, must be the highest section level (but no higher than others, need more code) "title": None, # taken care of in ref_and_label_commoncode "date": r":Date: \g<subst>\n", "author": rst_author, "figure": rst_figure, "movie": rst_movie, #'comment': '.. %s', # rst does not like empty comment lines: # so therefore we introduce a function to remove empty comment lines # (we insert an extra blank first to be safe) "comment": lambda c: "" if c.isspace() or c == "" else "\n.. %s\n" % c, #'linebreak': r'| \g<text>', # does not work: interfers with tables and requires a final blank line after block "linebreak": r"<linebreakpipe> \g<text>", # fixed in rst_code/sphinx_code as a hack "footnote": rst_footnotes, "non-breaking-space": " |nbsp| ", "horizontal-rule": "---------", "ampersand2": r" \g<1>&\g<2>", } if option("rst_mathjax"): # rst2html conversion requires four backslashes here for one of them # to survive INLINE_TAGS_SUBST["rst"]["math"] = r"\g<begin>\\\\( \g<subst> \\\\)\g<end>" INLINE_TAGS_SUBST["rst"]["math2"] = r"\g<begin>\\\\( \g<latexmath> \\\\)\g<end>" ENVIRS["rst"] = { "quote": rst_quote, "warning": rst_warning, "question": rst_question, "notice": rst_notice, "summary": rst_summary, "block": rst_block, "box": rst_box, } CODE["rst"] = rst_code # function for typesetting code LIST["rst"] = { "itemize": {"begin": "", "item": "*", "end": "\n"}, # lists must end with a blank line - we insert one extra, "enumerate": {"begin": "", "item": "%d.", "end": "\n"}, "description": {"begin": "", "item": "%s", "end": "\n"}, "separator": "\n", } from common import DEFAULT_ARGLIST ARGLIST["rst"] = DEFAULT_ARGLIST FIGURE_EXT["rst"] = { "search": (".png", ".gif", ".jpg", ".jpeg", ".pdf", ".eps", ".ps"), "convert": (".png", ".gif", ".jpg"), } CROSS_REFS["rst"] = rst_ref_and_label INDEX_BIB["rst"] = rst_index_bib TABLE["rst"] = rst_table EXERCISE["rst"] = plain_exercise TOC["rst"] = lambda s: ".. contents:: Table of Contents\n :depth: 2" QUIZ["rst"] = rst_quiz INTRO[ "rst" ] = """\ .. Automatically generated reStructuredText file from DocOnce source (https://github.com/hplgit/doconce/) """ # http://stackoverflow.com/questions/11830242/non-breaking-space from common import INLINE_TAGS if re.search(INLINE_TAGS["non-breaking-space"], filestr): nbsp = """ .. |nbsp| unicode:: 0xA0 :trim: """ if "TITLE:" not in filestr: import common if common.format in ("rst", "sphinx"): print "*** error: non-breaking space character ~ is used," print " but this will give an error when the document does" print " not have a title." _abort() else: INTRO["rst"] += nbsp
def ref_and_label_commoncode(section_label2title, format, filestr): # .... see section ref{my:sec} is replaced by # see the section "...section heading..." pattern = r"[Ss]ection(s?)\s+ref\{" replacement = r"the section\g<1> ref{" filestr = re.sub(pattern, replacement, filestr) pattern = r"[Cc]hapter(s?)\s+ref\{" replacement = r"the chapter\g<1> ref{" filestr = re.sub(pattern, replacement, filestr) # Need special adjustment to handle start of sentence (capital) or not. pattern = r"([.?!]\s+|\n\n|[%=~-]\n+)the (sections?|chapters?)\s+ref" replacement = r"\g<1>The \g<2> ref" filestr = re.sub(pattern, replacement, filestr) # Remove Exercise, Project, Problem in references since those words # are used in the title of the section too pattern = r"(the\s*)?([Ee]xercises?|[Pp]rojects?|[Pp]roblems?)\s+ref\{" replacement = r"ref{" filestr = re.sub(pattern, replacement, filestr) # Deal with the problem of identical titles, which makes problem # with non-unique links in reST: add a counter to the title debugtext = "" section_pattern = r"^\s*(={3,9})(.+?)(={3,9})(\s*label\{(.+?)\})?" all_sections = re.findall(section_pattern, filestr, flags=re.MULTILINE) # First count the no of titles with the same wording titles = {} max_heading = 1 # track the top heading level for correct TITLE typesetting for heading, title, dummy2, dummy3, label in all_sections: entry = None if label == "" else label if title in titles: titles[title].append(entry) else: titles[title] = [entry] max_heading = max(max_heading, len(heading)) # Typeset TITLE so that it gets the highest+1 (but no higher) section sevel max_heading += 2 # one level up (2 =) max_heading = min(max_heading, 9) pattern = r"^TITLE:\s*(.+)$" if format == "sphinx": # Title cannot be more than 63 chars... m = re.search(pattern, filestr, flags=re.MULTILINE) if m: title = m.group(1).strip() if len(title) > 63: print "*** error: sphinx title cannot be longer than 63 characters" print ' current title: "%s" (%d characters)' % (title, len(title)) _abort() filestr = re.sub( pattern, ".. Document title:\n\n%s \g<1> %s\n" % ("=" * max_heading, "=" * max_heading), filestr, flags=re.MULTILINE, ) # Make new titles title_counter = {} # count repeated titles sections = [] for heading, title, dummy2, dummy3, label in all_sections: label = None if label == "" else label if len(titles[title]) > 1: if title in title_counter: title_counter[title] += 1 else: title_counter[title] = 1 new_title = title + " (%d) " % title_counter[title] sections.append((heading, new_title, label, title)) if label in section_label2title: section_label2title[label] = new_title else: sections.append((heading, title, label, title)) # Make replacements for heading, title, label, old_title in sections: if title != old_title: debugtext += "\nchanged title: %s -> %s\n" % (old_title, title) # Avoid trouble with \t, \n in replacement title = title.replace("\\", "\\\\") # The substitution depends on whether we have a label or not if label is not None: title_pattern = r"%s\s*%s\s*%s\s*label\{%s\}" % (heading, re.escape(old_title), heading, label) # title may contain ? () etc., that's why we take re.escape replacement = ".. _%s:\n\n" % label + r"%s %s %s" % (heading, title, heading) else: title_pattern = r"%s\s*%s\s*%s" % (heading, re.escape(old_title), heading) replacement = r"%s %s %s" % (heading, title, heading) filestr, n = re.subn(title_pattern, replacement, filestr, count=1) if n > 1: raise ValueError("Replaced more than one title. BUG!") # remove label{...} from output # filestr = re.sub(r'^label\{.+?\}\s*$', '', filestr, flags=re.MULTILINE) cpattern = re.compile(r"^label\{[^}]+?\}\s*$", flags=re.MULTILINE) filestr = cpattern.sub("", filestr) filestr = re.sub(r"label\{[^}]+?\}", "", filestr) # all the remaining import doconce doconce.debugpr(debugtext) return filestr
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['rst'] = '.rst' BLANKLINE['rst'] = '\n' encoding = 'utf-8' # 'latin-1' INLINE_TAGS_SUBST['rst'] = { 'math': r'\g<begin>\g<subst>\g<end>', 'math2': r'\g<begin>\g<puretext>\g<end>', # math and math2 are redefined below if --rst_mathjax #'math': r'\g<begin>:math:`\g<subst>`\g<end>', # sphinx #'math2': r'\g<begin>:math:`\g<latexmath>`\g<end>', 'emphasize': None, # => just use doconce markup (*emphasized words*) 'bold': r'\g<begin>**\g<subst>**\g<end>', 'verbatim': r'\g<begin>``\g<subst>``\g<end>', 'label': r'\g<subst>', # should be improved, rst has cross ref 'reference': r'\g<subst>', #colortext cannot employ pure HTML code. Recipe: http://stackoverflow.com/questions/4669689/how-to-use-color-in-text-with-restructured-text-rst2html-py-or-how-to-insert-h (this is too comprehensive). Use bold instead. #'colortext': r'<font color="\g<color>">\g<text></font>', 'colortext': r'**\g<text>**', # Use anonymous hyperlink references to avoid warnings if the link # name appears twice #'linkURL': r'\g<begin>`\g<link> <\g<url>>`__\g<end>', #'linkURL': r'\g<begin>`\g<link>`_\g<end>' + '\n\n.. ' + r'__\g<link>: \g<url>' + '\n\n', # better (?): make function instead that stacks up the URLs and dumps them at the end; can be used for citations as well 'linkURL2': r'`\g<link> <\g<url>>`__', 'linkURL3': r'`\g<link> <\g<url>>`__', 'linkURL2v': r'`\g<link> <\g<url>>`__', # no verbatim, does not work well 'linkURL3v': r'`\g<link> <\g<url>>`__', # same 'plainURL': r'`<\g<url>>`_', 'inlinecomment': r'color{red}{(**\g<name>**: \g<comment>})', # the replacement string differs, depending on the match object m: # (note len(m.group('subst')) gives wrong length for latin-1 strings, # seems to work for utf-8, if problems: replace lambda function # with an ordinary function where you can debug and test! #'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst').decode(encoding))), 'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%' * len(m.group('subst'))), 'section': lambda m: '%s\n%s' % (m.group('subst'), '=' * len(m.group('subst'))), 'subsection': lambda m: '%s\n%s' % (m.group('subst'), '-' * len(m.group('subst'))), 'subsubsection': lambda m: '%s\n%s\n' % (m.group('subst'), '~' * len(m.group('subst'))), 'paragraph': r'**\g<subst>**\n', # extra newline 'abstract': rst_abstract, #'title': r'======= \g<subst> =======\n', # doconce top section, must be the highest section level (but no higher than others, need more code) 'title': None, # taken care of in ref_and_label_commoncode 'date': r':Date: \g<subst>\n', 'author': rst_author, 'figure': rst_figure, 'movie': rst_movie, #'comment': '.. %s', # rst does not like empty comment lines: # so therefore we introduce a function to remove empty comment lines # (we insert an extra blank first to be safe) 'comment': lambda c: '' if c.isspace() or c == '' else '\n.. %s\n' % c, #'linebreak': r'| \g<text>', # does not work: interfers with tables and requires a final blank line after block 'linebreak': r'<linebreakpipe> \g<text>', # fixed in rst_code/sphinx_code as a hack 'footnote': rst_footnotes, 'non-breaking-space': ' |nbsp| ', 'horizontal-rule': '---------', 'ampersand2': r' \g<1>&\g<2>', } if option('rst_mathjax'): # rst2html conversion requires four backslashes here for one of them # to survive INLINE_TAGS_SUBST['rst'][ 'math'] = r'\g<begin>\\\\( \g<subst> \\\\)\g<end>' INLINE_TAGS_SUBST['rst'][ 'math2'] = r'\g<begin>\\\\( \g<latexmath> \\\\)\g<end>' ENVIRS['rst'] = { 'quote': rst_quote, 'warning': rst_warning, 'question': rst_question, 'notice': rst_notice, 'summary': rst_summary, 'block': rst_block, 'box': rst_box, } CODE['rst'] = rst_code # function for typesetting code LIST['rst'] = { 'itemize': { 'begin': '', 'item': '*', 'end': '\n' }, # lists must end with a blank line - we insert one extra, 'enumerate': { 'begin': '', 'item': '%d.', 'end': '\n' }, 'description': { 'begin': '', 'item': '%s', 'end': '\n' }, 'separator': '\n', } from common import DEFAULT_ARGLIST ARGLIST['rst'] = DEFAULT_ARGLIST FIGURE_EXT['rst'] = { 'search': ('.png', '.gif', '.jpg', '.jpeg', '.pdf', '.eps', '.ps'), 'convert': ('.png', '.gif', '.jpg') } CROSS_REFS['rst'] = rst_ref_and_label INDEX_BIB['rst'] = rst_index_bib TABLE['rst'] = rst_table EXERCISE['rst'] = plain_exercise TOC['rst'] = lambda s: '.. contents:: Table of Contents\n :depth: 2' QUIZ['rst'] = rst_quiz INTRO['rst'] = """\ .. Automatically generated reStructuredText file from DocOnce source (https://github.com/hplgit/doconce/) """ # http://stackoverflow.com/questions/11830242/non-breaking-space from common import INLINE_TAGS if re.search(INLINE_TAGS['non-breaking-space'], filestr): nbsp = """ .. |nbsp| unicode:: 0xA0 :trim: """ if 'TITLE:' not in filestr: import common if common.format in ('rst', 'sphinx'): print '*** error: non-breaking space character ~ is used,' print ' but this will give an error when the document does' print ' not have a title.' _abort() else: INTRO['rst'] += nbsp
def ref_and_label_commoncode(section_label2title, format, filestr): # .... see section ref{my:sec} is replaced by # see the section "...section heading..." pattern = r'[Ss]ection(s?)\s+ref\{' replacement = r'the section\g<1> ref{' filestr = re.sub(pattern, replacement, filestr) pattern = r'[Cc]hapter(s?)\s+ref\{' replacement = r'the chapter\g<1> ref{' filestr = re.sub(pattern, replacement, filestr) # Need special adjustment to handle start of sentence (capital) or not. pattern = r'([.?!]\s+|\n\n|[%=~-]\n+)the (sections?|chapters?)\s+ref' replacement = r'\g<1>The \g<2> ref' filestr = re.sub(pattern, replacement, filestr) # Remove Exercise, Project, Problem in references since those words # are used in the title of the section too pattern = r'(the\s*)?([Ee]xercises?|[Pp]rojects?|[Pp]roblems?)\s+ref\{' replacement = r'ref{' filestr = re.sub(pattern, replacement, filestr) # Deal with the problem of identical titles, which makes problem # with non-unique links in reST: add a counter to the title debugtext = '' section_pattern = r'^\s*(={3,9})(.+?)(={3,9})(\s*label\{(.+?)\})?' all_sections = re.findall(section_pattern, filestr, flags=re.MULTILINE) # First count the no of titles with the same wording titles = {} max_heading = 1 # track the top heading level for correct TITLE typesetting for heading, title, dummy2, dummy3, label in all_sections: entry = None if label == '' else label if title in titles: titles[title].append(entry) else: titles[title] = [entry] max_heading = max(max_heading, len(heading)) # Typeset TITLE so that it gets the highest+1 (but no higher) section sevel max_heading += 2 # one level up (2 =) max_heading = min(max_heading, 9) pattern = r'^TITLE:\s*(.+)$' if format == 'sphinx': # Title cannot be more than 63 chars... m = re.search(pattern, filestr, flags=re.MULTILINE) if m: title = m.group(1).strip() if len(title) > 63: print '*** error: sphinx title cannot be longer than 63 characters' print ' current title: "%s" (%d characters)' % (title, len(title)) _abort() filestr = re.sub(pattern, '.. Document title:\n\n%s \g<1> %s\n' % ('=' * max_heading, '=' * max_heading), filestr, flags=re.MULTILINE) # Make new titles title_counter = {} # count repeated titles sections = [] for heading, title, dummy2, dummy3, label in all_sections: label = None if label == '' else label if len(titles[title]) > 1: if title in title_counter: title_counter[title] += 1 else: title_counter[title] = 1 new_title = title + ' (%d) ' % title_counter[title] sections.append((heading, new_title, label, title)) if label in section_label2title: section_label2title[label] = new_title else: sections.append((heading, title, label, title)) # Make replacements for heading, title, label, old_title in sections: if title != old_title: debugtext += '\nchanged title: %s -> %s\n' % (old_title, title) # Avoid trouble with \t, \n in replacement title = title.replace('\\', '\\\\') # The substitution depends on whether we have a label or not if label is not None: title_pattern = r'%s\s*%s\s*%s\s*label\{%s\}' % ( heading, re.escape(old_title), heading, label) # title may contain ? () etc., that's why we take re.escape replacement = '.. _%s:\n\n' % label + r'%s %s %s' % \ (heading, title, heading) else: title_pattern = r'%s\s*%s\s*%s' % (heading, re.escape(old_title), heading) replacement = r'%s %s %s' % (heading, title, heading) filestr, n = re.subn(title_pattern, replacement, filestr, count=1) if n > 1: raise ValueError('Replaced more than one title. BUG!') # remove label{...} from output #filestr = re.sub(r'^label\{.+?\}\s*$', '', filestr, flags=re.MULTILINE) cpattern = re.compile(r'^label\{[^}]+?\}\s*$', flags=re.MULTILINE) filestr = cpattern.sub('', filestr) filestr = re.sub(r'label\{[^}]+?\}', '', filestr) # all the remaining import doconce doconce.debugpr(debugtext) return filestr
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # Fix labels if option('rst_mathjax'): for i in range(len(tex_blocks)): tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"...] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE | re.DOTALL) # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::) filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) if option('rst_mathjax') and (re.search( r'^!bt', filestr, flags=re.MULTILINE) or re.search(r'\\\( .+ \\\)', filestr)): # First add MathJax script in the very beginning of the file from html import mathjax_header latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst') filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr # Replace all the !bt parts by raw html directive (make sure # the coming block is sufficiently indented, we used 8 chars above)[[[ filestr = re.sub(bt_regex_pattern, r'\g<1>\n\n.. raw:: html\n\n $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', ' $$\n\n', filestr, flags=re.MULTILINE) # Remove inner \[..\] from equations $$ \[ ... \] $$ filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr) filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr) # Equation references (ref{...}) must be \eqref{...} in MathJax # (note: this affects also (ref{...}) syntax in verbatim blocks...) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) else: # just use the same substitution for tex blocks as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix: if there are !bc-!ec or other environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec::, !et::, !bbox:: etc. from the above substitutions. # We just replace these by empty text. filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr, flags=re.MULTILINE) # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: print """ Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove too much vertical space filestr = re.sub(r'\n\n\n+', '\n\n', filestr) return filestr