Exemple #1
0
    def getModtext(self,ID='key'):
        modtext = self.text
        explicitreplacements = ( #'`^v~
                
        (r"\'{a}",u"á"),
        (r"\'{e}",u"é"),
        (r"\'{i}",u"í"),
        (r"\'{o}",u"ó"),
        (r"\'{u}",u"ú"),
        (r"\'{y}",u"ý"),
        (r"\'{m}",u"ḿ"),
        (r"\'{n}",u"ń"),
        (r"\'{r}",u"ŕ"),
        (r"\'{l}",u"ĺ"),
        (r"\'{c}",u"ć"),
        (r"\'{s}",u"ś"),
        (r"\'{z}",u"ź"),
        
        (r"\`{a}",u"à"),
        (r"\`{e}",u"è"),
        (r"\`{i}",u"ì"),
        (r"\`{o}",u"ò"),
        (r"\`{u}",u"ù"),
        (r"\`{y}",u"ỳ"),
        (r"\`{n}",u"ǹ"),        
        
        (r"\^{a}",u"â"),
        (r"\^{e}",u"ê"),
        (r"\^{i}",u"î"),
        (r"\^{o}",u"ô"),
        (r"\^{u}",u"û"),
        (r"\^{y}",u"ŷ"),
        (r"\^{c}",u"ĉ"),
        (r"\^{s}",u"ŝ"),
        (r"\^{z}",u"ẑ"),
        
        
        (r"\~{a}",u"ã"),
        (r"\~{e}",u"ẽ"),
        (r"\~{i}",u"ĩ"),
        (r"\~{o}",u"õ"),
        (r"\~{u}",u"ũ"),
        (r"\~{y}",u"ỹ"),
        (r"\~{n}",u"ñ"),
        
        
        
        (r"\"{a}",u"ä"),
        (r"\"{e}",u"ë"),
        (r"\"{i}",u"ï"),
        (r"\"{o}",u"ö"),
        (r"\"{u}",u"ü"),
        (r"\"{y}",u"ÿ"),
                
        (r"\"{A}",u"Ä"),
        (r"\"{E}",u"Ë"),
        (r"\"{I}",u"Ï"),
        (r"\"{O}",u"Ö"),
        (r"\"{U}",u"Ü"),
        (r"\"{Y}",u"Ÿ"),
        
        
        (r"\v{a}",u"ǎ"),
        (r"\v{e}",u"ě"),
        (r"\v{i}",u"ǐ"),
        (r"\v{o}",u"ǒ"),
        (r"\v{u}",u"ǔ"), 
        (r"\v{n}",u"ň"),
        (r"\v{r}",u"ř"), 
        (r"\v{c}",u"č"),
        (r"\v{s}",u"š"),
        (r"\v{z}",u"ž"),
        (r"\v{C}",u"Č"),
        (r"\v{S}",u"Š"),
        (r"\v{Y}",u"Ž"), 
        
        (r"\u{a}",u"ă"),
        (r"\u{e}",u"ĕ"),
        (r"\u{i}",u"ĭ"),
        (r"\u{\i}",u"ĭ"),
        (r"\u{o}",u"ŏ"),
        (r"\u{u}",u"ŭ"),        
        (r"\u{A}",u"Ă"),
        (r"\u{E}",u"Ĕ"),
        (r"\u{I}",u"Ĭ"),
        (r"\u{O}",u"Ŏ"),
        (r"\u{U}",u"Ŭ"),
        
        (r"\={a}",u"ā"),
        (r"\={e}",u"ē"),
        (r"\={i}",u"ī"),
        (r"\={\i}",u"ī"),
        (r"\=\i",u"ī"),
        (r"\={o}",u"ō"),
        (r"\={u}",u"ū"),        
        (r"\={A}",u"Ā"),
        (r"\={E}",u"Ē"),
        (r"\={I}",u"Ī"),
        (r"\={O}",u"Ō"),
        (r"\={U}",u"Ū"),        
        
        (r"\=a",u"ā"),
        (r"\=e",u"ē"),
        (r"\=i",u"ī"),
        (r"\=o",u"ō"),
        (r"\=u",u"ū"),        
        (r"\=A",u"Ā"),
        (r"\=E",u"Ē"),
        (r"\=I",u"Ī"),
        (r"\=O",u"Ō"),
        (r"\=U",u"Ū"),
        
        (r"$\alpha $",u"α"),
        (r"$\beta $",u"β"),
        (r"$\gamma $",u"γ"),
        (r"$\delta $",u"δ"),
        (r"$\epsilon $",u"ε"),
        (r"$\zeta $",u"ζ"),
        (r"$\eta $",u"η"),
        (r"$\theta $",u"θ"),
        (r"$\iota $",u"ι"),
        (r"$\kappa $",u"κ"),
        (r"$\lambda $",u"λ"),
        (r"$\mu $",u"μ"),
        (r"$\nu $",u"ν"),
        (r"$\xi $",u"ξ"),
        (r"$\omicron $",u"ο"),
        (r"$\pi $",u"π"),
        (r"$\rho $",u"ρ"),
        (r"$\sigma $",u"σ"),
        (r"$\tau $",u"τ"),
        (r"$\upsilon $",u"υ"),
        (r"$\phi $",u"φ"),
        (r"$\chi $",u"χ"),
        (r"$\psi $",u"ψ"),
        (r"$\omega $",u"ω"),      
        
        (r"$\Alpha $"	,u"Α"),
        (r"$\Beta $"	,u"β"),
        (r"$\Gamma $"	,u"Γ"),
        (r"$\Delta $"	,u"Δ"),
        (r"$\Epsilon $"	,U"ε"),
        (r"$\Zeta $"	,u"ζ"),
        (r"$\Eta $"	,u"η"),
        (r"$\Theta $"	,u"Θ"),
        (r"$\Iota $"	,u"ι"),
        (r"$\Kappa $"	,u"Κ"),
        (r"$\Lambda $"	,u"λ"),
        (r"$\Mu $"	,u"μ"),
        (r"$\Nu $"	,u"ν"),
        (r"$\Xi $"	,u"ξ"),
        (r"$\Omicron $"	,U"ο"),
        (r"$\Pi $"	,u"π"),
        (r"$\Rho $"	,u"ρ"),
        (r"$\Sigma $"	,u"Σ"),
        (r"$\Tau $"	,u"τ"),
        (r"$\Upsilon $"	,U"υ"),
        (r"$\Phi $"	,u"φ"),
        (r"$\Chi $"	,u"χ"),
        (r"$\Psi $"	,u"ψ"),
        (r"$\Omega $"	,u"Ω"),
        
        ("{\\textquoteleft}","`"),
        ("{\\textgreater}",">"),
        ("{\\textless}","<"),
        ("{\\textquotedbl}",'"'),
        ("{\\textquotedblleft}","``"), 
        ("{\\textquoteright}","'"),
        ("{\\textquotedblright}","''"),
        ("{\\textquotesingle}","'"),
        ("{\\textquotedouble}",'"'), 
        ("\\par}","}"),
        ("\\clearpage","\n"),
        #("\\begin","\n\\begin"),
        #("\\end","\n\\end"), 
        #(" }","} "),%causes problems with '\ '
        ("supertabular","tabular"),  
        ("\~{}","{\\textasciitilde}"), 
        #("\\section","\\chapter"),  
        #("\\subsection","\\section"),  
        #("\\subsubsection","\\subsection"),  
                            
        ("""\\begin{listWWNumiileveli}
\\item 
\\setcounter{listWWNumiilevelii}{0}
\\begin{listWWNumiilevelii}
\\item 
\\begin{styleLangSciLanginfo}""","\\begin{styleLangSciLanginfo}"),#MSi langsci
                                
        ("""\\begin{listWWNumiileveli}
\\item 
\\setcounter{listWWNumiilevelii}{0}
\\begin{listWWNumiilevelii}
\\item 
\\begin{stylelsLanginfo}""","\\begin{stylelsLanginfo}"),#MSi ls
                    
        ("""\\begin{listWWNumiileveli}
\\item 
\\begin{styleLangSciLanginfo}\n""","\\ea\\label{ex:key:}\n%%1st subexample: change \\ea\\label{...} to \\ea\\label{...}\\ea; remove \\z  \n%%further subexamples: change \\ea to \\ex; remove \\z  \n%%last subexample: change \\z to \\z\\z \n\\langinfo{}{}{"),#MSii langsci
                                
        ("""\\begin{listWWNumiileveli}
\\item 
\\begin{stylelsLanginfo}\n""","\\ea\\label{ex:key:}\n%%1st subexample: change \\ea\\label{...} to \\ea\\label{...}\\ea; remove \\z  \n%%further subexamples: change \\ea to \\ex; remove \\z  \n%%last subexample: change \\z to \\z\\z \n\\langinfo{}{}{"),#MSii ls
                                
        ("""\\begin{listLangSciLanginfoiileveli}
\\item 
\\begin{styleLangSciLanginfo}""","\\begin{styleLangSciLanginfo}"),#OOi langsci
                                
        ("""\\begin{listlsLanginfoiileveli}
\\item 
\\begin{stylelsLanginfo}""","\\begin{stylelsLanginfo}"),#OOi ls
                                
        ("""\\begin{listLangSciLanginfoiilevelii}
\\item 
\\begin{styleLangSciLanginfo}""","\\begin{styleLangSciLanginfo}"),#OOii langsci
                                
        ("""\\begin{listlsLanginfoiilevelii}
\\item 
\\begin{stylelsLanginfo}""","\\begin{stylelsLanginfo}"),#OOii ls
                                
        ("""\\end{styleLangSciLanginfo}


\\end{listWWNumiilevelii}
\\end{listWWNumiileveli}""","\\end{styleLangSciLanginfo}"),   #langsci  
                                
        ("""\\end{stylelsLanginfo}


\\end{listWWNumiilevelii}
\\end{listWWNumiileveli}""","\\end{stylelsLanginfo}"),     #ls
                                
        ("""\\end{styleLangSciLanginfo}

\\end{listWWNumiilevelii}
\\end{listWWNumiileveli}""","\\end{styleLangSciLanginfo}"), #langsci
                                
                                
        ("""\\end{stylelsLanginfo}

\\end{listWWNumiilevelii}
\\end{listWWNumiileveli}""","\\end{stylelsLanginfo}"), #ls
                                 
                                
        ("\\begin{styleLangSciLanginfo}\n","\\ea\label{ex:key:}\n\\langinfo{}{}{"),
        ("\\begin{stylelsLanginfo}\n","\\ea\label{ex:key:}\n\\langinfo{}{}{"),

        ("\\begin{listWWNumiilevelii}\n\\item \n\\ea\\label{ex:key:}\n",""),

        ("\n\\end{styleLangSciLanginfo}\n","}\\\\\n"),                          
        ("\\begin{styleLangSciExample}\n","\n\\gll "),
        ("\\end{styleLangSciExample}\n","\\\\"),
        ("\\begin{styleLangSciSourceline}\n","\\gll "),
        ("\\end{styleLangSciSourceline}\n","\\\\"),

        ("\n\\end{stylelsLanginfo}\n","}\\\\\n"),                          
        ("\\begin{stylelsExample}\n","\n\\gll "),
        ("\\end{stylelsExample}\n","\\\\"),
        ("\\begin{stylelsSourceline}\n","\\gll "),
        ("\\end{stylelsSourceline}\n","\\\\"),
        ("\\end{listWWNumiileveli}\n\\gll","\\gll"),

        ("\\begin{styleLangSciIMT}\n","     "),
        ("\\end{styleLangSciIMT}\n","\\\\"),
        ("\\begin{styleLangSciTranslation}\n","\\glt "),
        ("\\end{styleLangSciTranslation}","\z"), 
        ("\\begin{styleLangSciTranslationSubexample}\n","\\glt "),
        ("\\end{styleLangSciTranslationSubexample}","\z"), 

        ("\\begin{stylelsIMT}\n","     "),
        ("\\end{stylelsIMT}\n","\\\\"),
        ("\\begin{stylelsTranslation}\n","\\glt "),
        ("\\end{stylelsTranslation}","\z"), 
        ("\\begin{stylelsTranslationSubexample}\n","\\glt "),
        ("\\end{stylelsTranslationSubexample}","\z"), 

        ("""\\setcounter{listWWNumiileveli}{0}
\\ea\\label{ex:key:}""",""),#MS
                                #("""\\setcounter{listLangSciLanginfoiilevelii}{0}
#\\ea\\label{ex:key:}""",""),#OO
        ("""\\begin{listLangSciLanginfoiileveli}
\item""","\\ea\label{ex:key:}"),
        ("""\setcounter{listLangSciLanginfoiilevelii}{0}
\\ea\label{ex:key:}""",""),
        ("\n\\end{listLangSciLanginfoiileveli}",""), 
        ("\n\\end{listLangSciLanginfoiilevelii}",""), 

        ("""\\begin{listlsLanginfoiileveli}
\item""","\\ea\label{ex:key:}"),
        ("""\setcounter{listlsLanginfoiilevelii}{0}
\\ea\label{ex:key:}""",""),
        ("\n\\end{listlsLanginfoiileveli}",""), 
        ("\n\\end{listlsLanginfoiilevelii}",""), 

        ("\n\\glt ~",""), 
        #end examples
        ("{styleQuote}","{quote}"),  
        ("{styleAbstract}","{abstract}"),  
        ("textstylelsCategory","textsc"),  
        ("textstylelsCategory","textsc"),  
        #("\\begin{styleListParagraph}","%\\begin{epigram}"),
        #("\\end{styleListParagraph}","%\\end{epigram}"), 
        #("\\begin{styleListenabsatz}","%\\begin{epigram}"),
        #("\\end{styleListenabsatz}","%\\end{epigram}"), 
        #("\\begin{styleEpigramauthor}","%\\begin{epigramauthor}"),
        #("\\end{styleEpigramauthor}","%\\end{epigramauthor}"),  
        ("{styleConversationTranscript}","{lstlisting}"),   
        ("\ "," "), 					    
        #(" }","} "),  
        #("\\setcounter","%\\setcounter"),  
        ("\n\n\\item","\\item"),  
        ("\n\n\\end","\\end") ,  
        ("[Warning: Draw object ignored]","%%[Warning: Draw object ignored]\n"),
        (r"\=\i","{\=\i}")
                                
                            )    
        yanks =  ("\\begin{flushleft}",
                    "\\end{flushleft}",
                    "\\centering",
                    "\\raggedright",
                    "\\par ",
                    "\\tablehead{}", 
                    "\\textstylepagenumber",
                    "\\textstyleCharChar", 
                    "\\textstyleInternetlink",
                    "\\textstylefootnotereference",
                    "\\textstyleFootnoteTextChar",
                    "\\textstylepagenumber",
                    "\\textstyleappleconvertedspace",
                    "\\pagestyle{Standard}",
                    "\\hline",
                    "\\begin{center}",
                    "\\end{center}",
                    "\\begin{styleStandard}",
                    "\\end{styleStandard}",
                    "\\begin{styleBodytextC}",
                    "\\end{styleBodytextC}",
                    "\\begin{styleBodyTextFirst}",
                    "\\end{styleBodyTextFirst}",
                    "\\begin{styleIllustration}",
                    "\\end{styleIllustration}",
                    "\\begin{styleTabelle}",
                    "\\end{styleTabelle}",
                    "\\begin{styleAbbildung}",
                    "\\end{styleAbbildung}",
                    "\\begin{styleTextbody}",
                    "\\end{styleTextbody}",
                    "\\maketitle",
                    "\\hline",
                    "\\arraybslash",
                    "\\textstyleAbsatzStandardschriftart{}",
                    "\\textstyleAbsatzStandardschriftart",
                    "[Warning: Image ignored] % Unhandled or unsupported graphics:",
                    "%\\setcounter{listWWNumileveli}{0}\n",
                    "%\\setcounter{listWWNumilevelii}{0}\n",
                    "%\\setcounter{listWWNumiileveli}{0}\n",
                    "%\\setcounter{listWWNumiilevelii}{0}\n",
                    "%\\setcounter{listLangSciLanginfoiileveli}{0}\n",
                    "%\\setcounter{listlsLanginfoiileveli}{0}\n",
                    "\\setcounter{itemize}{0}",                    
                    "\\setcounter{page}{1}",
                    "\\mdseries "
                    ) 
        for old, new in explicitreplacements:
            modtext = modtext.replace(old,new)
            
        for y in yanks:
            modtext = modtext.replace(y,'')
        #unescape w2l unicode
        w2lunicodep3 = re.compile(r'(\[[0-9A-Fa-f]{3}\?\])')
        w2lunicodep4 = re.compile(r'(\[[0-9A-Da-d][0-9A-Fa-f]{3}\?\])') #intentionally leaving out PUA   
        byteprefix3 =  b'\u0'
        byteprefix4 =  b'\u'
        for m in w2lunicodep3.findall(modtext):
          modtext=modtext.replace(m,(byteprefix3+m[1:-2].encode('utf-8')).decode('unicode_escape'))
        for m in w2lunicodep4.findall(modtext):
           modtext=modtext.replace(m,(byteprefix4+m[1:-2].encode('utf-8')).decode('unicode_escape'))
        #remove marked up white space and punctuation
        modtext = re.sub("\\text(it|bf|sc)\{([ \.,]*)\}","\\2",modtext)  
        
        #remove explicit counters. These are not usefull when from autoconversion 
        
        #remove explicit table widths
        modtext = re.sub("m\{-?[0-9.]+(in|cm)\}","X",modtext)  
        modtext = re.sub("X\|","X",modtext)
        modtext = re.sub("\|X","X",modtext)
        modtext = re.sub(r"\\fontsize\{.*?\}\\selectfont","",modtext)
        #remove stupid multicolumns and center multicolumns
        modtext = modtext.replace("\\multicolumn{1}{l}{}","")
        modtext = modtext.replace("\\multicolumn{1}{l}","")
        modtext = modtext.replace("}{X}{","}{c}{")
        #remove stupid Open Office styles
        modtext = re.sub("\\\\begin\\{styleLangSciSectioni\\}\n+(.*?)\n+\\\\end\\{styleLangSciSectioni\\}","\\section{\\1}",modtext) 
        modtext = re.sub("\\\\begin\\{styleLangSciSectionii\\}\n+(.*?)\n+\\\\end\\{styleLangSciSectionii\\}","\\subsection{\\1}",modtext)
        modtext = re.sub("\\\\begin\\{styleLangSciSectioniii\\}\n+(.*?)\n+\\\\end\\{styleLangSciSectioniii\\}","\\subsubsection{\\1}",modtext)
        modtext = re.sub("\\\\begin\\{styleLangSciSectioniv\\}\n+(.*?)\n+\\\\end\\{styleLangSciSectioniv\\}","\\subsubsubsection{\\1}",modtext)
        
        modtext = re.sub("\\\\begin\\{stylelsSectioni\\}\n+(.*?)\n+\\\\end\\{stylelsSectioni\\}","\\section{\\1}",modtext)
        modtext = re.sub("\\\\begin\\{stylelsSectionii\\}\n+(.*?)\n+\\\\end\\{stylelsSectionii\\}","\\subsection{\\1}",modtext)
        modtext = re.sub("\\\\begin\\{stylelsSectioniii\\}\n+(.*?)\n+\\\\end\\{stylelsSectioniii\\}","\\subsubsection{\\1}",modtext)
        modtext = re.sub("\\\\begin\\{stylelsSectioniv\\}\n+(.*?)\n+\\\\end\\{stylelsSectioniv\\}","\\subsubsubsection{\\1}",modtext)
        
        modtext = re.sub(r"\\begin\{styleHeadingi}\n+(.*?)\n+\\end\{styleHeadingi\}","\\chapter{\\1}",modtext) 
        modtext = re.sub("\\\\begin\\{styleHeadingii\\}\n+(.*?)\n+\\\\end\\{styleHeadingii\\}","\\section{\\1}",modtext)
        modtext = re.sub("\\\\begin\{styleHeadingiii\}\n+(.*?)\n+\\\\end\{styleHeadingiii}","\\subsubsection{\\1}",modtext)
        modtext = re.sub("\\\\begin\{styleHeadingiv\}\n+(.*?)\n+\\\\end\{styleHeadingiv}","\\subsubsection{\\1}",modtext)
        
        #remove explicit shorttitle for sections
        modtext = re.sub("\\\\(sub)*section(\[.*?\])\{(\\text[bfmd][bfmd])\?(.*)\}","\\\\1section{\\4}",modtext) 
        #                        several subs | options       formatting           title ||   subs      title
        #move explict section number to end of line and comment out
        modtext = re.sub("section\{([0-9\.]+ )(.*)","section{\\2 %\\1/",modtext)
        modtext = re.sub("section\[.*?\]","section",modtext)
        #                                 number    title         title number
        #table cells in one row
        modtext = re.sub("[\n ]*&[ \n]*",' & ',modtext)
        modtext = modtext.replace(r'\ &','\&')
        #collapse newlines
        modtext = re.sub("\n*\\\\\\\\\n*",'\\\\\\\\\n',modtext) 
        #bib
        authorchars = "[A-Z][-a-záéíóúaèìòùâeîôûñäëïöüA-Z]+"
        yearchars = "[12][0-9]{3}[a-z]?"
        modtext = re.sub("\((%s) +et al\.?  +(%s): *([0-9,-]+)\)"%(authorchars,yearchars),
                         "\\citep[\\3]{\\1EtAl\\2}",
                         modtext)
        modtext = re.sub("\((%s) +(%s): *([0-9,-]+)\)"%(authorchars,yearchars),
                         "\\citep[\\3]{\\1\\2}",
                         modtext)
        modtext = re.sub("\((%s) +et al\.? +(%s)\)"%(authorchars,yearchars),
                         "\\citep{\\1EtAl\\2}",
                         modtext)
        modtext = re.sub("\((%s) +(%s)\)"%(authorchars,yearchars),
                         "\\citep{\\1\\2}",
                         modtext)
        #citet
        modtext = re.sub("(%s) +et al.? +\((%s): *([0-9,-]+)\)"%(authorchars,yearchars),
                         "\\citet[\\3]{\\1EtAl\\2}",
                         modtext)
        modtext = re.sub("(%s) +\((%s): *([0-9,-]+)\)"%(authorchars,yearchars),
                         "\\citet[\\3]{\\1\\2}",
                         modtext)
        modtext = re.sub("(%s) +et al.? +\((%s)\)"%(authorchars,yearchars),
                         "\\citet{\\1EtAl\\2}",
                         modtext)
        modtext = re.sub("(%s) +\((%s)\)"%(authorchars,yearchars),
                         "\\citet{\\1\\2}",
                         modtext)
        #citegen
        modtext = re.sub("(%s) +et al\.?]['’]s +\((%s)\)"%(authorchars,yearchars),
                         "\\citegen{\\1EtAl\\2}",
                         modtext)
        modtext = re.sub("(%s)['’]s +\((%s)\)"%(authorchars,yearchars),
                         "\\citegen{\\1\\2}",
                         modtext)
        #citeapo
        modtext = re.sub("(%s) +et al\.?]['’] +\((%s)\)"%(authorchars,yearchars),
                         "\\citeapo{\\1EtAl\\2}",
                         modtext)
        modtext = re.sub("(%s)['’] +\((%s)\)"%(authorchars,yearchars),
                         "\\citeapo{\\1\\2}",
                         modtext)
        #modtext = re.sub("([A-Z][a-z]+) +(%s)","\\citet{\\1\\2}",modtext)i
        #catch all citealt
        modtext = re.sub("(%s) +(%s)"%(authorchars,yearchars),
                         "\\citealt{\\1\\2}",
                         modtext)    
        modtext = re.sub("(%s) et al\.? +(%s)"%(authorchars,yearchars),
                         "\\citealt{\\1EtAl\\2}",
                         modtext)    
        #integrate ampersands
        modtext = re.sub(r"(%s) \\& \\citet{"%authorchars,
                         "\\citet{\\1",
                         modtext)  
        modtext = re.sub(r"(%s) and \\citet{"%authorchars,
                         "\\citet{\\1",
                         modtext)        
        modtext = re.sub(r"(%s) \\& \\citealt{"%authorchars,
                         "\\citealt{\\1",
                         modtext)    
        modtext = re.sub(r"(%s) and \\citealt{"%authorchars,
                         "\\citealt{\\1",
                         modtext)   
        #Smith (2000, 2001)
        modtext = re.sub(r"(%s)\((%s), *(%s)\)"%(authorchars, yearchars, yearchars),
                         r"\\citet{\1\2,\3\2}",
                         modtext)
        #Smith 2000, 2001
        modtext = re.sub(r"\\citealt{(%s)(%s)}[,;] (%s)"%(authorchars, yearchars, yearchars),
                         r"\\citealt{\1\2,\3\2}",
                         modtext)
        #condense chains of citations
        modtext = re.sub(r"(\\citealt{%s)\}[,;] \\citealt{"%authorchars,
                         "\\1,",
                         modtext)
        modtext = re.sub(r"(\\citet{%s)\}[,;] \\citealt{"%authorchars,
                         "\\1,",
                         modtext)
        #examples
        modtext = modtext.replace("\n()", "\n\\ea \n \\gll \\\\\n   \\\\\n \\glt\n\\z\n\n")
        modtext = re.sub("\n\(([0-9]+)\)", """\n\ea%\\1
    \label{ex:key:\\1}
    \\\\gll\\\\newline
        \\\\newline
    \\\\glt
    \z

        """,modtext)
        modtext = re.sub(r"\\label\{(bkm:Ref[0-9]+)\}\(\)", """ea%\\1
    \\label{\\1}  
    \\\\gll \\\\newline  
        \\\\newline
    \\\\glt
    \\z

    """,modtext)
    
        #subexamples
        modtext = modtext.replace("\n *a. ","\n% \\ea\n%\\gll \n%    \n%\\glt \n")
        modtext = modtext.replace("\n *b. ","%\\ex\n%\\gll \\\\\n%    \\\\\n%\\glt \n%\\z\n")    
        modtext = modtext.replace("\n *c. ","%\\ex\n%\\gll \\\\\n%    \\\\\n%\\glt \n%\\z\n")  
        modtext = modtext.replace("\n *d. ","%\\ex\n%\\gll \\\\\n%    \\\\\n%\\glt \n%\\z\n") 
        modtext = modtext.replace(r"\newline",r"\\")


        modtext = re.sub("\n\\\\textit{Table ([0-9]+)[\.:] *(.*?)}\n","%%please move \\\\begin{table} just above \\\\begin{tabular . \n\\\\begin{table}\n\\caption{\\2}\n\\label{tab:key:\\1}\n\\end{table}",modtext)
        modtext = re.sub("\nTable ([0-9]+)[\.:] *(.*?) *\n","%%please move \\\\begin{table} just above \\\\begin{tabular\n\\\\begin{table}\n\\caption{\\2}\n\\label{tab:\\1}\n\\end{table}",modtext)#do not add } after tabular
        modtext = re.sub("Table ([0-9]+)","\\\\tabref{tab:key:\\1}",modtext) 
        modtext = re.sub("\nFigure ([0-9]+)[\.:] *(.*?)\n","\\\\begin{figure}\n\\caption{\\2}\n\\label{fig:key:\\1}\n\\end{figure}",modtext)
        modtext = re.sub("Figure ([0-9]+)","\\\\figref{fig:key:\\1}",modtext)
        modtext = re.sub("Section ([0-9\.]+)","\\\\sectref{sec:key:\\1}",modtext) 
        modtext = re.sub("§ *([0-9\.]+)","\\\\sectref{sec:key:\\1}",modtext) 
        modtext = re.sub(" \(([0-9s][0-9]*[a-h]?)\)"," \\\\REF{ex:key:\\1}",modtext) 
        modtext = re.sub("\\\\(begin|end){minipage}.*?\n",'',modtext)
        modtext = re.sub("\\\\begin{figure}\[h\]",'\\\\begin{figure}',modtext)
        
        
        modtext = re.sub("(begin\{tabular\}[^\n]*)",r"""\1\n
\lsptoprule""",modtext) 
        modtext = re.sub(r"\\end{tabular}\n*",r"""\lspbottomrule
\end{tabular}\n""",modtext)

        modtext = modtext.replace("begin{tabular}","begin{tabularx}{\\textwidth}")
        modtext = modtext.replace("end{tabular}","end{tabularx}")
        modtext = modtext.replace("\\hhline","%\\hhline%%replace by cmidrule")


        modtext = re.sub(r"\\setcounter{[^}]+\}\{0\}",'',modtext)

        modtext = re.sub("""listWWNum[ivxlc]+level[ivxlc]+""","itemize",modtext) 
        modtext = re.sub("""listL[ivxlc]+level[ivxlc]+""","itemize",modtext) 
        
        modtext = modtext.replace("& \\begin{itemize}\n\\item","& \n%%\\begin{itemize}\\item\n")  
        modtext = modtext.replace("\\end{itemize}\\\\\n","\\\\\n%%\\end{itemize}\n")  
        modtext = modtext.replace("& \\end{itemize}","& %%\\end{itemize}\n")
        
        
        modtext = re.sub(r"""\n+\\z""","\\z",modtext) 
        modtext = re.sub("""\n\n+""","\n\n",modtext) 
        
        
        #merge useless chains of formatting
        modtext = re.sub("(\\\\textbf\{[^}]+)\}\\\\textbf\{","\\1",modtext)
        modtext = re.sub("(\\\\textit\{[^}]+)\}\\\\textit\{","\\1",modtext)
        modtext = re.sub("(\\\\texttt\{[^}]+)\}\\\\texttt\{","\\1",modtext)
        modtext = re.sub("(\\\\emph\{[^}]+)\}\\\\emph\{","\\1",modtext)
        
        #remove all textits from sourcelines
        i = 1
        while i != 0:
            modtext, i = re.subn(r'(\\gll.*)\\textit',
                             r'\1',
                             modtext)

        #bold and smallcaps are used in example environments, so we want them to enclose only minimal words
        for s in ('textbf','textsc'):
            i = 1
            while i != 0:
                modtext,i = re.subn(r'\\%s\{([^\}]+) '%s,
                                    r'\\%s{\1} \\%s{'%(s,s),
                                    modtext)
            
        modtext = re.sub("\\\\includegraphics\[.*?width=\\\\textwidth\]\{","%please move the includegraphics inside the {figure} environment\n%%\includegraphics[width=\\\\textwidth]{figures/",modtext)
        
        modtext = re.sub("\\\\item *\n+",'\\item ',modtext)
        
        modtext = re.sub(r"\\begin{itemize}\n\\item *(\\section{.*?})\\end{itemize}",r"\1",modtext)
        
        modtext = re.sub("\\\\footnote\{ +",'\\\\footnote{',modtext)
        #put spaces on right side of formatting
        #right
        modtext = re.sub(" +\\}",'} ',modtext)
        #left
        modtext = re.sub("\\\\text(it|bf|sc|tt|up|rm)\\{ +",' \\\\text\\1{',modtext)
        modtext = re.sub("\\\\text(it|bf|sc|tt|up|rm)\\{([!?\(\)\[\]\.\,\>]*)\\}",'\\2',modtext)
        
        
        
        #duplicated section names 
        modtext = re.sub("(chapter|section|paragraph)\[.*?\](\{.*\}.*)","\\1\\2",modtext)
        
        
        bibliography = ''
        modtext = modtext.replace(r'\textbf{References}','References')
        modtext = modtext.replace(r'\section{References}','References')
        modtext = modtext.replace(r'\chapter{References}','References') 
        a = re.compile("\n\s*References\s*\n").split(modtext)
        if len(a)==2:
                modtext = a[0]
                refs = a[1].split('\n')
                bibliography = '\n'.join([bibtools.Record(r).bibstring for r in refs])     
                
        
        return modtext+"\n\\begin{verbatim}%%move bib entries to  localbibliography.bib\n"+bibliography+'\\end{verbatim}' 
            
Exemple #2
0
 def test_record(self):
     bibtests = (
                 ("""Mufwene, Salikoko. 2001. The Ecology of Language Evolution. Cambridge: Cambridge University Press.""",
                     """@book{Mufwene2001,\n\taddress = {Cambridge},\n\tauthor = {Mufwene, Salikoko},\n\tpublisher = {Cambridge University Press},\n\ttitle = {The Ecology of Language Evolution},\n\tyear = {2001}\n}\n"""),
                 ("""Alleyne, Mervyn C. 1996. Syntaxe historique créole. Paris, France: Karthala."""
                             ,"""@book{Alleyne1996,\n\taddress = {Paris, France},\n\tauthor = {Alleyne, Mervyn C.},\n\tpublisher = {Karthala},\n\ttitle = {Syntaxe historique créole},\n\tyear = {1996}\n}\n"""),
                 ("""Allsopp, Richard. (ed.), 2003. Dictionary of Caribbean English usage. Kingston, Jamaica: University of the West Indies Press."""
                             ,"""@book{Allsopp2003,\n\taddress = {Kingston, Jamaica},\n\tbooktitle = {Dictionary of {Caribbean} {English} usage},\n\teditor = {Allsopp, Richard},\n\tpublisher = {University of the West Indies Press},\n\ttitle = {Dictionary of {Caribbean} {English} usage},\n\tyear = {2003}\n}\n"""),
                 ("""Ammon, Matthias. 2013. The functions of oath and pledge in Anglo-Saxon legal culture. Historical Research. 86(233), 515-535."""
                             ,"""@article{Ammon2013,\n\tauthor = {Ammon, Matthias},\n\tjournal = {Historical Research},\n\tnumber = {233},\n\tpages = {515--535},\n\ttitle = {The functions of oath and pledge in {Anglo}-{Saxon} legal culture},\n\tvolume = {86},\n\tyear = {2013}\n}\n"""),
                 ("""Andriotis, Nikolaos. 1995. History of the Greek Language: Four studies. Thessaloniki, Greece: Ίδρυμα Τριανταφυλλίδη."""
                             ,"""@book{Andriotis1995,\n\taddress = {Thessaloniki, Greece},\n\tauthor = {Andriotis, Nikolaos},\n\tpublisher = {Ίδρυμα Τριανταφυλλίδη},\n\ttitle = {History of the {Greek} Language: {{F}}our studies},\n\tyear = {1995}\n}\n"""),
                 ("""Archer, Dawn. 2010. Speech acts. In Andreas H. Jucker & Irma Taavitsainen (eds.), Historical pragmatics, 379-418. Berlin, Germany: Walter de Gruyter GmbH & Co."""
                             ,"""@incollection{Archer2010,\n\taddress = {Berlin, Germany},\n\tauthor = {Archer, Dawn},\n\tbooktitle = {Historical pragmatics},\n\teditor = {Andreas H. Jucker and Irma Taavitsainen},\n\tpages = {379--418},\n\tpublisher = {Walter de Gruyter GmbH & Co},\n\ttitle = {Speech acts},\n\tyear = {2010}\n}\n"""),
                 ("""Auer, Peter & Hinskens, Frans & Kerswill, Paul. (eds.), 2005.  Dialect change: Convergence and divergence in European languages. Cambridge, England: The Cambridge University Press. """
                             ,"""@book{Auer2005,\n\taddress = {Cambridge, England},\n\tbooktitle = {Dialect change: {{C}}onvergence and divergence in {European} languages},\n\teditor = {Auer, Peter and Hinskens, Frans and Kerswill, Paul},\n\tpublisher = {The Cambridge University Press},\n\ttitle = {Dialect change: {{C}}onvergence and divergence in {European} languages},\n\tyear = {2005}\n}\n"""),
                 ("""Awbery, G. M. 1988. Slander and defamation as a source for historical dialectology. In Alan R. Thomas (ed.), Methods in dialectology: Proceedings of the sixth international conference held at the University College of North Wales, 3rd-7th August 1987, 164-174. Clevedon, PA: Multilingual Matters Ltd."""
                             ,"""@incollection{Awbery1988,\n\taddress = {Clevedon, PA},\n\tauthor = {Awbery, G. M.},\n\tbooktitle = {Methods in dialectology: {{P}}roceedings of the sixth international conference held at the University College of {North} {Wales}, 3rd-7th {August} 1987},\n\teditor = {Alan R. Thomas},\n\tpages = {164--174},\n\tpublisher = {Multilingual Matters Ltd},\n\ttitle = {Slander and defamation as a source for historical dialectology},\n\tyear = {1988}\n}\n"""),
                 ("""Bailey, Guy & Ross, Garry. 1988. The shape of the superstrate: Morphosyntactic features of Ship English. English World-Wide. 9(2). 193-212."""
                             ,"""@article{Bailey1988,\n\tauthor = {Bailey, Guy and Ross, Garry},\n\tjournal = {English World-Wide},\n\tnumber = {2},\n\tpages = {193--212},\n\ttitle = {The shape of the superstrate: {{M}}orphosyntactic features of Ship {English}},\n\tvolume = {9},\n\tyear = {1988}\n}\n"""),
                 ("""Baker, Philip & Huber, Magnus. 2001. Atlantic, Pacific, and world-wide features in English-lexicon contact languages. English World-Wide. 22(2), 157-208."""
                             ,"""@article{Baker2001,\n\tauthor = {Baker, Philip and Huber, Magnus},\n\tjournal = {English World-Wide},\n\tnumber = {2},\n\tpages = {157--208},\n\ttitle = {{Atlantic}, {Pacific}, and world-wide features in {English}-lexicon contact languages},\n\tvolume = {22},\n\tyear = {2001}\n}\n"""),
                 ##(""""""  ,""""""),
                 ("""Blevins, Juliette. 2004. Evolutionary phonology. Cambridge: Cambridge University Press."""
                             ,"""@book{Blevins2004,\n\taddress = {Cambridge},\n\tauthor = {Blevins, Juliette},\n\tpublisher = {Cambridge University Press},\n\ttitle = {Evolutionary phonology},\n\tyear = {2004}\n}\n"""),
                 ("""Casali, Roderic F. 1998. Predicting ATR activity. Chicago Linguistic Society (CLS) 34(1). 55-68."""
                             ,"""@article{Casali1998,\n\tauthor = {Casali, Roderic F.},\n\tjournal = {Chicago Linguistic Society (CLS)},\n\tnumber = {1},\n\tpages = {55--68},\n\ttitle = {Predicting {ATR} activity},\n\tvolume = {34},\n\tyear = {1998}\n}\n"""),
                 ("""Chomsky, Noam. 1986. Knowledge of language. New York: Praeger."""
                             ,"""@book{Chomsky1986,\n\taddress = {New York},\n\tauthor = {Chomsky, Noam},\n\tpublisher = {Praeger},\n\ttitle = {Knowledge of language},\n\tyear = {1986}\n}\n"""),
                 ("""van Coetsem, Frans. 2000. A general and unified theory of the transmission process in language contact. Heidelberg: Winter."""
                             ,"""@book{vanCoetsem2000,\n\taddress = {Heidelberg},\n\tauthor = {van Coetsem, Frans},\n\tpublisher = {Winter},\n\ttitle = {A general and unified theory of the transmission process in language contact},\n\tyear = {2000}\n}\n"""),
                 ("""Franks, Steven. 2005. Bulgarian clitics are positioned in the syntax. http://www.cogs.indiana.edu/people/homepages/franks/Bg_clitics_remark_dense.pdf (17 May, 2006.)"""
                             ,"""@misc{Franks2005,\n\tauthor = {Franks, Steven},\n\tnote = {(17 May, 2006.)},\n\ttitle = {{Bulgarian} clitics are positioned in the syntax},\n\turl = {http://www.cogs.indiana.edu/people/homepages/franks/Bg_clitics_remark_dense.pdf},\n\tyear = {2005}\n}\n"""),
                 ("""Iverson, Gregory K. 1983. Korean /s/. Journal of Phonetics 11. 191-200."""
                             ,"""@article{Iverson1983,\n\tauthor = {Iverson, Gregory K.},\n\tjournal = {Journal of Phonetics},\n\tpages = {191--200},\n\ttitle = {{Korean} /s/},\n\tvolume = {11},\n\tyear = {1983}\n}\n"""),
                 ("""Iverson, Gregory K. 1989. On the category supralaryngeal. Phonology 6. 285-303."""
                             ,"""@article{Iverson1989,\n\tauthor = {Iverson, Gregory K.},\n\tjournal = {Phonology},\n\tpages = {285--303},\n\ttitle = {On the category supralaryngeal},\n\tvolume = {6},\n\tyear = {1989}\n}\n"""),
                 ("""Johnson, Kyle, Mark Baker & Ian Roberts. 1989. Passive arguments raised. Linguistic Inquiry 20. 219-251."""
                             ,"""@article{Johnson1989,\n\tauthor = {Johnson, Kyle, Mark Baker and Ian Roberts},\n\tjournal = {Linguistic Inquiry},\n\tpages = {219--251},\n\ttitle = {Passive arguments raised},\n\tvolume = {20},\n\tyear = {1989}\n}\n"""),
                 ("""Lahiri, Aditi (ed.). 2000. Analogy, leveling, markedness: Principles of change in phonology and morphology (Trends in Linguistics 127). Berlin: Mouton de Gruyter."""
                             ,"""@book{Lahiri2000,\n\taddress = {Berlin},\n\tbooktitle = {Analogy, leveling, markedness: {{P}}rinciples of change in phonology and morphology},\n\teditor = {Lahiri, Aditi},\n\tnumber = {127},\n\tpublisher = {Mouton de Gruyter},\n\tseries = {Trends in Linguistics},\n\ttitle = {Analogy, leveling, markedness: {{P}}rinciples of change in phonology and morphology},\n\tyear = {2000}\n}\n"""),
                 ("""McCarthy, John J. & Alan S. Prince. 1999. Prosodic morphology. In John A. Goldsmith (ed.), Phonological theory: The essential readings, 238-288. Malden, MA & Oxford: Blackwell."""
                             ,"""@incollection{McCarthy1999,\n\taddress = {Malden, MA \& Oxford},\n\tauthor = {McCarthy, John J. and Alan S. Prince},\n\tbooktitle = {Phonological theory: {{T}}he essential readings},\n\teditor = {John A. Goldsmith},\n\tpages = {238--288},\n\tpublisher = {Blackwell},\n\ttitle = {Prosodic morphology},\n\tyear = {1999}\n}\n"""),
                 ("""Murray, Robert W. & Theo Vennemann. 1983. Sound change and syllable structure in Germanic phonology. Language 59(3). 514-528."""
                             ,"""@article{Murray1983,\n\tauthor = {Murray, Robert W. and Theo Vennemann},\n\tjournal = {Language},\n\tnumber = {3},\n\tpages = {514--528},\n\ttitle = {Sound change and syllable structure in {Germanic} phonology},\n\tvolume = {59},\n\tyear = {1983}\n}\n"""),
                 #("""Oxford English Dictionary , 2nd edn. 1989. Oxford: Oxford University Press."""
                             #,""""""),
                 ("""Pedersen, Johan. 2005. The Spanish impersonal se-construction: Constructional variation and change. Constructions 1. http://www.constructions-online.de. (3 April, 2007.)"""
                             ,"""@article{Pedersen2005,\n\tauthor = {Pedersen, Johan},\n\tjournal = {Constructions},\n\tnote = {(3 April, 2007.)},\n\ttitle = {The {Spanish} impersonal se-construction: {{C}}onstructional variation and change},\n\turl = {http://www.constructions-online.de},\n\tvolume = {1},\n\tyear = {2005}\n}\n"""),
                 ("""Rissanen, Matti. 1999. Syntax. In Roger Lass (ed.), Cambridge History of the English Language, vol. 3, 187-331. Cambridge & New York: Cambridge University Press."""
                             ,"""@incollection{Rissanen1999,\n\taddress = {Cambridge \& New York},\n\tauthor = {Rissanen, Matti},\n\tbooktitle = {{Cambridge} History of the {English} Language},\n\teditor = {Roger Lass},\n\tpages = {187--331},\n\tpublisher = {Cambridge University Press},\n\ttitle = {Syntax},\n\tvolume = {3},\n\tyear = {1999}\n}\n"""),
                 ("""Stewart, Thomas W., Jr. 2000. Mutation as morphology: Bases, stems, and shapes in Scottish Gaelic. Columbus, OH: The Ohio State University dissertation."""
                             ,"""@thesis{Stewart2000,\n\taddress = {Columbus, OH},\n\tauthor = {Stewart, Thomas W., Jr},\n\tschool = {The Ohio State University},\n\ttitle = {Mutation as morphology: {{B}}ases, stems, and shapes in {Scottish} {Gaelic}},\n\tyear = {2000}\n}\n"""),
                 ("""Webelhuth, Gert (ed.). 1995. Government and binding theory and the minimalist program: Principles and parameters in syntactic theory. Oxford: Blackwell."""
                             ,"""@book{Webelhuth1995,\n\taddress = {Oxford},\n\tbooktitle = {Government and binding theory and the minimalist program: {{P}}rinciples and parameters in syntactic theory},\n\teditor = {Webelhuth, Gert},\n\tpublisher = {Blackwell},\n\ttitle = {Government and binding theory and the minimalist program: {{P}}rinciples and parameters in syntactic theory},\n\tyear = {1995}\n}\n"""),
                 ("""Yu, Alan C. L. 2003. The morphology and phonology of infixation. Berkeley, CA: University of California dissertation."""
                             ,"""@thesis{Yu2003,\n\taddress = {Berkeley, CA},\n\tauthor = {Yu, Alan C. L.},\n\tschool = {University of California},\n\ttitle = {The morphology and phonology of infixation},\n\tyear = {2003}\n}\n"""),
                 #("""Smith, John. 2000. Thoughts on stuff. Newton: Mypress. www.mypress.com/smith""",
                             #"""@book{Smith2000,\n\taddress = {Newton},\n\tauthor = {Smith, John},\n\tpublisher = {Mypress},\n\ttitle = {Thoughts on stuff},\n\turl = {www.mypress.com/smith},\n\tyear = {2000}\n}\n"""),
                 ("""Smith, John. 2000. Thoughts on stuff. Newton: Mypress. www.mypress.com/smith (2021-12-24)""",
                     """@book{Smith2000,\n\taddress = {Newton},\n\tauthor = {Smith, John},\n\tpublisher = {Mypress},\n\ttitle = {Thoughts on stuff},\n\turl = {www.mypress.com/smith},\n\turldate = {2021-12-24},\n\tyear = {2000}\n}\n"""),
                 #("""Smith, John. 2000. Thoughts on stuff. Newton: Mypress. www.mypress.com/smith (2021-12-24) and so on"""      ,""""""),
                 ("""Braun, Sabine & Sara Dicerto. 2016. The use of videoconferencing in proceedings conducted with the assistance of an interpreter. http://www.videoconference-interpreting.net/wp-content/uploads/2016/11/AVIDICUS3_Research_Report.pdf"""
                     ,"""@misc{Braun2016,\n\tauthor = {Braun, Sabine and Sara Dicerto},\n\ttitle = {The use of videoconferencing in proceedings conducted with the assistance of an interpreter},\n\turl = {http://www.videoconference-interpreting.net/wp-content/uploads/2016/11/AVIDICUS3_Research_Report.pdf},\n\tyear = {2016}\n}\n"""),
                 #("""bad Url"""      ,""""""),
                 #("""masters thesis"""     ,""""""),
                 #("Cleirac, Estienne. 1639. Explication des termes de marine employez dans les édicts, ordonnances et reglemens de l’Admirauté. Paris: M. Brunet.",""""""),
                 #("Esquemelin, John. 1678. The buccaneers of America: A true account of the most remarkable assaults committed of late years upon the coasts of the West Indies by the buccaneers of Jamaica and Tortuga (both English and French). New York, NY: Dover Publications. (Reprinted 1967.)",""""""),
                 #("Parkvall, Mikael. 2005. Foreword. In Alan D. Corré (ed.), A glossary of lingua franca. https://minds.wisconsin.edu/bitstream/item/3920/edition2/edition3/foreword.htm (Accessed 2016-10-07).",""""""),
                 #("Smith, John. 1627. A Sea Grammar. Amsterdam: Da Capo Press. (Reprinted 1968).",""""""),
                 #("maitrise",""""""),
                 #("maitrise",""""""),
                 #("maitrise","""""")
                 )
     for s, expected in bibtests:
         record = bibtools.Record(s)
         self.assertEqual(record.bibstring, expected)