def addpacks(cmdline): packs = '.yalafi.shell.addpacks' if cmdline.packages.strip(','): packs = cmdline.packages.strip(',') + ',' + packs opts = tex2txt.Options(defs=cmdline.define, lang=cmdline.language[:2], dcls=cmdline.documentclass, pack=packs) f = tex2txt.myopen(cmdline.add_modules, encoding=cmdline.encoding) latex = f.read() f.close() tex2txt.tex2txt(latex, opts) return documentclass[0], packages
U_\epsilon(x) &\subset M \quad\text{for all } x \in \Omega, \notag \\ f(x) % LINE 11 &> 0 \quad\text{for all}\ x \in \Omega \label{l1} \\ f(x) &= 0 \quad\text{for all} x \in M \setminus \Omega. \label{l2} \end{align} """ plain_t = r""" We consider a set C-C-C, a domain D-D-D, andx a function E-E-E. With a constant F-F-F, we require V-V-V equal W-W-W for all X-X-X, Y-Y-Y equal Z-Z-Z for all U-U-U U-U-U equal V-V-V for allW-W-W. Thix is a footnote. """ options = tex2txt.Options(lang='en', char=True) plain, nums = tex2txt.tex2txt(latex, options) def test_text(): assert plain == plain_t
from yalafi.shell import addpacks dcls, packs = addpacks.addpacks(cmdline, source_defs) if dcls: cmdline.documentclass = dcls if cmdline.packages: packs += cmdline.packages.split(',') cmdline.packages = ','.join(packs) # on option --include: add included files to work list # otherwise: remove duplicates # if cmdline.include: sys.stderr.write('=== checking for file inclusions ... ') sys.stderr.flush() opts = tex2txt.Options(extr=inclusion_macros, repl=cmdline.replace, defs=cmdline.define, lang=cmdline.language[:2], dcls=cmdline.documentclass, pack=cmdline.packages, nosp=cmdline.no_specials) def skip_file(fn): # does file name match regex from option --skip? return cmdline.skip and re.search(r'\A' + cmdline.skip + r'\Z', fn) todo = cmdline.file done = [] while todo: f = todo.pop(0) if f in done or skip_file(f): continue done.append(f) if not cmdline.include: continue
def run_proofreader_options(tex, source, source_defs, language, disable, enable, disablecategories, enablecategories, lt_options): t2t_options = tex2txt.Options(char=True, repl=cmdline.replace, defs=cmdline.define, lang=language, extr=cmdline.extract, unkn=cmdline.list_unknown, seqs=cmdline.simple_equations, dcls=cmdline.documentclass, pack=cmdline.packages, nosp=cmdline.no_specials) if cmdline.plain_input: plain_map = {language: [(tex, list(range(1, len(tex) + 1)))]} else: if cmdline.list_unknown: # only look for unknown macros and environemnts plain, charmap = tex2txt.tex2txt(tex, t2t_options, source=source, source_defs=source_defs) return (tex, plain, charmap, []) if cmdline.multi_language: def mod_parms(parms): parms.ml_continue_thresh = cmdline.ml_continue_threshold plain_map = tex2txt.tex2txt(tex, t2t_options, multi_language=True, modify_parms=mod_parms, source=source, source_defs=source_defs) else: plain, charmap = tex2txt.tex2txt(tex, t2t_options, source=source, source_defs=source_defs) plain_map = {language: [(plain, charmap)]} disa_thresh = disable if cmdline.ml_disable: if disa_thresh: disa_thresh += ',' disa_thresh += cmdline.ml_disable disacat_thresh = disablecategories if cmdline.ml_disablecategories: if disacat_thresh: disacat_thresh += ',' disacat_thresh += cmdline.ml_disablecategories delim = '\n\n' # NB: issue #6 matches_tot = [] plain_tot = '' charmap_tot = [] for lang in plain_map: for plain, charmap in plain_map[lang]: if not plain.strip(): continue # here, we could dispatch to other tools, see for instance # - https://textgears.com/api # - Python package prowritingaid.python # if cmdline.textgears: matches = run_textgears(plain) else: flag = (cmdline.multi_language and len(plain.split()) <= cmdline.ml_rule_threshold) matches = run_languagetool( plain, lang, disa_thresh if flag else disable, enable, disacat_thresh if flag else disablecategories, enablecategories, lt_options) matches += checks.create_single_letter_matches(plain, cmdline) matches += checks.create_equation_punct_messages( plain, cmdline, equation_replacements_display, equation_replacements_inline, equation_replacements) for m in matches: m['offset'] = json_get(m, 'offset', int) + len(plain_tot) matches_tot += matches plain_tot += plain charmap_tot += charmap plain_tot += delim charmap_tot += [charmap_tot[-1]] * len(delim) # sort matches according to position in LaTeX text # def f(m): beg = json_get(m, 'offset', int) if beg < 0 or beg >= len(charmap_tot): tex2txt.fatal('run_proofreader():' + ' bad message read from proofreader') return abs(charmap_tot[beg]) matches_tot.sort(key=f) return (tex, plain_tot, charmap_tot, matches_tot)
def test_6(): plain, nums = tex2txt.tex2txt(latex_6, tex2txt.Options()) assert plain == plain_6
def test_4(): plain, nums = tex2txt.tex2txt(latex_4, tex2txt.Options(pack='.tests.defs')) assert plain == plain_4
def test_3(): plain, nums = tex2txt.tex2txt(latex_3, tex2txt.Options(pack='*')) assert plain == plain_3
# # - test of option --unkn # from yalafi import tex2txt options = tex2txt.Options(unkn=True) latex_1 = r""" A \newcommand{\zzz}[1]{#1#1} \xxx \begin{\zzz Y} B """ plain_1 = r"""\xxx YY """ def test_1(): plain, nums = tex2txt.tex2txt(latex_1, options) assert plain_1 == plain