Example #1
0
if cmdline.disable:
    ltcommand += ['--disable', cmdline.disable]
if cmdline.lt_options:
    ltcommand += cmdline.lt_options[1:].split()
ltcommand += ['-']
if cmdline.lt_server_options:
    ltserver_local_cmd += ' ' + cmdline.lt_server_options[1:]

# on option --include: add included files to work list
# otherwise: remove duplicates
#
if cmdline.include:
    sys.stderr.write('=== checking for file inclusions ... ')
    sys.stderr.flush()
    opts = tex2txt.Options(extr=inclusion_macros,
                           repl=cmdline.replace,
                           defs=cmdline.define,
                           lang=cmdline.t2t_lang)


def skip_file(fn):
    # does file name match regex from option --skip?
    return cmdline.skip and re.search(r'\A' + cmdline.skip + r'\Z', fn)


todo = cmdline.file
done = []
while todo:
    f = todo.pop(0)
    if f in done or skip_file(f):
        continue
    done.append(f)
Example #2
0
\textcolor{red}{redx colour.}}
is lazy.
"""

plain_t = r"""
Only few people
is lazy.



We use
redx colour.
"""

nums_t = [
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 64, 65, 66, 67, 68,
    69, 70, 71, 72, 73, 27, 27, 27, 27, 28, 29, 30, 31, 32, 33, 50, 51, 52, 53,
    54, 55, 56, 57, 58, 59, 60, 61, 63, 74
]

options = tex2txt.Options(lang='en', char=True)
plain, nums = tex2txt.tex2txt(latex, options)


def test_text():
    assert plain == plain_t


def test_nums():
    assert nums == nums_t
Example #3
0
#
input_encoding = 'utf-8'
# input_encoding = 'latin-1'

# path of LT java archive and used options
#
ltjar = '../LT/LanguageTool-4.7/languagetool-commandline.jar'
ltcmd = ('java -jar ' + ltjar + ' --language en-GB --encoding utf-8' +
         ' --disable WHITESPACE_RULE').split()

# prepare options for tex2txt()
#
options = tex2txt.Options(
    char=True,
    #           repl=tex2txt.read_replacements('Tools/LT/repls.txt',
    #                                           encoding=input_encoding),
    #           defs=tex2txt.read_definitions('Tools/LT/defs.py',
    #                                           encoding='utf-8'),
    lang='en')

for file in sys.argv[1:]:

    sys.stderr.write('=== ' + file + '\n')
    sys.stderr.flush()

    # read file and call tex2txt()
    #
    f = tex2txt.myopen(file, encoding=input_encoding)
    tex = f.read()
    f.close()
    (plain, charmap) = tex2txt.tex2txt(tex, options)