Example #1
0
def run_languagetool(plain):
    if cmdline.server:
        # use Web server hosted by LT or local server
        if cmdline.server == 'lt':
            server = ltserver
        else:
            start_local_lt_server()
            server = ltserver_local
        data = {'text': plain, 'language': cmdline.language}
        if cmdline.disable:
            data['disabledRules'] = cmdline.disable
        if cmdline.lt_options:
            # translate options to entries in HTML request
            ltopts = cmdline.lt_options[1:].split()
            for opt in lt_option_map:
                entry = lt_option_map[opt]
                if not any(s in ltopts for s in entry[0]):
                    continue
                idx = max(ltopts.index(s) for s in entry[0] if s in ltopts)
                if entry[1]:
                    data[opt] = ltopts[idx +
                                       1] if idx + 1 < len(ltopts) else ''
                else:
                    data[opt] = 'true'

        data = urllib.parse.urlencode(data).encode(encoding='ascii')
        request = urllib.request.Request(server, data=data)
        try:
            reply = urllib.request.urlopen(request)
            out = reply.read()
            reply.close()
        except:
            tex2txt.fatal('error connecting to "' + server + '"')
    else:
        # use local installation
        try:
            out = subprocess.run(ltcommand,
                                 input=plain.encode('utf-8'),
                                 stdout=subprocess.PIPE).stdout
        except:
            tex2txt.fatal('error running "' + ltcommand[0] + '"')

    out = out.decode(encoding='utf-8')
    try:
        dic = json_decoder.decode(out)
    except:
        json_fatal('JSON root element')
    matches = json_get(dic, 'matches', list)
    return matches
Example #2
0
def start_local_lt_server():
    def check_server():
        # check for running server
        global ltserver_local_running
        if ltserver_local_running:
            return True

        data = {'text': '', 'language': 'en'}
        data = urllib.parse.urlencode(data).encode(encoding='ascii')
        request = urllib.request.Request(ltserver_local, data=data)
        try:
            reply = urllib.request.urlopen(request)
            reply.close()
            ltserver_local_running = True
            return True
        except:
            return False

    if check_server():
        return
    try:
        subprocess.Popen(ltserver_local_cmd.split(),
                         cwd=ltdirectory,
                         stdout=subprocess.DEVNULL,
                         stderr=subprocess.DEVNULL)
    except:
        tex2txt.fatal('error running "' + ltserver_local_cmd + '"')

    # wait for server to be available
    #
    sys.stderr.write('=== starting local LT server at "' + ltdirectory +
                     '":\n=== ' + ltserver_local_cmd + ' ')
    sys.stderr.flush()
    for x in range(20):
        time.sleep(0.5)
        sys.stderr.write('.') and sys.stderr.flush()
        if check_server():
            sys.stderr.write('\n') and sys.stderr.flush()
            return
    sys.stderr.write('\n')
    tex2txt.fatal('error starting server "' + ltserver_local_cmd + '"')
Example #3
0
def create_equation_punct_messages(plain):
    if cmdline.equation_punctuation is None:
        return []

    mode = {
        'displayed': equation_replacements_display,
        'inline': equation_replacements_inline,
        'all': equation_replacements,
    }
    k = list(k for k in mode.keys()
             if k.startswith(cmdline.equation_punctuation))
    if len(k) != 1:
        tex2txt.fatal('mode for --equation-punctuation has to determine' +
                      ' one of ' + ', '.join(mode.keys()))
    repls = mode[k[0]]

    def msg(m):
        return create_message(
            m,
            rule='PRIVATE::EQUATION_PUNCTUATION',
            msg='Possibly incorrect punctuation after equation.',
            repl='—')

    def f(m):
        #   return True if equation is followed by
        #   - another equation (possibly after punctuation in punct)
        #   - a dot
        #   - a lower-case word (possibly after punctuation in punct)
        #
        groups = m.groups()
        equ = groups[0]  # another equation follows: not consumed!
        dot = groups[-2]  # a dot follows
        word = groups[-1]  # a word follows
        return equ or dot or word and word[0].islower()

    punct = ',;:'
    equ = r'\b(?:' + repls + r')\b'
    expr = (r'(' + equ + r'(?=\s*[' + punct + r']?\s*' + equ + r'))|' + equ +
            r'\s*(?:(\.)|[' + punct + r']?\s*([^\W0-9_]+))?')
    return list(msg(m) for m in re.finditer(expr, plain) if not f(m))
Example #4
0
def run_textgears(plain):
    data = {
        'key': cmdline.textgears,
        'text': plain,
    }
    data = urllib.parse.urlencode(data).encode(encoding='ascii')
    request = urllib.request.Request(textgears_server, data=data)
    try:
        reply = urllib.request.urlopen(request)
        out = reply.read()
        reply.close()
    except:
        tex2txt.fatal('error connecting to "' + textgears_server + '"')

    out = out.decode(encoding='utf-8')
    try:
        dic = json_decoder.decode(out)
    except:
        json_fatal('JSON root element')

    def f(err):
        offset = json_get(err, 'offset', int)
        length = json_get(err, 'length', int)
        return {
            'message':
            'Error type: ' + json_get(err, 'type', str),
            'offset':
            offset,
            'length':
            length,
            'context':
            create_context(plain, offset, length),
            'replacements':
            list({'value': r} for r in json_get(err, 'better', list)),
            'rule': {
                'id': 'Not available'
            },
        }

    return list(f(err) for err in json_get(dic, 'errors', list))
Example #5
0
def generate_html(tex, charmap, matches, file):

    s = 'File "' + file + '" with ' + str(len(matches)) + ' problem(s)'
    title = protect_html(s)
    anchor = file
    anchor_overlap = file + '-@@@'
    prefix = '<a id="' + anchor + '"></a><H3>' + title + '</H3>\n'

    # collect data for highlighted places
    #
    hdata = []
    for m in matches:
        beg = json_get(m, 'offset', int)
        end = beg + max(1, json_get(m, 'length', int))
        if beg < 0 or end < 0 or beg >= len(charmap) or end >= len(charmap):
            tex2txt.fatal('generate_html():' +
                          ' bad message read from proofreader')
        h = tex2txt.Aux()
        h.unsure = (charmap[beg] < 0 or charmap[max(beg, end - 1)] < 0)
        h.beg = abs(charmap[beg]) - 1
        h.end = abs(charmap[max(beg, end - 1)])  # see issue #21
        if h.unsure or h.end <= h.beg:
            h.end = h.beg + 1

        if (h.end == h.beg + 1 and tex[h.beg] == '\\'
                and re.search(r'(?<!\\)(\\\\)*\Z', tex[:h.beg])):
            # HACK:
            # if matched a single \ that is actually followed by macro name:
            # also highlight the macro name
            s = re.search(r'\A\\[a-zA-Z]+', tex[h.beg:])
            if s:
                h.end = h.beg + len(s.group(0))
        elif h.unsure and tex[h.beg].isalpha():
            # HACK:
            # if unsure: mark till end of word (only letters)
            s = re.search(r'\A.[^\W0-9_]+', tex[h.beg:])
            if s:
                h.end = h.beg + len(s.group(0))

        h.beglin = tex.count('\n', 0, h.beg)
        h.endlin = tex.count('\n', 0, h.end) + 1
        h.lin = h.beglin
        h.m = m
        hdata.append(h)

    # group adjacent matches into regions
    #
    regions = []
    starts = tex2txt.get_line_starts(tex)
    for h in hdata:
        h.beglin = max(h.beglin - cmdline.context, 0)
        h.endlin = min(h.endlin + cmdline.context, len(starts) - 1)
        if not regions or h.beglin >= max(h.endlin for h in regions[-1]):
            # start a new region
            regions.append([h])
        else:
            # match is part of last region
            regions[-1].append(h)

    # produce output
    #
    res_tot = ''
    overlaps = []
    line_numbers = []
    for reg in regions:
        #
        # generate output for one region:
        # collect all matches in that region
        #
        beglin = reg[0].beglin
        endlin = max(h.endlin for h in reg)
        res = ''
        last = starts[beglin]
        for h in reg:
            s = generate_highlight(h.m, tex[h.beg:h.end], h.lin + 1, h.unsure)
            if h.beg < last:
                # overlapping with last message
                overlaps.append((s, h.lin + 1))
                continue
            res += protect_html(tex[last:h.beg])
            res += s
            last = h.end

        res += protect_html(tex[last:starts[endlin]])
        res_tot += res + '<br>\n'
        line_numbers += list(range(beglin, endlin)) + [-1]

    if not line_numbers:
        # no problems found: just display first cmdline.context lines
        endlin = min(cmdline.context, len(starts) - 1)
        res_tot = protect_html(tex[:starts[endlin]])
        line_numbers = list(range(endlin))
    if line_numbers:
        res_tot = add_line_numbers(res_tot, line_numbers)

    postfix = ''
    if overlaps:
        prefix += ('<a href="#' + anchor_overlap + '">' +
                   '<H3>Overlapping message(s) found:' +
                   ' see here</H3></a>\n')
        postfix = ('<a id="' + anchor_overlap + '"></a><H3>' +
                   protect_html('File "' + file + '":') +
                   ' overlapping message(s)</H3>\n')
        postfix += '<table cellspacing="0">\n'
        for (s, lin) in overlaps:
            postfix += ('<tr><td style="' + number_style +
                        '" align="right" valign="top">' + str(lin) +
                        '&nbsp;&nbsp;</td><td>' + s + '</td></tr>\n')
        postfix += '</table>\n'

    return (title, anchor, prefix + res_tot + postfix, len(matches))
Example #6
0
 def f(m):
     beg = json_get(m, 'offset', int)
     if beg < 0 or beg >= len(charmap):
         tex2txt.fatal('run_proofreader():' +
                       ' bad message read from proofreader')
     return abs(charmap[beg])
Example #7
0
def json_fatal(item):
    tex2txt.fatal('error reading JSON output from proofreader, (sub-)item "' +
                  item + '"')
Example #8
0
if cmdline.language is None:
    cmdline.language = default_option_language
if cmdline.t2t_lang is None:
    cmdline.t2t_lang = cmdline.language[:2]
if cmdline.encoding is None:
    cmdline.encoding = default_option_encoding
if cmdline.disable is None:
    cmdline.disable = default_option_disable
if cmdline.context is None:
    cmdline.context = default_option_context
if cmdline.context < 0:
    # huge context: display whole text
    cmdline.context = int(1e8)
if cmdline.server is not None and cmdline.server not in ('lt', 'my', 'stop'):
    tex2txt.fatal('mode for --server has to be one of lt, my, stop')
if cmdline.plain and (cmdline.include or cmdline.replace):
    tex2txt.fatal('cannot handle --plain together with --include or --replace')
if cmdline.single_letters and cmdline.single_letters.endswith('||'):
    cmdline.single_letters += equation_replacements
if cmdline.replace:
    cmdline.replace = tex2txt.read_replacements(cmdline.replace,
                                                encoding=cmdline.encoding)
if cmdline.define:
    cmdline.define = tex2txt.read_definitions(cmdline.define, encoding='utf-8')

# only stop local LT server?
#
if cmdline.server == 'stop':
    done = False
    try: