Ejemplo n.º 1
0
def start_local_lt_server(language):
    def check_server():
        # check for running server
        global ltserver_local_running
        if ltserver_local_running:
            return True

        # NB: we need at least one character to check (issue #57)
        data = {'text': ' ', 'language': language}
        data = urllib.parse.urlencode(data).encode(encoding='ascii')
        request = urllib.request.Request(ltserver_local, data=data)
        try:
            reply = urllib.request.urlopen(request)
            reply.close()
            ltserver_local_running = True
            return True
        except urllib.error.HTTPError as e:
            # as we have no real text, this is probably a wrong language code
            tex2txt.fatal('The server couldn\'t fulfill the request;' +
                          ' error code: ' + repr(e.code) +
                          '\n(probably an unknown language code)')
        except:
            return False

    server_cmd = ltserver_local_cmd
    if cmdline.lt_server_options[1:]:
        server_cmd += ' ' + cmdline.lt_server_options[1:]
    if check_server():
        return

    # compare issue #12
    start_new_session = sys.platform != 'win32'
    try:
        subprocess.Popen(server_cmd.split(),
                         cwd=cmdline.lt_directory,
                         start_new_session=start_new_session,
                         stdout=subprocess.DEVNULL,
                         stderr=subprocess.DEVNULL)
    except:
        tex2txt.fatal('error running ' + repr(server_cmd) + ' in directory ' +
                      repr(cmdline.lt_directory))

    # wait for server to be available
    #
    sys.stderr.write('=== starting local LT server at "' +
                     cmdline.lt_directory + '":\n=== ' + server_cmd + ' ')
    sys.stderr.flush()
    for x in range(20):
        time.sleep(0.5)
        sys.stderr.write('.') and sys.stderr.flush()
        if check_server():
            sys.stderr.write('\n') and sys.stderr.flush()
            return
    sys.stderr.write('\n') and sys.stderr.flush()
    tex2txt.fatal('error starting server "' + server_cmd + '"')
Ejemplo n.º 2
0
def run_textgears(plain):
    data = {
        'key': cmdline.textgears,
        'text': plain,
    }
    data = urllib.parse.urlencode(data).encode(encoding='ascii')
    request = urllib.request.Request(textgears_server, data=data)
    try:
        reply = urllib.request.urlopen(request)
        out = reply.read()
        reply.close()
    except:
        tex2txt.fatal('error connecting to "' + textgears_server + '"')

    out = out.decode(encoding='utf-8')
    try:
        dic = json_decoder.decode(out)
    except:
        json_fatal('JSON root element')

    def f(err):
        offset = json_get(err, 'offset', int)
        length = json_get(err, 'length', int)
        return {
            'message':
            'Error type: ' + json_get(err, 'type', str),
            'offset':
            offset,
            'length':
            length,
            'context':
            checks.create_context(plain, offset, length),
            'replacements':
            list({'value': r} for r in json_get(err, 'better', list)),
            'rule': {
                'id': 'Not available'
            },
        }

    return list(f(err) for err in json_get(dic, 'errors', list))
Ejemplo n.º 3
0
    def check_server():
        # check for running server
        global ltserver_local_running
        if ltserver_local_running:
            return True

        # NB: we need at least one character to check (issue #57)
        data = {'text': ' ', 'language': language}
        data = urllib.parse.urlencode(data).encode(encoding='ascii')
        request = urllib.request.Request(ltserver_local, data=data)
        try:
            reply = urllib.request.urlopen(request)
            reply.close()
            ltserver_local_running = True
            return True
        except urllib.error.HTTPError as e:
            # as we have no real text, this is probably a wrong language code
            tex2txt.fatal('The server couldn\'t fulfill the request;' +
                          ' error code: ' + repr(e.code) +
                          '\n(probably an unknown language code)')
        except:
            return False
Ejemplo n.º 4
0
def json_fatal(item):
    tex2txt.fatal('error reading JSON output from proofreader, (sub-)item "'
                    + item + '"')
Ejemplo n.º 5
0
ltcommand = ((cmdline.lt_command or default_option_lt_command)
                    + ' --json --encoding utf-8')
if cmdline.lt_command:
    ltserver_local_cmd = (cmdline.lt_command + ' --http --port '
                                + str(ltserver_local_port))
if not cmdline.lt_directory:
    if cmdline.lt_command:
        cmdline.lt_directory = '.'
    else:
        cmdline.lt_directory = default_option_lt_directory

if cmdline.context < 0:
    # huge context: display whole text
    cmdline.context = int(1e8)
if not (cmdline.file or cmdline.as_server or cmdline.server == 'stop'):
    tex2txt.fatal('no input file given')
if cmdline.plain_input and (cmdline.include or cmdline.replace):
    tex2txt.fatal('cannot handle --plain-input together with'
                                        + ' --include or --replace')

lc = parameters.Parameters(cmdline.language).lang_context
equation_replacements_display = r'|'.join(set(
                    lc.math_repl_display + lc.math_repl_display_vowel))
equation_replacements_inline = r'|'.join(set(
                    lc.math_repl_inline + lc.math_repl_inline_vowel))
equation_replacements = r'|'.join(set(
                    lc.math_repl_display + lc.math_repl_display_vowel
                    + lc.math_repl_inline + lc.math_repl_inline_vowel))
if cmdline.single_letters and cmdline.single_letters.endswith('||'):
    repls = (lc.math_repl_display + lc.math_repl_display_vowel
                    + lc.math_repl_inline + lc.math_repl_inline_vowel)
Ejemplo n.º 6
0
def run_languagetool(plain, language, disable, enable, disablecategories,
                     enablecategories, lt_options):
    if cmdline.server:
        # use Web server hosted by LT or local server
        server = ltserver if cmdline.server == 'lt' else ltserver_local
        data = {'text': plain, 'language': language}
        if disable:
            data['disabledRules'] = disable
        if enable:
            data['enabledRules'] = enable
        if disablecategories:
            data['disabledCategories'] = disablecategories
        if enablecategories:
            data['enabledCategories'] = enablecategories
        if lt_options:
            # translate options to entries in HTML request
            ltopts = lt_options
            for opt in lt_option_map:
                entry = lt_option_map[opt]
                if not any(s in ltopts for s in entry[0]):
                    continue
                idx = max(ltopts.index(s) for s in entry[0] if s in ltopts)
                if entry[1]:
                    data[opt] = ltopts[idx +
                                       1] if idx + 1 < len(ltopts) else ''
                else:
                    data[opt] = 'true'

        data = urllib.parse.urlencode(data).encode(encoding='ascii')
        request = urllib.request.Request(server, data=data)
        for _ in range(2):
            try:
                reply = urllib.request.urlopen(request)
                out = reply.read()
                reply.close()
                break
            except:
                if cmdline.server != 'my':
                    tex2txt.fatal('error connecting to "' + server + '"')
                start_local_lt_server(language)
    else:
        # use local installation
        lt_cmd = ltcommand.split() + ['--language', language]
        if disable:
            lt_cmd += ['--disable', disable]
        if enable:
            lt_cmd += ['--enable', enable]
        if disablecategories:
            lt_cmd += ['--disablecategories', disablecategories]
        if enablecategories:
            lt_cmd += ['--enablecategories', enablecategories]
        lt_cmd += lt_options
        lt_cmd.append('-')  # read from stdin
        try:
            out = subprocess.run(lt_cmd,
                                 cwd=cmdline.lt_directory,
                                 input=plain.encode('utf-8'),
                                 stdout=subprocess.PIPE)
            out = out.stdout
        except:
            tex2txt.fatal('error running ' + repr(' '.join(lt_cmd)) +
                          ' in directory ' + repr(cmdline.lt_directory))

    out = out.decode(encoding='utf-8')
    try:
        dic = json_decoder.decode(out)
    except:
        json_fatal('JSON root element')
    matches = json_get(dic, 'matches', list)
    return matches
Ejemplo n.º 7
0
 def f(m):
     beg = json_get(m, 'offset', int)
     if beg < 0 or beg >= len(charmap_tot):
         tex2txt.fatal('run_proofreader():' +
                       ' bad message read from proofreader')
     return abs(charmap_tot[beg])
Ejemplo n.º 8
0
def generate_html(tex, charmap, matches, file):

    s = 'File "' + file + '" with ' + str(len(matches)) + ' problem(s)'
    title = protect_html(s)
    anchor = file
    anchor_overlap = file + '-@@@'
    prefix = '<a id="' + anchor + '"></a><H3>' + title + '</H3>\n'

    # collect data for highlighted places
    #
    hdata = []
    for m in matches:
        beg = json_get(m, 'offset', int)
        end = beg + max(1, json_get(m, 'length', int))
        if beg < 0 or end < 0 or beg >= len(charmap) or end >= len(charmap):
            tex2txt.fatal('generate_html():' +
                          ' bad message read from proofreader')
        h = tex2txt.Aux()
        h.unsure = (charmap[beg] < 0 or charmap[max(beg, end - 1)] < 0)
        h.beg = abs(charmap[beg]) - 1
        h.end = abs(charmap[max(beg, end - 1)])  # see issue #21
        if h.unsure or h.end <= h.beg:
            h.end = h.beg + 1

        if h.end == h.beg + 1 and tex[h.beg] == '\\':
            # HACK:
            # if matched a single \ that is actually followed by macro name:
            # also highlight the macro name
            h.end = h.beg + utils.correct_mark_macroname(h.beg, 1, tex)
        elif h.unsure and tex[h.beg].isalpha():
            # HACK:
            # if unsure: mark till end of word (only letters)
            s = re.search(r'\A.[^\W0-9_]+', tex[h.beg:])
            if s:
                h.end = h.beg + len(s.group(0))

        h.beglin = tex.count('\n', 0, h.beg)
        h.endlin = tex.count('\n', 0, h.end) + 1
        h.lin = h.beglin
        h.m = m
        hdata.append(h)

    # group adjacent matches into regions
    #
    regions = []
    starts = tex2txt.get_line_starts(tex)
    for h in hdata:
        h.beglin = max(h.beglin - cmdline.context, 0)
        h.endlin = min(h.endlin + cmdline.context, len(starts) - 1)
        if not regions or h.beglin >= max(h.endlin for h in regions[-1]):
            # start a new region
            regions.append([h])
        else:
            # match is part of last region
            regions[-1].append(h)

    # produce output
    #
    res_tot = ''
    overlaps = []
    line_numbers = []
    for reg in regions:
        #
        # generate output for one region:
        # collect all matches in that region
        #
        beglin = reg[0].beglin
        endlin = max(h.endlin for h in reg)
        res = ''
        last = starts[beglin]
        for h in reg:
            s = generate_highlight(h.m, tex[h.beg:h.end], h.lin + 1, h.unsure)
            if h.beg < last:
                # overlapping with last message
                overlaps.append((s, h.lin + 1))
                continue
            res += protect_html(tex[last:h.beg])
            res += s
            last = h.end

        res += protect_html(tex[last:starts[endlin]])
        res_tot += res + '<br>\n'
        line_numbers += list(range(beglin, endlin)) + [-1]

    if not line_numbers:
        # no problems found: just display first cmdline.context lines
        endlin = min(cmdline.context, len(starts) - 1)
        res_tot = protect_html(tex[:starts[endlin]])
        line_numbers = list(range(endlin))
    if line_numbers:
        res_tot = add_line_numbers(res_tot, line_numbers)

    postfix = ''
    if overlaps:
        prefix += ('<a href="#' + anchor_overlap + '">' +
                   '<H3>Overlapping message(s) found:' +
                   ' see here</H3></a>\n')
        postfix = ('<a id="' + anchor_overlap + '"></a><H3>' +
                   protect_html('File "' + file + '":') +
                   ' overlapping message(s)</H3>\n')
        postfix += '<table cellspacing="0">\n'
        for (s, lin) in overlaps:
            postfix += ('<tr><td style="' + number_style +
                        '" align="right" valign="top">' + str(lin) +
                        '&nbsp;&nbsp;</td><td>' + s + '</td></tr>\n')
        postfix += '</table>\n'

    return (title, anchor, prefix + res_tot + postfix, len(matches))