def run_languagetool(plain): if cmdline.server: # use Web server hosted by LT or local server if cmdline.server == 'lt': server = ltserver else: start_local_lt_server() server = ltserver_local data = {'text': plain, 'language': cmdline.language} if cmdline.disable: data['disabledRules'] = cmdline.disable if cmdline.lt_options: # translate options to entries in HTML request ltopts = cmdline.lt_options[1:].split() for opt in lt_option_map: entry = lt_option_map[opt] if not any(s in ltopts for s in entry[0]): continue idx = max(ltopts.index(s) for s in entry[0] if s in ltopts) if entry[1]: data[opt] = ltopts[idx + 1] if idx + 1 < len(ltopts) else '' else: data[opt] = 'true' data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(server, data=data) try: reply = urllib.request.urlopen(request) out = reply.read() reply.close() except: tex2txt.fatal('error connecting to "' + server + '"') else: # use local installation try: out = subprocess.run(ltcommand, input=plain.encode('utf-8'), stdout=subprocess.PIPE).stdout except: tex2txt.fatal('error running "' + ltcommand[0] + '"') out = out.decode(encoding='utf-8') try: dic = json_decoder.decode(out) except: json_fatal('JSON root element') matches = json_get(dic, 'matches', list) return matches
def start_local_lt_server(): def check_server(): # check for running server global ltserver_local_running if ltserver_local_running: return True data = {'text': '', 'language': 'en'} data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(ltserver_local, data=data) try: reply = urllib.request.urlopen(request) reply.close() ltserver_local_running = True return True except: return False if check_server(): return try: subprocess.Popen(ltserver_local_cmd.split(), cwd=ltdirectory, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except: tex2txt.fatal('error running "' + ltserver_local_cmd + '"') # wait for server to be available # sys.stderr.write('=== starting local LT server at "' + ltdirectory + '":\n=== ' + ltserver_local_cmd + ' ') sys.stderr.flush() for x in range(20): time.sleep(0.5) sys.stderr.write('.') and sys.stderr.flush() if check_server(): sys.stderr.write('\n') and sys.stderr.flush() return sys.stderr.write('\n') tex2txt.fatal('error starting server "' + ltserver_local_cmd + '"')
def create_equation_punct_messages(plain): if cmdline.equation_punctuation is None: return [] mode = { 'displayed': equation_replacements_display, 'inline': equation_replacements_inline, 'all': equation_replacements, } k = list(k for k in mode.keys() if k.startswith(cmdline.equation_punctuation)) if len(k) != 1: tex2txt.fatal('mode for --equation-punctuation has to determine' + ' one of ' + ', '.join(mode.keys())) repls = mode[k[0]] def msg(m): return create_message( m, rule='PRIVATE::EQUATION_PUNCTUATION', msg='Possibly incorrect punctuation after equation.', repl='—') def f(m): # return True if equation is followed by # - another equation (possibly after punctuation in punct) # - a dot # - a lower-case word (possibly after punctuation in punct) # groups = m.groups() equ = groups[0] # another equation follows: not consumed! dot = groups[-2] # a dot follows word = groups[-1] # a word follows return equ or dot or word and word[0].islower() punct = ',;:' equ = r'\b(?:' + repls + r')\b' expr = (r'(' + equ + r'(?=\s*[' + punct + r']?\s*' + equ + r'))|' + equ + r'\s*(?:(\.)|[' + punct + r']?\s*([^\W0-9_]+))?') return list(msg(m) for m in re.finditer(expr, plain) if not f(m))
def run_textgears(plain): data = { 'key': cmdline.textgears, 'text': plain, } data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(textgears_server, data=data) try: reply = urllib.request.urlopen(request) out = reply.read() reply.close() except: tex2txt.fatal('error connecting to "' + textgears_server + '"') out = out.decode(encoding='utf-8') try: dic = json_decoder.decode(out) except: json_fatal('JSON root element') def f(err): offset = json_get(err, 'offset', int) length = json_get(err, 'length', int) return { 'message': 'Error type: ' + json_get(err, 'type', str), 'offset': offset, 'length': length, 'context': create_context(plain, offset, length), 'replacements': list({'value': r} for r in json_get(err, 'better', list)), 'rule': { 'id': 'Not available' }, } return list(f(err) for err in json_get(dic, 'errors', list))
def generate_html(tex, charmap, matches, file): s = 'File "' + file + '" with ' + str(len(matches)) + ' problem(s)' title = protect_html(s) anchor = file anchor_overlap = file + '-@@@' prefix = '<a id="' + anchor + '"></a><H3>' + title + '</H3>\n' # collect data for highlighted places # hdata = [] for m in matches: beg = json_get(m, 'offset', int) end = beg + max(1, json_get(m, 'length', int)) if beg < 0 or end < 0 or beg >= len(charmap) or end >= len(charmap): tex2txt.fatal('generate_html():' + ' bad message read from proofreader') h = tex2txt.Aux() h.unsure = (charmap[beg] < 0 or charmap[max(beg, end - 1)] < 0) h.beg = abs(charmap[beg]) - 1 h.end = abs(charmap[max(beg, end - 1)]) # see issue #21 if h.unsure or h.end <= h.beg: h.end = h.beg + 1 if (h.end == h.beg + 1 and tex[h.beg] == '\\' and re.search(r'(?<!\\)(\\\\)*\Z', tex[:h.beg])): # HACK: # if matched a single \ that is actually followed by macro name: # also highlight the macro name s = re.search(r'\A\\[a-zA-Z]+', tex[h.beg:]) if s: h.end = h.beg + len(s.group(0)) elif h.unsure and tex[h.beg].isalpha(): # HACK: # if unsure: mark till end of word (only letters) s = re.search(r'\A.[^\W0-9_]+', tex[h.beg:]) if s: h.end = h.beg + len(s.group(0)) h.beglin = tex.count('\n', 0, h.beg) h.endlin = tex.count('\n', 0, h.end) + 1 h.lin = h.beglin h.m = m hdata.append(h) # group adjacent matches into regions # regions = [] starts = tex2txt.get_line_starts(tex) for h in hdata: h.beglin = max(h.beglin - cmdline.context, 0) h.endlin = min(h.endlin + cmdline.context, len(starts) - 1) if not regions or h.beglin >= max(h.endlin for h in regions[-1]): # start a new region regions.append([h]) else: # match is part of last region regions[-1].append(h) # produce output # res_tot = '' overlaps = [] line_numbers = [] for reg in regions: # # generate output for one region: # collect all matches in that region # beglin = reg[0].beglin endlin = max(h.endlin for h in reg) res = '' last = starts[beglin] for h in reg: s = generate_highlight(h.m, tex[h.beg:h.end], h.lin + 1, h.unsure) if h.beg < last: # overlapping with last message overlaps.append((s, h.lin + 1)) continue res += protect_html(tex[last:h.beg]) res += s last = h.end res += protect_html(tex[last:starts[endlin]]) res_tot += res + '<br>\n' line_numbers += list(range(beglin, endlin)) + [-1] if not line_numbers: # no problems found: just display first cmdline.context lines endlin = min(cmdline.context, len(starts) - 1) res_tot = protect_html(tex[:starts[endlin]]) line_numbers = list(range(endlin)) if line_numbers: res_tot = add_line_numbers(res_tot, line_numbers) postfix = '' if overlaps: prefix += ('<a href="#' + anchor_overlap + '">' + '<H3>Overlapping message(s) found:' + ' see here</H3></a>\n') postfix = ('<a id="' + anchor_overlap + '"></a><H3>' + protect_html('File "' + file + '":') + ' overlapping message(s)</H3>\n') postfix += '<table cellspacing="0">\n' for (s, lin) in overlaps: postfix += ('<tr><td style="' + number_style + '" align="right" valign="top">' + str(lin) + ' </td><td>' + s + '</td></tr>\n') postfix += '</table>\n' return (title, anchor, prefix + res_tot + postfix, len(matches))
def f(m): beg = json_get(m, 'offset', int) if beg < 0 or beg >= len(charmap): tex2txt.fatal('run_proofreader():' + ' bad message read from proofreader') return abs(charmap[beg])
def json_fatal(item): tex2txt.fatal('error reading JSON output from proofreader, (sub-)item "' + item + '"')
if cmdline.language is None: cmdline.language = default_option_language if cmdline.t2t_lang is None: cmdline.t2t_lang = cmdline.language[:2] if cmdline.encoding is None: cmdline.encoding = default_option_encoding if cmdline.disable is None: cmdline.disable = default_option_disable if cmdline.context is None: cmdline.context = default_option_context if cmdline.context < 0: # huge context: display whole text cmdline.context = int(1e8) if cmdline.server is not None and cmdline.server not in ('lt', 'my', 'stop'): tex2txt.fatal('mode for --server has to be one of lt, my, stop') if cmdline.plain and (cmdline.include or cmdline.replace): tex2txt.fatal('cannot handle --plain together with --include or --replace') if cmdline.single_letters and cmdline.single_letters.endswith('||'): cmdline.single_letters += equation_replacements if cmdline.replace: cmdline.replace = tex2txt.read_replacements(cmdline.replace, encoding=cmdline.encoding) if cmdline.define: cmdline.define = tex2txt.read_definitions(cmdline.define, encoding='utf-8') # only stop local LT server? # if cmdline.server == 'stop': done = False try: