def build_journals_re_kb(fpath): """Load journals regexps knowledge base @see build_journals_kb """ def make_tuple(match): regexp = match.group('seek') repl = match.group('repl') return regexp, repl kb = [] if isinstance(fpath, six.string_types): fpath_needs_closing = True try: fh = open(fpath, "r") except IOError: raise IOError("Error: Unable to open journal kb '%s'" % fpath) else: fpath_needs_closing = False fh = fpath try: for rawline in fh: if rawline.startswith('#'): continue # Extract the seek->replace terms from this KB line: m_kb_line = re_kb_line.search(rawline.decode('utf-8')) kb.append(make_tuple(m_kb_line)) finally: if fpath_needs_closing: fh.close() return kb
def build_authors_kb(fpath): replacements = [] if isinstance(fpath, six.string_types): fpath_needs_closing = True try: fh = open(fpath, "r") except IOError: # problem opening KB for reading, or problem while reading from it: emsg = "Error: Could not build list of authors - failed " \ "to read from KB %(kb)s." % {'kb' : fpath} write_message(emsg, sys.stderr, verbose=0) raise IOError("Error: Unable to open authors kb '%s'" % fpath) else: fpath_needs_closing = False fh = fpath try: for rawline in fh: if rawline.startswith('#'): continue # Extract the seek->replace terms from this KB line: m_kb_line = re_kb_line.search(rawline.decode('utf-8')) if m_kb_line: seek = m_kb_line.group('seek') repl = m_kb_line.group('repl') replacements.append((seek, repl)) finally: if fpath_needs_closing: fh.close() return replacements
def lazy_parser(fh): for rawline in fh: if rawline.startswith('#'): continue try: rawline = rawline.decode("utf-8").rstrip("\n") except UnicodeError: raise StandardError("Unicode problems in kb %s at line %s" % (path, rawline)) # Test line to ensure that it is a correctly formatted # knowledge base line: # Extract the seek->replace terms from this KB line m_kb_line = re_kb_line.search(rawline) if m_kb_line: # good KB line yield m_kb_line.group('seek'), m_kb_line.group('repl') else: raise StandardError("Badly formatted kb '%s' at line %s" % (path, rawline))