Example #1
0
def build_journals_re_kb(fpath):
    """Load journals regexps knowledge base

    @see build_journals_kb
    """
    def make_tuple(match):
        regexp = match.group('seek')
        repl = match.group('repl')
        return regexp, repl

    kb = []

    if isinstance(fpath, six.string_types):
        fpath_needs_closing = True
        try:
            fh = open(fpath, "r")
        except IOError:
            raise IOError("Error: Unable to open journal kb '%s'" % fpath)
    else:
        fpath_needs_closing = False
        fh = fpath

    try:
        for rawline in fh:
            if rawline.startswith('#'):
                continue
            # Extract the seek->replace terms from this KB line:
            m_kb_line = re_kb_line.search(rawline.decode('utf-8'))
            kb.append(make_tuple(m_kb_line))
    finally:
        if fpath_needs_closing:
            fh.close()

    return kb
Example #2
0
def build_authors_kb(fpath):
    replacements = []

    if isinstance(fpath, six.string_types):
        fpath_needs_closing = True
        try:
            fh = open(fpath, "r")
        except IOError:
            # problem opening KB for reading, or problem while reading from it:
            emsg = "Error: Could not build list of authors - failed " \
                   "to read from KB %(kb)s." % {'kb' : fpath}
            write_message(emsg, sys.stderr, verbose=0)
            raise IOError("Error: Unable to open authors kb '%s'" % fpath)
    else:
        fpath_needs_closing = False
        fh = fpath

    try:
        for rawline in fh:
            if rawline.startswith('#'):
                continue

            # Extract the seek->replace terms from this KB line:
            m_kb_line = re_kb_line.search(rawline.decode('utf-8'))
            if m_kb_line:
                seek = m_kb_line.group('seek')
                repl = m_kb_line.group('repl')
                replacements.append((seek, repl))
    finally:
        if fpath_needs_closing:
            fh.close()

    return replacements
Example #3
0
def build_journals_re_kb(fpath):
    """Load journals regexps knowledge base

    @see build_journals_kb
    """
    def make_tuple(match):
        regexp = match.group('seek')
        repl = match.group('repl')
        return regexp, repl

    kb = []

    if isinstance(fpath, six.string_types):
        fpath_needs_closing = True
        try:
            fh = open(fpath, "r")
        except IOError:
            raise IOError("Error: Unable to open journal kb '%s'" % fpath)
    else:
        fpath_needs_closing = False
        fh = fpath

    try:
        for rawline in fh:
            if rawline.startswith('#'):
                continue
            # Extract the seek->replace terms from this KB line:
            m_kb_line = re_kb_line.search(rawline.decode('utf-8'))
            kb.append(make_tuple(m_kb_line))
    finally:
        if fpath_needs_closing:
            fh.close()

    return kb
Example #4
0
def build_authors_kb(fpath):
    replacements = []

    if isinstance(fpath, six.string_types):
        fpath_needs_closing = True
        try:
            fh = open(fpath, "r")
        except IOError:
            # problem opening KB for reading, or problem while reading from it:
            emsg = "Error: Could not build list of authors - failed " \
                   "to read from KB %(kb)s." % {'kb' : fpath}
            write_message(emsg, sys.stderr, verbose=0)
            raise IOError("Error: Unable to open authors kb '%s'" % fpath)
    else:
        fpath_needs_closing = False
        fh = fpath

    try:
        for rawline in fh:
            if rawline.startswith('#'):
                continue

            # Extract the seek->replace terms from this KB line:
            m_kb_line = re_kb_line.search(rawline.decode('utf-8'))
            if m_kb_line:
                seek = m_kb_line.group('seek')
                repl = m_kb_line.group('repl')
                replacements.append((seek, repl))
    finally:
        if fpath_needs_closing:
            fh.close()

    return replacements
Example #5
0
    def lazy_parser(fh):
        for rawline in fh:
            if rawline.startswith('#'):
                continue

            try:
                rawline = rawline.decode("utf-8").rstrip("\n")
            except UnicodeError:
                raise StandardError("Unicode problems in kb %s at line %s"
                                                             % (path, rawline))

            # Test line to ensure that it is a correctly formatted
            # knowledge base line:
            # Extract the seek->replace terms from this KB line
            m_kb_line = re_kb_line.search(rawline)
            if m_kb_line:  # good KB line
                yield m_kb_line.group('seek'), m_kb_line.group('repl')
            else:
                raise StandardError("Badly formatted kb '%s' at line %s"
                                                            % (path, rawline))
Example #6
0
    def lazy_parser(fh):
        for rawline in fh:
            if rawline.startswith('#'):
                continue

            try:
                rawline = rawline.decode("utf-8").rstrip("\n")
            except UnicodeError:
                raise StandardError("Unicode problems in kb %s at line %s" %
                                    (path, rawline))

            # Test line to ensure that it is a correctly formatted
            # knowledge base line:
            # Extract the seek->replace terms from this KB line
            m_kb_line = re_kb_line.search(rawline)
            if m_kb_line:  # good KB line
                yield m_kb_line.group('seek'), m_kb_line.group('repl')
            else:
                raise StandardError("Badly formatted kb '%s' at line %s" %
                                    (path, rawline))