def correct(source, target):
    """Runs a set of easy and automatic corrections

    Current corrections include:
      - Ellipses - align target to use source form of ellipses (either three dots or the Unicode ellipses characters)
      - Missing whitespace and start or end of the target
      - Missing punction (.:?) at the end of the target
    """
    if target == "":
        return target
    if "..." in source and u"…" in target:
        return target.replace(u"…", "...")
    if u"…" in source and "..." in target:
        return target.replace("...", u"…")
    if decoration.spacestart(source) != decoration.spacestart(target) or decoration.spaceend(
        source
    ) != decoration.spaceend(target):
        return decoration.spacestart(source) + target.strip() + decoration.spaceend(source)
    punctuation = (".", ":", ". ", ": ", "?")
    puncendid = decoration.puncend(source, punctuation)
    puncendstr = decoration.puncend(target, punctuation)
    if puncendid != puncendstr:
        if not puncendstr:
            return target + puncendid
    if source[:1].isalpha() and target[:1].isalpha():
        if source[:1].isupper() and target[:1].islower():
            return target[:1].upper() + target[1:]
        elif source[:1].islower() and target[:1].isupper():
            return target[:1].lower() + target[1:]
    return None
Exemple #2
0
def correct(source, target):
    """Runs a set of easy and automatic corrections

    Current corrections include:
      - Ellipses - align target to use source form of ellipses (either three dots or the Unicode ellipses characters)
      - Missing whitespace and start or end of the target
      - Missing punction (.:?) at the end of the target
    """
    if target == "":
        return target
    if "..." in source and u"…" in target:
        return target.replace(u"…", "...")
    if u"…" in source and "..." in target:
        return target.replace("...", u"…")
    if decoration.spacestart(source) != decoration.spacestart(
            target) or decoration.spaceend(source) != decoration.spaceend(
                target):
        return decoration.spacestart(
            source) + target.strip() + decoration.spaceend(source)
    punctuation = (".", ":", ". ", ": ", "?")
    puncendid = decoration.puncend(source, punctuation)
    puncendstr = decoration.puncend(target, punctuation)
    if puncendid != puncendstr:
        if not puncendstr:
            return target + puncendid
    if source[:1].isalpha() and target[:1].isalpha():
        if source[:1].isupper() and target[:1].islower():
            return target[:1].upper() + target[1:]
        elif source[:1].islower() and target[:1].isupper():
            return target[:1].lower() + target[1:]
    return None
def test_spacestart():
    """test operation of spacestart()"""
    assert decoration.spacestart("  Start") == "  "
    assert decoration.spacestart(u"\u0020\u00a0Start") == u"\u0020\u00a0"
    # non-breaking space
    assert decoration.spacestart(u"\u00a0\u202fStart") == u"\u00a0\u202f"
    # Some exotic spaces
    assert decoration.spacestart(u"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200aStart") == u"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a"
Exemple #4
0
def test_spacestart():
    """test operation of spacestart()"""
    assert decoration.spacestart("  Start") == "  "
    assert decoration.spacestart(u"\u0020\u00a0Start") == u"\u0020\u00a0"
    # non-breaking space
    assert decoration.spacestart(u"\u00a0\u202fStart") == u"\u00a0\u202f"
    # Some exotic spaces
    assert decoration.spacestart(u"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200aStart") == u"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a"