def correct(source, target): """Runs a set of easy and automatic corrections Current corrections include: - Ellipses - align target to use source form of ellipses (either three dots or the Unicode ellipses characters) - Missing whitespace and start or end of the target - Missing punction (.:?) at the end of the target """ if target == "": return target if "..." in source and u"…" in target: return target.replace(u"…", "...") if u"…" in source and "..." in target: return target.replace("...", u"…") if decoration.spacestart(source) != decoration.spacestart(target) or decoration.spaceend( source ) != decoration.spaceend(target): return decoration.spacestart(source) + target.strip() + decoration.spaceend(source) punctuation = (".", ":", ". ", ": ", "?") puncendid = decoration.puncend(source, punctuation) puncendstr = decoration.puncend(target, punctuation) if puncendid != puncendstr: if not puncendstr: return target + puncendid if source[:1].isalpha() and target[:1].isalpha(): if source[:1].isupper() and target[:1].islower(): return target[:1].upper() + target[1:] elif source[:1].islower() and target[:1].isupper(): return target[:1].lower() + target[1:] return None
def correct(source, target): """Runs a set of easy and automatic corrections Current corrections include: - Ellipses - align target to use source form of ellipses (either three dots or the Unicode ellipses characters) - Missing whitespace and start or end of the target - Missing punction (.:?) at the end of the target """ if target == "": return target if "..." in source and u"…" in target: return target.replace(u"…", "...") if u"…" in source and "..." in target: return target.replace("...", u"…") if decoration.spacestart(source) != decoration.spacestart( target) or decoration.spaceend(source) != decoration.spaceend( target): return decoration.spacestart( source) + target.strip() + decoration.spaceend(source) punctuation = (".", ":", ". ", ": ", "?") puncendid = decoration.puncend(source, punctuation) puncendstr = decoration.puncend(target, punctuation) if puncendid != puncendstr: if not puncendstr: return target + puncendid if source[:1].isalpha() and target[:1].isalpha(): if source[:1].isupper() and target[:1].islower(): return target[:1].upper() + target[1:] elif source[:1].islower() and target[:1].isupper(): return target[:1].lower() + target[1:] return None
def test_spacestart(): """test operation of spacestart()""" assert decoration.spacestart(" Start") == " " assert decoration.spacestart(u"\u0020\u00a0Start") == u"\u0020\u00a0" # non-breaking space assert decoration.spacestart(u"\u00a0\u202fStart") == u"\u00a0\u202f" # Some exotic spaces assert decoration.spacestart(u"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200aStart") == u"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a"