Example #1
0
def fixstring(str):
    """remove crap characters due to interpreting utf-8 as microsoft code page"""
    str = str.replace(u"“", u'"').replace(u"’", u"'").replace(u"â€", u'"')
    str = cf.convert_entities(str)
    str = cf.convert_unicode_u(str)
    str = html_to_segments(str)
    return str.strip()
Example #2
0
def fixstring(str):
	"""remove crap characters due to interpreting utf-8 as microsoft code page"""
	str = str.replace(u"“",u'"').replace(u"’",u"'").replace(u"â€",u'"')
	str = cf.convert_entities(str)
	str = cf.convert_unicode_u(str)
	str = html_to_segments(str)
	return str.strip()
Example #3
0
def cleanup(claim):
    claim = cf.convert_entities(claim)
    claim = cf.convert_unicode(claim)
Example #4
0
def fix_string(txt):
	txt = cf.convert_entities(txt)
	txt = cf.convert_unicode(txt)
	return txt.decode('utf-8')
Example #5
0
def cleanup(claim):
	claim = cf.convert_entities(claim)
	claim = cf.convert_unicode(claim)
def fix_string(txt):
    txt = cf.convert_entities(txt)
    txt = cf.convert_unicode(txt)
    return txt.decode("utf-8")