def processline(s): """Conform the input string to the index requirements and return the conformed string To conform the string, first LaTex diacritics like {\'{e}} are removed. Then, Unicode is translated to ASCII Args: s (str): the input string Returns: str: the output string Example: >>> print(processline("\v{C}{\'{e}}pl\"o, Slavomír") Ceplo, Slavomir """ if s.strip() == '': return s #find the substring used for sorting m = p.match(s) sortstring = '' try: sortstring = m.groups(1)[0] except AttributeError: print("%s could not be parsed" % repr(s)) tmpstring = dediacriticize(sortstring) tmpstring = asciify(tmpstring) if sortstring == tmpstring: return s else: print("%s => %s"%(sortstring,tmpstring)) return s.replace("%s@"%sortstring,"%s@"%tmpstring)
def processline(s): global ignoredic """Conform the input string to the index requirements and return the conformed string To conform the string, first LaTex diacritics like {\'{e}} are removed. Then, Unicode is translated to ASCII Args: s (str): the input string Returns: str: the output string Example: >>> print(processline("\v{C}{\'{e}}pl\"o, Slavomír") Ceplo, Slavomir """ if s.strip() == "": return s # find the substring used for sorting m = p.match(s) try: items = p.match(s).group(1).split("@") sortstring = items[0] has_at = False if len(items) > 1: has_at = True except AttributeError: print("%s could not be parsed" % repr(s)) return "" processedstring = asciify(dediacriticize(sortstring)) if sortstring == processedstring: return s else: if sortstring not in ignoredic: print("%s => %s" % (sortstring, processedstring)) ignoredic[sortstring] = True if has_at: result = s.replace("%s@" % sortstring, "%s@" % processedstring) print(result) return result else: result = s.replace(sortstring, "%s@%s" % (processedstring, sortstring)) return result