Example #1
0
def processline(s): 
  """Conform the input string to the index requirements and return the conformed string
  
  To conform the string, first LaTex diacritics like {\'{e}} are removed. Then, Unicode 
  is translated to ASCII  
    
  Args: 
    s (str): the input string
    
  Returns:
    str:  the output string
    
  Example: 
    >>> print(processline("\v{C}{\'{e}}pl\"o, Slavomír")
    Ceplo, Slavomir
  """
    
  if s.strip() == '':
    return s  
  #find the substring used for sorting
  m = p.match(s) 
  sortstring = ''
  try:
    sortstring = m.groups(1)[0] 
  except AttributeError:
    print("%s could not be parsed" % repr(s))    
  tmpstring = dediacriticize(sortstring)
  tmpstring = asciify(tmpstring) 
  if sortstring == tmpstring:    
    return s
  else:
    print("%s => %s"%(sortstring,tmpstring))
    return s.replace("%s@"%sortstring,"%s@"%tmpstring) 
Example #2
0
def processline(s):
    global ignoredic
    """Conform the input string to the index requirements and return the conformed string

  To conform the string, first LaTex diacritics like {\'{e}} are removed. Then, Unicode
  is translated to ASCII

  Args:
    s (str): the input string

  Returns:
    str:  the output string

  Example:
    >>> print(processline("\v{C}{\'{e}}pl\"o, Slavomír")
    Ceplo, Slavomir
  """

    if s.strip() == "":
        return s
    # find the substring used for sorting
    m = p.match(s)
    try:
        items = p.match(s).group(1).split("@")
        sortstring = items[0]
        has_at = False
        if len(items) > 1:
            has_at = True
    except AttributeError:
        print("%s could not be parsed" % repr(s))
        return ""
    processedstring = asciify(dediacriticize(sortstring))
    if sortstring == processedstring:
        return s
    else:
        if sortstring not in ignoredic:
            print("%s => %s" % (sortstring, processedstring))
            ignoredic[sortstring] = True
        if has_at:
            result = s.replace("%s@" % sortstring, "%s@" % processedstring)
            print(result)
            return result
        else:
            result = s.replace(sortstring,
                               "%s@%s" % (processedstring, sortstring))
            return result