Exemple #1
0
 def generate_key(self, ingr):
     """Generate a generic-looking key from a string."""
     timer = TimeAction('keymanager.generate_key 1',3)
     debug("Start generate_key(self,%s)"%ingr,10)
     ingr = ingr.strip()
     # language specific here - turn off the strip().lower() for German, 'cos:
     # i) german Nouns always start with an uppercase Letter.
     # ii) the function 'lower()' doesn't appear to work correctly with umlauts.
     if (not langProperties['capitalisedNouns']):
         # We want to use unicode's lower() method
         if not isinstance(ingr,unicode):
             ingr = unicode(ingr.decode('utf8'))
         ingr = ingr.lower()
     timer.end()
     timer = TimeAction('keymanager.generate_key 2',3)
     debug("verbless string=%s"%ingr,10)
     if ingr.find(',') == -1:
         # if there are no commas, we see if it makes sense
         # to turn, e.g. whole-wheat bread into bread, whole-wheat
         words = ingr.split()
         if len(words) >= 2:
             if self.cats.__contains__(words[-1]):
                 ingr = "%s, %s" %(words[-1],string.join(words[0:-1]))
     #if len(str) > 32:
     #    str = str[0:32]
     debug("End generate_key",10)
     timer.end()
     return ingr
Exemple #2
0
 def remove_verbs (self,words):
     """Handed a list of words, we remove anything from the
     list that matches a regexp in self.ignored"""
     debug("Start remove_verbs",10)
     t=TimeAction('remove_verbs',0)
     stringp=True
     if type(words)==type([]):
         stringp=False
         words = string.join(words," ")
     words = words.split(';')[0] #we ignore everything after semicolon
     words = words.split("--")[0] # we ignore everything after double dashes too!
     m = self.ignored_regexp.match(words)
     while m:
         words = words[0:m.start()] + words[m.end():]
         m = self.ignored_regexp.match(words)
     t.end()
     if stringp:
         return words
     else:
         return words.split()