def generate_key(self, ingr): """Generate a generic-looking key from a string.""" timer = TimeAction('keymanager.generate_key 1',3) debug("Start generate_key(self,%s)"%ingr,10) ingr = ingr.strip() # language specific here - turn off the strip().lower() for German, 'cos: # i) german Nouns always start with an uppercase Letter. # ii) the function 'lower()' doesn't appear to work correctly with umlauts. if (not langProperties['capitalisedNouns']): # We want to use unicode's lower() method if not isinstance(ingr,unicode): ingr = unicode(ingr.decode('utf8')) ingr = ingr.lower() timer.end() timer = TimeAction('keymanager.generate_key 2',3) debug("verbless string=%s"%ingr,10) if ingr.find(',') == -1: # if there are no commas, we see if it makes sense # to turn, e.g. whole-wheat bread into bread, whole-wheat words = ingr.split() if len(words) >= 2: if self.cats.__contains__(words[-1]): ingr = "%s, %s" %(words[-1],string.join(words[0:-1])) #if len(str) > 32: # str = str[0:32] debug("End generate_key",10) timer.end() return ingr
def remove_verbs (self,words): """Handed a list of words, we remove anything from the list that matches a regexp in self.ignored""" debug("Start remove_verbs",10) t=TimeAction('remove_verbs',0) stringp=True if type(words)==type([]): stringp=False words = string.join(words," ") words = words.split(';')[0] #we ignore everything after semicolon words = words.split("--")[0] # we ignore everything after double dashes too! m = self.ignored_regexp.match(words) while m: words = words[0:m.start()] + words[m.end():] m = self.ignored_regexp.match(words) t.end() if stringp: return words else: return words.split()