def test_clean_phrase(self): self.assertEqual(clean_phrase('abc def ghi'), ['abc', 'def', 'ghi']) self.assertEqual(clean_phrase('a A tHe an a'), []) self.assertEqual(clean_phrase(''), []) self.assertEqual( clean_phrase('The Best of times, the blurst of times'), ['best', 'times,', 'blurst', 'times'])
def create_key(phrase): """ Clean up a phrase making it suitable for use as a key The quick brown fox jumped --> quickbrownfox """ key = ' '.join(clean_phrase(phrase)[:MAX_WORDS]) return re.sub('[^a-z0-9_-]', '', key)
def partial_complete(phrase): """ Break apart a phrase into several chunks using max_words as a guide The quick brown fox jumped --> quick brown fox, brown fox jumped """ words = clean_phrase(phrase) max_words = max( min(len(words), MAX_WORDS), MIN_WORDS ) for num_words in range(MIN_WORDS, max_words + 1): chunks = len(words) - num_words + 1 chunks = chunks < 1 and 1 or chunks for i in range(chunks): yield ' '.join(words[i:i + num_words])