def test_normalize_list(): # Strip away articles, unless there's only an article eq_(normalize_list('the dog'), ['dog']) eq_(normalize_list('the'), ['the']) # strip out pluralization eq_(normalize_list('big dogs'), ['big', 'dog'])
def normalize(text): """ Get a string made from the non-stopword word stems in the text. See normalize_list(). """ return untokenize(normalize_list(text))