def test_tag_and_stem(): the_big_dogs = [(u'the', 'DT', u'the'), (u'big', 'JJ', u'big'), (u'dog', 'NNS', u'dogs')] eq_(tag_and_stem('the big dogs'), the_big_dogs) the_big_hashtag = [(u'the', 'DT', u'the'), (u'#', 'NN', u'#'), (u'big', 'JJ', u'big'), (u'dog', 'NN', u'dog')] eq_(tag_and_stem('the #big dog'), the_big_hashtag) two_sentences = [(u'i', 'PRP', u'I'), (u'can', 'MD', u'ca'), (u'not', 'RB', u"n't"), (u'.', '.', u'.'), (u'avoid', 'NNP', u'Avoid'), (u'fragment', 'NNS', u'fragments'), (u'.', '.', u'.')] eq_(tag_and_stem("I can't. Avoid fragments."), two_sentences)
def test_japanese(): eq_(normalize('これはテストです'), 'テスト') this_is_a_test = [('これ', '~名詞', 'これ'), ('は', '~助詞', 'は'), ('テスト', '名詞', 'テスト'), ('です', '~助動詞', 'です'), ('。', '.', '。')] eq_(tag_and_stem('これはテストです。'), this_is_a_test)