Example #1
0
def test_tag_and_stem():
    the_big_dogs = [(u'the', 'DT', u'the'), (u'big', 'JJ', u'big'),
                    (u'dog', 'NNS', u'dogs')]
    eq_(tag_and_stem('the big dogs'), the_big_dogs)

    the_big_hashtag = [(u'the', 'DT', u'the'), (u'#', 'NN', u'#'),
                       (u'big', 'JJ', u'big'), (u'dog', 'NN', u'dog')]
    eq_(tag_and_stem('the #big dog'), the_big_hashtag)

    two_sentences = [(u'i', 'PRP', u'I'), (u'can', 'MD', u'ca'),
                     (u'not', 'RB', u"n't"), (u'.', '.', u'.'),
                     (u'avoid', 'NNP', u'Avoid'),
                     (u'fragment', 'NNS', u'fragments'), (u'.', '.', u'.')]
    eq_(tag_and_stem("I can't. Avoid fragments."), two_sentences)
Example #2
0
def test_japanese():
    eq_(normalize('これはテストです'), 'テスト')
    this_is_a_test = [('これ', '~名詞', 'これ'),
                      ('は', '~助詞', 'は'),
                      ('テスト', '名詞', 'テスト'),
                      ('です', '~助動詞', 'です'),
                      ('。', '.', '。')]
    eq_(tag_and_stem('これはテストです。'), this_is_a_test)
Example #3
0
def test_tag_and_stem():
    the_big_dogs = [(u'the', 'DT', u'the'),
                    (u'big', 'JJ', u'big'),
                    (u'dog', 'NNS', u'dogs')]
    eq_(tag_and_stem('the big dogs'), the_big_dogs)

    the_big_hashtag = [(u'the', 'DT', u'the'),
                       (u'#', 'NN', u'#'),
                       (u'big', 'JJ', u'big'),
                       (u'dog', 'NN', u'dog')]
    eq_(tag_and_stem('the #big dog'), the_big_hashtag)

    two_sentences = [(u'i', 'PRP', u'I'),
                     (u'can', 'MD', u'ca'),
                     (u'not', 'RB', u"n't"),
                     (u'.', '.', u'.'),
                     (u'avoid', 'NNP', u'Avoid'),
                     (u'fragment', 'NNS', u'fragments'),
                     (u'.', '.', u'.')]
    eq_(tag_and_stem("I can't. Avoid fragments."), two_sentences)
Example #4
0
def test_japanese():
    eq_(normalize('これはテストです'), 'テスト')
    this_is_a_test = [('これ', '~名詞', 'これ'), ('は', '~助詞', 'は'),
                      ('テスト', '名詞', 'テスト'), ('です', '~助動詞', 'です'),
                      ('。', '.', '。')]
    eq_(tag_and_stem('これはテストです。'), this_is_a_test)