def test_word_analyzer_unigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=1, stop_words=None) text = u"J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." expected = [u"ai", u"mange", u"du", u"kangourou", u"ce", u"midi", u"etait", u"pas", u"tres", u"bon"] assert_equal(wa.analyze(text), expected) text = "This is a test, really.\n\n I met Harry yesterday." expected = [u"this", u"is", u"test", u"really", u"met", u"harry", u"yesterday"] assert_equal(wa.analyze(text), expected) text = StringIO("This is a test with a file-like object!") expected = [u"this", u"is", u"test", u"with", u"file", u"like", u"object"] assert_equal(wa.analyze(text), expected)
def test_word_analyzer_unigrams_and_bigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=2, stop_words=None) text = u"J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." expected = [ u"ai", u"mange", u"du", u"kangourou", u"ce", u"midi", u"etait", u"pas", u"tres", u"bon", u"ai mange", u"mange du", u"du kangourou", u"kangourou ce", u"ce midi", u"midi etait", u"etait pas", u"pas tres", u"tres bon", ] assert_equal(wa.analyze(text), expected)
def test_word_analyzer_unigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=1, stop_words=None) text = u"J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." expected = [u'ai', u'mange', u'du', u'kangourou', u'ce', u'midi', u'etait', u'pas', u'tres', u'bon'] assert_equal(wa.analyze(text), expected) text = "This is a test, really.\n\n I met Harry yesterday." expected = [u'this', u'is', u'test', u'really', u'met', u'harry', u'yesterday'] assert_equal(wa.analyze(text), expected) text = StringIO("This is a test with a file-like object!") expected = [u'this', u'is', u'test', u'with', u'file', u'like', u'object'] assert_equal(wa.analyze(text), expected)
def test_word_analyzer_unigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=1, stop_words=None) text = u"J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." expected = [u'ai', u'mange', u'du', u'kangourou', u'ce', u'midi', u'etait', u'pas', u'tres', u'bon'] assert_equal(wa.analyze(text), expected) text = "This is a test, really.\n\n I met Harry yesterday." expected = [u'this', u'is', u'test', u'really', u'met', u'harry', u'yesterday'] assert_equal(wa.analyze(text), expected) text = StringIO("This is a test with a file-like object!") expected = [u'this', u'is', u'test', u'with', u'file', u'like', u'object'] assert_equal(wa.analyze(text), expected)
def test_word_analyzer_unigrams_and_bigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=2, stop_words=None) text = u"J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." expected = [u'ai', u'mange', u'du', u'kangourou', u'ce', u'midi', u'etait', u'pas', u'tres', u'bon', u'ai mange', u'mange du', u'du kangourou', u'kangourou ce', u'ce midi', u'midi etait', u'etait pas', u'pas tres', u'tres bon'] assert_equal(wa.analyze(text), expected)
def test_word_analyzer_unigrams_and_bigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=2, stop_words=None) text = u"J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." expected = [u'ai', u'mange', u'du', u'kangourou', u'ce', u'midi', u'etait', u'pas', u'tres', u'bon', u'ai mange', u'mange du', u'du kangourou', u'kangourou ce', u'ce midi', u'midi etait', u'etait pas', u'pas tres', u'tres bon'] assert_equal(wa.analyze(text), expected)