예제 #1
0
def test__collocation_document():
    bigrams_and_trigrams = _collect_bigrams_and_trigrams(sample_data, min_freqs=[2,2])
    assert(_collocation_document(sample_data[0][1],bigrams_and_trigrams) == [
        u'frank_swank', u'tank', u'walked', u'sassy', u'unicorn', u'brony',
         u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
         u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
    ])

    assert(_collocation_document(sample_data[1][1],bigrams_and_trigrams) == [
        u'prancercise', u'form', u'art', u'fitniss', u'originally',
         u'invented', u'sassy_unicorns', u'recently', u'popularized',
         u'retired', u'celebrities', u'frank_swank', u'tank'
    ])
예제 #2
0
def test__collocation_document():
    bigrams_and_trigrams = _collect_bigrams_and_trigrams(sample_data,
                                                         min_freqs=[2, 2])
    assert (_collocation_document(sample_data[0][1], bigrams_and_trigrams) == [
        u'frank_swank', u'tank', u'walked', u'sassy', u'unicorn', u'brony',
        u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
        u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
    ])

    assert (_collocation_document(sample_data[1][1], bigrams_and_trigrams) == [
        u'prancercise', u'form', u'art', u'fitniss', u'originally',
        u'invented', u'sassy_unicorns', u'recently', u'popularized',
        u'retired', u'celebrities', u'frank_swank', u'tank'
    ])
예제 #3
0
def test__collect_bigrams_and_trigrams():
    bigrams_and_trigrams = _collect_bigrams_and_trigrams(sample_data, min_freqs=[2,2])
    assert(bigrams_and_trigrams[0].pattern == u'(frank swank|swank tank|sassy unicorns)')
    assert(bigrams_and_trigrams[1].pattern == u'(frank swank tank)')
예제 #4
0
def test__collect_bigrams_and_trigrams():
    bigrams_and_trigrams = _collect_bigrams_and_trigrams(sample_data,
                                                         min_freqs=[2, 2])
    assert (bigrams_and_trigrams[0].pattern ==
            u'(frank swank|swank tank|sassy unicorns)')
    assert (bigrams_and_trigrams[1].pattern == u'(frank swank tank)')