Example #1
0
def test_task4_api5():
    '''
    Test to end-to-end functionality
    '''
    tweets = [{
        'text': 'the cat in the hat'
    }, {
        'text': "don't let the cat on the hat"
    }, {
        'text': "the cat's hat"
    }, {
        'text': "the hat cat"
    }]
    assert find_top_k_ngrams(tweets, 2, 1) == [(('cat', 'hat'), 2)]
    assert find_top_k_ngrams(tweets, 3, 1) == [(("don't", 'let', 'cat'), 1)]
    assert (find_top_k_ngrams(tweets, 4,
                              1) == [(("don't", 'let', 'cat', 'hat'), 1)])
Example #2
0
def test_find_top_k_ngrams(params):
    '''
    test code for find_top_k_ngrams
    '''

    # fix the type of expected
    params["expected"] = [(tuple(k), v) for (k, v) in params["expected"]]

    recreate_msg = setup_tweets(params)

    call_str = "  analyze.find_top_k_ngrams(tweets, {}, {})"
    recreate_msg += call_str.format(params["n"], params["k"])
    try:
        actual = analyze.find_top_k_ngrams(params["tweets"], params["n"],
                                           params["k"])
    except Exception as e:
        msg = str(e) + "\n" + recreate_msg
        pytest.fail(msg)

    compare_tuple_lists(actual, params, recreate_msg)
Example #3
0
def test_task4_api4():
    '''
    Test emoji removal
    '''
    tweets = [{'text': ';)'}]
    assert find_top_k_ngrams(tweets, 2, 1) == []
Example #4
0
def test_task4_api3():
    '''
    Test to see if prefix filtering is done properly
    '''
    tweets = [{'text': '@Dog dog'}]
    assert find_top_k_ngrams(tweets, 2, 1) == []
Example #5
0
def test_task4_api2():
    '''
    Test handling cases
    '''
    tweets = [{'text': 'Dog dog'}]
    assert find_top_k_ngrams(tweets, 2, 1) == [(('dog', 'dog'), 1)]
Example #6
0
def test_task4_api1():
    '''
    Test punctuation removal
    '''
    tweets = [{'text': 'the dog, a dog'}]
    assert find_top_k_ngrams(tweets, 2, 1) == [(('dog', 'dog'), 1)]
Example #7
0
def test_task4_api0():
    '''
    Test Stop word removal and ordering
    '''
    tweets = [{'text': 'the dog a dog'}]
    assert find_top_k_ngrams(tweets, 2, 1) == [(('dog', 'dog'), 1)]