Example #1
0
def test_word_freq_returns_two_cols_if_not_num_list():
    result = word_frequency(['pizza burger', 'pizza sandwitch'])
    assert result.shape[1] == 2
Example #2
0
def test_works_without_numlist_provided():
    result = word_frequency(['Great Text in a List', 'Greater text as well'])
    assert result['word'].eq('text').any()
Example #3
0
def test_word_freq_uses_regex():
    result = word_frequency(['pizza burger', 'pizza sandwitch'], regex='pizza')
    assert result['word'][0] == 'pizza'
Example #4
0
def test_extra_info_provided():
    result = word_frequency(text_list, num_list, extra_info=True)
    assert set(result.columns.values) == {
        'word', 'abs_freq', 'abs_perc', 'abs_perc_cum', 'wtd_freq',
        'wtd_freq_perc', 'wtd_freq_perc_cum', 'rel_value'
    }
Example #5
0
def test_works_fine_with_only_stopwords_supplied():
    result = word_frequency(['on'], [3])
    assert result.shape == (0, 4)
Example #6
0
def test_rm_words_removed():
    result = word_frequency(text_list, num_list, rm_words=['one', 'two'])
    assert not result['word'].eq('one').any()
    assert not result['word'].eq('two').any()
Example #7
0
def test_extra_info_not_provided():
    result = word_frequency(text_list, num_list, extra_info=False)
    assert set(result.columns.values) == {
        'word', 'abs_freq', 'wtd_freq', 'rel_value'
    }
Example #8
0
def test_rm_words_removed():
    result = word_frequency(text_list, num_list, rm_words=['one', 'two'])
    assert 'one' not in result['word']
    assert 'two' not in result['word']
Example #9
0
def test_words_separated_with_given_sep():
    for sep in separators:
        result = word_frequency(text_list, num_list, sep=sep)
        assert sep not in result['word']
def test_word_freq_uses_regex():
    result = word_frequency(['pizza burger', 'pizza sandwitch'], regex='pizza')
    assert result['word'][0] == 'pizza'
Example #11
0
def test_len_result_one_more_than_len_slots():
    for sep in sep_list:
        result = word_frequency(text_list, num_list, sep=sep)
        if sep is not None:
            assert sep not in result['word']
def test_works_without_numlist_provided():
    result = word_frequency(['Great Text in a List', 'Greater text as well'])
    assert result['word'].eq('text').any()
def test_works_fine_with_only_stopwords_supplied():
    result = word_frequency(['on'], [3])
    assert result.shape == (0, 4)
def test_extra_info_provided():
    result = word_frequency(text_list, num_list, extra_info=True)
    assert set(result.columns.values) == {'word', 'abs_freq', 'abs_perc',
                                          'abs_perc_cum', 'wtd_freq',
                                          'wtd_freq_perc', 'wtd_freq_perc_cum',
                                          'rel_value'}
def test_extra_info_not_provided():
    result = word_frequency(text_list, num_list, extra_info=False)
    assert set(result.columns.values) == {'word', 'abs_freq', 'wtd_freq',
                                          'rel_value'}
def test_rm_words_removed():
    result = word_frequency(text_list, num_list, rm_words=['one', 'two'])
    assert not result['word'].eq('one').any()
    assert not result['word'].eq('two').any()