Example #1
0
def test_bow_build_vocab():
    field = BoWField(min_freq=2, unk_token='<unk>')
    assert field.vocab == {'<unk>': 0}

    dummy = ["justo luctus Praesent luctus", "luctus praesent"]
    field.setup(dummy)

    vocab = {'<unk>': 0, 'luctus': 1}
    assert field.vocab == vocab
Example #2
0
def test_bow_text_process_scale():
    with pytest.raises(ValueError):
        field = BoWField(min_freq=2, scale_factor=10)
Example #3
0
def test_bow_text_process_normalize():
    field = BoWField(min_freq=2, normalize=True)

    dummy = "justo praesent luctus justo praesent"
    field.setup([dummy])
    assert list(field.process(dummy)) == [0, 0.5, 0.5]
Example #4
0
def test_bow_text_process_normalize_scale():
    field = BoWField(min_freq=2, normalize=True, scale_factor=10)

    dummy = "justo praesent luctus justo praesent"
    field.setup([dummy])
    assert list(field.process(dummy)) == [0, 5, 5]
Example #5
0
def test_bow_text_process():
    field = BoWField(min_freq=2)

    dummy = "justo praesent luctus justo praesent"
    field.setup([dummy])
    assert list(field.process(dummy)) == [0, 2, 2]