예제 #1
0
def test_bow_build_vocab():
    field = BoWField(min_freq=2, unk_token='<unk>')
    assert field.vocab == {'<unk>': 0}

    dummy = ["justo luctus Praesent luctus", "luctus praesent"]
    field.setup(dummy)

    vocab = {'<unk>': 0, 'luctus': 1}
    assert field.vocab == vocab
예제 #2
0
def test_bow_text_process_normalize():
    field = BoWField(min_freq=2, normalize=True)

    dummy = "justo praesent luctus justo praesent"
    field.setup([dummy])
    assert list(field.process(dummy)) == [0, 0.5, 0.5]
예제 #3
0
def test_bow_text_process_normalize_scale():
    field = BoWField(min_freq=2, normalize=True, scale_factor=10)

    dummy = "justo praesent luctus justo praesent"
    field.setup([dummy])
    assert list(field.process(dummy)) == [0, 5, 5]
예제 #4
0
def test_bow_text_process():
    field = BoWField(min_freq=2)

    dummy = "justo praesent luctus justo praesent"
    field.setup([dummy])
    assert list(field.process(dummy)) == [0, 2, 2]