def test_bow_build_vocab(): field = BoWField(min_freq=2, unk_token='<unk>') assert field.vocab == {'<unk>': 0} dummy = ["justo luctus Praesent luctus", "luctus praesent"] field.setup(dummy) vocab = {'<unk>': 0, 'luctus': 1} assert field.vocab == vocab
def test_bow_text_process_scale(): with pytest.raises(ValueError): field = BoWField(min_freq=2, scale_factor=10)
def test_bow_text_process_normalize(): field = BoWField(min_freq=2, normalize=True) dummy = "justo praesent luctus justo praesent" field.setup([dummy]) assert list(field.process(dummy)) == [0, 0.5, 0.5]
def test_bow_text_process_normalize_scale(): field = BoWField(min_freq=2, normalize=True, scale_factor=10) dummy = "justo praesent luctus justo praesent" field.setup([dummy]) assert list(field.process(dummy)) == [0, 5, 5]
def test_bow_text_process(): field = BoWField(min_freq=2) dummy = "justo praesent luctus justo praesent" field.setup([dummy]) assert list(field.process(dummy)) == [0, 2, 2]