예제 #1
0
def test_explicit_vocab_roundtrip(tmp_path):
    filename = tmp_path / "write_explicit_vocab.fifu"
    i = ExplicitIndexer([str(i) for i in range(10)])
    v = ExplicitVocab([str(i) for i in range(10, 100)], indexer=i)
    v.write(filename)
    v2 = load_vocab(filename)
    assert v == v2
예제 #2
0
def test_ff_buckets_lookup(tests_root):
    v = load_vocab(tests_root / "data" / "ff_buckets.fifu")
    assert v.words[0] == "one"
    assert v["one"] == 0
    tuebingen_buckets = [
        14, 69, 74, 124, 168, 181, 197, 246, 250, 276, 300, 308, 325, 416, 549,
        590, 648, 651, 707, 717, 761, 817, 820, 857, 860, 1007
    ]
    assert sorted(v.idx('tübingen')) == tuebingen_buckets
예제 #3
0
def test_reading(tests_root):
    with pytest.raises(TypeError):
        finalfusion.vocab.load_vocab(None)
    with pytest.raises(FinalfusionFormatError):
        # 0 opens sys.stdin, should result in an error when trying to read magic
        finalfusion.vocab.load_vocab(0)
    with pytest.raises(IOError):
        finalfusion.vocab.load_vocab("foo")
    vocab_path = tests_root / "data" / "simple_vocab.fifu"
    v = load_vocab(vocab_path)
    assert v.words[0] == "Paris"
예제 #4
0
def test_simple_roundtrip(tests_root, tmp_path):
    filename = tmp_path / "write_simple.fifu"
    v = load_vocab(tests_root / "data" / "simple_vocab.fifu")
    v.write(filename)
    assert load_vocab(filename)
예제 #5
0
def test_fifu_buckets_roundtrip(tests_root, tmp_path):
    filename = tmp_path / "write_ff_buckets.fifu"
    v = load_vocab(tests_root / "data" / "ff_buckets.fifu")
    v.write(filename)
    assert v == load_vocab(filename)
예제 #6
0
def test_fasttext_vocab_roundtrip(tmp_path):
    filename = tmp_path / "write_ft_vocab.fifu"
    v = FastTextVocab([str(i) for i in range(10)])
    v.write(filename)
    v2 = load_vocab(filename)
    assert v == v2