def test_merge(merger):
    tmp_dir = tempfile.mkdtemp()
    try:
        file_1 = path.join(tmp_dir, 'test_merger_1.kv')
        file_2 = path.join(tmp_dir, 'test_merger_2.kv')
        file_3 = path.join(tmp_dir, 'test_merger_3.kv')
        merge_file = path.join(tmp_dir, 'merge.kv')

        generate_keyvi(keys_1, file_1)
        generate_keyvi(keys_2, file_2)
        generate_keyvi(keys_3, file_3)

        merger.Add(file_1)
        merger.Add(file_2)
        merger.Add(file_3)
        merger.Merge(merge_file)

        merged_dictionary = Dictionary(merge_file)

        keys = set()
        keys.update(keys_1)
        keys.update(keys_2)
        keys.update(keys_3)

        keys_ordered = sorted(keys)

        for base_key, keyvi_key in zip(keys_ordered, merged_dictionary.GetAllKeys()):
            assert base_key == keyvi_key

    finally:
        shutil.rmtree(tmp_dir)
Exemple #2
0
def test_manifest_for_merger():
    try:
        c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
        c.Add("abc", '{"a" : 2}')
        c.Compile()
        c.SetManifest('{"author": "Zapp Brannigan"}')
        c.WriteToFile('manifest_json_merge1.kv')
        del c

        c2 = JsonDictionaryCompiler({"memory_limit_mb": "10"})
        c2.Add("abd", '{"a" : 3}')
        c2.Compile()
        c2.SetManifest('{"author": "Leela"}')
        c2.WriteToFile('manifest_json_merge2.kv')
        del c2

        merger = JsonDictionaryMerger({"memory_limit_mb": "10"})
        merger.SetManifest('{"author": "Fry"}')
        merger.Merge('manifest_json_merged.kv')

        d = Dictionary('manifest_json_merged.kv')
        m = json.loads(d.GetManifest())
        assert m['author'] == "Fry"
        del d

    finally:
        os.remove('manifest_json_merge1.kv')
        os.remove('manifest_json_merge2.kv')
        os.remove('manifest_json_merged.kv')
def test_merge(merger):
    tmp_dir = tempfile.mkdtemp()
    try:
        file_1 = path.join(tmp_dir, 'test_merger_1.kv')
        file_2 = path.join(tmp_dir, 'test_merger_2.kv')
        file_3 = path.join(tmp_dir, 'test_merger_3.kv')
        merge_file = path.join(tmp_dir, 'merge.kv')

        generate_keyvi(key_values_1, file_1)
        generate_keyvi(key_values_2, file_2)
        generate_keyvi(key_values_3, file_3)

        merger.Add(file_1)
        merger.Add(file_2)
        merger.Add(file_3)
        merger.Merge(merge_file)

        merged_dictionary = Dictionary(merge_file)

        key_values = {}
        key_values.update(key_values_1)
        key_values.update(key_values_2)
        key_values.update(key_values_3)

        key_values_ordered = collections.OrderedDict(sorted(key_values.items()))

        for (base_key, base_value), (keyvi_key, keyvi_value) in zip(key_values_ordered.items(),
                                                                    merged_dictionary.GetAllItems()):
            assert base_key == keyvi_key
            assert base_value == keyvi_value

    finally:
        shutil.rmtree(tmp_dir)
Exemple #4
0
def test_truncated_file_json():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add('a', '{1:2}')
    c.Add('b', '{2:4}')
    c.Add('c', '{4:4}')
    c.Add('d', '{2:3}')
    c.Compile()

    c.WriteToFile(os.path.join(tmp_dir, 'truncation_test.kv'))
    size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv'))

    fd_in = open(os.path.join(tmp_dir, 'truncation_test.kv'), 'rb')
    fd = open(os.path.join(tmp_dir, 'truncation_test1.kv'), 'wb')
    fd.write(fd_in.read(int(size / 2)))
    fd.close()

    fd2 = open(os.path.join(tmp_dir, 'truncation_test2.kv'), 'wb')
    fd2.write(fd_in.read(int(size - 2)))
    fd2.close()

    with pytest.raises(ValueError):
        d = Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv'))
    with pytest.raises(ValueError):
        d = Dictionary(os.path.join(tmp_dir, 'truncation_test2.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test2.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test1.kv'))
    os.remove(os.path.join(tmp_dir, 'truncation_test.kv'))
Exemple #5
0
def dump(args):
    dictionary = Dictionary(args.input_file)
    with open(args.output_file, 'w') as file_out:
        for key, value in dictionary.GetAllItems():
            if args.json_dumps:
                key = json.dumps(key)
            if isinstance(key, bytes):
                key = key.decode()
            file_out.write(key)
            if value:
                if args.json_dumps:
                    value = json.dumps(value)
                file_out.write('\t{}'.format(value))
            file_out.write('\n')
Exemple #6
0
def test_manifest_after_compile():
    c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela")
    c.Add("Kif")
    c.Compile()
    c.SetManifest('{"author": "Zapp Brannigan"}')
    file_name = os.path.join(tempfile.gettempdir(), 'brannigan_manifest2.kv')
    try:
        c.WriteToFile(file_name)
        d = Dictionary(file_name)
        m = json.loads(d.GetManifest())
        assert m['author'] == "Zapp Brannigan"
        del d
    finally:
        os.remove(file_name)
Exemple #7
0
def test_invalid_filemagic():
    fd = open(os.path.join(tmp_dir, 'broken_file'), 'w')
    fd.write('dead beef')
    fd.close()
    exception_caught = False
    with pytest.raises(ValueError):
        d = Dictionary(os.path.join(tmp_dir, 'broken_file'))
    os.remove(os.path.join(tmp_dir, 'broken_file'))
Exemple #8
0
def tmp_dictionary(compiler, file_name):
    tmp_dir = tempfile.gettempdir()
    fq_file_name = os.path.join(tmp_dir, file_name)
    compiler.Compile()
    compiler.WriteToFile(fq_file_name)
    del compiler
    d = Dictionary(fq_file_name)
    yield d
    del d
    os.remove(fq_file_name)
from keyvi.dictionary import Dictionary

query = ""

d = Dictionary("your-own.kv")


def get_lookup_key(query):
    return query


while query != "exit":
    query = raw_input("Query:")
    for m in d.Get(get_lookup_key(query.strip())):
        print("{} {}".format(m.GetMatchedString(), m.GetValueAsString()))
from keyvi.dictionary import Dictionary
from keyvi.completion import PrefixCompletion

query = ""

d = Dictionary("prefix-completion.kv")
c = PrefixCompletion(d)


def get_lookup_key(query):
    return query


while query != "exit":
    query = str(input("Query:"))
    for m in c.GetFuzzyCompletions(get_lookup_key(query.strip()), 3):
        print("{} {}".format(m.GetMatchedString(), m.GetAttribute("weight")))
Exemple #11
0
def stats(input_file):
    print(
        json.dumps(Dictionary(input_file).GetStatistics(),
                   indent=4,
                   sort_keys=True))
Exemple #12
0
from keyvi.dictionary import Dictionary
from keyvi.completion import MultiWordCompletion

MULTIWORD_QUERY_SEPARATOR = '\x1b'

query = ""

d=Dictionary("mw-completion.kv")
c=MultiWordCompletion(d)

def get_lookup_key(query):
    l = query.split(" ")
    l_bow = " ".join(sorted(l[:-1]) + l[-1:])

    return l_bow


while query!="exit":
    query = str(input("Query:"))
    for m in c.GetCompletions(get_lookup_key(query.strip())):
        print("{} {}".format(m.GetMatchedString(), m.GetAttribute("weight")))
Exemple #13
0
import sys
from keyvi.dictionary import Dictionary
from keyvi.util import FsaTransform

d = Dictionary("normalization.kv")
n = FsaTransform(d)

for line in sys.stdin:
    print(n.Normalize(line))
Exemple #14
0
from keyvi.dictionary import Dictionary

query = ""

d = Dictionary("cities.kv")


def get_lookup_key(query):
    return query


while query != "exit":
    query = raw_input("Query:")
    for m in d.LookupText(get_lookup_key(query.strip())):
        print("{}".format(m.GetMatchedString()))
Exemple #15
0
def test_non_existing_file():
    assert os.path.exists('non_existing_file') == False
    with pytest.raises(ValueError):
        d = Dictionary(os.path.join(tmp_dir, 'non_existing_file'))
Exemple #16
0
def mem():
    D = Dictionary("_temp/kv/g_word_utf8.txt")
    print(type(D))