def test_truncated_file_json(): c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add('a', '{1:2}') c.Add('b', '{2:4}') c.Add('c', '{4:4}') c.Add('d', '{2:3}') c.Compile() c.WriteToFile(os.path.join(tmp_dir, 'truncation_test.kv')) size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv')) fd_in = open(os.path.join(tmp_dir, 'truncation_test.kv'), 'rb') fd = open(os.path.join(tmp_dir, 'truncation_test1.kv'), 'wb') fd.write(fd_in.read(int(size / 2))) fd.close() fd2 = open(os.path.join(tmp_dir, 'truncation_test2.kv'), 'wb') fd2.write(fd_in.read(int(size - 2))) fd2.close() with pytest.raises(ValueError): d = keyvi.Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv')) with pytest.raises(ValueError): d = keyvi.Dictionary(os.path.join(tmp_dir, 'truncation_test2.kv')) os.remove(os.path.join(tmp_dir, 'truncation_test2.kv')) os.remove(os.path.join(tmp_dir, 'truncation_test1.kv')) os.remove(os.path.join(tmp_dir, 'truncation_test.kv'))
def test_truncated_file_json(): c=keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"}) c.Add('a', '{1:2}') c.Add('b', '{2:4}') c.Add('c', '{4:4}') c.Add('d', '{2:3}') c.Compile() c.WriteToFile(os.path.join(tmp_dir,'truncation_test.kv')) size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv')) fd_in = open(os.path.join(tmp_dir,'truncation_test.kv'), 'rb') fd = open(os.path.join(tmp_dir,'truncation_test1.kv'), 'wb') fd.write(fd_in.read(int(size/2))) fd.close() exception_caught = False try: d=keyvi.Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv')) except ValueError: exception_caught = True assert exception_caught os.remove(os.path.join(tmp_dir, 'truncation_test1.kv')) os.remove(os.path.join(tmp_dir, 'truncation_test.kv'))
def test_merge(merger): tmp_dir = tempfile.mkdtemp() try: file_1 = path.join(tmp_dir, 'test_merger_1.kv') file_2 = path.join(tmp_dir, 'test_merger_2.kv') file_3 = path.join(tmp_dir, 'test_merger_3.kv') merge_file = path.join(tmp_dir, 'merge.kv') generate_keyvi(keys_1, file_1) generate_keyvi(keys_2, file_2) generate_keyvi(keys_3, file_3) merger.Add(file_1) merger.Add(file_2) merger.Add(file_3) merger.Merge(merge_file) merged_dictionary = keyvi.Dictionary(merge_file) keys = set() keys.update(keys_1) keys.update(keys_2) keys.update(keys_3) keys_ordered = sorted(keys) for base_key, keyvi_key in zip(keys_ordered, merged_dictionary.GetAllKeys()): assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key) finally: shutil.rmtree(tmp_dir)
def test_manifest_for_merger(): try: c = keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("abc", '{"a" : 2}') c.Compile() c.SetManifest({"author": "Zapp Brannigan"}) c.WriteToFile('manifest_json_merge1.kv') del c c2 = keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"}) c2.Add("abd", '{"a" : 3}') c2.Compile() c2.SetManifest({"author": "Leela"}) c2.WriteToFile('manifest_json_merge2.kv') del c2 merger = keyvi.JsonDictionaryMerger({"memory_limit_mb":"10"}) merger.SetManifest({"author": "Fry"}) merger.Merge('manifest_json_merged.kv') d = keyvi.Dictionary('manifest_json_merged.kv') m = d.GetManifest() assert m['author'] == "Fry" del d finally: os.remove('manifest_json_merge1.kv') os.remove('manifest_json_merge2.kv') os.remove('manifest_json_merged.kv')
def test_merge(merger): tmp_dir = tempfile.mkdtemp() try: file_1 = path.join(tmp_dir, 'test_merger_1.kv') file_2 = path.join(tmp_dir, 'test_merger_2.kv') file_3 = path.join(tmp_dir, 'test_merger_3.kv') merge_file = path.join(tmp_dir, 'merge.kv') generate_keyvi(key_values_1, file_1) generate_keyvi(key_values_2, file_2) generate_keyvi(key_values_3, file_3) merger.Add(file_1) merger.Add(file_2) merger.Add(file_3) merger.Merge(merge_file) merged_dictionary = keyvi.Dictionary(merge_file) key_values = {} key_values.update(key_values_1) key_values.update(key_values_2) key_values.update(key_values_3) key_values_ordered = collections.OrderedDict(sorted( key_values.items())) for (base_key, base_value), (keyvi_key, keyvi_value) in zip( key_values_ordered.items(), merged_dictionary.GetAllItems()): assert base_key == keyvi_key assert base_value == keyvi_value finally: shutil.rmtree(tmp_dir)
def test_invalid_filemagic(): fd = open(os.path.join(tmp_dir, 'broken_file'), 'w') fd.write('dead beef') fd.close() exception_caught = False with pytest.raises(ValueError): d = keyvi.Dictionary(os.path.join(tmp_dir, 'broken_file')) os.remove(os.path.join(tmp_dir, 'broken_file'))
def tmp_dictionary(compiler, file_name): tmp_dir = tempfile.gettempdir() fq_file_name = os.path.join(tmp_dir, file_name) compiler.Compile() compiler.WriteToFile(fq_file_name) del compiler d = keyvi.Dictionary(fq_file_name) yield d del d os.remove(fq_file_name)
def test_invalid_filemagic(): fd = open(os.path.join(tmp_dir, 'broken_file'),'w') fd.write ('dead beef') fd.close() exception_caught = False try: d=keyvi.Dictionary(os.path.join(tmp_dir, 'broken_file')) except ValueError: exception_caught = True assert exception_caught os.remove(os.path.join(tmp_dir, 'broken_file'))
def dump(args): dictionary = keyvi.Dictionary(args.input_file) with open(args.output_file, 'w') as file_out: for key, value in dictionary.GetAllItems(): if args.json_dumps: key = json.dumps(key) if isinstance(key, bytes): key = key.decode() file_out.write(key) if value: if args.json_dumps: value = json.dumps(value) file_out.write('\t{}'.format(value)) file_out.write('\n')
def test_manifest_after_compile(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("Leela") c.Add("Kif") c.Compile() c.SetManifest({"author": "Zapp Brannigan"}) file_name = os.path.join(tempfile.gettempdir(),'brannigan_manifest2.kv') try: c.WriteToFile(file_name) d = keyvi.Dictionary(file_name) m = d.GetManifest() assert m['author'] == "Zapp Brannigan" del d finally: os.remove(file_name)
import keyvi MULTIWORD_QUERY_SEPARATOR = '\x1b' query = "" d = keyvi.Dictionary("mw-completion.keyvi") c = keyvi.MultiWordCompletion(d) def get_lookup_key(query): l = query.split(" ") l_bow = " ".join(sorted(l[:-1]) + l[-1:]) return l_bow while query != "exit": query = raw_input("Query:") for m in c.GetCompletions(get_lookup_key(query.strip())): print "{} {}".format(m.GetMatchedString(), m.GetAttribute("weight"))
import keyvi query = "" d = keyvi.Dictionary("cities.keyvi") def get_lookup_key(query): return query while query != "exit": query = raw_input("Query:") for m in d.LookupText(get_lookup_key(query.strip())): print "{}".format(m.GetMatchedString())
import sys import keyvi d = keyvi.Dictionary("normalization.keyvi") n = keyvi.FsaTransform(d) for line in sys.stdin: print n.Normalize(line)
def test_non_existing_file(): assert os.path.exists('non_existing_file') == False with pytest.raises(ValueError): d = keyvi.Dictionary(os.path.join(tmp_dir, 'non_existing_file'))
def stats(input_file): print (json.dumps(keyvi.Dictionary(input_file).GetStatistics(), indent=4, sort_keys=True))
import keyvi query = "" d = keyvi.Dictionary("prefix-completion.keyvi") c = keyvi.PrefixCompletion(d) def get_lookup_key(query): return query while query != "exit": query = raw_input("Query:") for m in c.GetCompletions(get_lookup_key(query.strip())): print "{} ({})".format(m.GetMatchedString(), m.GetAttribute("weight"))
import keyvi query = "" d = keyvi.Dictionary("your-own.keyvi") def get_lookup_key(query): return query while query != "exit": query = raw_input("Query:") for m in d.Get(get_lookup_key(query.strip())): print "{} {}".format(m.GetMatchedString(), m.GetValueAsString())