def test_get_value_string(): c = keyvi.StringDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", "aaaaa") c.Add("abd", "bbbbb") with tmp_dictionary(c, 'match_object_string.kv') as d: m = d["abc"] assert decode_to_unicode(m.GetValue()) == decode_to_unicode("aaaaa") m = d["abd"] assert decode_to_unicode(m.GetValue()) == decode_to_unicode("bbbbb")
def test_bytes_attributes(): m = keyvi.Match() bytes_key = bytes(decode_to_unicode("äöü").encode('utf-8')) bytes_value = bytes(decode_to_unicode("äöüöäü").encode('utf-8')) m.SetAttribute(bytes_key, 22) assert m.GetAttribute(bytes_key) == 22 m.SetAttribute("k2", bytes_value) assert decode_to_unicode( m.GetAttribute("k2")) == decode_to_unicode("äöüöäü")
def test_unicode_attributes(): m = keyvi.Match() m.SetAttribute(decode_to_unicode("küy"), 22) assert m.GetAttribute("küy") == 22 m.SetAttribute("k2", decode_to_unicode(" 吃饭了吗")) m.SetScore(99) assert decode_to_unicode( m.GetAttribute("k2")) == decode_to_unicode(" 吃饭了吗") assert m.GetScore() == 99.0
def test_get_value(): c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') with tmp_dictionary(c, 'match_object_json.kv') as d: m = d["abc"] assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a": 2}) m = d["abd"] assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a": 3})
def test_get_all_values(): with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_values.kv') as keyvi_dictionary: for (_, base_value), keyvi_value in zip(key_values, keyvi_dictionary.GetAllValues()): assert decode_to_unicode(base_value) == decode_to_unicode( keyvi_value)
def test_raw_serialization(): c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') with tmp_dictionary(c, 'match_object_json.kv') as d: m = d["abc"] assert decode_to_unicode(m.GetValueAsString()) == decode_to_unicode('{"a":2}') d = m.dumps() m2 = pykeyvi.Match.loads(d) assert decode_to_unicode(m2.GetValueAsString()) == decode_to_unicode('{"a":2}')
def test_get_all_items(): with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_items.kv') as keyvi_dictionary: for (base_key, base_value), (keyvi_key, keyvi_value) in zip(key_values, keyvi_dictionary.GetAllItems()): assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key) assert decode_to_unicode(base_value) == decode_to_unicode( keyvi_value)
def test_simple(): c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') # use python syntax ala __setitem__ c["abd"] = '{"a" : 3}' with tmp_dictionary(c, 'simple_json.kv') as d: assert len(d) == 2 assert decode_to_unicode( d["abc"].GetValueAsString()) == decode_to_unicode('{"a":2}') assert decode_to_unicode( d["abd"].GetValueAsString()) == decode_to_unicode('{"a":3}')
def test_unicode(): c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("öäü", '{"a" : 2}') c.Add("abd", '{"a" : 3}') # use python syntax ala __setitem__ c["abd"] = '{"a" : 3}' # create unicode string key = decode_to_unicode("öäü") with tmp_dictionary(c, 'unicode_json.kv') as d: assert key in d assert decode_to_unicode(d[key].GetValue()) == decode_to_unicode( {"a": 2}) assert decode_to_unicode(d.get(key).GetValue()) == decode_to_unicode( {"a": 2})
def test_simple_snappy(): c = pykeyvi.JsonDictionaryCompiler({ "memory_limit_mb": "10", 'compression': 'snappy', 'compression_threshold': '0' }) c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') with tmp_dictionary(c, 'simple_json_snappy.kv') as d: assert len(d) == 2 assert decode_to_unicode( d["abc"].GetValueAsString()) == decode_to_unicode('{"a":2}') assert decode_to_unicode( d["abd"].GetValueAsString()) == decode_to_unicode('{"a":3}') m = d.GetStatistics()['Value Store'] assert m['__compression'] == "snappy" assert m['__compression_threshold'] == "0"
def test_unicode_compile(): c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("üöä", '{"y" : 2}') c.Add(decode_to_unicode("üüüüüüabd"), '{"a" : 3}') c.Add(u"ääääädäd", '{"b" : 33}') with tmp_dictionary(c, 'simple_json.kv') as d: assert len(d) == 3 assert decode_to_unicode( d["üöä"].GetValueAsString()) == decode_to_unicode('{"y":2}') assert decode_to_unicode( d[u"üöä"].GetValueAsString()) == decode_to_unicode('{"y":2}') assert decode_to_unicode( d["üüüüüüabd"].GetValueAsString()) == decode_to_unicode('{"a":3}') assert decode_to_unicode( d["ääääädäd"].GetValueAsString()) == decode_to_unicode('{"b":33}')
def test_merge(merger): tmp_dir = tempfile.mkdtemp() try: file_1 = path.join(tmp_dir, 'test_merger_1.kv') file_2 = path.join(tmp_dir, 'test_merger_2.kv') file_3 = path.join(tmp_dir, 'test_merger_3.kv') merge_file = path.join(tmp_dir, 'merge.kv') generate_keyvi(key_values_1, file_1) generate_keyvi(key_values_2, file_2) generate_keyvi(key_values_3, file_3) merger.Add(file_1) merger.Add(file_2) merger.Add(file_3) merger.Merge(merge_file) merged_dictionary = pykeyvi.Dictionary(merge_file) key_values = {} key_values.update(key_values_1) key_values.update(key_values_2) key_values.update(key_values_3) key_values_ordered = collections.OrderedDict(sorted( key_values.items())) for (base_key, base_value), (keyvi_key, keyvi_value) in zip( key_values_ordered.items(), merged_dictionary.GetAllItems()): assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key) assert decode_to_unicode(base_value) == decode_to_unicode( keyvi_value) finally: shutil.rmtree(tmp_dir)
def test_zerobyte(): c=keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("\x00abc", '["a" : 2]') c.Add("abc\x00def", '["a" : 3]') c.Add("cd\x00", '["a" : 4]') with tmp_dictionary(c, 'zerobyte.kv') as d: assert decode_to_unicode(d["\x00abc"].GetValue()) == decode_to_unicode('["a" : 2]') assert decode_to_unicode(d["abc\x00def"].GetValue()) == decode_to_unicode('["a" : 3]') assert decode_to_unicode(d["cd\x00"].GetValue()) == decode_to_unicode('["a" : 4]') assert len([(k, v) for k, v in d.GetAllItems()]) == 3
def test_unicode_lookup(): c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("Los Angeles", '{"country" : "USA"}') c.Add("Frankfurt am Main", '{"country" : "Germany"}') c.Add(decode_to_unicode("Kirchheim bei München"), '{"country" : "Germany"}') # create unicode string for lookup text = decode_to_unicode( "From Los Angeles via Frankfurt am Main to Kirchheim bei München it should just work" ) with tmp_dictionary(c, 'unicode_json_lookup.kv') as d: assert "Kirchheim bei München" in d matched_strings = [x.GetMatchedString() for x in d.LookupText(text)] assert len(matched_strings) == 3 assert decode_to_unicode("Kirchheim bei München") in decode_to_unicode( matched_strings) assert decode_to_unicode("Los Angeles") in decode_to_unicode( matched_strings) assert decode_to_unicode("Frankfurt am Main") in decode_to_unicode( matched_strings)
def test_boolean_attributes(): m = keyvi.Match() bytes_key = bytes(decode_to_unicode("def").encode('utf-8')) m.SetAttribute(bytes_key, True) assert m.GetAttribute(bytes_key) == True
def test_double_attributes(): m = keyvi.Match() bytes_key = bytes(decode_to_unicode("abc").encode('utf-8')) m.SetAttribute(bytes_key, 42.0) assert m.GetAttribute(bytes_key) == 42.0
def test_get_all_keys(): with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_keys.kv') as keyvi_dictionary: for (base_key, _), keyvi_key in zip(key_values, keyvi_dictionary.GetAllKeys()): assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key)