Ejemplo n.º 1
0
def test_get_value_string():
    c = keyvi.StringDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode("aaaaa")
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode("bbbbb")
Ejemplo n.º 2
0
def test_bytes_attributes():
    m = keyvi.Match()
    bytes_key = bytes(decode_to_unicode("äöü").encode('utf-8'))
    bytes_value = bytes(decode_to_unicode("äöüöäü").encode('utf-8'))
    m.SetAttribute(bytes_key, 22)
    assert m.GetAttribute(bytes_key) == 22
    m.SetAttribute("k2", bytes_value)
    assert decode_to_unicode(
        m.GetAttribute("k2")) == decode_to_unicode("äöüöäü")
Ejemplo n.º 3
0
def test_unicode_attributes():
    m = keyvi.Match()
    m.SetAttribute(decode_to_unicode("küy"), 22)
    assert m.GetAttribute("küy") == 22
    m.SetAttribute("k2", decode_to_unicode(" 吃饭了吗"))
    m.SetScore(99)
    assert decode_to_unicode(
        m.GetAttribute("k2")) == decode_to_unicode(" 吃饭了吗")
    assert m.GetScore() == 99.0
Ejemplo n.º 4
0
def test_get_value():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a": 2})
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a": 3})
Ejemplo n.º 5
0
def test_get_all_values():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_values.kv') as keyvi_dictionary:
        for (_,
             base_value), keyvi_value in zip(key_values,
                                             keyvi_dictionary.GetAllValues()):
            assert decode_to_unicode(base_value) == decode_to_unicode(
                keyvi_value)
Ejemplo n.º 6
0
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValueAsString()) == decode_to_unicode('{"a":2}')
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert decode_to_unicode(m2.GetValueAsString()) == decode_to_unicode('{"a":2}')
Ejemplo n.º 7
0
def test_get_all_items():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_items.kv') as keyvi_dictionary:
        for (base_key,
             base_value), (keyvi_key,
                           keyvi_value) in zip(key_values,
                                               keyvi_dictionary.GetAllItems()):
            assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key)
            assert decode_to_unicode(base_value) == decode_to_unicode(
                keyvi_value)
Ejemplo n.º 8
0
def test_simple():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert decode_to_unicode(
            d["abc"].GetValueAsString()) == decode_to_unicode('{"a":2}')
        assert decode_to_unicode(
            d["abd"].GetValueAsString()) == decode_to_unicode('{"a":3}')
Ejemplo n.º 9
0
def test_unicode():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("öäü", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'

    # create unicode string
    key = decode_to_unicode("öäü")
    with tmp_dictionary(c, 'unicode_json.kv') as d:
        assert key in d
        assert decode_to_unicode(d[key].GetValue()) == decode_to_unicode(
            {"a": 2})
        assert decode_to_unicode(d.get(key).GetValue()) == decode_to_unicode(
            {"a": 2})
Ejemplo n.º 10
0
def test_simple_snappy():
    c = pykeyvi.JsonDictionaryCompiler({
        "memory_limit_mb": "10",
        'compression': 'snappy',
        'compression_threshold': '0'
    })
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json_snappy.kv') as d:
        assert len(d) == 2
        assert decode_to_unicode(
            d["abc"].GetValueAsString()) == decode_to_unicode('{"a":2}')
        assert decode_to_unicode(
            d["abd"].GetValueAsString()) == decode_to_unicode('{"a":3}')
        m = d.GetStatistics()['Value Store']
        assert m['__compression'] == "snappy"
        assert m['__compression_threshold'] == "0"
Ejemplo n.º 11
0
def test_unicode_compile():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("üöä", '{"y" : 2}')
    c.Add(decode_to_unicode("üüüüüüabd"), '{"a" : 3}')
    c.Add(u"ääääädäd", '{"b" : 33}')

    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 3
        assert decode_to_unicode(
            d["üöä"].GetValueAsString()) == decode_to_unicode('{"y":2}')
        assert decode_to_unicode(
            d[u"üöä"].GetValueAsString()) == decode_to_unicode('{"y":2}')
        assert decode_to_unicode(
            d["üüüüüüabd"].GetValueAsString()) == decode_to_unicode('{"a":3}')
        assert decode_to_unicode(
            d["ääääädäd"].GetValueAsString()) == decode_to_unicode('{"b":33}')
Ejemplo n.º 12
0
def test_merge(merger):

    tmp_dir = tempfile.mkdtemp()
    try:
        file_1 = path.join(tmp_dir, 'test_merger_1.kv')
        file_2 = path.join(tmp_dir, 'test_merger_2.kv')
        file_3 = path.join(tmp_dir, 'test_merger_3.kv')
        merge_file = path.join(tmp_dir, 'merge.kv')

        generate_keyvi(key_values_1, file_1)
        generate_keyvi(key_values_2, file_2)
        generate_keyvi(key_values_3, file_3)

        merger.Add(file_1)
        merger.Add(file_2)
        merger.Add(file_3)
        merger.Merge(merge_file)

        merged_dictionary = pykeyvi.Dictionary(merge_file)

        key_values = {}
        key_values.update(key_values_1)
        key_values.update(key_values_2)
        key_values.update(key_values_3)

        key_values_ordered = collections.OrderedDict(sorted(
            key_values.items()))

        for (base_key, base_value), (keyvi_key, keyvi_value) in zip(
                key_values_ordered.items(), merged_dictionary.GetAllItems()):
            assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key)
            assert decode_to_unicode(base_value) == decode_to_unicode(
                keyvi_value)

    finally:
        shutil.rmtree(tmp_dir)
Ejemplo n.º 13
0
def test_zerobyte():
    c=keyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("\x00abc", '["a" : 2]')
    c.Add("abc\x00def", '["a" : 3]')
    c.Add("cd\x00", '["a" : 4]')
    with tmp_dictionary(c, 'zerobyte.kv') as d:
        assert decode_to_unicode(d["\x00abc"].GetValue()) == decode_to_unicode('["a" : 2]')
        assert decode_to_unicode(d["abc\x00def"].GetValue()) == decode_to_unicode('["a" : 3]')
        assert decode_to_unicode(d["cd\x00"].GetValue()) == decode_to_unicode('["a" : 4]')
        assert len([(k, v) for k, v in d.GetAllItems()]) == 3
Ejemplo n.º 14
0
def test_unicode_lookup():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Los Angeles", '{"country" : "USA"}')
    c.Add("Frankfurt am Main", '{"country" : "Germany"}')
    c.Add(decode_to_unicode("Kirchheim bei München"),
          '{"country" : "Germany"}')

    # create unicode string for lookup
    text = decode_to_unicode(
        "From Los Angeles via Frankfurt am Main to Kirchheim bei München it should just work"
    )
    with tmp_dictionary(c, 'unicode_json_lookup.kv') as d:
        assert "Kirchheim bei München" in d
        matched_strings = [x.GetMatchedString() for x in d.LookupText(text)]
        assert len(matched_strings) == 3
        assert decode_to_unicode("Kirchheim bei München") in decode_to_unicode(
            matched_strings)
        assert decode_to_unicode("Los Angeles") in decode_to_unicode(
            matched_strings)
        assert decode_to_unicode("Frankfurt am Main") in decode_to_unicode(
            matched_strings)
Ejemplo n.º 15
0
def test_boolean_attributes():
    m = keyvi.Match()
    bytes_key = bytes(decode_to_unicode("def").encode('utf-8'))
    m.SetAttribute(bytes_key, True)
    assert m.GetAttribute(bytes_key) == True
Ejemplo n.º 16
0
def test_double_attributes():
    m = keyvi.Match()
    bytes_key = bytes(decode_to_unicode("abc").encode('utf-8'))
    m.SetAttribute(bytes_key, 42.0)
    assert m.GetAttribute(bytes_key) == 42.0
Ejemplo n.º 17
0
def test_get_all_keys():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_keys.kv') as keyvi_dictionary:
        for (base_key, _), keyvi_key in zip(key_values, keyvi_dictionary.GetAllKeys()):
            assert decode_to_unicode(base_key) == decode_to_unicode(keyvi_key)