Ejemplo n.º 1
0
def test_float_compaction():
    cs = JsonDictionaryCompiler({
        "memory_limit_mb": "10",
        'floating_point_precision': 'single'
    })
    cd = JsonDictionaryCompiler({"memory_limit_mb": "10"})

    # add a couple of floats to both
    cs.Add(
        'aa',
        '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]'
    )
    cd.Add(
        'aa',
        '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]'
    )

    with tmp_dictionary(cs, 'json_single_precision_float.kv') as ds:
        with tmp_dictionary(cd, 'json_double_precision_float.kv') as dd:
            # first some basic checks
            assert len(ds) == 1
            assert len(dd) == 1
            # simple test the length of the value store which shall be smaller for single floats
            stats_s = ds.GetStatistics()
            stats_d = dd.GetStatistics()
            assert int(stats_s['Value Store']['size']) < int(
                stats_d['Value Store']['size'])
Ejemplo n.º 2
0
def test_near_greedy():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    with tmp_dictionary(c, 'near_greedy.kv') as d:
        assert (len(list(d.GetNear("zahnarzt:u1q0gkqsenhf", 12, True))) == 2)
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12, True))) == 3)
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13, True))) == 0)
        assert (len(list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True))) == 10)

        greedy = [
            x.GetMatchedString()
            for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True)
        ]
        non_greedy = [
            x.GetMatchedString()
            for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, False)
        ]
        assert greedy[:len(non_greedy)] == non_greedy
Ejemplo n.º 3
0
def test_get_all_keys():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_keys.kv') as keyvi_dictionary:
        for (base_key, _), keyvi_key in zip(key_values,
                                            keyvi_dictionary.GetAllKeys()):
            assert base_key == keyvi_key
Ejemplo n.º 4
0
def test_get_fuzzy():
    c = keyvi.CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("türkei news", 23698)
    c.Add("türkei side", 18838)
    c.Add("türkei urlaub", 23424)
    c.Add("türkisch anfänger", 20788)
    c.Add("türkisch für", 21655)
    c.Add("türkisch für anfänger", 20735)
    c.Add("türkçe dublaj", 28575)
    c.Add("türkçe dublaj izle", 16391)
    c.Add("türkçe izle", 19946)
    c.Add("tüv akademie", 9557)
    c.Add("tüv hessen", 7744)
    c.Add("tüv i", 331)
    c.Add("tüv in", 10188)
    c.Add("tüv ib", 10189)
    c.Add("tüv kosten", 11387)
    c.Add("tüv nord", 46052)
    c.Add("tüv sood", 46057)
    c.Add("tüs rhein", 462)
    c.Add("tüs rheinland", 39131)
    c.Add("tüs öffnungszeiten", 15999)

    key_values = [
        (u'tüv sood', 46057),
        (u'tüv nord', 46052),
    ]

    with tmp_dictionary(c, 'get_fuzzy.kv') as d:
        for (base_key, base_value), m in zip(key_values, d.GetFuzzy('tüv koid', 2)):
            assert base_key == m.GetMatchedString()
            assert base_value == m.GetValue()

        assert len(list(d.GetFuzzy('tüv koid', 2))) == 2
Ejemplo n.º 5
0
def test_get_all_values():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_values.kv') as keyvi_dictionary:
        for (_,
             base_value), keyvi_value in zip(key_values,
                                             keyvi_dictionary.GetAllValues()):
            assert base_value == keyvi_value
Ejemplo n.º 6
0
def test_manifest():
    c = compiler.IntDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela", 20)
    c.Add("Kif", 2)
    c.SetManifest('{"drink": "slurm"}')
    with tmp_dictionary(c, 'slurm.kv') as d:
        m = json.loads(d.GetManifest())
        assert m['drink'] == "slurm"
Ejemplo n.º 7
0
def test_simple():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
Ejemplo n.º 8
0
def test_manifest():
    c = pykeyvi.IntDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela", 20)
    c.Add("Kif", 2)
    c.SetManifest({"drink": "slurm"})
    with tmp_dictionary(c, 'slurm.kv') as d:
        m = d.GetManifest()
        assert m['drink'] == "slurm"
Ejemplo n.º 9
0
def test_near_less_precission():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_less_precission.kv') as d:
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12))) == 2)
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13))) == 0)
Ejemplo n.º 10
0
def test_near_less_precission():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_less_precission.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12))) == 2)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13))) == 0)
Ejemplo n.º 11
0
def test_manifest():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("Leela")
    c.Add("Kif")
    c.SetManifest({"author": "Zapp Brannigan"})
    with tmp_dictionary(c, 'brannigan_manifest.kv') as d:
        m = d.GetManifest()
        assert m['author'] == "Zapp Brannigan"
Ejemplo n.º 12
0
def test_manifest():
    c = pykeyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela")
    c.Add("Kif")
    c.SetManifest({"author": "Zapp Brannigan"})
    with tmp_dictionary(c, 'brannigan_manifest.kv') as d:
        m = d.GetManifest()
        assert m['author'] == "Zapp Brannigan"
Ejemplo n.º 13
0
def test_float_compaction():
    cs = pykeyvi.JsonDictionaryCompiler(50000000, {'floating_point_precision': 'single'})
    cd = pykeyvi.JsonDictionaryCompiler(50000000)

    # add a couple of floats to both
    cs.Add('aa', '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]')
    cd.Add('aa', '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]')

    with tmp_dictionary(cs, 'json_single_precision_float.kv') as ds:
        with tmp_dictionary(cd, 'json_double_precision_float.kv') as dd:
            # first some basic checks
            assert len(ds) == 1
            assert len(dd) == 1
            # simple test the length of the value store which shall be smaller for single floats
            stats_s = ds.GetStatistics()
            stats_d = dd.GetStatistics()
            assert int(stats_s['Value Store']['size']) < int(stats_d['Value Store']['size'])
Ejemplo n.º 14
0
def test_get_value():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValue() == {"a": 2}
        m = d["abd"]
        assert m.GetValue() == {"a": 3}
Ejemplo n.º 15
0
def test_get_value():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValue() == {"a":2}
        m = d["abd"]
        assert m.GetValue() == {"a":3}
Ejemplo n.º 16
0
def test_get_value_string():
    c = pykeyvi.StringDictionaryCompiler()
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert m.GetValue() == "aaaaa"
        m = d["abd"]
        assert m.GetValue() == "bbbbb"
Ejemplo n.º 17
0
def test_get_value_key_only():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert m.GetValue() == ''
        m = d["abd"]
        assert m.GetValue() == ''
Ejemplo n.º 18
0
def test_get_value_int():
    c = pykeyvi.CompletionDictionaryCompiler()
    c.Add("abc", 42)
    c.Add("abd", 21)
    with tmp_dictionary(c, 'match_object_int.kv') as d:
        m = d["abc"]
        assert m.GetValue() == 42
        m = d["abd"]
        assert m.GetValue() == 21
Ejemplo n.º 19
0
def test_get_value_string():
    c = pykeyvi.StringDictionaryCompiler()
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert m.GetValue() == "aaaaa"
        m = d["abd"]
        assert m.GetValue() == "bbbbb"
Ejemplo n.º 20
0
def test_get_value_key_only():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert m.GetValue() == ''
        m = d["abd"]
        assert m.GetValue() == ''
Ejemplo n.º 21
0
def test_near_broken_input():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_broken.kv') as d:
        assert (len(list(d.GetNear("zahnarzt:u1h", 12))) == 2)
        assert (len(list(d.GetNear("zahnarzt:u", 13))) == 0)
        assert (len(list(d.GetNear("zahnarzt:u1", 12))) == 0)
Ejemplo n.º 22
0
def test_near_broken_input():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_broken.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1h", 12))) == 2)
        assert(len(list(d.GetNear("zahnarzt:u", 13))) == 0)
        assert(len(list(d.GetNear("zahnarzt:u1", 12))) == 0)
Ejemplo n.º 23
0
def test_get_value_key_only():
    c = pykeyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode('')
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode('')
Ejemplo n.º 24
0
def test_get_value_int():
    c = pykeyvi.CompletionDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", 42)
    c.Add("abd", 21)
    with tmp_dictionary(c, 'match_object_int.kv') as d:
        m = d["abc"]
        assert m.GetValue() == 42
        m = d["abd"]
        assert m.GetValue() == 21
Ejemplo n.º 25
0
def test_get_value_string():
    c = pykeyvi.StringDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode("aaaaa")
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode("bbbbb")
Ejemplo n.º 26
0
def test_get_value():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a":2})
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a":3})
Ejemplo n.º 27
0
def test_manifest():
    c = compiler.IntDictionaryCompilerSmallData({"memory_limit_mb": "10"})
    c.Add("Leela", 9223372036854775)
    c.Add("Kif", 2)
    c.SetManifest('{"drink": "slurm"}')
    with tmp_dictionary(c, 'slurm.kv') as d:
        m = json.loads(d.GetManifest())
        assert 9223372036854775 == d.get('Leela').GetValue()
        assert m['drink'] == "slurm"
Ejemplo n.º 28
0
def test_exact_match_without_completion():
    c = keyvi.CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("mr" + '\x1b' + "mr", 80)
    c.Add("mozilla firefox" + '\x1b' + "mozilla firefox", 80)
    c.Add("maa" + '\x1b' + "maa", 80)
    with tmp_dictionary(c, 'test_exact_match_without_completion.kv') as d:
        mw = keyvi.MultiWordCompletion(d)
        for m in mw.GetCompletions("mr "):
            assert m.GetMatchedString() == b'mr'
Ejemplo n.º 29
0
def test_get_value_key_only():
    c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert m.GetValue() == ''
        m = d["abd"]
        assert m.GetValue() == ''
Ejemplo n.º 30
0
def test_get_value():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValue() == {"a": 2}
        m = d["abd"]
        assert m.GetValue() == {"a": 3}
Ejemplo n.º 31
0
def test_get_value_string():
    c = StringDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert m.GetValue() == "aaaaa"
        m = d["abd"]
        assert m.GetValue() == "bbbbb"
Ejemplo n.º 32
0
def test_forward_backward_completion():
    c = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("bayern munich vs. real madrid", 80)
    c.Add("munich vs. real madrid", 30)

    c_bw = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c_bw.Add("bayern munich vs. real madrid"[::-1], 80)
    c_bw.Add("munich vs. real madrid"[::-1], 30)

    with tmp_dictionary(c, 'fw_bw_completion.kv') as d:
        with tmp_dictionary(c_bw, 'fw_bw_completion_bw.kv') as d2:
            completer = ForwardBackwardCompletion(d, d2)
            matches = sorted(
                [(match.GetAttribute('weight'), match.GetMatchedString())
                 for match in completer.GetCompletions("munich")],
                reverse=True)
            assert len(matches) == 2
            assert matches[0][1] == 'bayern munich vs. real madrid'
            assert matches[1][1] == 'munich vs. real madrid'
Ejemplo n.º 33
0
def test_simple():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
Ejemplo n.º 34
0
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValueAsString() == '{"a":2}'
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert m2.GetValueAsString() == '{"a":2}'
Ejemplo n.º 35
0
def test_get_all_items():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_items.kv') as keyvi_dictionary:
        for (base_key,
             base_value), (keyvi_key,
                           keyvi_value) in zip(key_values,
                                               keyvi_dictionary.GetAllItems()):
            assert base_key == keyvi_key
            assert base_value == keyvi_value
Ejemplo n.º 36
0
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValueAsString() == '{"a":2}'
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert m2.GetValueAsString() == '{"a":2}'
Ejemplo n.º 37
0
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValueAsString()) == decode_to_unicode('{"a":2}')
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert decode_to_unicode(m2.GetValueAsString()) == decode_to_unicode('{"a":2}')
Ejemplo n.º 38
0
def test_simple():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
Ejemplo n.º 39
0
def test_zerobyte():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("\x00abc", '["a" : 2]')
    c.Add("abc\x00def", '["a" : 3]')
    c.Add("cd\x00", '["a" : 4]')
    with tmp_dictionary(c, 'zerobyte.kv') as d:
        assert d["\x00abc"].GetValue() == '["a" : 2]'
        assert d["abc\x00def"].GetValue() == '["a" : 3]'
        assert d["cd\x00"].GetValue() == '["a" : 4]'
        assert len([(k, v) for k, v in d.GetAllItems()]) == 3
def test_fuzzy_completion():
    c = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("turkei news", 23698)
    c.Add("turkei side", 18838)
    c.Add("turkei urlaub", 23424)
    c.Add("turkisch anfänger", 20788)
    c.Add("turkisch für", 21655)
    c.Add("turkisch für anfänger", 20735)
    c.Add("turkçe dublaj", 28575)
    c.Add("turkçe dublaj izle", 16391)
    c.Add("turkçe izle", 19946)
    c.Add("tuv", 97)
    c.Add("tuv akademie", 9557)
    c.Add("tuv hessen", 7744)
    c.Add("tuv i", 331)
    c.Add("tuv in", 10188)
    c.Add("tuv ib", 10189)
    c.Add("tuv kosten", 11387)
    c.Add("tuv nord", 46052)
    c.Add("tuv sood", 46057)
    c.Add("tus rhein", 462)
    c.Add("tus rheinland", 39131)
    c.Add("tus öffnungszeiten", 15999)

    with tmp_dictionary(c, 'fuzzy_completion.kv') as d:
        completer = PrefixCompletion(d)
        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuv', 0)]
        assert len(matches) == 9

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tue', 1)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuv h', 1)]
        assert len(matches) == 2

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuv h', 2)]
        assert len(matches) == 7

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk töffnungszeiten', 2)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk töffnung', 2)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk txyzöff', 5)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk txyzöffnung', 5)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk txyzvöffnung', 6)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk ffnung', 2)]
        assert len(matches) == 1
Ejemplo n.º 41
0
def test_simple_snappy():
    c = pykeyvi.JsonDictionaryCompiler(50000000, {'compression': 'snappy', 'compression_threshold': '0'})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json_snappy.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
        m = d.GetStatistics()['Value Store']
        assert m['__compression'] == "snappy"
        assert m['__compression_threshold'] == "0"
Ejemplo n.º 42
0
def test_simple_snappy():
    c = pykeyvi.JsonDictionaryCompiler(50000000, {'compression': 'snappy', 'compression_threshold': '0'})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json_snappy.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
        m = d.GetStatistics()['Value Store']
        assert m['__compression'] == "snappy"
        assert m['__compression_threshold'] == "0"
Ejemplo n.º 43
0
def test_statistics():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("Leela")
    c.Add("Kif")
    c.SetManifest({"author": "Zapp Brannigan"})
    with tmp_dictionary(c, 'brannigan_statistics.kv') as d:
        stats = d.GetStatistics()
        gen = stats.get('General', {})
        man = gen.get('manifest', {})
        size = int(gen.get('number_of_keys', 0))
        assert size == 2
        assert man.get('author') == "Zapp Brannigan"
Ejemplo n.º 44
0
def test_unicode_compile():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("üöä", '{"y" : 2}')
    c.Add("üüüüüüabd".decode('utf-8'), '{"a" : 3}')
    c.Add(u"ääääädäd", '{"b" : 33}')

    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 3
        assert d["üöä"].GetValueAsString() == '{"y":2}'
        assert d[u"üöä"].GetValueAsString() == '{"y":2}'
        assert d["üüüüüüabd"].GetValueAsString() == '{"a":3}'
        assert d["ääääädäd"].GetValueAsString() == '{"b":33}'
Ejemplo n.º 45
0
def test_unicode():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("öäü", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'

    # create unicode string
    key = "öäü".decode('utf-8')
    with tmp_dictionary(c, 'unicode_json.kv') as d:
        assert key in d
        assert d[key].GetValue() == {"a" : 2}
        assert d.get(key).GetValue() == {"a" : 2}
Ejemplo n.º 46
0
def test_leak():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("something", '["a" : 2]')

    with tmp_dictionary(c, 'near_simple.kv') as d:
        gc.collect()
        memory_usage_on_start = memory_usage_ps()
        for i in range(0, 500000):
            assert not d.get('something_else')
            if i % 100 == 0:
                gc.collect()
                memory_usage_now = memory_usage_ps()
                assert memory_usage_now < memory_usage_on_start + 15000
Ejemplo n.º 47
0
def test_unicode_lookup():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("Los Angeles", '{"country" : "USA"}')
    c.Add("Frankfurt am Main", '{"country" : "Germany"}')
    c.Add("Kirchheim bei München".decode('utf-8'), '{"country" : "Germany"}')

    # create unicode string for lookup
    text = "From Los Angeles via Frankfurt am Main to Kirchheim bei München it should just work".decode('utf-8')
    with tmp_dictionary(c, 'unicode_json_lookup.kv') as d:
        assert "Kirchheim bei München" in d
        matched_strings = [x.GetMatchedString() for x in d.LookupText(text)]
        assert len(matched_strings) == 3
        assert "Kirchheim bei München" in matched_strings
        assert "Los Angeles" in matched_strings
        assert "Frankfurt am Main" in matched_strings
Ejemplo n.º 48
0
def test_overlong_completion():
    c = pykeyvi.CompletionDictionaryCompiler()
    c.Add("html disable" + MULTIWORD_QUERY_SEPARATOR + "html disable", 30075)
    c.Add("html disabled" + MULTIWORD_QUERY_SEPARATOR + "html disabled", 29650)
    c.Add("html display=main&referer=3c6120640656e466f726e26616d703b726566657265723d336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363633643237326636363732363136643635326536613733373032373230373436393734366336353364323735333734363137323734373336353639373436353237336535333734363137323734373336353639373436353363326636313365323032363637373433623230336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363632303364323732663733363537323736366336353734326636363666373236353665336636663730363536653436366637323635366535343732363536353364333132363631366437303362363436393733373036633631373933643664363136393665323636313664373033623734363137323637363537343639363433643330323636313664373033623734363137323637363537343734373937303635336433303236363136643730336236333664363433643338333032373230373436393734366336353364323737613735373232363735373536643663336236333662323037613735373232303436366637323635366532363735373536643663336236323635373237333639363336383734323733653436366637323635366532363735373536643663336236323635373237333639363336383734336332663631336532303230323636373734336232303463363536383732363736313665363737333636366637323635366526616d703b616a61783d3126616d703b6d6f62696c653d3026616d703b706167653d3026616d703b6f70656e466f72656e547265653d3127203e204c65687267616e6773666f72656e3c2f613e20&openforentree=1&targetid=130&targettype=1&cmd=6&page=null&fromhistory=1"
          + MULTIWORD_QUERY_SEPARATOR +
          "html display=main&referer=3c6120640656e466f726e26616d703b726566657265723d336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363633643237326636363732363136643635326536613733373032373230373436393734366336353364323735333734363137323734373336353639373436353237336535333734363137323734373336353639373436353363326636313365323032363637373433623230336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363632303364323732663733363537323736366336353734326636363666373236353665336636663730363536653436366637323635366535343732363536353364333132363631366437303362363436393733373036633631373933643664363136393665323636313664373033623734363137323637363537343639363433643330323636313664373033623734363137323637363537343734373937303635336433303236363136643730336236333664363433643338333032373230373436393734366336353364323737613735373232363735373536643663336236333662323037613735373232303436366637323635366532363735373536643663336236323635373237333639363336383734323733653436366637323635366532363735373536643663336236323635373237333639363336383734336332663631336532303230323636373734336232303463363536383732363736313665363737333636366637323635366526616d703b616a61783d3126616d703b6d6f62696c653d3026616d703b706167653d3026616d703b6f70656e466f72656e547265653d3127203e204c65687267616e6773666f72656e3c2f613e20&openforentree=1&targetid=130&targettype=1&cmd=6&page=null&fromhistory=1"
          , 23732)
    with tmp_dictionary(c, 'mw_overlong_completion.kv') as d:
        mw = pykeyvi.MultiWordCompletion(d)
        matches = sorted([(match.GetAttribute('weight'), match.GetMatchedString())
                          for match in mw.GetCompletions("html dis")], reverse=True)
        assert len(matches) == 3
        assert matches[0][1] == 'html disable'
        assert matches[1][1] == 'html disabled'
        assert matches[2][1] == "html display=main&referer=3c6120640656e466f726e26616d703b726566657265723d336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363633643237326636363732363136643635326536613733373032373230373436393734366336353364323735333734363137323734373336353639373436353237336535333734363137323734373336353639373436353363326636313365323032363637373433623230336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363632303364323732663733363537323736366336353734326636363666373236353665336636663730363536653436366637323635366535343732363536353364333132363631366437303362363436393733373036633631373933643664363136393665323636313664373033623734363137323637363537343639363433643330323636313664373033623734363137323637363537343734373937303635336433303236363136643730336236333664363433643338333032373230373436393734366336353364323737613735373232363735373536643663336236333662323037613735373232303436366637323635366532363735373536643663336236323635373237333639363336383734323733653436366637323635366532363735373536643663336236323635373237333639363336383734336332663631336532303230323636373734336232303463363536383732363736313665363737333636366637323635366526616d703b616a61783d3126616d703b6d6f62696c653d3026616d703b706167653d3026616d703b6f70656e466f72656e547265653d3127203e204c65687267616e6773666f72656e3c2f613e20&openforentree=1&targetid=130&targettype=1&cmd=6&page=null&fromhistory=1"
Ejemplo n.º 49
0
def test_near():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    with tmp_dictionary(c, 'near_simple.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1q0gkqsenhf", 12))) == 1)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12))) == 3)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13))) == 0)
        assert(len(list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10))) == 4)
Ejemplo n.º 50
0
def test_mw_completion():
    c=pykeyvi.CompletionDictionaryCompiler()
    c.Add("mozilla firefox" + '\x1b' + "mozilla firefox", 80)
    c.Add("mozilla footprint" + '\x1b' + "mozilla footprint", 30)
    c.Add("mozilla fans" + '\x1b' + "mozilla fans", 43)
    c.Add("mozilla firebird" + '\x1b' + "mozilla firebird", 12)
    c.Add("internet microsoft explorer" + '\x1b' + "microsoft internet explorer", 21)
    c.Add("google chrome" + '\x1b' + "google chrome", 54)
    c.Add("netscape navigator" + '\x1b' + "netscape navigator", 10)
    with tmp_dictionary(c, 'mw_completion.kv') as d:
        mw = pykeyvi.MultiWordCompletion(d)
        matches = sorted([(match.GetAttribute('weight'), match.GetMatchedString())
                          for match in mw.GetCompletions("mozilla f")], reverse=True)
        assert len(matches) == 4
        assert matches[0][1] == 'mozilla firefox'
        assert matches[1][1] == 'mozilla fans'
        assert matches[2][1] == 'mozilla footprint'
        assert matches[3][1] == 'mozilla firebird'
Ejemplo n.º 51
0
def test_near_greedy():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    with tmp_dictionary(c, 'near_greedy.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1q0gkqsenhf", 12, True))) == 2)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12, True))) == 3)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13, True))) == 0)
        assert(len(list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True))) == 10)

        greedy = [x.GetMatchedString() for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True)]
        non_greedy = [x.GetMatchedString() for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, False)]
        assert greedy[:len(non_greedy)] == non_greedy
Ejemplo n.º 52
0
def test_near_score():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    c.Add("zahnarzt:u0h0gkqsenhf", '["a" : 11]')

    with tmp_dictionary(c, 'near_score.kv') as d:
        greedy = list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True))
        assert greedy[0].GetScore() == 21
        for m in greedy[1:5]:
            assert m.GetScore() == 11
        for m in greedy[5:]:
            assert m.GetScore() == 10
Ejemplo n.º 53
0
def test_size():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("Leela")
    c.Add("Kif")
    with tmp_dictionary(c, 'brannigan_size.kv') as d:
        assert len(d) == 2
Ejemplo n.º 54
0
def test_get_all_keys():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_keys.kv') as keyvi_dictionary:
        for (base_key, _), keyvi_key in zip(key_values, keyvi_dictionary.GetAllKeys()):
            assert base_key == keyvi_key
Ejemplo n.º 55
0
def test_get_all_values():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_values.kv') as keyvi_dictionary:
        for (_, base_value), keyvi_value in zip(key_values, keyvi_dictionary.GetAllValues()):
            print keyvi_value
            assert base_value == keyvi_value
Ejemplo n.º 56
0
def test_get_all_items():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_items.kv') as keyvi_dictionary:
        for (base_key, base_value), (keyvi_key, keyvi_value) in zip(key_values, keyvi_dictionary.GetAllItems()):
            assert base_key == keyvi_key
            assert base_value == keyvi_value
Ejemplo n.º 57
0
def test_compiler_empty():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    with test_tools.tmp_dictionary(c, 'empty.kv') as d:
        assert len(d) == 0
Ejemplo n.º 58
0
def test_compiler_empty_json():
    c = pykeyvi.JsonDictionaryCompiler()
    with test_tools.tmp_dictionary(c, 'empty_json.kv') as d:
        assert len(d) == 0