コード例 #1
0
def test_float_compaction():
    cs = JsonDictionaryCompiler({
        "memory_limit_mb": "10",
        'floating_point_precision': 'single'
    })
    cd = JsonDictionaryCompiler({"memory_limit_mb": "10"})

    # add a couple of floats to both
    cs.Add(
        'aa',
        '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]'
    )
    cd.Add(
        'aa',
        '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]'
    )

    with tmp_dictionary(cs, 'json_single_precision_float.kv') as ds:
        with tmp_dictionary(cd, 'json_double_precision_float.kv') as dd:
            # first some basic checks
            assert len(ds) == 1
            assert len(dd) == 1
            # simple test the length of the value store which shall be smaller for single floats
            stats_s = ds.GetStatistics()
            stats_d = dd.GetStatistics()
            assert int(stats_s['Value Store']['size']) < int(
                stats_d['Value Store']['size'])
コード例 #2
0
ファイル: near_test.py プロジェクト: hendrikmuhs/keyvi-server
def test_near_greedy():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    with tmp_dictionary(c, 'near_greedy.kv') as d:
        assert (len(list(d.GetNear("zahnarzt:u1q0gkqsenhf", 12, True))) == 2)
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12, True))) == 3)
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13, True))) == 0)
        assert (len(list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True))) == 10)

        greedy = [
            x.GetMatchedString()
            for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True)
        ]
        non_greedy = [
            x.GetMatchedString()
            for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, False)
        ]
        assert greedy[:len(non_greedy)] == non_greedy
コード例 #3
0
ファイル: iterators_test.py プロジェクト: subu-cliqz/keyvi
def test_get_all_keys():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_keys.kv') as keyvi_dictionary:
        for (base_key, _), keyvi_key in zip(key_values,
                                            keyvi_dictionary.GetAllKeys()):
            assert base_key == keyvi_key
コード例 #4
0
def test_get_fuzzy():
    c = keyvi.CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("türkei news", 23698)
    c.Add("türkei side", 18838)
    c.Add("türkei urlaub", 23424)
    c.Add("türkisch anfänger", 20788)
    c.Add("türkisch für", 21655)
    c.Add("türkisch für anfänger", 20735)
    c.Add("türkçe dublaj", 28575)
    c.Add("türkçe dublaj izle", 16391)
    c.Add("türkçe izle", 19946)
    c.Add("tüv akademie", 9557)
    c.Add("tüv hessen", 7744)
    c.Add("tüv i", 331)
    c.Add("tüv in", 10188)
    c.Add("tüv ib", 10189)
    c.Add("tüv kosten", 11387)
    c.Add("tüv nord", 46052)
    c.Add("tüv sood", 46057)
    c.Add("tüs rhein", 462)
    c.Add("tüs rheinland", 39131)
    c.Add("tüs öffnungszeiten", 15999)

    key_values = [
        (u'tüv sood', 46057),
        (u'tüv nord', 46052),
    ]

    with tmp_dictionary(c, 'get_fuzzy.kv') as d:
        for (base_key, base_value), m in zip(key_values, d.GetFuzzy('tüv koid', 2)):
            assert base_key == m.GetMatchedString()
            assert base_value == m.GetValue()

        assert len(list(d.GetFuzzy('tüv koid', 2))) == 2
コード例 #5
0
def test_get_all_values():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_values.kv') as keyvi_dictionary:
        for (_,
             base_value), keyvi_value in zip(key_values,
                                             keyvi_dictionary.GetAllValues()):
            assert base_value == keyvi_value
コード例 #6
0
def test_manifest():
    c = compiler.IntDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela", 20)
    c.Add("Kif", 2)
    c.SetManifest('{"drink": "slurm"}')
    with tmp_dictionary(c, 'slurm.kv') as d:
        m = json.loads(d.GetManifest())
        assert m['drink'] == "slurm"
コード例 #7
0
def test_simple():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
コード例 #8
0
def test_manifest():
    c = pykeyvi.IntDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela", 20)
    c.Add("Kif", 2)
    c.SetManifest({"drink": "slurm"})
    with tmp_dictionary(c, 'slurm.kv') as d:
        m = d.GetManifest()
        assert m['drink'] == "slurm"
コード例 #9
0
ファイル: near_test.py プロジェクト: hendrikmuhs/keyvi-server
def test_near_less_precission():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_less_precission.kv') as d:
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12))) == 2)
        assert (len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13))) == 0)
コード例 #10
0
ファイル: near_test.py プロジェクト: ankit-cliqz/keyvi
def test_near_less_precission():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_less_precission.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12))) == 2)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13))) == 0)
コード例 #11
0
def test_manifest():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("Leela")
    c.Add("Kif")
    c.SetManifest({"author": "Zapp Brannigan"})
    with tmp_dictionary(c, 'brannigan_manifest.kv') as d:
        m = d.GetManifest()
        assert m['author'] == "Zapp Brannigan"
コード例 #12
0
ファイル: statistics_test.py プロジェクト: skilluck/keyvi
def test_manifest():
    c = pykeyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("Leela")
    c.Add("Kif")
    c.SetManifest({"author": "Zapp Brannigan"})
    with tmp_dictionary(c, 'brannigan_manifest.kv') as d:
        m = d.GetManifest()
        assert m['author'] == "Zapp Brannigan"
コード例 #13
0
def test_float_compaction():
    cs = pykeyvi.JsonDictionaryCompiler(50000000, {'floating_point_precision': 'single'})
    cd = pykeyvi.JsonDictionaryCompiler(50000000)

    # add a couple of floats to both
    cs.Add('aa', '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]')
    cd.Add('aa', '[1.7008715758978892, 1.8094465532317732, 1.6098250864350536, 1.6369107966501981, 1.7736887965234107, 1.606682751740542, 1.6186427703265525, 1.7939763843449683, 1.5973550162469434, 1.6799721708726192, 1.8199786239525833, 1.7956178070065245, 1.7269879953863045]')

    with tmp_dictionary(cs, 'json_single_precision_float.kv') as ds:
        with tmp_dictionary(cd, 'json_double_precision_float.kv') as dd:
            # first some basic checks
            assert len(ds) == 1
            assert len(dd) == 1
            # simple test the length of the value store which shall be smaller for single floats
            stats_s = ds.GetStatistics()
            stats_d = dd.GetStatistics()
            assert int(stats_s['Value Store']['size']) < int(stats_d['Value Store']['size'])
コード例 #14
0
def test_get_value():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValue() == {"a": 2}
        m = d["abd"]
        assert m.GetValue() == {"a": 3}
コード例 #15
0
def test_get_value():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValue() == {"a":2}
        m = d["abd"]
        assert m.GetValue() == {"a":3}
コード例 #16
0
def test_get_value_string():
    c = pykeyvi.StringDictionaryCompiler()
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert m.GetValue() == "aaaaa"
        m = d["abd"]
        assert m.GetValue() == "bbbbb"
コード例 #17
0
def test_get_value_key_only():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert m.GetValue() == ''
        m = d["abd"]
        assert m.GetValue() == ''
コード例 #18
0
def test_get_value_int():
    c = pykeyvi.CompletionDictionaryCompiler()
    c.Add("abc", 42)
    c.Add("abd", 21)
    with tmp_dictionary(c, 'match_object_int.kv') as d:
        m = d["abc"]
        assert m.GetValue() == 42
        m = d["abd"]
        assert m.GetValue() == 21
コード例 #19
0
def test_get_value_string():
    c = pykeyvi.StringDictionaryCompiler()
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert m.GetValue() == "aaaaa"
        m = d["abd"]
        assert m.GetValue() == "bbbbb"
コード例 #20
0
def test_get_value_key_only():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert m.GetValue() == ''
        m = d["abd"]
        assert m.GetValue() == ''
コード例 #21
0
ファイル: near_test.py プロジェクト: hendrikmuhs/keyvi-server
def test_near_broken_input():
    c = keyvi.JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_broken.kv') as d:
        assert (len(list(d.GetNear("zahnarzt:u1h", 12))) == 2)
        assert (len(list(d.GetNear("zahnarzt:u", 13))) == 0)
        assert (len(list(d.GetNear("zahnarzt:u1", 12))) == 0)
コード例 #22
0
ファイル: near_test.py プロジェクト: ankit-cliqz/keyvi
def test_near_broken_input():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9", '["a" : 2]')
    c.Add("zahnarzt:u1h2f", '["a" : 3]')
    c.Add("zahnarzt:u1huf", '["a" : 4]')
    with tmp_dictionary(c, 'near_broken.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1h", 12))) == 2)
        assert(len(list(d.GetNear("zahnarzt:u", 13))) == 0)
        assert(len(list(d.GetNear("zahnarzt:u1", 12))) == 0)
コード例 #23
0
ファイル: match_object_test.py プロジェクト: skilluck/keyvi
def test_get_value_key_only():
    c = pykeyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode('')
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode('')
コード例 #24
0
ファイル: match_object_test.py プロジェクト: skilluck/keyvi
def test_get_value_int():
    c = pykeyvi.CompletionDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", 42)
    c.Add("abd", 21)
    with tmp_dictionary(c, 'match_object_int.kv') as d:
        m = d["abc"]
        assert m.GetValue() == 42
        m = d["abd"]
        assert m.GetValue() == 21
コード例 #25
0
ファイル: match_object_test.py プロジェクト: skilluck/keyvi
def test_get_value_string():
    c = pykeyvi.StringDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode("aaaaa")
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode("bbbbb")
コード例 #26
0
ファイル: match_object_test.py プロジェクト: skilluck/keyvi
def test_get_value():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a":2})
        m = d["abd"]
        assert decode_to_unicode(m.GetValue()) == decode_to_unicode({"a":3})
コード例 #27
0
def test_manifest():
    c = compiler.IntDictionaryCompilerSmallData({"memory_limit_mb": "10"})
    c.Add("Leela", 9223372036854775)
    c.Add("Kif", 2)
    c.SetManifest('{"drink": "slurm"}')
    with tmp_dictionary(c, 'slurm.kv') as d:
        m = json.loads(d.GetManifest())
        assert 9223372036854775 == d.get('Leela').GetValue()
        assert m['drink'] == "slurm"
コード例 #28
0
def test_exact_match_without_completion():
    c = keyvi.CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("mr" + '\x1b' + "mr", 80)
    c.Add("mozilla firefox" + '\x1b' + "mozilla firefox", 80)
    c.Add("maa" + '\x1b' + "maa", 80)
    with tmp_dictionary(c, 'test_exact_match_without_completion.kv') as d:
        mw = keyvi.MultiWordCompletion(d)
        for m in mw.GetCompletions("mr "):
            assert m.GetMatchedString() == b'mr'
コード例 #29
0
def test_get_value_key_only():
    c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc")
    c.Add("abd")
    with tmp_dictionary(c, 'match_object_key_only.kv') as d:
        m = d["abc"]
        assert m.GetValue() == ''
        m = d["abd"]
        assert m.GetValue() == ''
コード例 #30
0
def test_get_value():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValue() == {"a": 2}
        m = d["abd"]
        assert m.GetValue() == {"a": 3}
コード例 #31
0
def test_get_value_string():
    c = StringDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", "aaaaa")
    c.Add("abd", "bbbbb")
    with tmp_dictionary(c, 'match_object_string.kv') as d:
        m = d["abc"]
        assert m.GetValue() == "aaaaa"
        m = d["abd"]
        assert m.GetValue() == "bbbbb"
コード例 #32
0
def test_forward_backward_completion():
    c = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("bayern munich vs. real madrid", 80)
    c.Add("munich vs. real madrid", 30)

    c_bw = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c_bw.Add("bayern munich vs. real madrid"[::-1], 80)
    c_bw.Add("munich vs. real madrid"[::-1], 30)

    with tmp_dictionary(c, 'fw_bw_completion.kv') as d:
        with tmp_dictionary(c_bw, 'fw_bw_completion_bw.kv') as d2:
            completer = ForwardBackwardCompletion(d, d2)
            matches = sorted(
                [(match.GetAttribute('weight'), match.GetMatchedString())
                 for match in completer.GetCompletions("munich")],
                reverse=True)
            assert len(matches) == 2
            assert matches[0][1] == 'bayern munich vs. real madrid'
            assert matches[1][1] == 'munich vs. real madrid'
コード例 #33
0
def test_simple():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
コード例 #34
0
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValueAsString() == '{"a":2}'
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert m2.GetValueAsString() == '{"a":2}'
コード例 #35
0
ファイル: iterators_test.py プロジェクト: subu-cliqz/keyvi
def test_get_all_items():

    with tmp_dictionary(generate_dictionary_compiler(),
                        'test_get_all_items.kv') as keyvi_dictionary:
        for (base_key,
             base_value), (keyvi_key,
                           keyvi_value) in zip(key_values,
                                               keyvi_dictionary.GetAllItems()):
            assert base_key == keyvi_key
            assert base_value == keyvi_value
コード例 #36
0
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert m.GetValueAsString() == '{"a":2}'
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert m2.GetValueAsString() == '{"a":2}'
コード例 #37
0
ファイル: match_object_test.py プロジェクト: skilluck/keyvi
def test_raw_serialization():
    c = pykeyvi.JsonDictionaryCompiler({"memory_limit_mb":"10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'match_object_json.kv') as d:
        m = d["abc"]
        assert decode_to_unicode(m.GetValueAsString()) == decode_to_unicode('{"a":2}')
        d = m.dumps()
        m2 = pykeyvi.Match.loads(d)
        assert decode_to_unicode(m2.GetValueAsString()) == decode_to_unicode('{"a":2}')
コード例 #38
0
def test_simple():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'
    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
コード例 #39
0
def test_zerobyte():
    c = JsonDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("\x00abc", '["a" : 2]')
    c.Add("abc\x00def", '["a" : 3]')
    c.Add("cd\x00", '["a" : 4]')
    with tmp_dictionary(c, 'zerobyte.kv') as d:
        assert d["\x00abc"].GetValue() == '["a" : 2]'
        assert d["abc\x00def"].GetValue() == '["a" : 3]'
        assert d["cd\x00"].GetValue() == '["a" : 4]'
        assert len([(k, v) for k, v in d.GetAllItems()]) == 3
コード例 #40
0
def test_fuzzy_completion():
    c = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
    c.Add("turkei news", 23698)
    c.Add("turkei side", 18838)
    c.Add("turkei urlaub", 23424)
    c.Add("turkisch anfänger", 20788)
    c.Add("turkisch für", 21655)
    c.Add("turkisch für anfänger", 20735)
    c.Add("turkçe dublaj", 28575)
    c.Add("turkçe dublaj izle", 16391)
    c.Add("turkçe izle", 19946)
    c.Add("tuv", 97)
    c.Add("tuv akademie", 9557)
    c.Add("tuv hessen", 7744)
    c.Add("tuv i", 331)
    c.Add("tuv in", 10188)
    c.Add("tuv ib", 10189)
    c.Add("tuv kosten", 11387)
    c.Add("tuv nord", 46052)
    c.Add("tuv sood", 46057)
    c.Add("tus rhein", 462)
    c.Add("tus rheinland", 39131)
    c.Add("tus öffnungszeiten", 15999)

    with tmp_dictionary(c, 'fuzzy_completion.kv') as d:
        completer = PrefixCompletion(d)
        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuv', 0)]
        assert len(matches) == 9

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tue', 1)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuv h', 1)]
        assert len(matches) == 2

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuv h', 2)]
        assert len(matches) == 7

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk töffnungszeiten', 2)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk töffnung', 2)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk txyzöff', 5)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk txyzöffnung', 5)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk txyzvöffnung', 6)]
        assert len(matches) == 1

        matches = [m.GetMatchedString() for m in completer.GetFuzzyCompletions('tuk ffnung', 2)]
        assert len(matches) == 1
コード例 #41
0
ファイル: json_dictionary_test.py プロジェクト: mindis/keyvi
def test_simple_snappy():
    c = pykeyvi.JsonDictionaryCompiler(50000000, {'compression': 'snappy', 'compression_threshold': '0'})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json_snappy.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
        m = d.GetStatistics()['Value Store']
        assert m['__compression'] == "snappy"
        assert m['__compression_threshold'] == "0"
コード例 #42
0
def test_simple_snappy():
    c = pykeyvi.JsonDictionaryCompiler(50000000, {'compression': 'snappy', 'compression_threshold': '0'})
    c.Add("abc", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    with tmp_dictionary(c, 'simple_json_snappy.kv') as d:
        assert len(d) == 2
        assert d["abc"].GetValueAsString() == '{"a":2}'
        assert d["abd"].GetValueAsString() == '{"a":3}'
        m = d.GetStatistics()['Value Store']
        assert m['__compression'] == "snappy"
        assert m['__compression_threshold'] == "0"
コード例 #43
0
def test_statistics():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("Leela")
    c.Add("Kif")
    c.SetManifest({"author": "Zapp Brannigan"})
    with tmp_dictionary(c, 'brannigan_statistics.kv') as d:
        stats = d.GetStatistics()
        gen = stats.get('General', {})
        man = gen.get('manifest', {})
        size = int(gen.get('number_of_keys', 0))
        assert size == 2
        assert man.get('author') == "Zapp Brannigan"
コード例 #44
0
def test_unicode_compile():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("üöä", '{"y" : 2}')
    c.Add("üüüüüüabd".decode('utf-8'), '{"a" : 3}')
    c.Add(u"ääääädäd", '{"b" : 33}')

    with tmp_dictionary(c, 'simple_json.kv') as d:
        assert len(d) == 3
        assert d["üöä"].GetValueAsString() == '{"y":2}'
        assert d[u"üöä"].GetValueAsString() == '{"y":2}'
        assert d["üüüüüüabd"].GetValueAsString() == '{"a":3}'
        assert d["ääääädäd"].GetValueAsString() == '{"b":33}'
コード例 #45
0
ファイル: unicode_test.py プロジェクト: hendrik-cliqz/keyvi
def test_unicode():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("öäü", '{"a" : 2}')
    c.Add("abd", '{"a" : 3}')
    # use python syntax ala __setitem__
    c["abd"] = '{"a" : 3}'

    # create unicode string
    key = "öäü".decode('utf-8')
    with tmp_dictionary(c, 'unicode_json.kv') as d:
        assert key in d
        assert d[key].GetValue() == {"a" : 2}
        assert d.get(key).GetValue() == {"a" : 2}
コード例 #46
0
def test_leak():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("something", '["a" : 2]')

    with tmp_dictionary(c, 'near_simple.kv') as d:
        gc.collect()
        memory_usage_on_start = memory_usage_ps()
        for i in range(0, 500000):
            assert not d.get('something_else')
            if i % 100 == 0:
                gc.collect()
                memory_usage_now = memory_usage_ps()
                assert memory_usage_now < memory_usage_on_start + 15000
コード例 #47
0
ファイル: unicode_test.py プロジェクト: hendrik-cliqz/keyvi
def test_unicode_lookup():
    c = pykeyvi.JsonDictionaryCompiler()
    c.Add("Los Angeles", '{"country" : "USA"}')
    c.Add("Frankfurt am Main", '{"country" : "Germany"}')
    c.Add("Kirchheim bei München".decode('utf-8'), '{"country" : "Germany"}')

    # create unicode string for lookup
    text = "From Los Angeles via Frankfurt am Main to Kirchheim bei München it should just work".decode('utf-8')
    with tmp_dictionary(c, 'unicode_json_lookup.kv') as d:
        assert "Kirchheim bei München" in d
        matched_strings = [x.GetMatchedString() for x in d.LookupText(text)]
        assert len(matched_strings) == 3
        assert "Kirchheim bei München" in matched_strings
        assert "Los Angeles" in matched_strings
        assert "Frankfurt am Main" in matched_strings
コード例 #48
0
def test_overlong_completion():
    c = pykeyvi.CompletionDictionaryCompiler()
    c.Add("html disable" + MULTIWORD_QUERY_SEPARATOR + "html disable", 30075)
    c.Add("html disabled" + MULTIWORD_QUERY_SEPARATOR + "html disabled", 29650)
    c.Add("html display=main&referer=3c6120640656e466f726e26616d703b726566657265723d336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363633643237326636363732363136643635326536613733373032373230373436393734366336353364323735333734363137323734373336353639373436353237336535333734363137323734373336353639373436353363326636313365323032363637373433623230336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363632303364323732663733363537323736366336353734326636363666373236353665336636663730363536653436366637323635366535343732363536353364333132363631366437303362363436393733373036633631373933643664363136393665323636313664373033623734363137323637363537343639363433643330323636313664373033623734363137323637363537343734373937303635336433303236363136643730336236333664363433643338333032373230373436393734366336353364323737613735373232363735373536643663336236333662323037613735373232303436366637323635366532363735373536643663336236323635373237333639363336383734323733653436366637323635366532363735373536643663336236323635373237333639363336383734336332663631336532303230323636373734336232303463363536383732363736313665363737333636366637323635366526616d703b616a61783d3126616d703b6d6f62696c653d3026616d703b706167653d3026616d703b6f70656e466f72656e547265653d3127203e204c65687267616e6773666f72656e3c2f613e20&openforentree=1&targetid=130&targettype=1&cmd=6&page=null&fromhistory=1"
          + MULTIWORD_QUERY_SEPARATOR +
          "html display=main&referer=3c6120640656e466f726e26616d703b726566657265723d336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363633643237326636363732363136643635326536613733373032373230373436393734366336353364323735333734363137323734373336353639373436353237336535333734363137323734373336353639373436353363326636313365323032363637373433623230336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363632303364323732663733363537323736366336353734326636363666373236353665336636663730363536653436366637323635366535343732363536353364333132363631366437303362363436393733373036633631373933643664363136393665323636313664373033623734363137323637363537343639363433643330323636313664373033623734363137323637363537343734373937303635336433303236363136643730336236333664363433643338333032373230373436393734366336353364323737613735373232363735373536643663336236333662323037613735373232303436366637323635366532363735373536643663336236323635373237333639363336383734323733653436366637323635366532363735373536643663336236323635373237333639363336383734336332663631336532303230323636373734336232303463363536383732363736313665363737333636366637323635366526616d703b616a61783d3126616d703b6d6f62696c653d3026616d703b706167653d3026616d703b6f70656e466f72656e547265653d3127203e204c65687267616e6773666f72656e3c2f613e20&openforentree=1&targetid=130&targettype=1&cmd=6&page=null&fromhistory=1"
          , 23732)
    with tmp_dictionary(c, 'mw_overlong_completion.kv') as d:
        mw = pykeyvi.MultiWordCompletion(d)
        matches = sorted([(match.GetAttribute('weight'), match.GetMatchedString())
                          for match in mw.GetCompletions("html dis")], reverse=True)
        assert len(matches) == 3
        assert matches[0][1] == 'html disable'
        assert matches[1][1] == 'html disabled'
        assert matches[2][1] == "html display=main&referer=3c6120640656e466f726e26616d703b726566657265723d336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363633643237326636363732363136643635326536613733373032373230373436393734366336353364323735333734363137323734373336353639373436353237336535333734363137323734373336353639373436353363326636313365323032363637373433623230336336313230363436313734363132643631366136313738336432373636363136633733363532373230363837323635363632303364323732663733363537323736366336353734326636363666373236353665336636663730363536653436366637323635366535343732363536353364333132363631366437303362363436393733373036633631373933643664363136393665323636313664373033623734363137323637363537343639363433643330323636313664373033623734363137323637363537343734373937303635336433303236363136643730336236333664363433643338333032373230373436393734366336353364323737613735373232363735373536643663336236333662323037613735373232303436366637323635366532363735373536643663336236323635373237333639363336383734323733653436366637323635366532363735373536643663336236323635373237333639363336383734336332663631336532303230323636373734336232303463363536383732363736313665363737333636366637323635366526616d703b616a61783d3126616d703b6d6f62696c653d3026616d703b706167653d3026616d703b6f70656e466f72656e547265653d3127203e204c65687267616e6773666f72656e3c2f613e20&openforentree=1&targetid=130&targettype=1&cmd=6&page=null&fromhistory=1"
コード例 #49
0
ファイル: near_test.py プロジェクト: ankit-cliqz/keyvi
def test_near():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    with tmp_dictionary(c, 'near_simple.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1q0gkqsenhf", 12))) == 1)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12))) == 3)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13))) == 0)
        assert(len(list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10))) == 4)
コード例 #50
0
def test_mw_completion():
    c=pykeyvi.CompletionDictionaryCompiler()
    c.Add("mozilla firefox" + '\x1b' + "mozilla firefox", 80)
    c.Add("mozilla footprint" + '\x1b' + "mozilla footprint", 30)
    c.Add("mozilla fans" + '\x1b' + "mozilla fans", 43)
    c.Add("mozilla firebird" + '\x1b' + "mozilla firebird", 12)
    c.Add("internet microsoft explorer" + '\x1b' + "microsoft internet explorer", 21)
    c.Add("google chrome" + '\x1b' + "google chrome", 54)
    c.Add("netscape navigator" + '\x1b' + "netscape navigator", 10)
    with tmp_dictionary(c, 'mw_completion.kv') as d:
        mw = pykeyvi.MultiWordCompletion(d)
        matches = sorted([(match.GetAttribute('weight'), match.GetMatchedString())
                          for match in mw.GetCompletions("mozilla f")], reverse=True)
        assert len(matches) == 4
        assert matches[0][1] == 'mozilla firefox'
        assert matches[1][1] == 'mozilla fans'
        assert matches[2][1] == 'mozilla footprint'
        assert matches[3][1] == 'mozilla firebird'
コード例 #51
0
ファイル: near_test.py プロジェクト: ankit-cliqz/keyvi
def test_near_greedy():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    with tmp_dictionary(c, 'near_greedy.kv') as d:
        assert(len(list(d.GetNear("zahnarzt:u1q0gkqsenhf", 12, True))) == 2)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 12, True))) == 3)
        assert(len(list(d.GetNear("zahnarzt:u1h0gkqsenhf", 13, True))) == 0)
        assert(len(list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True))) == 10)

        greedy = [x.GetMatchedString() for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True)]
        non_greedy = [x.GetMatchedString() for x in d.GetNear("zahnarzt:u0h0gkqsenhf", 10, False)]
        assert greedy[:len(non_greedy)] == non_greedy
コード例 #52
0
ファイル: near_test.py プロジェクト: ankit-cliqz/keyvi
def test_near_score():
    c=pykeyvi.JsonDictionaryCompiler()
    c.Add("zahnarzt:u0we9yykdyum", '["a" : 2]')
    c.Add("zahnarzt:u1h2fde2kct3", '["a" : 3]')
    c.Add("zahnarzt:u1huf1q5cnxn", '["a" : 4]')
    c.Add("zahnarzt:u0y2dvey61sw", '["a" : 5]')
    c.Add("zahnarzt:u1hvqmmj801r", '["a" : 6]')
    c.Add("zahnarzt:u0vvmknrwgmj", '["a" : 7]')
    c.Add("zahnarzt:u0ypv22fb9q3", '["a" : 8]')
    c.Add("zahnarzt:u1qcvvw0hxe1", '["a" : 9]')
    c.Add("zahnarzt:u1xjx6yfvfz2", '["a" : 10]')
    c.Add("zahnarzt:u1q0gkqsenhf", '["a" : 11]')
    c.Add("zahnarzt:u0h0gkqsenhf", '["a" : 11]')

    with tmp_dictionary(c, 'near_score.kv') as d:
        greedy = list(d.GetNear("zahnarzt:u0h0gkqsenhf", 10, True))
        assert greedy[0].GetScore() == 21
        for m in greedy[1:5]:
            assert m.GetScore() == 11
        for m in greedy[5:]:
            assert m.GetScore() == 10
コード例 #53
0
def test_size():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    c.Add("Leela")
    c.Add("Kif")
    with tmp_dictionary(c, 'brannigan_size.kv') as d:
        assert len(d) == 2
コード例 #54
0
ファイル: iterators_test.py プロジェクト: cliqz-oss/keyvi
def test_get_all_keys():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_keys.kv') as keyvi_dictionary:
        for (base_key, _), keyvi_key in zip(key_values, keyvi_dictionary.GetAllKeys()):
            assert base_key == keyvi_key
コード例 #55
0
ファイル: iterators_test.py プロジェクト: cliqz-oss/keyvi
def test_get_all_values():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_values.kv') as keyvi_dictionary:
        for (_, base_value), keyvi_value in zip(key_values, keyvi_dictionary.GetAllValues()):
            print keyvi_value
            assert base_value == keyvi_value
コード例 #56
0
ファイル: iterators_test.py プロジェクト: cliqz-oss/keyvi
def test_get_all_items():

    with tmp_dictionary(generate_dictionary_compiler(), 'test_get_all_items.kv') as keyvi_dictionary:
        for (base_key, base_value), (keyvi_key, keyvi_value) in zip(key_values, keyvi_dictionary.GetAllItems()):
            assert base_key == keyvi_key
            assert base_value == keyvi_value
コード例 #57
0
def test_compiler_empty():
    c = pykeyvi.KeyOnlyDictionaryCompiler()
    with test_tools.tmp_dictionary(c, 'empty.kv') as d:
        assert len(d) == 0
コード例 #58
0
def test_compiler_empty_json():
    c = pykeyvi.JsonDictionaryCompiler()
    with test_tools.tmp_dictionary(c, 'empty_json.kv') as d:
        assert len(d) == 0