def compile(args): params = {key: value for key, value in args.compiler_params} dict_type = args.dict_type if dict_type == 'json': dictionary = keyvi.JsonDictionaryCompiler(params) elif dict_type == 'string': dictionary = keyvi.StringDictionaryCompiler(params) elif dict_type == 'int': dictionary = keyvi.IntDictionaryCompiler(params) elif dict_type == 'completion': dictionary = keyvi.CompletionDictionaryCompiler(params) elif dict_type == 'key-only': dictionary = keyvi.KeyOnlyDictionaryCompiler(params) else: return 'Must never reach here' with open(args.input_file) as file_in: for line in file_in: line = line.rstrip('\n') try: splits = line.split('\t') if dict_type == 'key-only': dictionary.Add(splits[0]) elif dict_type == 'int' or dict_type == 'completion': dictionary.Add(splits[0], int(splits[1])) else: dictionary.Add(splits[0], splits[1]) except: print ('Can not parse line: {}'.format(line)) dictionary.Compile() dictionary.WriteToFile(args.output_file)
def generate_keyvi(key_values, filename): dictionary_compiler = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) for key in key_values: dictionary_compiler.Add(key) dictionary_compiler.Compile() dictionary_compiler.WriteToFile(filename)
def test_manifest(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("Leela") c.Add("Kif") c.SetManifest({"author": "Zapp Brannigan"}) with tmp_dictionary(c, 'brannigan_manifest.kv') as d: m = d.GetManifest() assert m['author'] == "Zapp Brannigan"
def test_get_value_key_only(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc") c.Add("abd") with tmp_dictionary(c, 'match_object_key_only.kv') as d: m = d["abc"] assert decode_to_unicode(m.GetValue()) == decode_to_unicode('') m = d["abd"] assert decode_to_unicode(m.GetValue()) == decode_to_unicode('')
def test_statistics(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("Leela") c.Add("Kif") c.SetManifest({"author": "Zapp Brannigan"}) with tmp_dictionary(c, 'brannigan_statistics.kv') as d: stats = d.GetStatistics() gen = stats.get('General', {}) man = gen.get('manifest', {}) size = int(gen.get('number_of_keys', 0)) assert size == 2 assert man.get('author') == "Zapp Brannigan"
def test_manifest_after_compile(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("Leela") c.Add("Kif") c.Compile() c.SetManifest({"author": "Zapp Brannigan"}) file_name = os.path.join(tempfile.gettempdir(),'brannigan_manifest2.kv') try: c.WriteToFile(file_name) d = keyvi.Dictionary(file_name) m = d.GetManifest() assert m['author'] == "Zapp Brannigan" del d finally: os.remove(file_name)
def test_size(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb":"10"}) c.Add("Leela") c.Add("Kif") with tmp_dictionary(c, 'brannigan_size.kv') as d: assert len(d) == 2
def test_compile_step_missing(): c = keyvi.KeyOnlyDictionaryCompiler() c.Add("abc") c.Add("abd") with raises(RuntimeError): c.WriteToFile("compile_step_missing.kv")
def test_compiler_empty(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) with test_tools.tmp_dictionary(c, 'empty.kv') as d: assert len(d) == 0
def test_compiler_no_compile_edge_case_empty(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) del c
def test_compiler_no_compile_edge_case(): c = keyvi.KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc") c.Add("abd") del c