def parseCommon(sentence, model_dir_new, dset_dir, gpu, model_dir, data, model, seg): if "common" == model_dir: data.generate_instance_with_gaz(sentence, 'sentence') decode_results = main.parse_text(model, data, 'raw', gpu, seg) result = data.write_decoded_results_back(decode_results, 'raw') result_output = js.dumps(result) else: model_dir = "common" haveDset = os.path.exists("/app/data/CommonNER/common.dset") haveModel = os.path.exists("/app/data/CommonNER/common.35.model") if haveDset and haveModel: dset_dir = "/app/data/CommonNER/common.dset" one_model_dir = "/app/data/CommonNER/common.35.model" #初始化 data = main.load_data_setting(dset_dir) model = main.load_model(one_model_dir, data, gpu) #处理sentence data.generate_instance_with_gaz(sentence, 'sentence') decode_results = main.parse_text(model, data, 'raw', gpu, seg) result = data.write_decoded_results_back(decode_results, 'raw') result_output = js.dumps(result) else: print "have not predefine model" return model_dir, dset_dir, data, model, result_output
def test_single_noun(self): result = parse_text('東京') self.assertEqual(len(result), 1) self.assertEqual(result[0]['word'], '東京') self.assertEqual(result[0]['reading'], 'トウキョウ') self.assertEqual(result[0]['type'], '名詞')
def test_user_dic(self): result = parse_text('資源ごみ') self.assertEqual(len(result), 1)
def test_text(self): result = parse_text('プラスチックと燃えるゴミ') self.assertEqual(len(result), 4) self.assertEqual(result[1]['type'], '助詞') self.assertEqual(result[2]['type'], '動詞')
def test_multiple_noun(self): result = parse_text('東京スカイツリー') self.assertEqual(len(result), 3)
def parse(sentence, model_dir_new, dset_dir, gpu, model_dir, data, model, seg): result_output = "" if model_dir_new == model_dir: data.generate_instance_with_gaz(sentence, 'sentence') decode_results = main.parse_text(model, data, 'raw', gpu, seg) result = data.write_decoded_results_back(decode_results, 'raw') result_output = js.dumps(result) else: dset_dir_new = "/app/data/" + model_dir_new + "/" + model_dir_new + ".train.dset" haveModel = os.path.exists("/app/data/" + model_dir_new) haveDset = os.path.exists(dset_dir_new) if haveModel and haveDset: #处理文件路径 dirs = os.listdir("/app/data/" + model_dir_new + "/") maxValue = 0 for file in dirs: strArray = file.split('.') strArrayLen = len(strArray) if strArrayLen > 1 and strArray[strArrayLen - 1] == 'model': index_int = int(strArray[strArrayLen - 2]) if maxValue < index_int: maxValue = index_int one_model_dir_new = "/app/data/" + model_dir_new + "/" + model_dir_new + ".train." + str( maxValue) + ".model" print one_model_dir_new haveModel = os.path.exists(one_model_dir_new) if haveModel: model_dir = model_dir_new dset_dir = dset_dir_new one_model_dir = one_model_dir_new #初始化 data = main.load_data_setting(dset_dir) model = main.load_model(one_model_dir, data, gpu) #处理sentence data.generate_instance_with_gaz(sentence, 'sentence') decode_results = main.parse_text(model, data, 'raw', gpu, seg) result = data.write_decoded_results_back(decode_results, 'raw') result_output = js.dumps(result) else: print "can not fine the model" print "use default model" model_dir, dset_dir, data, model, result_output = parseCommon( sentence, model_dir_new, dset_dir, gpu, model_dir, data, model, seg) else: print "can not fine the model" print "use default model" model_dir, dset_dir, data, model, result_output = parseCommon( sentence, model_dir_new, dset_dir, gpu, model_dir, data, model, seg) return model_dir, dset_dir, data, model, result_output
def test_parse_text(self): test_str = {'one', 'two', 'five six', 'seven 7'} return_val = main.parse_text('one, two, five six; seven 7') self.assertEqual(return_val, test_str)