Esempio n. 1
0
def parseCommon(sentence, model_dir_new, dset_dir, gpu, model_dir, data, model,
                seg):

    if "common" == model_dir:
        data.generate_instance_with_gaz(sentence, 'sentence')
        decode_results = main.parse_text(model, data, 'raw', gpu, seg)
        result = data.write_decoded_results_back(decode_results, 'raw')
        result_output = js.dumps(result)

    else:
        model_dir = "common"
        haveDset = os.path.exists("/app/data/CommonNER/common.dset")
        haveModel = os.path.exists("/app/data/CommonNER/common.35.model")

        if haveDset and haveModel:
            dset_dir = "/app/data/CommonNER/common.dset"
            one_model_dir = "/app/data/CommonNER/common.35.model"
            #初始化
            data = main.load_data_setting(dset_dir)
            model = main.load_model(one_model_dir, data, gpu)

            #处理sentence
            data.generate_instance_with_gaz(sentence, 'sentence')
            decode_results = main.parse_text(model, data, 'raw', gpu, seg)
            result = data.write_decoded_results_back(decode_results, 'raw')
            result_output = js.dumps(result)
        else:
            print "have not predefine model"

    return model_dir, dset_dir, data, model, result_output
Esempio n. 2
0
 def test_single_noun(self):
     result = parse_text('東京')
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0]['word'], '東京')
     self.assertEqual(result[0]['reading'], 'トウキョウ')
     self.assertEqual(result[0]['type'], '名詞')
Esempio n. 3
0
 def test_user_dic(self):
     result = parse_text('資源ごみ')
     self.assertEqual(len(result), 1)
Esempio n. 4
0
 def test_text(self):
     result = parse_text('プラスチックと燃えるゴミ')
     self.assertEqual(len(result), 4)
     self.assertEqual(result[1]['type'], '助詞')
     self.assertEqual(result[2]['type'], '動詞')
Esempio n. 5
0
 def test_multiple_noun(self):
     result = parse_text('東京スカイツリー')
     self.assertEqual(len(result), 3)
Esempio n. 6
0
def parse(sentence, model_dir_new, dset_dir, gpu, model_dir, data, model, seg):
    result_output = ""

    if model_dir_new == model_dir:

        data.generate_instance_with_gaz(sentence, 'sentence')
        decode_results = main.parse_text(model, data, 'raw', gpu, seg)
        result = data.write_decoded_results_back(decode_results, 'raw')
        result_output = js.dumps(result)

    else:
        dset_dir_new = "/app/data/" + model_dir_new + "/" + model_dir_new + ".train.dset"

        haveModel = os.path.exists("/app/data/" + model_dir_new)
        haveDset = os.path.exists(dset_dir_new)

        if haveModel and haveDset:
            #处理文件路径
            dirs = os.listdir("/app/data/" + model_dir_new + "/")
            maxValue = 0
            for file in dirs:
                strArray = file.split('.')
                strArrayLen = len(strArray)
                if strArrayLen > 1 and strArray[strArrayLen - 1] == 'model':
                    index_int = int(strArray[strArrayLen - 2])
                    if maxValue < index_int:
                        maxValue = index_int

            one_model_dir_new = "/app/data/" + model_dir_new + "/" + model_dir_new + ".train." + str(
                maxValue) + ".model"
            print one_model_dir_new
            haveModel = os.path.exists(one_model_dir_new)

            if haveModel:

                model_dir = model_dir_new
                dset_dir = dset_dir_new
                one_model_dir = one_model_dir_new
                #初始化
                data = main.load_data_setting(dset_dir)
                model = main.load_model(one_model_dir, data, gpu)

                #处理sentence
                data.generate_instance_with_gaz(sentence, 'sentence')
                decode_results = main.parse_text(model, data, 'raw', gpu, seg)
                result = data.write_decoded_results_back(decode_results, 'raw')
                result_output = js.dumps(result)
            else:
                print "can not fine the model"
                print "use default model"
                model_dir, dset_dir, data, model, result_output = parseCommon(
                    sentence, model_dir_new, dset_dir, gpu, model_dir, data,
                    model, seg)

        else:
            print "can not fine the model"
            print "use default model"
            model_dir, dset_dir, data, model, result_output = parseCommon(
                sentence, model_dir_new, dset_dir, gpu, model_dir, data, model,
                seg)

    return model_dir, dset_dir, data, model, result_output
Esempio n. 7
0
 def test_parse_text(self):
     test_str = {'one', 'two', 'five six', 'seven 7'}
     return_val = main.parse_text('one, two, five six; seven 7')
     self.assertEqual(return_val, test_str)