コード例 #1
0
ファイル: main.py プロジェクト: gaodan223/demo
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        # while True:
        # try:
        #     line = input("请输入测试句子:")
        #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #     print(result)
        # except Exception as e:
        #     logger.info(e)

        # line = input("请输入测试句子:")
        line = "哮喘古代文献也称“鼻息”、“肩息”、“上气”等。"
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)
        print(line)
        print([(x["word"], x["type"]) for x in result["entities"]])

        line = "喘病是指由于外感或内伤,导致肺失宣降,肺气上逆或气无所主,肾失摄纳,以致呼吸困难,甚则张口抬肩,鼻翼煽动,不能平卧等为主要临床特征的一种病证。 "
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)
        print(line)
        print([(x["word"], x["type"]) for x in result["entities"]])
コード例 #2
0
ファイル: main.py プロジェクト: BenSNW/ChineseNER
def evaluate_file(file, target):
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, NERModel, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        with open(file, encoding='u8') as fr, open(target,
                                                   mode='w',
                                                   encoding='u8') as fw:
            for line in fr:
                result = model.evaluate_line(
                    sess, input_from_line(line.strip(), char_to_id), id_to_tag)
                print(result)
                fw.write(json.dumps(result, ensure_ascii=False))
                fw.write("\n")
                fw.flush()

        while True:
            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
コード例 #3
0
def evaluate_line(testfile):
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    Test = pd.read_csv(testfile)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        # while True:
        #     line = input("请输入测试句子:")
        #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #     print(result)
        ID = []
        entities = []
        for i in tqdm(range(len(Test))):
            id = Test.iloc[i]['id']
            ID.append(id)
            print('id', id)
            #标题文本识别
            sentence_title = str(Test.iloc[i]['title'])
            # print('sentence_title',sentence_title)
            if sentence_title != 'nan':
                #evaluate 返回的为list
                result_title = model.evaluate_line(
                    sess, input_from_line(sentence_title, char_to_id),
                    id_to_tag)
                # for item in result_title['entities']:
                #     title.append(item['word'].strip().strip('"').strip())

            #文本实体识别
            sentence_text = str(Test.iloc[i]['text'])
            # print('sentence_text',sentence_text)
            if sentence_text != 'nan':
                result_text = model.evaluate_line(
                    sess, input_from_line(sentence_text, char_to_id),
                    id_to_tag)
                # for item in result_text['entities']:
                #     text.append(item['word'].strip().strip('"').strip())
            #标题+内容去重
            print('result_title', result_title)
            print('result_text', result_text)
            entities_str = ';'.join(list(set(result_title + result_text)))
            print('entities_str', entities_str)
            entities.append(entities_str)
        output = pd.DataFrame(list(zip(ID, entities)))
        print(output)
        output.to_csv('Test_result.csv', encoding='utf8')
コード例 #4
0
ファイル: main.py プロジェクト: BenSNW/ChineseNER
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, NERModel, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        # txt = input("请输入文件:")
        # with open(txt, encoding='u8') as test_file:
        #     for line in test_file.readlines():
        #         line = line.split(',')
        #         result = model.evaluate_line(sess, input_from_line(line[1], char_to_id), id_to_tag)
        #         print(result)

        while True:
            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
コード例 #5
0
ファイル: main.py プロジェクト: leolle/deep_learning
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    user_path = os.path.expanduser("~")

    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        with open(
                user_path +
                '/share/deep_learning/data/knowledge_graph/entity_relation/corpus.yml'
        ) as f:
            corpus = yaml.load(f)
            for key in corpus:
                sentences = corpus[key]
                sentences = list(set(sentences))
                for sen in sentences:
                    sen_strip = sen.replace(' ', '')
                    result = model.evaluate_line(
                        sess, input_from_line(sen_strip, char_to_id),
                        id_to_tag)
                    extract_entity(result, key)
コード例 #6
0
def evaluate_line(sents):
    global static_model
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    graph = tf.Graph()
    # tf_config = tf.ConfigProto()
    # tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

        # with tf.Session(config=tf_config) as sess:

    sess = tf.InteractiveSession(graph=graph)

    print(" start  create model")
    static_model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                                config, id_to_char, logger)
    print(" end  create model")

    result = static_model.evaluate_line(sess,
                                        input_from_line(sents,
                                                        char_to_id), id_to_tag)
    sess.close()

    return result
コード例 #7
0
def evaluate_line():
    config = utils.load_config(FLAGS.config_file)  # 读取配置文件
    log_path = os.path.join("evl_log", FLAGS.log_test)  # ./log/train.log
    logger = utils.get_logger(log_path)  # log文件名及路径
    # limit GPU memory
    tf_config = tf.ConfigProto()  # TensorFlow 会话的配置项
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:  # map_file 中存储着字与id,tag与id之间的对应关系
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
        # char_to_id  每个字对应的id, id_to_char 两者是相对应的
        # print('char_to_id: ', char_to_id)
        # print('tag_to_id: ', tag_to_id)

    with tf.Session(config=tf_config) as sess:
        model = utils.create_model(sess, Model, FLAGS.ckpt_path,
                                   data_utils.load_word2vec, config,
                                   id_to_char, logger)
        while True:
            try:
                line = input("请输入测试句子:")
                if line == 'exit':
                    break
                result = model.evaluate_line(
                    sess, data_utils.input_from_line(line, char_to_id),
                    id_to_tag)
                print(result)
                logger.debug(result)
            except Exception as e:
                logger.info(e)
コード例 #8
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger, False)
        # while True:
        #     # try:
        #     #     line = input("请输入测试句子:")
        #     #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #     #     print(result)
        #     # except Exception as e:
        #     #     logger.info(e)
        #
        #         line = input("请输入测试句子:")
        #         result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #         print(result)
        line = u"香港的房价已经到达历史巅峰,乌溪沙地铁站上盖由新鸿基地产公司开发的银湖天峰,现在的尺价已经超过一万五千港币。"
        line = u"这是测试语句,国务院加入测试"
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)
        print(result)
コード例 #9
0
def evaluate_lines(user_query):
    config = load_config("config_file")
    logger = get_logger("train.log")
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open("maps.pkl", "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, "ckpt", load_word2vec, config,
                             id_to_char, logger)

        try:
            results = []
            lines = user_query.split('<SEP>')
            for item in lines:
                result = model.evaluate_line(sess,
                                             input_from_line(item, char_to_id),
                                             id_to_tag)
                results.append(result)
            print(result + '\n')
        except Exception as e:
            logger.info(e)

    return results
コード例 #10
0
ファイル: main.py プロジェクト: qq345736500/ppt_run2.0
def evaluate_line(if_ask=False,
                  another_input='',
                  save=None):  #save=FLAGS.ckpt_path
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, save, load_word2vec, config,
                             id_to_char, logger)

        if if_ask == False:
            line = input("今天过的如何:\n")
        if if_ask == True:
            line = another_input
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)

        print(result)
        mytext, label_list = result  ##三箱西瓜,苹果,身高180公分,体重90斤,酒一瓶不喝,心情很好

        return mytext, label_list
コード例 #11
0
ファイル: main.py プロジェクト: facingwaller/ChineseNER
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            f1 = r'F:\PycharmProjects\dl2\deeplearning\data\nlpcc2016\nlpcc-iccpol-2016.kbqa.training.testing-data-all.txt'
            with open(f1 + '.crf.txt', 'w', encoding='utf-8') as o1:
                with codecs.open(f1, 'r', 'utf-8') as f1:
                    for l in f1.readlines():
                        line = l.split('\t')[0]
                        result = model.evaluate_line(
                            sess, input_from_line(line, char_to_id), id_to_tag)
                        print(result)
                        o1.write(l.strip('\n').strip('\r') + '\t')
                        if len(result['entities']) > 0:
                            for k, v in dict(result['entities'][0]).items():
                                o1.write("%s\t%s\t" % (k, v))
                        o1.write('\n')
コード例 #12
0
ファイル: main.py プロジェクト: aiedward/Cner_v1
def predict_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    tf_config = tf.ConfigProto()
    # 读取词典
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        # 根据保存的模型读取模型
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            # 反复输入句子进行预测
            line = input("请输入测试句子:")
            inputs, tag = model.evaluate_line(
                sess, input_from_line(line, char_to_id), id_to_tag)
            if FLAGS.server:
                # 如果是命令行测试就打印json数据
                result = result_to_json(inputs, tag)
                result = js.dumps(result,
                                  ensure_ascii=False,
                                  indent=4,
                                  separators=(',', ': '))
                with open('./result/result.json', 'w', encoding='utf-8') as f:
                    f.write(result)
                print("预测结果为:{}".format(result))
コード例 #13
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)
            line = unicode('在万达集团的老总王健林的著名采访之后,深圳出现了一家公司叫做赚它一个亿网络科技有限公司',
                           'utf-8')
            # line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print_result_p27(result)
            print(line)
            break
コード例 #14
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        #while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)
        start = time.clock()
        fr=codecs.open("data/test_for_output1.txt","r","utf-8")
        fw = codecs.open("result/output1.txt", "w", "utf-8")
        result=[]
        for line in fr:
            result.append(model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag))
        for res in result:
            #print(res)
            fw.write(' '.join(res)+'\n')
        end = time.clock()
        print('Running time: %s Seconds' % (end - start))
コード例 #15
0
def get_text_input():
    # http://127.0.0.1:5002/?inputStr="最开心"
    text = request.args.get('inputStr')
    # print(text)
    if len(text.strip()) > 0:
        aa = model.evaluate_line(sess, input_from_line(text, char_to_id), id_to_tag)
        return jsonify(aa)
コード例 #16
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)

    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        f = codecs.open(os.path.join(FLAGS.test_filepath, "127_9.txt"), "r",
                        "utf-8")
        s = f.read()
        line = []
        sent = ''
        for i in range(len(s)):
            if s[i] != '。':
                sent += s[i]
            else:
                sent += s[i]
                line.append(sent)
                sent = ''

        line = input("请输入测试句子:")
        for info in line:
            print(info)
            result = model.evaluate_line(sess,
                                         input_from_line(info, char_to_id),
                                         id_to_tag)
            for info1 in result['entities']:
                print(info1)
コード例 #17
0
def evaluate_predictsamples():
    config = load_config(FLAGS.config_file)
    logger = get_logger('log/test.log')
    print_config(config, logger)
    # limit GPU memory
    logger.info("start predict")
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        index = 1
        with open('data/predict.txt', 'w', encoding='utf-8') as f_write:
            read_file = 'data/test.txt'
            for line in open(read_file, 'r', encoding='utf-8-sig').readlines():
                result = model.evaluate_line(sess,
                                             input_from_line(line, char_to_id),
                                             id_to_tag)
                print(result)
                content = str(index) + ','
                if result is not None:
                    entities = result['entities']
                    for entity in entities:
                        if entity:
                            print(entity['word'] + '\t' + entity['type'])
                            # content += entity['word'] + '\t' + str(entity['start']) + '\t' + str(entity['end']) + '\t' + entity['type']+ ';'
                            content += entity['word'] + '\t' + entity[
                                'type'] + ';'
                f_write.write(content + '\n')
                index += 1
コード例 #18
0
    def predict_from_pb(self, document):
        row = {'content': document}
        df = pandas.DataFrame().append(row, ignore_index=True)
        filename = "data/{}.csv".format(time.time())
        df.to_csv(filename, index=False, escapechar="\\", columns=['content'])

        with tf.Graph().as_default():
            output_graph_def = tf.GraphDef()
            with open(os.path.join(args.model_folder, 'money_model.pb'), "rb") as f:
                output_graph_def.ParseFromString(f.read())
                tf.import_graph_def(output_graph_def, name="")

            with tf.Session() as sess:  # config=config
                sess.run(tf.global_variables_initializer())

                # Get the input placeholders from the graph by name
                char_input = sess.graph.get_tensor_by_name('CharInputs:0')
                seg_input = sess.graph.get_tensor_by_name('SegInputs:0')
                drop_keep_prob = sess.graph.get_tensor_by_name('Dropout:0')
                # Tensors we want to evaluate,outputs
                lengths = sess.graph.get_tensor_by_name('lengths:0')
                logits = sess.graph.get_tensor_by_name("project/logits_outputs:0")
                # predictions = sess.graph.get_tensor_by_name("Accuracy/predictions:0")

                trans = sess.graph.get_tensor_by_name("crf_loss/transitions:0")

                # fo = open(test_data_path, "r", encoding='utf8')
                # all_data = fo.readlines()
                # fo.close()
                # for line in all_data:  # 一行行遍历
                lines = document.split(r"\n")
                lines = [line for line in lines if len(line)>0]

                list_original = []
                list_amounts = []
                for line in lines:
                    input_batch = input_from_line(line, self.char_to_id)  # 处理测试数据格式
                    feed_dict = create_feed_dict(input_batch, char_input, seg_input, drop_keep_prob)  # 创建输入的feed_dict
                    seq_len, scores = sess.run([lengths, logits], feed_dict)
                    print('---')

                    transition_matrix = trans.eval()
                    batch_paths = decode(scores, seq_len, transition_matrix)
                    tags = [self.id_to_tag[str(idx)] for idx in batch_paths[0]]
                    #print(tags)
                    result = result_to_json(input_batch[0][0], tags)
                    original = str(result['string'])
                    entities = result['entities']

                    if len(entities) != 0:
                        list_original.extend([original] * len(entities))
                        for entity in entities:
                            #是数字金额需要增强逻辑
                            if digit_regex.match(entity['word']) and len(entity['word']) >= 1:
                                aug_word = augment(entity['word'], line)
                                list_amounts.append(aug_word)
                            else:
                                list_amounts.append(entity['word'])
                            #print(entity['word'])
                return {"answer":list_amounts,"line":list_original}
コード例 #19
0
 def address_segment(self, address):
     # print(address, type(address))  # <class 'str'>
     model, sess, char_to_id, id_to_tag = get_model()
     address_segment_result = model.evaluate_line(
         sess, input_from_line(address, char_to_id), id_to_tag)
     # print(address_segment_result, type(address_segment_result))  # <class 'dict'>
     return address_segment_result
コード例 #20
0
def evaluate_file(config):
    config = load_config(config["config_file"])
    logger = get_logger(config["log_file"])

    with open(config["map_file"], "rb") as f:
        word_to_id, id_to_word, tag_to_id, id_to_tag = pickle.load(f)

    fout = open("data/result_entities.json", "w+")

    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        model = Model(config)

        logger.info("读取现有模型...")
        ckpt = tf.train.get_checkpoint_state(config["ckpt_path"])  # 从模型路径获取ckpt
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        with open("data/result.json") as f:
            for index, line in enumerate(f):
                if index % 1000 == 0:
                    print(index)

                data = json.loads(line.strip())
                answer = data["answers"][0]
                if not answer:
                    answer += " "
                result = model.evaluate_line(sess, input_from_line(answer, word_to_id), id_to_tag)
                entities = list(set([item["word"] for item in result["entities"]]))
                data["entity_answers"] = [entities]
                fout.write(json.dumps(data, ensure_ascii=False) + '\n')

    fout.close()
コード例 #21
0
def evaluate_ht():
    submit_path_ht = 'submit_sample/hetong.csv'
    submit_path_file = open(submit_path_ht, 'a+', encoding='gbk')
    submit_path_file.write('公告id,甲方,乙方,项目名称,合同名称,合同金额上限,合同金额下限,联合体成员\n')
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        rootdir = '/home/utopia/corpus/FDDC_part2_data/FDDC_announcements_round1_test_a_20180605/重大合同/html/'
        list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
        for i in range(0, len(list)):
            htmlpath = os.path.join(rootdir, list[i])
            if os.path.isfile(htmlpath):
                print(htmlpath)
                s_arr = levelText_withtable(htmlpath)
                candidates = []
                for j in range(len(s_arr)):
                    sen = s_arr[j]
                    result = model.evaluate_line(
                        sess, input_from_line(sen, char_to_id), id_to_tag)
                    entities = result.get('entities')
                    if len(entities) > 0:
                        for en in entities:
                            en['sid'] = j
                            en['pid'] = list[i]
                            candidates.append(en)
                org_ht(candidates, submit_path_file)
                print('-------------------------------------------------')
コード例 #22
0
def evaluate_line():
    # 写死
    FLAGS.config_file = 'forum_config/config_file'
    FLAGS.log_file = 'forum_config/log/train.log'
    FLAGS.ckpt_path = 'forum_ckpt/'
    # FLAGS.ckpt_path = 'ckpt/'
    FLAGS.map_file = 'forum_config/maps.pkl'

    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

            line = input("请输入测试句子:")
            result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            print(result)
コード例 #23
0
def content_ner():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        for i in range(302):
            newslist = []
            m = i + 1
            f = open('newsLists/%d.txt' % m, 'r')
            for line in f.readlines():
                newslist.append(int(line))
            fout = codecs.open('content_ner/%d.json' % m,
                               'w',
                               encoding='utf-8')
            for i in range(len(newslist)):
                day = newslist[i]
                f = codecs.open('D:/PycharmProjects/news_data/%d.json' % day,
                                encoding='utf-8')
                f_d = json.load(f)
                content = f_d["content"]
                content = strip_tags(content)
                result = model.evaluate_line(
                    sess, input_from_line(content, char_to_id), id_to_tag)
                dicObj = json.dumps(result)
                fout.write(dicObj)
                fout.write("\n")
            fout.close()
コード例 #24
0
ファイル: main2.py プロジェクト: lzx00000/mynew
def evaluate_line():
    config = load_config(FLAGS.config_file)  #从文件config_file 中读取配置数据
    #{'model_type': 'idcnn', 'num_chars': 3538, 'char_dim': 100, 'num_tags': 51, 'seg_dim': 20, 'lstm_dim': 100, 'batch_size': 20, 'emb_file': 'E:\\pythonWork3.6.2\\NERuselocal\\NERuselocal\\data\\vec.txt', 'clip': 5, 'dropout_keep': 0.5, 'optimizer': 'adam', 'lr': 0.001, 'tag_schema': 'iobes', 'pre_emb': True, 'zeros': True, 'lower': False}
    logger = get_logger(FLAGS.log_file)  #写日志文件名字为 train.log
    # limit GPU memory
    tf_config = tf.ConfigProto(
    )  #实例化一个设置GPU的对象  函数用在创建session的时候,用来对session进行参数配置
    tf_config.gpu_options.allow_growth = True  #1动态申请显存需要多少使用多少 2限制GPU使用率 config.gpu_options.per_process_gpu_memory_fraction = 0.4
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
コード例 #25
0
ファイル: main.py プロジェクト: sunyiwei24601/WebDb_Linux
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    # map_file文件需要改
    with open(FLAGS.map_file, "rb+") as f:
        # pkllf=pickle.load(f)
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

                # 改成file in file out
                line = input("请输入测试句子:")
                result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)

                print(result)
コード例 #26
0
def get_slot_dl(text):
    """
    获得一个句子的slot_table
    :param text: 用户输入的句子
    # :param tf_sess: tensorflow 的sessiong
    :return:
    """
    tf.reset_default_graph()   # FIXME: 为了可以保证重复多次输入(不过会很慢)整合代码时,可以尝试将NLU和DM分离,使用统一的文件管理。
    FLAGS.config_file = '/forum_config/config_file'
    FLAGS.log_file = '/forum_config/log/train.log'
    FLAGS.ckpt_path = '/forum_ckpt/'
    FLAGS.map_file = '/forum_config/maps.pkl'
    
    file_path = os.path.dirname(__file__)
    config = load_config(file_path+FLAGS.config_file)
    logger = get_logger(file_path+FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    with open(file_path+FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        result = model.evaluate_line(sess, input_from_line(text, char_to_id), id_to_tag)
    return result
コード例 #27
0
ファイル: main.py プロジェクト: zyq11223/NLP_basis
def get_text_input():

    text = request.args.get('inputStr')

    if text:
        aa = model.evaluate_line(sess, input_from_line(text, char_to_id),
                                 id_to_tag)
        return jsonify(aa)
コード例 #28
0
ファイル: main.py プロジェクト: rongyuer/Task
def nlp():
    #print(request.headers)
    line = request.form.get('text')
    result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
    #print(result)
    answer = ''
    for i in range(len(result['entities'])):
        answer =answer + result['entities'][i]['word'] + '/n'
    return answer
コード例 #29
0
def get_text_input():
    # logging.info("connect successfully")
    # tf_config = tf.ConfigProto()
    # tf_config.gpu_options.allow_growth = True   
    # http://127.0.0.1:5002/?inputStr="..."
    text = request.args.get('inputStr')
    if text:     
        aa = model.evaluate_line(sess, input_from_line(text, char_to_id), id_to_tag)
        return jsonify(aa)
コード例 #30
0
def evaluate2(line_lis,sess):
    all_dict = []
    print('buuuuuuuuuuuuuuuug1',line_lis)
    for i in line_lis:
        result = model.evaluate_line(sess, input_from_line(i, char_to_id), id_to_tag)
        mytext, label_list = result  ##三箱西瓜,苹果,身高180公分,体重90斤,酒一瓶不喝,心情很好
        dii = dictionary(label_list=label_list, mytext=mytext)
        all_dict.append(dii)
    print('debugggggggggggg1')
    return all_dict
コード例 #31
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

                line = input("请输入测试句子:")
                result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
                print(result)