Esempio n. 1
0
def test_text():
    cg = CorpusGraph()

    # 从json文件读取语料库模型
    # cg.load_from_json()

    # 连接mongodb建立语料库模型
    cg.build_corpus()

    # 保存为json文件
    cg.save_as_json()

    tg = TextGraph()

    # 从mongodb读取句子,以便分词
    # sentences = tg.get_sentences(isRandom=False)

    sentences = ["准许原告肖振明撤回起诉"]

    # 对句子数组建立图模型
    tg.build(sentences)

    # 填入边的权重
    tg.fill_edge(cg)

    # 输出语句图需要的json文件, path如果为None则返回json,而不保存在硬盘
    tg.make_json(cg, path='./data/text.json')
def test_text():
    cg = CorpusGraph()
    cg.build_corpus()
    cg.get_sorted_neighbour('一')
    # print("###############")
    # for cge in cg.corpus.edges:
    #     print(cge)
    # break
    # print('###', cg.corpus['朝'])

    tg = TextGraph()
    sentences = tg.get_sentences(isRandom=False)
    tg.build(sentences)
    tg.fill_edge(cg)
    tg.make_json(cg)
Esempio n. 3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from Network import CorpusGraph
from Network import TextGraph
from ResultReference import JiebaChecker
from ResultReference import ThulacChecker
from IO import DisIO

cg = CorpusGraph()
# cg.build_corpus()
#cg.save_as_json('./data/ten.json')
cg.load_from_json('./data/corpus_50k.json')
jieba_checker = JiebaChecker()
thulac_checker = ThulacChecker()

def tokenize(sentence):
    tg = TextGraph()
    tg.build([sentence])
    tg.fill_edge(cg)

    # 暂时只对单句分词
    result = tg.cut()[0]
    jieba_check = jieba_checker.check(sentence, result)
    thulac_check = thulac_checker.check(sentence, result)

    jieba_result = jieba_check["jieba_result"]
    jieba_overlap = jieba_check["overlap"]

    thulac_result = thulac_check["thulac_result"]
    thulac_overlap = thulac_check["overlap"]
Esempio n. 4
0
from flask import Flask
from flask import request
from flask import send_from_directory
from flask import send_file

from IO import RemoteIO
from Network import CorpusGraph
from Network import TextGraph
from ResultReference import JiebaChecker, ThulacChecker
from utl import count as time_count
import os
import json

# 从json文件建立语料库图模型
cg = CorpusGraph()
cg.load_from_json()

# 分词结果校对
jieba_checker = JiebaChecker()
thulac_checker = ThulacChecker()

rio = RemoteIO()

app = Flask(__name__, template_folder='./presentation', static_folder='./presentation')


@app.route('/')
def hello_world():
    return send_file('./presentation/WordLink.html')

def make_corpus():
    cg = CorpusGraph()
    cg.build_corpus()
    # cg.save_as_json()
    cg.load_from_json()
Esempio n. 6
0
import sys

from Network import CorpusGraph

cmds = sys.argv

cg = CorpusGraph()

if "build" in cmds and "toJson" in cmds:
    cg.build_corpus()
    cg.save_as_json()