Python TextGraph Examples

Programming Language: Python

Namespace/Package Name: Network

Class/Type: TextGraph

Examples at hotexamples.com: 5

Python TextGraph - 5 examples found. These are the top rated real world Python examples of Network.TextGraph extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TextGraph(5)

build(5)

fill_edge(5)

cut(3)

make_json(3)

get_sentences(1)

Example #1

Show file

def test_text():
    cg = CorpusGraph()

    # 从json文件读取语料库模型
    # cg.load_from_json()

    # 连接mongodb建立语料库模型
    cg.build_corpus()

    # 保存为json文件
    cg.save_as_json()

    tg = TextGraph()

    # 从mongodb读取句子，以便分词
    # sentences = tg.get_sentences(isRandom=False)

    sentences = ["准许原告肖振明撤回起诉"]

    # 对句子数组建立图模型
    tg.build(sentences)

    # 填入边的权重
    tg.fill_edge(cg)

    # 输出语句图需要的json文件, path如果为None则返回json，而不保存在硬盘
    tg.make_json(cg, path='./data/text.json')

Example #2

Show file

File: driver.py Project: long001/ChineseTokenizerPrototype

def test_text():
    cg = CorpusGraph()
    cg.build_corpus()
    cg.get_sorted_neighbour('一')
    # print("###############")
    # for cge in cg.corpus.edges:
    #     print(cge)
    # break
    # print('###', cg.corpus['朝'])

    tg = TextGraph()
    sentences = tg.get_sentences(isRandom=False)
    tg.build(sentences)
    tg.fill_edge(cg)
    tg.make_json(cg)

Example #3

Show file

File: FlaskServer.py Project: long001/ChineseTokenizerService

def tokenize():
    if request.method == 'GET':
        tg = TextGraph()
        sentence = "没有输入"

        # 从参数获取待分词句子
        if request.args.get('sentence', '') != "":
            sentence = request.args.get('sentence', '')
        tg.build([sentence])
        tg.fill_edge(cg)

        # 暂时只对单句分词
        time_count(print_to_console=False)
        result = tg.cut()[0]
        time_count("分词完毕")
        check_jieba = jieba_checker.check(sentence, result)
        time_count("jieba分词完毕")
        check_thulac = thulac_checker.check(sentence, result)
        time_count("thulac分词完毕")

        # jieba的分词结果
        jieba_result = check_jieba["jieba_result"]
        jieba_overlap = check_jieba["overlap"]

        thulac_result = check_thulac["thulac_result"]
        thulac_overlap = check_thulac["overlap"]
        # res = json.dumps(
        #     {"graph": tg.make_json(cg, path=None), "result": result,
        #      "jieba": jieba_result, "jieba_overlap": jieba_overlap,
        #      "thulac": thulac_result, "thulac_overlap": thulac_overlap},
        #     ensure_ascii=False)
        res = json.dumps(
            {
                "graph": tg.make_json(cg, path=None),
                "result": result,
                "jieba": {
                    "words": jieba_result,
                    "overlap": "%.2f" % jieba_overlap
                },
                "thulac": {
                    "words": thulac_result,
                    "overlap": "%.2f" % thulac_overlap
                }
            },
            ensure_ascii=False)
        # print("json dumping")
        # res = json.dumps(
        #     {"graph": tg.make_json(cg, path=None), "result": result,
        #      "jieba": jieba_result, "jieba_overlap": jieba_overlap,
        #      },
        #     ensure_ascii=False)
        print("server returned")
        return res

Example #4

Show file

File: sample.py Project: wzssga/ChineseTokenizerService

def tokenize(sentence):
    tg = TextGraph()
    tg.build([sentence])
    tg.fill_edge(cg)

    # 暂时只对单句分词
    result = tg.cut()[0]
    check = checker.check(sentence, result)

    jieba_result = check["jieba_result"]
    overlap = check["overlap"]
    res = json.dumps(
        {"result": result, "jieba": jieba_result, "overlap": overlap},
        ensure_ascii=False)
    return res

Example #5

Show file

def tokenize(sentence):
    tg = TextGraph()
    tg.build([sentence])
    tg.fill_edge(cg)

    # 暂时只对单句分词
    result = tg.cut()[0]
    jieba_check = jieba_checker.check(sentence, result)
    thulac_check = thulac_checker.check(sentence, result)

    jieba_result = jieba_check["jieba_result"]
    jieba_overlap = jieba_check["overlap"]

    thulac_result = thulac_check["thulac_result"]
    thulac_overlap = thulac_check["overlap"]
    res = {"sentence": sentence, "result": result, "jieba": jieba_result, "jieba_overlap": jieba_overlap,"thulac":thulac_result,"thulac_overlap":thulac_overlap}
    return res