Python HarvestText.posseg Examples

Programming Language: Python

Namespace/Package Name: harvesttext

Class/Type: HarvestText

Method/Function: posseg

Examples at hotexamples.com: 5

Python HarvestText.posseg - 5 examples found. These are the top rated real world Python examples of harvesttext.HarvestText.posseg extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

HarvestText(30)

add_entities(9)

clean_text(6)

posseg(5)

cut_sentences(5)

entity_linking(4)

cut_paragraphs(4)

triple_extraction(4)

add_typed_words(4)

build_word_ego_graph(2)

clear(2)

build_entity_ego_graph(2)

dependency_parse(2)

find_entity_with_rule(2)

get_linking_mention_candidates(2)

mention2entity(2)

named_entity_recognition(2)

seg(2)

build_sent_dict(1)

build_entity_graph(1)

analyse_sent(1)

add_new_entity(1)

Example #1

Show file

def test_find_with_rules():
    sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read()
    from harvesttext.match_patterns import UpperFirst, AllEnglish, Contains, StartsWith, EndsWith
    # some more patterns is provided
    text0 = "我喜欢Python，因为requests库很适合爬虫"
    ht0 = HarvestText()

    found_entities = ht0.find_entity_with_rule(text0, rulesets=[AllEnglish()], type0="英文名")
    print(found_entities)
    print(ht0.posseg(text0))
    print(ht0.mention2entity("Python"))


    # Satisfying one of the rules
    ht0.clear()
    found_entities = ht0.find_entity_with_rule(text0,rulesets=[AllEnglish(),Contains("爬")],type0="技术")
    print(found_entities)
    print(ht0.posseg(text0))

    # Satisfying a couple of rules [using tuple]
    ht0.clear()
    found_entities = ht0.find_entity_with_rule(text0, rulesets=[(AllEnglish(),UpperFirst())], type0="专有英文词")
    print(found_entities)
    print(ht0.posseg(text0))

    sys.stdout.close()
    assert open(get_current_function_name() + "_current").read() == expected

Example #2

Show file

File: basics.py Project: zimizzzz/HarvestText

def find_with_rules():
    from harvesttext.match_patterns import UpperFirst, AllEnglish, Contains, StartsWith, EndsWith
    # some more patterns is provided
    text0 = "我喜欢Python，因为requests库很适合爬虫"
    ht0 = HarvestText()

    found_entities = ht0.find_entity_with_rule(text0,
                                               rulesets=[AllEnglish()],
                                               type0="英文名")
    print(found_entities)
    print(ht0.posseg(text0))
    print(ht0.mention2entity("Python"))

    # Satisfying one of the rules
    ht0.clear()
    found_entities = ht0.find_entity_with_rule(
        text0, rulesets=[AllEnglish(), Contains("爬")], type0="技术")
    print(found_entities)
    print(ht0.posseg(text0))

    # Satisfying a couple of rules [using tuple]
    ht0.clear()
    found_entities = ht0.find_entity_with_rule(text0,
                                               rulesets=[(AllEnglish(),
                                                          UpperFirst())],
                                               type0="专有英文词")
    print(found_entities)
    print(ht0.posseg(text0))

Example #3

Show file

File: basics.py Project: zimizzzz/HarvestText

def using_typed_words():
    from harvesttext.resources import get_qh_typed_words, get_baidu_stopwords
    ht0 = HarvestText()
    typed_words, stopwords = get_qh_typed_words(), get_baidu_stopwords()
    ht0.add_typed_words(typed_words)
    print("加载清华领域词典，并使用停用词")
    print("全部类型", typed_words.keys())
    sentence = "THUOCL是自然语言处理的一套中文词库，词表来自主流网站的社会标签、搜索热词、输入法词库等。"
    print(sentence)
    print(ht0.posseg(sentence, stopwords=stopwords))
    print("一些词语被赋予特殊类型IT,而“是”等词语被筛出。")

Example #4

Show file

def test_using_typed_words():
    sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read()
    from harvesttext.resources import get_qh_typed_words,get_baidu_stopwords
    ht0 = HarvestText()
    typed_words, stopwords = get_qh_typed_words(), get_baidu_stopwords()
    ht0.add_typed_words(typed_words)
    print("加载清华领域词典，并使用停用词")
    print("全部类型",typed_words.keys())
    sentence = "THUOCL是自然语言处理的一套中文词库，词表来自主流网站的社会标签、搜索热词、输入法词库等。"
    print(sentence)
    print(ht0.posseg(sentence,stopwords=stopwords))
    print("一些词语被赋予特殊类型IT,而“是”等词语被筛出。")

    sys.stdout.close()
    assert open(get_current_function_name() + "_current").read() == expected

Example #5

Show file

File: basics.py Project: whitewolfkings/HarvestText

def test_english():
    # ♪ "Until the Day" by JJ Lin
    test_text = """
    In the middle of the night. 
    Lonely souls travel in time.
    Familiar hearts start to entwine.
    We imagine what we'll find, in another life.  
    """.lower()
    ht_eng = HarvestText(language="en")
    sentences = ht_eng.cut_sentences(test_text)
    print("\n".join(sentences))
    print(ht_eng.seg(sentences[-1]))
    print(ht_eng.posseg(sentences[0], stopwords={"in"}))
    sent_dict = ht_eng.build_sent_dict(sentences, pos_seeds=["familiar"], neg_seeds=["lonely"],
                                       min_times=1, stopwords={'in', 'to'})
    print("Sentiment analysis")
    for sent0 in sentences:
        print(sent0, "%.3f" % ht_eng.analyse_sent(sent0))
    print("Segmentation")
    print("\n".join(ht_eng.cut_paragraphs(test_text, num_paras=2)))