def test_find_with_rules(): sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read() from harvesttext.match_patterns import UpperFirst, AllEnglish, Contains, StartsWith, EndsWith # some more patterns is provided text0 = "我喜欢Python,因为requests库很适合爬虫" ht0 = HarvestText() found_entities = ht0.find_entity_with_rule(text0, rulesets=[AllEnglish()], type0="英文名") print(found_entities) print(ht0.posseg(text0)) print(ht0.mention2entity("Python")) # Satisfying one of the rules ht0.clear() found_entities = ht0.find_entity_with_rule(text0,rulesets=[AllEnglish(),Contains("爬")],type0="技术") print(found_entities) print(ht0.posseg(text0)) # Satisfying a couple of rules [using tuple] ht0.clear() found_entities = ht0.find_entity_with_rule(text0, rulesets=[(AllEnglish(),UpperFirst())], type0="专有英文词") print(found_entities) print(ht0.posseg(text0)) sys.stdout.close() assert open(get_current_function_name() + "_current").read() == expected
def find_with_rules(): from harvesttext.match_patterns import UpperFirst, AllEnglish, Contains, StartsWith, EndsWith # some more patterns is provided text0 = "我喜欢Python,因为requests库很适合爬虫" ht0 = HarvestText() found_entities = ht0.find_entity_with_rule(text0, rulesets=[AllEnglish()], type0="英文名") print(found_entities) print(ht0.posseg(text0)) print(ht0.mention2entity("Python")) # Satisfying one of the rules ht0.clear() found_entities = ht0.find_entity_with_rule( text0, rulesets=[AllEnglish(), Contains("爬")], type0="技术") print(found_entities) print(ht0.posseg(text0)) # Satisfying a couple of rules [using tuple] ht0.clear() found_entities = ht0.find_entity_with_rule(text0, rulesets=[(AllEnglish(), UpperFirst())], type0="专有英文词") print(found_entities) print(ht0.posseg(text0))