# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Analyzer, Domain from eot.wowool.error import Error try: english = Analyzer(language="dutch") entities = Domain("dutch-entity") # scope is the range of sentence, in this case 3 before until the current sentence. # concept id the type of annotation we are looking for. # required means that we must have that attribute. in this case 'hij' must be male. # you can also add relevancy if you have seen a attribute, ex: relevancy="_ana:referent:6.1" # this means that if we have a candidate with a attribute _ana="referent" we add more relevancy. anaphora = Domain(source=""" namespace entity { rule:{ Person } = Person; } namespace anaphora { rule: { <'hij'>} = wow::anaphora@( concept="Person" , scope="-3:0" , inherit="true", required="gender:male" ); } """) doc = english( "Jan Van Den Berg werkte als hoofdarts samen met Pol Jannsens bij Omega Pharma. Hij is ook de CEO." ) doc = entities(doc)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. # !!! make sure you build the helloworld.dom # from the domains folder pywoc -t -l en -o helloworld.dom helloworld.wow --verbose debug # or using scons # scons . from eot.wowool.native import Analyzer, Domain from eot.wowool.annotation import Concept from eot.wowool.error import Error from pathlib import Path this_folder = Path(__file__).parent try: dutch = Analyzer(language="dutch") helloworld = Domain( Path( this_folder, '..', '..', '..', 'domains' , 'helloworld.dom' ) ) doc = dutch("greetings world.") doc = helloworld(doc) print( doc ) except Error as ex: print("Exception:",ex)
} , { "from" : { "slot" : "USER" , "label": "USER"}, "to" : { "expr" : "Person"}, "relation" : { "label": "Mentions" } }, { "from" : { "expr" : "USER" }, "to" : { "slot" : "Document", "label": "Document"} , "relation" : { "label": "Mentions" } } ] } try: english = Analyzer(language="dutch") entities = Domain( "dutch-entity" ) myrule = Domain( source = """ rule:{ 'user' '\:' {(<>)+}=USER }; """) doc = english("user:John \n\nJan Van Den Berg werkte als hoofdarts bij Omega Pharma.") doc = entities(doc) doc = myrule(doc) print(doc) graphit = EntityGraph( graph_config ) # returns a panda dataframe. graphit.slots['Document'] = {"data":"hello"} results = graphit(doc) print( results.df_from) print( results.df_relation) print( results.df_to) from eot.wowool.tool.entity_graph.cypher import CypherStream
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Language, Domain from eot.wowool.error import Error try: english = Language("english") entities = Domain("english-entity") doc = english( "John Smith was in London on the 3/11/2020. He took a cab to the central station." ) doc = entities(doc) print('-' * 80) print(doc) except Error as ex: print("Exception:", ex)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Analyzer, Domain from eot.wowool.annotation import Concept from eot.wowool.error import Error try: english = Analyzer(language="dutch") entities = Domain("dutch-entity") doc = english("Jan Van Den Berg werkte als hoofdarts bij Omega Pharma.") doc = entities(doc) # filter some concepts requested_concepts = set(['Person', 'Position', 'Company']) concept_filter = lambda concept: concept.uri in requested_concepts for concept in Concept.iter(doc, concept_filter): print(f"literal: {concept.literal:<20}, stem={concept.stem}") # flatten concepts into dict print('-' * 40) for concept in Concept.iter(doc, concept_filter): print({**concept}) print('-' * 40) for person in Concept.iter(doc, lambda concept: concept.uri == "Person"): for person_parts in Concept.iter( person, lambda concept: concept.uri.startswith("Person")): print( f"Person: {person_parts.uri:<20}, stem={person_parts.literal}")
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Language, Domain from eot.wowool.error import Error try: english = Language("english") entities = Domain("english-entity") mydomain = Domain(source=""" rule:{ Person .. City }= PersonCity; """) doc = mydomain( entities(english("John Smith was in London on the 3/11/2020."))) for sentence in doc.analysis: print(f"S:({sentence.begin_offset},{sentence.end_offset})") for a in sentence: if a.is_token: print( f" T:({a.begin_offset},{a.end_offset}): {a.literal}, {a.stem}, {a.pos}, {a.properties}" ) elif a.is_concept: print( f"""C:[{a.begin_offset},{a.end_offset}]: {a.uri}, attributes={a.attributes}, literal="{a.literal}" """ ) except Error as ex: print("Exception:", ex)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Analyzer, Domain from eot.wowool.error import Error try: english = Analyzer(language="dutch") entities = Domain("dutch-entity") conjecture = Domain(source=""" namespace conjecture { rule : { 'het' { <> }= Info 'bedrijf' {(Prop)+} = Company@(info=f"{rule.Info.stem().upper()}") }; } """) doc = english("Het Vlaams bedrijf NietGekent werkt same met EyeOnText.") doc = entities(doc) doc = conjecture(doc) print(doc) except Error as ex: print("Exception:", ex)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Language, Domain from eot.wowool.error import Error try: english = Language("english") entities = Domain("english-entity") conjecture = Domain(source=""" namespace conjecture { rule : { 'the' { <> }= Info 'company' {(Prop)+} = Company@(info=f"{rule.Info.stem().upper()}") }; } """) doc = english("The Flemish company NietGekent is located in Antwerp.") doc = entities(doc) doc = conjecture(doc) print(doc) except Error as ex: print("Exception:", ex)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Language, Domain, Filter from eot.wowool.annotation import Concept a = Language("english") dc = Domain("english-company") doc = dc(a("this is a EyeOnText.")) call_plugin = Domain(source=""" rule:{ Company }= ::python::myplugin::call_this; rule:{ Company }= Other@(name=f"{rule.literal()}" ); """) doc = call_plugin(doc) filter = Filter(['Other', 'Company', 'PLUGIN_COMPANY']) doc = filter(doc) concepts = [c for c in Concept.iter(doc)] uris = [c.uri for c in concepts] print(doc) assert "PLUGIN_COMPANY" in uris, "Missing some plugin annotation" assert "Company" in uris, "Missing some plugin annotation" assert concepts[0].attributes["name"][0] == "EyeOnText", "Missing attributes."
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. # !!! make sure you build the helloworld.dom # from the domains folder pywoc -t -l en -o helloworld.dom helloworld.wow --verbose debug # or using scons # scons . from eot.wowool.native import Language, Domain from eot.wowool.annotation import Concept from eot.wowool.error import Error from pathlib import Path this_folder = Path(__file__).parent try: dutch = Language("dutch") helloworld = Domain( Path(this_folder, '..', '..', '..', 'domains', 'extra_greeting.dom')) doc = dutch("greetings world.") doc = helloworld(doc) print(doc) except Error as ex: print("Exception:", ex)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Analyzer, Domain from eot.wowool.annotation import Concept from eot.wowool.error import Error try: dutch = Analyzer(language="dutch") entities = Domain("dutch-entity") doc = dutch("Jan Van Den Berg werkte als hoofdarts bij Omega Pharma.") doc = entities(doc) mydomain = Domain(source=r""" rule:{ Person .. <'werken'> .. Company }= PersonWorkCompany@(verb="work"); rule:{ Person .. Company }= PersonCompany; """) doc = mydomain(doc) # filter some concepts requested_concepts = set( ['Person', 'Company', 'PersonWorkCompany', 'PersonCompany']) concept_filter = lambda concept: concept.uri in requested_concepts for concept in Concept.iter(doc, concept_filter): print(f"{concept.uri} -> {concept.literal}") except Error as ex: print("Exception:", ex)
try: analyzer = Analyzer(language="dutch") rule_source = """ // Compound Sample: // capture all the word with verzekering lexicon:(input="component"){ verzekering } = INSURANCE_COMP; // capture only the real verzekering not verzekeringsmaatschapijen lexicon:(input="head"){ verzekering } = INSURANCE_HEAD; // capture the cost of the insurance. rule:{ h'verzekering' { <+currency> } = INSURANCE_PRICE }; """ compounds = Domain(source=rule_source) input = "Er zijn verzekeringsmaatschapijen €40.000.000 en verzekeringen: autoverzekeringen €100, fietsverzekering €10" doc = compounds(analyzer(input)) print("-" * 80) print(rule_source) print("-" * 80) print(input) print("-" * 80) print(f"{'uri':<20s} | {'literal':<30s} | {'stem'}") print("-" * 80) for concept in Concept.iter( doc, lambda concept: concept.uri.startswith("INSURANCE")): print(f"{concept.uri:<20s} | {concept.literal:<30s} | {concept.stem}") except Error as ex: print("Exception:", ex)
# Copyright (c) 2020 EyeOnText, All Rights Reserved. # NOTICE: All information contained herein is, and remains the property of EyeOnText. from eot.wowool.native import Language, Domain, Compiler from eot.wowool.annotation import Concept from eot.wowool.error import Error from pathlib import Path this_folder = Path(__file__).parent try: project_folder = this_folder / '..' / '..' / '..' / 'domains' compiler = Compiler() compiler.add_file(project_folder / 'helloworld.wow') compiler.add_source(""" rule:{ GREETING } = EXTRA_GREETING; """) results = compiler.save(project_folder / 'extra_greeting.dom') if not results.status: print(results) exit(-1) dutch = Language("dutch") helloworld = Domain(project_folder / 'extra_greeting.dom') doc = dutch("greetings world.") doc = helloworld(doc) print(doc) except Error as ex: print("Exception:", ex)
# Check if external configuration exists # Check for HEFrules.dom in /mnt/inout/config/ # Check for graphconfig.py in /mnt/inout/config/ external_config_file_path = "/mnt/inout/config/" rule_file_name = "HEFrules.dom" external_config_file_name = "graphconfig.py" output_directory = "/mnt/inout/output/hef-graph-cypher/" input_docs_directory = "/mnt/inout/output/scraper/txt/" if not os.path.exists(output_directory): pathlib.Path(output_directory).mkdir(parents=True, exist_ok=True) if pathlib.Path(external_config_file_path + rule_file_name).is_file(): print("External domain found") myrule = Domain(external_config_file_path + rule_file_name) else: print("use default domain") myrule = Domain(os.path.dirname(__file__) + "/" + rule_file_name) if pathlib.Path(external_config_file_path + external_config_file_name).is_file(): print("External config found") import importlib.util spec = importlib.util.spec_from_file_location( "hef-config", external_config_file_path + external_config_file_name) config_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(config_module) try: graph_config = config_module.graph_config