Exemple #1
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Analyzer, Domain
from eot.wowool.error import Error

try:
    english = Analyzer(language="dutch")
    entities = Domain("dutch-entity")

    # scope is the range of sentence, in this case 3 before until the current sentence.
    # concept id the type of annotation we are looking for.
    # required means that we must have that attribute. in this case 'hij' must be male.
    # you can also add relevancy if you have seen a attribute, ex: relevancy="_ana:referent:6.1"
    #   this means that if we have a candidate with a attribute _ana="referent" we add more relevancy.
    anaphora = Domain(source="""
    namespace entity {
         rule:{ Person } = Person;
    }

    namespace anaphora {
        rule: { <'hij'>} =
            wow::anaphora@( concept="Person" , scope="-3:0" , inherit="true", required="gender:male" );
    }

""")

    doc = english(
        "Jan Van Den Berg werkte als hoofdarts samen met Pol Jannsens bij Omega Pharma. Hij is ook de CEO."
    )
    doc = entities(doc)
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.
# !!! make sure you build the helloworld.dom
# from the domains folder pywoc -t -l en -o helloworld.dom helloworld.wow --verbose debug
# or using scons
# scons .

from eot.wowool.native import Analyzer, Domain
from eot.wowool.annotation import Concept
from eot.wowool.error import Error
from pathlib import Path

this_folder  = Path(__file__).parent

try:
    dutch = Analyzer(language="dutch")
    helloworld = Domain( Path( this_folder, '..', '..', '..', 'domains' , 'helloworld.dom' ) )

    doc = dutch("greetings world.")
    doc = helloworld(doc)
    print( doc )
except Error as ex:
    print("Exception:",ex)
          }
          ,
          {   "from"      : { "slot" : "USER" ,  "label": "USER"},
              "to"        : { "expr" : "Person"},
              "relation"  : { "label": "Mentions"  }
          },
          {   "from"      : { "expr" : "USER" },
              "to"        : { "slot" : "Document", "label": "Document"} ,
              "relation"  : { "label": "Mentions"  }
          }
      ]
}

try:
    english = Analyzer(language="dutch")
    entities = Domain( "dutch-entity" )
    myrule = Domain( source = """ rule:{ 'user' '\:' {(<>)+}=USER }; """)
    doc = english("user:John \n\nJan Van Den Berg werkte als hoofdarts bij Omega Pharma.")
    doc = entities(doc)
    doc = myrule(doc)
    print(doc)
    graphit = EntityGraph( graph_config )
    # returns a panda dataframe.
    graphit.slots['Document'] = {"data":"hello"}
    results = graphit(doc)

    print( results.df_from)
    print( results.df_relation)
    print( results.df_to)

    from eot.wowool.tool.entity_graph.cypher import CypherStream
Exemple #4
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Language, Domain
from eot.wowool.error import Error

try:
    english = Language("english")
    entities = Domain("english-entity")

    doc = english(
        "John Smith was in London on the 3/11/2020. He took a cab to the central station."
    )
    doc = entities(doc)
    print('-' * 80)
    print(doc)
except Error as ex:
    print("Exception:", ex)
Exemple #5
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Analyzer, Domain
from eot.wowool.annotation import Concept
from eot.wowool.error import Error

try:
    english = Analyzer(language="dutch")
    entities = Domain("dutch-entity")

    doc = english("Jan Van Den Berg werkte als hoofdarts bij Omega Pharma.")
    doc = entities(doc)

    # filter some concepts
    requested_concepts = set(['Person', 'Position', 'Company'])
    concept_filter = lambda concept: concept.uri in requested_concepts
    for concept in Concept.iter(doc, concept_filter):
        print(f"literal: {concept.literal:<20}, stem={concept.stem}")
    # flatten concepts into dict
    print('-' * 40)
    for concept in Concept.iter(doc, concept_filter):
        print({**concept})

    print('-' * 40)
    for person in Concept.iter(doc, lambda concept: concept.uri == "Person"):
        for person_parts in Concept.iter(
                person, lambda concept: concept.uri.startswith("Person")):
            print(
                f"Person: {person_parts.uri:<20}, stem={person_parts.literal}")
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Language, Domain
from eot.wowool.error import Error

try:
    english = Language("english")
    entities = Domain("english-entity")
    mydomain = Domain(source=""" rule:{ Person .. City }= PersonCity; """)

    doc = mydomain(
        entities(english("John Smith was in London on the 3/11/2020.")))
    for sentence in doc.analysis:
        print(f"S:({sentence.begin_offset},{sentence.end_offset})")
        for a in sentence:
            if a.is_token:
                print(
                    f"  T:({a.begin_offset},{a.end_offset}): {a.literal}, {a.stem}, {a.pos}, {a.properties}"
                )
            elif a.is_concept:
                print(
                    f"""C:[{a.begin_offset},{a.end_offset}]: {a.uri}, attributes={a.attributes}, literal="{a.literal}" """
                )

except Error as ex:
    print("Exception:", ex)
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Analyzer, Domain
from eot.wowool.error import Error

try:
    english = Analyzer(language="dutch")
    entities = Domain("dutch-entity")

    conjecture = Domain(source="""


namespace conjecture {

    rule :
    {
        'het' { <> }= Info
        'bedrijf'
    {(Prop)+} = Company@(info=f"{rule.Info.stem().upper()}")
    };
}
    """)

    doc = english("Het Vlaams bedrijf NietGekent werkt same met EyeOnText.")
    doc = entities(doc)
    doc = conjecture(doc)
    print(doc)
except Error as ex:
    print("Exception:", ex)
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Language, Domain
from eot.wowool.error import Error

try:
    english = Language("english")
    entities = Domain("english-entity")

    conjecture = Domain(source="""


namespace conjecture {

    rule :
    {
        'the' { <> }= Info
        'company'
        {(Prop)+} = Company@(info=f"{rule.Info.stem().upper()}")
    };
}
    """)

    doc = english("The Flemish company NietGekent is located in Antwerp.")
    doc = entities(doc)
    doc = conjecture(doc)
    print(doc)
except Error as ex:
    print("Exception:", ex)
Exemple #9
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Language, Domain, Filter
from eot.wowool.annotation import Concept

a = Language("english")
dc = Domain("english-company")
doc = dc(a("this is a EyeOnText."))

call_plugin = Domain(source="""
rule:{ Company }= ::python::myplugin::call_this;
rule:{ Company }= Other@(name=f"{rule.literal()}" );

""")

doc = call_plugin(doc)
filter = Filter(['Other', 'Company', 'PLUGIN_COMPANY'])
doc = filter(doc)
concepts = [c for c in Concept.iter(doc)]
uris = [c.uri for c in concepts]
print(doc)

assert "PLUGIN_COMPANY" in uris, "Missing some plugin annotation"
assert "Company" in uris, "Missing some plugin annotation"
assert concepts[0].attributes["name"][0] == "EyeOnText", "Missing attributes."
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.
# !!! make sure you build the helloworld.dom
# from the domains folder pywoc -t -l en -o helloworld.dom helloworld.wow --verbose debug
# or using scons
# scons .

from eot.wowool.native import Language, Domain
from eot.wowool.annotation import Concept
from eot.wowool.error import Error
from pathlib import Path

this_folder = Path(__file__).parent

try:
    dutch = Language("dutch")
    helloworld = Domain(
        Path(this_folder, '..', '..', '..', 'domains', 'extra_greeting.dom'))

    doc = dutch("greetings world.")
    doc = helloworld(doc)
    print(doc)
except Error as ex:
    print("Exception:", ex)
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Analyzer, Domain
from eot.wowool.annotation import Concept
from eot.wowool.error import Error

try:
    dutch = Analyzer(language="dutch")
    entities = Domain("dutch-entity")

    doc = dutch("Jan Van Den Berg werkte als hoofdarts bij Omega Pharma.")
    doc = entities(doc)

    mydomain = Domain(source=r"""
        rule:{ Person .. <'werken'> .. Company }= PersonWorkCompany@(verb="work");
        rule:{ Person .. Company }= PersonCompany;
    """)
    doc = mydomain(doc)

    # filter some concepts
    requested_concepts = set(
        ['Person', 'Company', 'PersonWorkCompany', 'PersonCompany'])
    concept_filter = lambda concept: concept.uri in requested_concepts
    for concept in Concept.iter(doc, concept_filter):
        print(f"{concept.uri} -> {concept.literal}")

except Error as ex:
    print("Exception:", ex)
Exemple #12
0
try:
    analyzer = Analyzer(language="dutch")
    rule_source = """
// Compound Sample:
// capture all the word with verzekering
lexicon:(input="component"){
    verzekering } = INSURANCE_COMP;

// capture only the real verzekering not verzekeringsmaatschapijen
lexicon:(input="head"){
    verzekering } = INSURANCE_HEAD;

// capture the cost of the insurance.
rule:{ h'verzekering' { <+currency> } = INSURANCE_PRICE };
    """
    compounds = Domain(source=rule_source)
    input = "Er zijn verzekeringsmaatschapijen €40.000.000 en verzekeringen: autoverzekeringen €100, fietsverzekering €10"
    doc = compounds(analyzer(input))
    print("-" * 80)
    print(rule_source)
    print("-" * 80)
    print(input)
    print("-" * 80)
    print(f"{'uri':<20s} | {'literal':<30s} | {'stem'}")
    print("-" * 80)
    for concept in Concept.iter(
            doc, lambda concept: concept.uri.startswith("INSURANCE")):
        print(f"{concept.uri:<20s} | {concept.literal:<30s} | {concept.stem}")
except Error as ex:
    print("Exception:", ex)
Exemple #13
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.
from eot.wowool.native import Language, Domain, Compiler
from eot.wowool.annotation import Concept
from eot.wowool.error import Error
from pathlib import Path

this_folder = Path(__file__).parent
try:
    project_folder = this_folder / '..' / '..' / '..' / 'domains'
    compiler = Compiler()
    compiler.add_file(project_folder / 'helloworld.wow')
    compiler.add_source(""" rule:{ GREETING } = EXTRA_GREETING; """)
    results = compiler.save(project_folder / 'extra_greeting.dom')
    if not results.status:
        print(results)
        exit(-1)

    dutch = Language("dutch")
    helloworld = Domain(project_folder / 'extra_greeting.dom')

    doc = dutch("greetings world.")
    doc = helloworld(doc)
    print(doc)
except Error as ex:
    print("Exception:", ex)
# Check if external configuration exists
# Check for HEFrules.dom in /mnt/inout/config/
# Check for graphconfig.py in /mnt/inout/config/
external_config_file_path = "/mnt/inout/config/"
rule_file_name = "HEFrules.dom"
external_config_file_name = "graphconfig.py"

output_directory = "/mnt/inout/output/hef-graph-cypher/"
input_docs_directory = "/mnt/inout/output/scraper/txt/"

if not os.path.exists(output_directory):
    pathlib.Path(output_directory).mkdir(parents=True, exist_ok=True)

if pathlib.Path(external_config_file_path + rule_file_name).is_file():
    print("External domain found")
    myrule = Domain(external_config_file_path + rule_file_name)
else:
    print("use default domain")
    myrule = Domain(os.path.dirname(__file__) + "/" + rule_file_name)

if pathlib.Path(external_config_file_path +
                external_config_file_name).is_file():
    print("External config found")
    import importlib.util
    spec = importlib.util.spec_from_file_location(
        "hef-config", external_config_file_path + external_config_file_name)
    config_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config_module)

    try:
        graph_config = config_module.graph_config