def test_passive(self): drs = Drs.create_from_natural_language('the rabbit is eaten by me') expected_drs = Drs.create_from_predicates_string( "{'entity': '', 'compound': 'rabbit', 'word': 'rabbit', 'tag': 'n'}(v1), {'entity': '', 'compound': 'eaten', 'word': 'eaten', 'tag': 'v'}(v3), {'entity': '', 'compound': 'me', 'word': 'me', 'tag': 'PRP'}(v5), {'type': 'PATIENT'}(v3,v1), {'type': 'AGENT'}(v3,v5)") lst = drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_modal(self): data_drs = Drs.create_from_natural_language('alberto can dance') expected_drs = Drs.create_from_predicates_string( '{"tag": "MD", "word": "can"}(2), {"type": "MODAL"}(3,2), {"tag": "v"}(3)') lst = data_drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 0 self.assertTrue(is_match)
def test_copula(self): drs = Drs.create_from_natural_language('this is a test') expected_drs = Drs.create_from_predicates_string( "{'word': 'is', 'compound': 'is', 'tag': 'v', 'entity': ''}(v1), {'word': 'this', 'compound': 'this', 'tag': 'DT', 'entity': ''}(v0), {'word': 'test', 'compound': 'test', 'tag': 'n', 'entity': ''}(v3), {'type': 'AGENT'}(v1,v0), {'type': 'ATTR'}(v1,v3)") lst = drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_creation_from_drt_with_preposition(self): drs = Drs.create_from_predicates_string( "{'word': 'ideas', 'entity': '', 'tag': 'n', 'compound': 'ideas'}(v0), {'word': 'Jim', 'entity': '', 'tag': 'n', 'compound': 'Jim'}(v2), {'type': 'of'}(v0,v2)") expected_drs = Drs.create_from_predicates_string( "{'word': 'ideas', 'entity': '', 'tag': 'n', 'compound': 'ideas'}(v0), {'word': 'Jim', 'entity': '', 'tag': 'n', 'compound': 'Jim'}(v2), {'type': 'of'}(v0,v2)") lst = drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_compound_nouns_gender_guess(self): text = "Jane Smith is an engineer" drs = Drs.create_from_natural_language(text) expected_drs = Drs.create_from_predicates_string( "{'compound': 'Jane Smith', 'gender_guess': 'f'}(a)") lst = drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_snippet_2(self): """ Mostly to check conjunction rules. """ text = "Jon is a carpenter and an engineer" drs = Drs.create_from_natural_language(text) expected_drs = Drs.create_from_predicates_string(""" {'word': 'is', 'tag': 'v', 'compound': 'is', 'entity': '', 'lemma': 'be', 'gender_guess': None, 'is_head_token': True, 'refers_to': None, 'negated': 'false'}(v1), {'word': 'Jon', 'tag': 'n', 'compound': 'Jon', 'entity': 'PERSON', 'lemma': 'Jon', 'gender_guess': 'm', 'is_head_token': False, 'refers_to': None, 'negated': 'false'}(v0), {'word': 'carpenter', 'tag': 'n', 'compound': 'carpenter', 'entity': '', 'lemma': 'carpenter', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false'}(v3), {'word': 'engineer', 'tag': 'n', 'compound': 'engineer', 'entity': '', 'lemma': 'engineer', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false'}(v6), {'type': 'AGENT'}(v1,v0), {'type': 'ATTR'}(v1,v6), {'type': 'ATTR'}(v1,v3), {'type': 'ATTR'}(v1,v6) """) lst = drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_relation_rules(self): data_drs = Drs.create_from_natural_language('Jim works at Microsoft') knowledge = Knowledge(metric) knowledge.add_rules(open(os.path.join(_path, '../rules/generic_relations.rules')).read()) inference = ForwardInference(data_drs, knowledge) end_drs = inference.compute() expected_drs = Drs.create_from_predicates_string('{}(1), {"text": "WORKS_AT"}(1,2), {}(2)') is_match = False for drs in end_drs: lst = drs[0].apply(DrsMatcher(expected_drs, metric)) if len(lst) > 0: is_match = True break self.assertTrue(is_match)
def test_coreference_is_joined_in_graph(self): text = "John is ginger. He is a carpenter. test. Jane is blond. She is a carpenter. " discourse = Discourse(text) expected_drs = Drs.create_from_predicates_string( "{}(a), {}(b), {'type': 'REFERS_TO'}(a,b)") lst = discourse._discourse.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_birth_date2(self): sentence = 'John was born in 10 August 1582' drs = Drs.create_from_natural_language(sentence) fi = ForwardInference(drs, _knowledge) drs_and_weight = fi.compute() writer = RelationTripletsWriter() lst = drs_and_weight[0][0].apply(writer) expected_list = [('John', 'BIRTH_DAY', '10_August_1582')] self.assertEqual(lst, expected_list)
def test_personal_pronouns(self): sentence = 'I have a red dog' drs = Drs.create_from_natural_language(sentence) fi = ForwardInference(drs, _knowledge) drs_and_weight = fi.compute() writer = RelationTripletsWriter() lst = drs_and_weight[0][0].apply(writer) expected_list = [('I', 'OWN', 'dog')] self.assertEqual(lst, expected_list)
def visit(self, g): if not isinstance(g, Graph): raise TypeError( "DrsRule.visit_to_graph() needs an igraph.Graph as an argument" ) for vertex in g.vs: if vertex['word'] in self._words_without_entity: vertex['entity'] = '' return Drs(g)
def test_single_clause(self): data_drs = Drs.create_from_natural_language('Jim works at Microsoft') rule = """ MATCH "{PERSON}#1 works at {ORG}#2" CREATE {}(1), {"type": "WORKS_AT"}(1,2), {}(2) """ knowledge = Knowledge(metric) knowledge.add_rules(rule) inference = ForwardInference(data_drs, knowledge) end_drs = inference.compute() expected_drs = Drs.create_from_predicates_string('{}(1), {"type": "WORKS_AT"}(1,2), {}(2)') is_match = False for drs in end_drs: lst = drs[0].apply(DrsMatcher(expected_drs, metric)) if len(lst) > 0: is_match = True break self.assertTrue(is_match)
def test_possesive_pronouns(self): sentence = 'My dog is red' drs = Drs.create_from_natural_language(sentence) knowledge = Knowledge() knowledge.add_rules(open(os.path.join(_path, '../rules/test.rules')).read()) fi = ForwardInference(drs, knowledge) drs_and_weight = fi.compute() writer = RelationTripletsWriter() lst = drs_and_weight[0][0].apply(writer) expected_list = [('me', 'OWN', 'dog')] self.assertEqual(lst, expected_list)
def test_drt_graph(self): sentence = 'John is tall' drs = Drs.create_from_natural_language(sentence) writer = DRTTripletsWriter() triplets = drs.apply(writer) expected_triplets = {'edges': [{'arrows': 'to', 'from': 'v1', 'label': 'AGENT', 'to': 'v0'}, {'arrows': 'to', 'from': 'v1', 'label': 'ADJECTIVE', 'to': 'v2'}], 'nodes': [{'id': 'v1', 'label': 'is'}, {'id': 'v0', 'label': 'John'}, {'id': 'v2', 'label': 'tall'}]} self.assertEqual(triplets, expected_triplets)
def test_asimov_wiki(self): text = open(os.path.join(_path, '../data/wiki_asimov.txt')).read() discourse = Discourse(text) extractor = Extractor(discourse, _knowledge) triplets = extractor.extract() expected_triplets = [('Isaac_Asimov_0|Asimov_0', 'JOB_TITLE', 'writer'), ('Isaac_Asimov_0|Asimov_0', 'OWNS', 'works'), ('Isaac_Asimov_0|Asimov_0', 'OWNS', 'books')] self.assertTrue(triplets, expected_triplets) expected_drs = Drs.create_from_predicates_string( "{}(a), {'word': 'Boston_University'}(b), {'type': 'at'}(a,b)") lst = discourse._discourse.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def _substitute_text_in_match_statement_with_graph(text, substitution_triggers): drs_cleaner = DrsNERCleaner(substitution_triggers) p = re.compile('MATCH.*\"(.*)\"') lst = p.findall(text) if not lst: p = re.compile('MATCH.*\'(.*)\'') lst = p.findall(text) for item in lst: try: drs = Discourse(item).connected_components[0] except IndexError: _logger.warning('Cannot use Discourse on %s' % item[:200]) drs = Drs.create_from_natural_language(item) drs = drs.apply(drs_cleaner) text = text.replace('"' + item + '"', str(drs)) return text
import os from pynsett.discourse import Discourse from pynsett.drt import Drs from pynsett.extractor import Extractor from pynsett.knowledge import Knowledge _path = os.path.dirname(__file__) text = "John Smith is not blond" drs = Drs.create_from_natural_language(text) drs.plot() print(drs)
_path = os.path.dirname(__file__) #sentence = "John has a house" #sentence = "Centuries later John wrote to his brother Theo" #sentence = "Hans was born in 1582 or 1583 in Antwerp, then in the Spanish Netherlands, as the son of cloth merchant Franchois Fransz Hals van Mechelen (c.1542–1610) and his second wife Adriaentje van Geertenryck.[2] Like many, Hals' parents fled during[citation needed] the Fall of Antwerp (1584–1585) from the south to Haarlem in the new Dutch Republic in the north, where he lived for the remainder of his life. Hals studied under Flemish émigré Karel van Mander,[2][3] whose Mannerist influence, however, is barely noticeable in Hals' work." #sentence = 'It is not known whether Hals ever painted landscapes' #sentence = 'John drove home where he has a cat.' #sentence = 'His dog was red' #sentence = 'My own dog is red' #sentence = 'Jane has a bicycle' #sentence = 'Jane Smith is an engineer' #sentence = 'Jane works as an engineer' #sentence = 'Jane works for Google' sentence = "John was born in 1582 or 1583 in Antwerp" drs = Drs.create_from_natural_language(sentence) print(drs) anaphora = SingleSentenceAnaphoraVisitor() drs.apply(anaphora) head_token_visitor = HeadTokenVisitor(1) drs.apply(head_token_visitor) print('---') print(drs) knowledge = Knowledge() knowledge.add_rules(open(os.path.join(_path, '../rules/test.rules')).read()) #knowledge.add_rules(open(os.path.join(_path, '../rules/generic_relations.rules')).read())
def test_if_rule(self): data_drs = Drs.create_from_natural_language('If I breathe I am alive') expected_drs = Drs.create_from_predicates_string('{}(a), {"type": "CONDITION"}(a,b), {}(b)') lst = data_drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 0 self.assertTrue(is_match)
def test_snippet_1(self): """ Mostly to check conjunction rules. """ text = "Asimov also wrote mysteries and fantasy, as well as much nonfiction. Most of his popular science books explain concepts in a historical way, going as far back as possible to a time when the science in question was at its simplest stage. Examples include Guide to Science, the three-volume set Understanding Physics, and Asimov's Chronology of Science and Discovery. He wrote on numerous other scientific and non-scientific topics, such as chemistry, astronomy, mathematics, history, biblical exegesis, and literary criticism." drs = Drs.create_from_natural_language(text) expected_drs = Drs.create_from_predicates_string(""" {'word': 'wrote', 'tag': 'v', 'compound': 'wrote', 'entity': '', 'lemma': 'write', 'gender_guess': None, 'is_head_token': True, 'refers_to': None, 'negated': 'false', 'type': None}(v2), {'word': 'Asimov', 'tag': 'n', 'compound': 'Asimov', 'entity': 'PERSON', 'lemma': 'Asimov', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v0), {'word': 'also', 'tag': 'RB', 'compound': 'also', 'entity': '', 'lemma': 'also', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v1), {'word': 'mysteries', 'tag': 'n', 'compound': 'mysteries', 'entity': '', 'lemma': 'mystery', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v3), {'word': 'fantasy', 'tag': 'n', 'compound': 'fantasy', 'entity': '', 'lemma': 'fantasy', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v5), {'word': 'nonfiction', 'tag': 'n', 'compound': 'nonfiction', 'entity': '', 'lemma': 'nonfiction', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v11), {'word': 'as', 'tag': 'RB', 'compound': 'as', 'entity': '', 'lemma': 'as', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v7), {'word': 'well', 'tag': 'RB', 'compound': 'well', 'entity': '', 'lemma': 'well', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v8), {'word': 'much', 'tag': 'j', 'compound': 'much', 'entity': '', 'lemma': 'much', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v10), {'word': 'Most', 'tag': 'j', 'compound': 'Most', 'entity': '', 'lemma': 'Most', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v13), {'word': 'books', 'tag': 'n', 'compound': 'books', 'entity': '', 'lemma': 'book', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v18), {'word': 'he', 'tag': 'PRP$', 'compound': 'he', 'entity': '', 'lemma': 'he', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v15), {'word': 'popular', 'tag': 'j', 'compound': 'popular', 'entity': '', 'lemma': 'popular', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v16), {'word': 'science', 'tag': 'n', 'compound': 'science', 'entity': '', 'lemma': 'science', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v17), {'word': 'explain', 'tag': 'v', 'compound': 'explain', 'entity': '', 'lemma': 'explain', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v19), {'word': 'concepts', 'tag': 'n', 'compound': 'concepts', 'entity': '', 'lemma': 'concept', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v20), {'word': 'going', 'tag': 'v', 'compound': 'going', 'entity': '', 'lemma': 'go', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v26), {'word': 'way', 'tag': 'n', 'compound': 'way', 'entity': '', 'lemma': 'way', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v24), {'word': 'historical', 'tag': 'j', 'compound': 'historical', 'entity': '', 'lemma': 'historical', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v23), {'word': 'back', 'tag': 'RB', 'compound': 'back', 'entity': '', 'lemma': 'back', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v29), {'word': 'to', 'tag': 'IN', 'compound': 'to', 'entity': '', 'lemma': 'to', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'to'}(v32), {'word': 'far', 'tag': 'RB', 'compound': 'far', 'entity': '', 'lemma': 'far', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v28), {'word': 'as', 'tag': 'RB', 'compound': 'as', 'entity': '', 'lemma': 'as', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v27), {'word': 'as', 'tag': 'IN', 'compound': 'as', 'entity': '', 'lemma': 'as', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'as'}(v30), {'word': 'possible', 'tag': 'j', 'compound': 'possible', 'entity': '', 'lemma': 'possible', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v31), {'word': 'time', 'tag': 'n', 'compound': 'time', 'entity': '', 'lemma': 'time', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v34), {'word': 'was', 'tag': 'v', 'compound': 'was', 'entity': '', 'lemma': 'be', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v40), {'word': 'science', 'tag': 'n', 'compound': 'science', 'entity': '', 'lemma': 'science', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v37), {'word': 'in', 'tag': 'IN', 'compound': 'in', 'entity': '', 'lemma': 'in', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'in'}(v38), {'word': 'question', 'tag': 'n', 'compound': 'question', 'entity': '', 'lemma': 'question', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v39), {'word': 'when', 'tag': 'WRB', 'compound': 'when', 'entity': '', 'lemma': 'when', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v35), {'word': 'at', 'tag': 'IN', 'compound': 'at', 'entity': '', 'lemma': 'at', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'at'}(v41), {'word': 'stage', 'tag': 'n', 'compound': 'stage', 'entity': '', 'lemma': 'stage', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v44), {'word': 'its', 'tag': 'PRP$', 'compound': 'its', 'entity': '', 'lemma': 'its', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v42), {'word': 'simplest', 'tag': 'j', 'compound': 'simplest', 'entity': '', 'lemma': 'simple', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v43), {'word': 'include', 'tag': 'v', 'compound': 'include', 'entity': '', 'lemma': 'include', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v47), {'word': 'Examples', 'tag': 'n', 'compound': 'Examples', 'entity': '', 'lemma': 'example', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v46), {'word': 'Guide', 'tag': 'n', 'compound': 'Guide', 'entity': '', 'lemma': 'Guide', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v48), {'word': '.', 'tag': '.', 'compound': '.', 'entity': '', 'lemma': '.', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v68), {'word': 'to', 'tag': 'IN', 'compound': 'to', 'entity': '', 'lemma': 'to', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v49), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'and'}(v51), {'word': 'set', 'tag': 'v', 'compound': 'set', 'entity': '', 'lemma': 'set', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v56), {'word': 'Science', 'tag': 'n', 'compound': 'Science', 'entity': '', 'lemma': 'Science', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v50), {'word': 'volume', 'tag': 'n', 'compound': 'volume', 'entity': '', 'lemma': 'volume', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v55), {'word': 'three', 'tag': 'CD', 'compound': 'three', 'entity': 'CARDINAL', 'lemma': 'three', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v53), {'word': '-', 'tag': 'HYPH', 'compound': '-', 'entity': '', 'lemma': '-', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v54), {'word': 'Physics', 'tag': 'n', 'compound': 'Understanding_Physics', 'entity': 'ORG', 'lemma': 'Physics', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v58), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v59), {'word': 'and', 'tag': 'CC', 'compound': 'and', 'entity': '', 'lemma': 'and', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'and'}(v60), {'word': 'Chronology', 'tag': 'n', 'compound': 'Chronology', 'entity': 'WORK_OF_ART', 'lemma': 'Chronology', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v63), {'word': 'Asimov', 'tag': 'n', 'compound': 'Asimov', 'entity': 'PERSON', 'lemma': 'Asimov', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v61), {'word': 'of', 'tag': 'IN', 'compound': 'of', 'entity': 'WORK_OF_ART', 'lemma': 'of', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v64), {'word': 'Science', 'tag': 'n', 'compound': 'Science', 'entity': 'WORK_OF_ART', 'lemma': 'Science', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v65), {'word': 'and', 'tag': 'CC', 'compound': 'and', 'entity': 'WORK_OF_ART', 'lemma': 'and', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'and'}(v66), {'word': 'Discovery', 'tag': 'n', 'compound': 'Discovery', 'entity': 'WORK_OF_ART', 'lemma': 'Discovery', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v67), {'word': 'wrote', 'tag': 'v', 'compound': 'wrote', 'entity': '', 'lemma': 'write', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v70), {'word': 'He', 'tag': 'PRP', 'compound': 'He', 'entity': 'PERSON', 'lemma': 'he', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v69), {'word': 'on', 'tag': 'IN', 'compound': 'on', 'entity': '', 'lemma': 'on', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v71), {'word': '.', 'tag': '.', 'compound': '.', 'entity': '', 'lemma': '.', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v97), {'word': 'topics', 'tag': 'n', 'compound': 'topics', 'entity': '', 'lemma': 'topic', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v79), {'word': 'scientific', 'tag': 'j', 'compound': 'scientific', 'entity': '', 'lemma': 'scientific', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v74), {'word': '-', 'tag': 'j', 'compound': '-', 'entity': '', 'lemma': '-', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v77), {'word': 'non', 'tag': 'j', 'compound': 'non', 'entity': '', 'lemma': 'non', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v76), {'word': 'scientific', 'tag': 'j', 'compound': 'scientific', 'entity': '', 'lemma': 'scientific', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v78), {'word': 'numerous', 'tag': 'j', 'compound': 'numerous', 'entity': '', 'lemma': 'numerous', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v72), {'word': 'other', 'tag': 'j', 'compound': 'other', 'entity': '', 'lemma': 'other', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v73), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v80), {'word': 'as', 'tag': 'IN', 'compound': 'as', 'entity': '', 'lemma': 'as', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'as'}(v82), {'word': 'such', 'tag': 'j', 'compound': 'such', 'entity': '', 'lemma': 'such', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v81), {'word': 'chemistry', 'tag': 'n', 'compound': 'chemistry', 'entity': '', 'lemma': 'chemistry', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v83), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': 'and'}(v84), {'word': 'astronomy', 'tag': 'n', 'compound': 'astronomy', 'entity': '', 'lemma': 'astronomy', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v85), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v86), {'word': 'mathematics', 'tag': 'n', 'compound': 'mathematics', 'entity': '', 'lemma': 'mathematic', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v87), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v88), {'word': 'history', 'tag': 'n', 'compound': 'history', 'entity': '', 'lemma': 'history', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v89), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v90), {'word': 'exegesis', 'tag': 'n', 'compound': 'exegesis', 'entity': '', 'lemma': 'exegesis', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v92), {'word': 'biblical', 'tag': 'j', 'compound': 'biblical', 'entity': '', 'lemma': 'biblical', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v91), {'word': ',', 'tag': ',', 'compound': ',', 'entity': '', 'lemma': ',', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v93), {'word': 'criticism', 'tag': 'n', 'compound': 'criticism', 'entity': '', 'lemma': 'criticism', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v96), {'word': 'literary', 'tag': 'j', 'compound': 'literary', 'entity': '', 'lemma': 'literary', 'gender_guess': None, 'is_head_token': False, 'refers_to': None, 'negated': 'false', 'type': None}(v95), {'type': 'AMOD'}(v24, v23), {'type': 'ADVMOD'}(v26, v29), {'type': 'ADVMOD'}(v28, v27), {'type': 'ADVMOD'}(v29, v28), {'type': 'NSUBJ'}(v40, v37), {'type': 'POSS'}(v44, v42), {'type': 'AMOD'}(v44, v43), {'type': 'NSUBJ'}(v47, v46), {'type': 'DOBJ'}(v47, v48), {'type': 'PUNCT'}(v47, v68), {'type': 'PREP'}(v48, v49), {'type': 'POBJ'}(v49, v50), {'type': 'PUNCT'}(v55, v54), {'type': 'NPADVMOD'}(v56, v55), {'type': 'DOBJ'}(v56, v58), {'type': 'PUNCT'}(v56, v59), {'type': 'POSS'}(v63, v61), {'type': 'PREP'}(v63, v64), {'type': 'POBJ'}(v64, v65), {'type': 'NSUBJ'}(v70, v69), {'type': 'PREP'}(v70, v71), {'type': 'PUNCT'}(v70, v97), {'type': 'POBJ'}(v71, v79), {'type': 'SUBTOK'}(v77, v76), {'type': 'SUBTOK'}(v78, v77), {'type': 'AMOD'}(v79, v72), {'type': 'AMOD'}(v79, v73), {'type': 'AMOD'}(v79, v74), {'type': 'AMOD'}(v79, v78), {'type': 'PUNCT'}(v79, v80), {'type': 'POBJ'}(v82, v83), {'type': 'PUNCT'}(v85, v86), {'type': 'CONJ'}(v85, v87), {'type': 'PUNCT'}(v87, v88), {'type': 'CONJ'}(v87, v89), {'type': 'PUNCT'}(v89, v90), {'type': 'CONJ'}(v89, v92), {'type': 'AMOD'}(v92, v91), {'type': 'PUNCT'}(v92, v93), {'type': 'CONJ'}(v92, v96), {'type': 'AMOD'}(v96, v95), {'type': 'AGENT'}(v2, v0), {'type': 'ADVOCATIVE_CLAUSE'}(v19, v26), {'type': 'when'}(v34, v40), {'type': 'of'}(v13, v18), {'type': 'AGENT'}(v19, v13), {'type': 'PATIENT'}(v19, v20), {'type': 'in'}(v19, v24), {'type': 'NUMBER'}(v55, v53), {'type': 'OWNS'}(v61, v63), {'type': 'OWNS'}(v15, v18), {'type': 'PATIENT'}(v2, v5), {'type': 'and'}(v56, v63), {'type': 'and'}(v48, v56), {'type': 'and'}(v65, v67), {'type': 'and'}(v83, v85), {'type': 'in'}(v37, v39), {'type': 'to'}(v26, v34), {'type': 'as'}(v29, v31), {'type': 'at'}(v40, v44), {'type': 'as'}(v79, v81), {'type': 'of'}(v18, v17), {'type': 'ADJECTIVE'}(v11, v10), {'type': 'ADVERB'}(v2, v1), {'type': 'ADJECTIVE'}(v18, v16), {'type': 'as'}(v2, v3), {'type': 'as'}(v2, v5)""") lst = drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_entity_parsing(self): data_drs = Drs.create_from_natural_language('{PERSON} is in {GPE}') expected_drs = Drs.create_from_natural_language('John is in London') lst = data_drs.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 0 self.assertTrue(is_match)
def test_negation_matches(self): drs = Drs.create_from_natural_language('John Smith is not blond') expected_drs = Drs.create_from_natural_language('John Smith is not blond') lst = drs.apply(DrsMatcher(expected_drs, metric)) is_not_match = len(lst) > 0 self.assertTrue(is_not_match)
def test_sub_isomorphism(self): large_drs = Drs.create_from_natural_language('The ideas#1 of Jim#2 are silly') small_drs = Drs.create_from_natural_language('ideas#3 of Jim#4') lst = large_drs.apply(DrsMatcher(small_drs, metric)) is_match = len(lst) > 0 self.assertTrue(is_match)