Ejemplo n.º 1
0
def get_drt():
    if request.method != 'POST':
        return []
    data = json.loads(request.data)
    text = data['text']
    discourse = Discourse(text)
    writer = DRTTripletsWriter()
    triplets = discourse.apply(writer)
    return jsonify(triplets)
Ejemplo n.º 2
0
 def test_multi_sentence_anaphora_feminine_names(self):
     text = "Jane is happy. She is a carpenter"
     discourse = Discourse(text)
     extractor = Extractor(discourse, _knowledge)
     triplets = extractor.extract()
     expected_triplets = [('Jane_0', 'HAS_ROLE', 'carpenter')]
     self.assertEqual(triplets, expected_triplets)
Ejemplo n.º 3
0
 def test_pronoun_coreference(self):
     sentence = 'John drove home where he has a cat.'
     discourse = Discourse(sentence)
     extractor = Extractor(discourse, _knowledge)
     triplets = extractor.extract()
     expected_triplets = [('John_0', 'OWN', 'cat')]
     self.assertEqual(triplets, expected_triplets)
Ejemplo n.º 4
0
def _substitute_text_in_match_statement_with_graph(text,
                                                   substitution_triggers):
    drs_cleaner = DrsNERCleaner(substitution_triggers)
    p = re.compile('MATCH.*\"(.*)\"')
    lst = p.findall(text)
    if not lst:
        p = re.compile('MATCH.*\'(.*)\'')
        lst = p.findall(text)
    for item in lst:
        try:
            drs = Discourse(item).connected_components[0]
        except IndexError:
            _logger.warning('Cannot use Discourse on %s' % item[:200])
            drs = Drs.create_from_natural_language(item)
        drs = drs.apply(drs_cleaner)
        text = text.replace('"' + item + '"', str(drs))
    return text
Ejemplo n.º 5
0
 def test_coreference_is_joined_in_graph(self):
     text = "John is ginger. He is a carpenter. test. Jane is blond. She is a carpenter. "
     discourse = Discourse(text)
     expected_drs = Drs.create_from_predicates_string(
         "{}(a), {}(b), {'type': 'REFERS_TO'}(a,b)")
     lst = discourse._discourse.apply(DrsMatcher(expected_drs, metric))
     is_match = len(lst) > 1
     self.assertTrue(is_match)
Ejemplo n.º 6
0
 def test_multiple_people_same_rule(self):
     text = "John is ginger. He is a carpenter. test. Jane is blond. She is a carpenter. "
     discourse = Discourse(text)
     extractor = Extractor(discourse, _knowledge)
     triplets = extractor.extract()
     expected_triplets = [('John_0', 'HAS_ROLE', 'carpenter'),
                          ('Jane_1', 'HAS_ROLE', 'carpenter')]
     self.assertTrue(triplets, expected_triplets)
Ejemplo n.º 7
0
def get_triplets():
    if request.method != 'POST':
        return []
    data = json.loads(request.data)
    text = data['text']
    discourse = Discourse(text)
    extractor = Extractor(discourse, knowledge)
    triplets = extractor.extract()
    return jsonify(transform_triplets_into_api_edges_and_nodes(triplets))
Ejemplo n.º 8
0
 def test_multiple_matcing(self):
     text = "John Smith is blond. He is a carpenter. There is no reason to panic. Sarah Doe is ginger. She is a carpenter."
     discourse = Discourse(text)
     extractor = Extractor(discourse, _knowledge)
     triplets = extractor.extract()
     expected_triplets = [('John_Smith_0', 'HAS_BLOND_ROLE', 'carpenter'),
                          ('Sarah_Doe_1', 'HAS_GINGER_ROLE', 'carpenter'),
                          ('John_0', 'HAS_ROLE', 'carpenter'),
                          ('Jane_1', 'HAS_ROLE', 'carpenter')]
     self.assertTrue(triplets, expected_triplets)
Ejemplo n.º 9
0
    def test_asimov_wiki(self):
        text = open(os.path.join(_path, '../data/wiki_asimov.txt')).read()
        discourse = Discourse(text)
        extractor = Extractor(discourse, _knowledge)
        triplets = extractor.extract()
        expected_triplets = [('Isaac_Asimov_0|Asimov_0', 'JOB_TITLE', 'writer'),
                             ('Isaac_Asimov_0|Asimov_0', 'OWNS', 'works'),
                             ('Isaac_Asimov_0|Asimov_0', 'OWNS', 'books')]
        self.assertTrue(triplets, expected_triplets)

        expected_drs = Drs.create_from_predicates_string(
            "{}(a), {'word': 'Boston_University'}(b), {'type': 'at'}(a,b)")
        lst = discourse._discourse.apply(DrsMatcher(expected_drs, metric))
        is_match = len(lst) > 1
        self.assertTrue(is_match)
Ejemplo n.º 10
0
import os

from pynsett.discourse import Discourse
from pynsett.extractor import Extractor
from pynsett.knowledge import Knowledge

_path = os.path.dirname(__file__)

text = "Jane was born on 10 August 1979."

knowledge = Knowledge()
knowledge.add_rules(open(os.path.join(_path, '../rules/test.rules')).read())

discourse = Discourse(text)
extractor = Extractor(discourse, knowledge)
triplets = extractor.extract()

for triplet in triplets:
    print(triplet)