Example #1
0
 def process_element(self, element):
     """Apply the Blinker rules to the element. Creates a TarsqiTree instance
     and then applies the Blinker rules."""
     self.doctree = create_tarsqi_tree(self.tarsqidoc, element)
     self.docelement = element
     # self.pp_doctree(BLINKER)
     self._run_blinker()
Example #2
0
File: main.py Project: tarsqi/ttk
 def process_element(self, element):
     """Apply the Blinker rules to the element. Creates a TarsqiTree instance
     and then applies the Blinker rules."""
     self.doctree = create_tarsqi_tree(self.tarsqidoc, element)
     self.docelement = element
     # self.pp_doctree(BLINKER)
     self._run_blinker()
Example #3
0
 def process(self):
     """Create a TarsqiTree instance for each docelement slice of the TarsqiDocument
     and hand them to Slinket for processing. Slinket processing will update
     the tags in the TarsqiDocument when slinks are added."""
     self.document.tags.index_events()
     self.document.tags.index_timexes()
     for element in self.document.elements():
         doctree = tree.create_tarsqi_tree(self.document, element)
         Slinket().process_doctree(doctree)
Example #4
0
def collect_tarsqidoc_vectors(tarsqidoc):
    """Collect vectors for the TarsqiDocument."""
    ee_vectors = []
    et_vectors = []
    for element in tarsqidoc.elements():
        tree = create_tarsqi_tree(tarsqidoc, element)
        for s in tree:
            _create_vectors_for_sentence(tarsqidoc, element, s, ee_vectors,
                                         et_vectors)
    return (ee_vectors, et_vectors)
Example #5
0
def collect_tarsqidoc_vectors(tarsqidoc):
    """Collect vectors for the TarsqiDocument."""
    ee_vectors = []
    et_vectors = []
    for element in tarsqidoc.elements():
        tree = create_tarsqi_tree(tarsqidoc, element)
        for s in tree:
            _create_vectors_for_sentence(tarsqidoc, element, s,
                                         ee_vectors, et_vectors)
    return (ee_vectors, et_vectors)
Example #6
0
 def process_element(self):
     """Process the element slice of the TarsqiDocument. Loop through all
     sentences in self.doctree and through all nodes in each sentence and
     determine if the node contains an event. Events are added to the tag
     repository on the element."""
     self.doctree = create_tarsqi_tree(self.tarsqidoc, self.docelement)
     for sentence in self.doctree:
         # print get_words_as_string(sentence)
         logger.debug("SENTENCE: %s" % get_words_as_string(sentence))
         for node in sentence:
             if not node.checkedEvents:
                 node.createEvent()
Example #7
0
 def process(self):
     """Try to add TLINKS for all the SLINKS in each element."""
     # NOTE: it is a bit weird that this has to be done here and on Slinket,
     # the thing is that this is not done when the TarsqiDocument and its
     # repository is first created (in which case there usually aren't any
     # times and events), so we do it for those components that need it,
     # without relying on it having been done before.
     self.document.tags.index_events()
     self.document.tags.index_timexes()
     for element in self.document.elements():
         doctree = tree.create_tarsqi_tree(self.document, element, links=True)
         Slink2Tlink().process_doctree(doctree)
Example #8
0
 def process(self):
     """Try to add TLINKS for all the SLINKS in each element."""
     # NOTE: it is a bit weird that this has to be done here and on Slinket,
     # the thing is that this is not done when the TarsqiDocument and its
     # repository is first created (in which case there usually aren't any
     # times and events), so we do it for those components that need it,
     # without relying on it having been done before.
     self.document.tags.index_events()
     self.document.tags.index_timexes()
     for element in self.document.elements():
         doctree = tree.create_tarsqi_tree(self.document,
                                           element,
                                           links=True)
         Slink2Tlink().process_doctree(doctree)
Example #9
0
File: main.py Project: tarsqi/ttk
 def process_element(self):
     """Process the element slice of the TarsqiDocument. Loop through all
     sentences in self.doctree and through all nodes in each sentence and
     determine if the node contains an event. Events are added to the tag
     repository on the element."""
     self.doctree = create_tarsqi_tree(self.tarsqidoc, self.docelement)
     for sentence in self.doctree:
         # print get_words_as_string(sentence)
         logger.debug("SENTENCE: %s" % get_words_as_string(sentence))
         for node in sentence:
             if node.isEvent():
                 continue
             if not node.checkedEvents:
                 node.createEvent(imported_events=self.imported_events)
Example #10
0
 def _update_element(self, element):
     """Uses the orphans in the TarsqiTree of the element to update chunks."""
     # NOTE: this is generic sounding, but is really only meant for timexes
     # TODO: maybe rename while the above is the case
     doctree = create_tarsqi_tree(self.tarsqidoc, element)
     for orphan in doctree.orphans:
         sentence = self._get_containing_sentence(doctree, orphan)
         if sentence is None:
             logger.warn("No sentence contains %s" % orphan)
             continue
         nodes = [n for n in sentence.all_nodes() if n.overlaps(orphan)]
         nodes = [n for n in nodes if n is not sentence and not n.isToken()]
         #self._debug(orphan, sentence, nodes)
         self._remove_overlapping_chunks(nodes)
     self._add_chunks_for_timexes(element)
Example #11
0
 def _update_element(self, element):
     """Uses the orphans in the TarsqiTree of the element to update chunks."""
     # NOTE: this is generic sounding, but is really only meant for timexes
     # TODO: maybe rename while the above is the case
     doctree = create_tarsqi_tree(self.tarsqidoc, element)
     for orphan in doctree.orphans:
         sentence = self._get_containing_sentence(doctree, orphan)
         if sentence is None:
             logger.warn("No sentence contains %s" % orphan)
             continue
         nodes = [n for n in sentence.all_nodes() if n.overlaps(orphan)]
         nodes = [n for n in nodes if n is not sentence and not n.isToken()]
         #self._debug(orphan, sentence, nodes)
         self._remove_overlapping_chunks(nodes)
     self._add_chunks_for_timexes(element)
Example #12
0
 def _add_chunks_for_timexes(self, element):
     # At this point we have removed chunks that overlap with timexes. The
     # TarsqiTree used in the calling method did not know that (because
     # changes were made to the TagRepository. So we create a new tree.
     doctree = create_tarsqi_tree(self.tarsqidoc, element)
     # Note that it is perhaps not enough to just create a chunk for the
     # orphan because removing an overlapping chunk could remove a chunk that
     # also embed another timex, which will then have no chunk around it. A
     # more primary question by the way is, do we need timexes to be inside
     # chunks or can they stand by themselves for later processing?
     # TODO: check whether we can go without chunks or whether we need
     # timexes to be inside chunks.
     for sentence in doctree.get_sentences():
         nodes = sentence.all_nodes()
         timexes = [n for n in nodes if n.isTimex()]
         nounchunks = [n for n in nodes if n.isNounChunk()]
         for t in timexes:
             # if the timexes parent is a sentence, then add a Tag with
             # tagname ng to the TagRepository
             if t.parent.isSentence():
                 # for now we will credit GUTIME with this chunk
                 attrs = { 'origin':  GUTIME }
                 self.tarsqidoc.tags.add_tag(NG, t.begin, t.end, attrs)
Example #13
0
 def _add_chunks_for_timexes(self, element):
     # At this point we have removed chunks that overlap with timexes. The
     # TarsqiTree used in the calling method did not know that (because
     # changes were made to the TagRepository. So we create a new tree.
     doctree = create_tarsqi_tree(self.tarsqidoc, element)
     # Note that it is perhaps not enough to just create a chunk for the
     # orphan because removing an overlapping chunk could remove a chunk that
     # also embed another timex, which will then have no chunk around it. A
     # more primary question by the way is, do we need timexes to be inside
     # chunks or can they stand by themselves for later processing?
     # TODO: check whether we can go without chunks or whether we need
     # timexes to be inside chunks.
     for sentence in doctree.get_sentences():
         nodes = sentence.all_nodes()
         timexes = [n for n in nodes if n.isTimex()]
         nounchunks = [n for n in nodes if n.isNounChunk()]
         for t in timexes:
             # if the timexes parent is a sentence, then add a Tag with
             # tagname ng to the TagRepository
             if t.parent.isSentence():
                 # for now we will credit GUTIME with this chunk
                 attrs = {'origin': GUTIME}
                 self.tarsqidoc.tags.add_tag(NG, t.begin, t.end, attrs)