Example #1
0
 def process(self, infile, outfile):
     """Process a fragment file and add TIMEX3 tags that were
     missed by Tempex.
     Arguments:
        infile - an absolute path
        outfile - an absolute path"""
     xmldoc = Parser().parse_file(open(infile,'r'))
     self.doctree = FragmentConverter(xmldoc, infile).convert()
     #self.print_doctree(BTIME)
     self.find_timexes()
     self.doctree.printOut(outfile)
Example #2
0
class BTime(TarsqiComponent):

    """Fledgling component to add Timex tags to a document. It now
    only serves to fill in some timexes that GUTime does not get,
    doing it this way is easier than modifiying GUTime.

    Instance variables:
       name - a string
       xmldoc - an XmlDocument
       doctree - a Document"""

    
    def __init__(self):
        """Set the NAME instance variable."""
        self.NAME = BTIME


    def process(self, infile, outfile):
        """Process a fragment file and add TIMEX3 tags that were
        missed by Tempex.
        Arguments:
           infile - an absolute path
           outfile - an absolute path"""
        xmldoc = Parser().parse_file(open(infile,'r'))
        self.doctree = FragmentConverter(xmldoc, infile).convert()
        #self.print_doctree(BTIME)
        self.find_timexes()
        self.doctree.printOut(outfile)

    def find_timexes(self):
        """Loop through all sentences in self.doctree and through all nodes in
        each sentence and search for missed timexes in noun groups."""
        for sentence in self.doctree:
            for node in sentence:
                if node.isNounChunk():
                    self.find_timex_in_noungroup(node)

    def find_timex_in_noungroup(self, noungroup):
        """Find missing timexes in noun groups. Searches for
        years. Assumes that the noun group is not contained in a
        Timex tag."""
        idx = 0
        for node in noungroup:
            if node.isToken() and node.pos == POS_CD:
                text = node.getText()
                if text.isdigit() and len(text) == 4:
                    attrs = { 'TYPE': 'DATE', 'VAL': text }
                    timex = TimexTag(attrs)
                    timex.dtrs = [node]
                    # this changes doctree.sentenceList
                    noungroup[idx] = timex
                    # and this changes doctree.nodeList
                    self.doctree.addTimexTag(timex)
            idx += 1
Example #3
0
File: main.py Project: tuandnvn/ttk
class Evita(TarsqiComponent):
    """Class that implements Evita's event recognizer.

    Instance variables:
       NAME - a string
       doctree - a Document instance """
    def __init__(self):
        """Set the NAME instance variable."""
        self.NAME = EVITA

    def process(self, infile, outfile):
        """Process a fragment file and write a file with EVENT tags.
        Arguments:
           infile - an absolute path
           outfile - an absolute path"""
        use_old = True
        use_old = False
        if use_old:
            #logger.out('start event parser ', time.time())
            self.doctree = parseFile(infile)
            #logger.out('end event parser   ', time.time())
        else:

            xmldoc = Parser().parse_file(open(infile, 'r'))

            # creating the document tree takes way too long, needs
            # to be optimized
            self.doctree = FragmentConverter(xmldoc, infile).convert()
        #xmldoc.pretty_print()
        #self.print_doctree(EVITA)
        self.extractEvents()
        self.doctree.printOut(outfile)

    def extractEvents(self):
        """Loop through all sentences in self.doctree and through all nodes in
        each sentence and determine if the node contains an event."""
        #print self.doctree
        for sentence in self.doctree:
            logger.debug("> SENTENCE:" + str(getWordList(sentence)))
            #print (sentence)
            for node in sentence:
                print(node)
                #print
                #node.pretty_print()
                wordlist = str(getWordList(node))
                poslist = str(getPOSList(node))
                #logger.debug("> NODE:" + wordlist + poslist + " checked=" +
                #             str(node.flagCheckedForEvents))
                if not node.flagCheckedForEvents:
                    #logger.out(node.__class__.__name__)
                    node.createEvent()
                else:
                    logger.debug("PASSING, already checked!")
Example #4
0
class Evita(TarsqiComponent):

    """Class that implements Evita's event recognizer.

    Instance variables:
       NAME - a string
       doctree - a Document instance """

    def __init__(self):
        """Set the NAME instance variable."""
        self.NAME = EVITA

    def process(self, infile, outfile):
        """Process a fragment file and write a file with EVENT tags.
        Arguments:
           infile - an absolute path
           outfile - an absolute path"""
        use_old = True
        use_old = False
        if use_old:
            # logger.out('start event parser ', time.time())
            self.doctree = parseFile(infile)
            # logger.out('end event parser   ', time.time())
        else:
            xmldoc = Parser().parse_file(open(infile, "r"))
            # creating the document tree takes way too long, needs
            # to be optimized
            self.doctree = FragmentConverter(xmldoc, infile).convert()
        # xmldoc.pretty_print()
        # self.print_doctree(EVITA)
        self.extractEvents()
        self.doctree.printOut(outfile)

    def extractEvents(self):
        """Loop through all sentences in self.doctree and through all nodes in
        each sentence and determine if the node contains an event."""
        for sentence in self.doctree:
            logger.debug("> SENTENCE:" + str(getWordList(sentence)))
            for node in sentence:
                # print
                # node.pretty_print()
                wordlist = str(getWordList(node))
                poslist = str(getPOSList(node))
                # logger.debug("> NODE:" + wordlist + poslist + " checked=" +
                #             str(node.flagCheckedForEvents))
                if not node.flagCheckedForEvents:
                    # logger.out(node.__class__.__name__)
                    node.createEvent()
                else:
                    logger.debug("PASSING, already checked!")
Example #5
0
 def process(self, infile, outfile):
     """Process a fragment file and write a file with EVENT tags.
     Arguments:
        infile - an absolute path
        outfile - an absolute path"""
     use_old = True
     use_old = False
     if use_old:
         #logger.out('start event parser ', time.time())
         self.doctree = parseFile(infile)
         #logger.out('end event parser   ', time.time())
     else:
         xmldoc = Parser().parse_file(open(infile, 'r'))
         # creating the document tree takes way too long, needs
         # to be optimized
         self.doctree = FragmentConverter(xmldoc, infile).convert()
     #xmldoc.pretty_print()
     #self.print_doctree(EVITA)
     self.extractEvents()
     self.doctree.printOut(outfile)
Example #6
0
 def process(self, infile, outfile):
     """Run Slinket on the input file and write the results to the output
     file. Both input an doutput file are fragments. Uses the xml
     parser as well as the fragment converter to prepare the input
     and create the shallow tree that Slinket requires.
     Arguments:
        infile - an absolute path
        outfile - an absolute path"""
     use_old = True
     use_old = False
     if use_old:
         self.doctree = eventParser.readFileWithEvents(infile)
     else:
         xmldoc = Parser().parse_file(open(infile, 'r'))
         self.doctree = FragmentConverter(xmldoc,
                                          infile).convert(user=SLINKET)
     #self.print_doctree(SLINKET)
     #logger.debug("Number of sentences in file: " + str(len(self.doctree)))
     for sentence in self.doctree:
         self._find_links(self.doctree, sentence)
     self.doctree.printOut(outfile)
Example #7
0
 def process(self, infile, outfile):
     """Apply all S2T rules to the input file.
     Parses the xml file with xml_parser.Parser and converts it to a shallow tree
     with converter.FragmentConverter.  Then calls createTLinksFromSlinks."""
     xmlfile = open(infile, "r")
     self.xmldoc = Parser().parse_file(xmlfile)
     self.doctree = FragmentConverter(self.xmldoc, infile).convert()
     #self.print_doctree(S2T)
     self.alinks = self.doctree.alink_list
     self.slinks = self.doctree.slink_list
     self.tlinks = self.doctree.tlink_list
     #self.createTLinksFromALinks()
     self.createTLinksFromSLinks()
     self.xmldoc.save_to_file(outfile)
Example #8
0
 def process(self, infile, outfile):
     """Run Slinket on the input file and write the results to the output
     file. Both input an doutput file are fragments. Uses the xml
     parser as well as the fragment converter to prepare the input
     and create the shallow tree that Slinket requires.
     Arguments:
        infile - an absolute path
        outfile - an absolute path"""
     use_old = True
     use_old = False
     if use_old:
         self.doctree = eventParser.readFileWithEvents(infile)
     else:
         xmldoc = Parser().parse_file(open(infile,'r'))
         self.doctree = FragmentConverter(xmldoc, infile).convert(user=SLINKET)
     #self.print_doctree(SLINKET)
     #logger.debug("Number of sentences in file: " + str(len(self.doctree))) 
     for sentence in self.doctree:
         self._find_links(self.doctree, sentence)
     self.doctree.printOut(outfile)
Example #9
0
 def process(self, infile, outfile):
     """Process a fragment file and write a file with EVENT tags.
     Arguments:
        infile - an absolute path
        outfile - an absolute path"""
     use_old = True
     use_old = False
     if use_old:
         # logger.out('start event parser ', time.time())
         self.doctree = parseFile(infile)
         # logger.out('end event parser   ', time.time())
     else:
         xmldoc = Parser().parse_file(open(infile, "r"))
         # creating the document tree takes way too long, needs
         # to be optimized
         self.doctree = FragmentConverter(xmldoc, infile).convert()
     # xmldoc.pretty_print()
     # self.print_doctree(EVITA)
     self.extractEvents()
     self.doctree.printOut(outfile)
Example #10
0
class Slinket(TarsqiComponent):
    """Class that implements the Slinket SLINK and ALINK parser.

    Instance variables:
       NAME - a string
       doctree - a Document"""
    def __init__(self):
        """Load the Slinket dictionaries if they have not been loaded yet."""
        self.NAME = SLINKET
        self.doctree = None
        SLINKET_DICTS.load()

    def process(self, infile, outfile):
        """Run Slinket on the input file and write the results to the output
        file. Both input an doutput file are fragments. Uses the xml
        parser as well as the fragment converter to prepare the input
        and create the shallow tree that Slinket requires.
        Arguments:
           infile - an absolute path
           outfile - an absolute path"""
        use_old = True
        use_old = False
        if use_old:
            self.doctree = eventParser.readFileWithEvents(infile)
        else:
            xmldoc = Parser().parse_file(open(infile, 'r'))
            self.doctree = FragmentConverter(xmldoc,
                                             infile).convert(user=SLINKET)
        #self.print_doctree(SLINKET)
        #logger.debug("Number of sentences in file: " + str(len(self.doctree)))
        for sentence in self.doctree:
            self._find_links(self.doctree, sentence)
        self.doctree.printOut(outfile)

    def _find_links(self, doc, sentence):
        """For each event in the sentence, check whether an Alink or Slink can
        be created for it."""
        self.currSent = sentence
        #print; print "SENTENCE\n"; sentence.pretty_print(); print
        #logger.out("ELIST", self.currSent.eventList)
        eventNum = -1
        for (eLocation, eId) in self.currSent.eventList:
            eventNum += 1
            #logger.out("< %d %d %s >" % (eventNum, eLocation, eId))
            event_expr = EventExpression(eId, eLocation, eventNum,
                                         doc.taggedEventsDict[eId])
            # alinks
            if event_expr.can_introduce_alink():
                logger.debug("EVENT: '" + event_expr.form +
                             "' is candidate to Alinking")
                self._find_alinks(event_expr)
            # lexical slinks
            if event_expr.can_introduce_slink():
                logger.debug("EVENT: '" + event_expr.form +
                             "' is candidate to Slinking")
                self._find_lexically_based_slinks(event_expr)
            # syntactic slinks
            self._find_purpose_clause_slinks(event_expr)
            self._find_conditional_slinks(event_expr)

    def _find_alinks(self, event_expr):
        evNode = self.currSent[event_expr.locInSent]
        if evNode is None:
            logger.warning("No event node found at locInSent")
        forwardFSAs = event_expr.alinkingContexts('forward')
        if forwardFSAs:
            logger.debug("PROCESS for FORWARD alinks")
            evNode.createForwardAlink(forwardFSAs)
            if evNode.createdAlink:
                evNode.createdAlink = 0
                return
        backwardFSAs = event_expr.alinkingContexts('backwards')
        if backwardFSAs:
            logger.debug("PROCESS for BACKWARD alinks")
            evNode.createBackwardAlink(backwardFSAs)
            if evNode.createdAlink:
                evNode.createdAlink = 0

    def _find_lexically_based_slinks(self, event_expr):
        """Try to find lexically based Slinks using forward, backward and
        reporting FSA paterns. No return value, if an Slink is found,
        it will be created by the chunk that embeds the Slink
        triggering event.
        Arguments:
           event_expr - an EventExpression"""

        evNode = self.currSent[event_expr.locInSent]
        #logger.out('trying slink')
        if evNode is None:
            logger.error("No event node found at locInSent")

        forwardFSAs = event_expr.slinkingContexts('forward')
        if forwardFSAs:
            #logger.out('found', len(forwardFSAs[0]), 'groups of forwardFSAs')
            evNode.find_forward_slink(forwardFSAs)
            if evNode.createdLexicalSlink:
                #logger.out('created slink')
                evNode.createdLexicalSlink = 0
                return

        backwardFSAs = event_expr.slinkingContexts('backwards')
        if backwardFSAs:
            #logger.out('found', len(backwardFSAs[0]), 'groups of backwardFSAs')
            logger.debug("PROCESS for BACKWARD slinks")
            evNode.find_backward_slink(backwardFSAs)
            if evNode.createdLexicalSlink:
                evNode.createdLexicalSlink = 0
                return

        reportingFSAs = event_expr.slinkingContexts('reporting')
        if reportingFSAs:
            #logger.out('found', len(reportingFSAs[0]), 'groups of reportingFSAs')
            logger.debug("PROCESS for REPORTING slinks")
            evNode.find_reporting_slink(reportingFSAs)
            if evNode.createdLexicalSlink:
                evNode.createdLexicalSlink = 0

    def _find_purpose_clause_slinks(self, event_expr):
        """Not yet implemented. But note that some purpose clause SLINKS are
        already introduced in the lexically-triggered process. This is
        so for those events that discoursively tend to appear modified
        by a Purpose Clause (e.g., 'address').  The data are based on
        TimeBank."""
        pass

    def _find_conditional_slinks(self, event_expr):
        """Not yet implemented."""
        pass
Example #11
0
class Slinket (TarsqiComponent):

    """Class that implements the Slinket SLINK and ALINK parser.

    Instance variables:
       NAME - a string
       doctree - a Document"""
    
    def __init__(self):
        """Load the Slinket dictionaries if they have not been loaded yet."""
        self.NAME = SLINKET
        self.doctree = None
        SLINKET_DICTS.load()

    def process(self, infile, outfile):
        """Run Slinket on the input file and write the results to the output
        file. Both input an doutput file are fragments. Uses the xml
        parser as well as the fragment converter to prepare the input
        and create the shallow tree that Slinket requires.
        Arguments:
           infile - an absolute path
           outfile - an absolute path"""
        use_old = True
        use_old = False
        if use_old:
            self.doctree = eventParser.readFileWithEvents(infile)
        else:
            xmldoc = Parser().parse_file(open(infile,'r'))
            self.doctree = FragmentConverter(xmldoc, infile).convert(user=SLINKET)
        #self.print_doctree(SLINKET)
        #logger.debug("Number of sentences in file: " + str(len(self.doctree))) 
        for sentence in self.doctree:
            self._find_links(self.doctree, sentence)
        self.doctree.printOut(outfile)

    def _find_links(self, doc, sentence):
        """For each event in the sentence, check whether an Alink or Slink can
        be created for it."""
        self.currSent = sentence
        #print; print "SENTENCE\n"; sentence.pretty_print(); print
        #logger.out("ELIST", self.currSent.eventList)
        eventNum = -1
        for (eLocation, eId) in self.currSent.eventList:
            eventNum += 1
            #logger.out("< %d %d %s >" % (eventNum, eLocation, eId))
            event_expr = EventExpression(eId, eLocation, eventNum, doc.taggedEventsDict[eId])
            # alinks
            if event_expr.can_introduce_alink():
                logger.debug("EVENT: '"+event_expr.form+"' is candidate to Alinking")
                self._find_alinks(event_expr)
            # lexical slinks
            if event_expr.can_introduce_slink() :
                logger.debug("EVENT: '"+event_expr.form+"' is candidate to Slinking")
                self._find_lexically_based_slinks(event_expr)
            # syntactic slinks
            self._find_purpose_clause_slinks(event_expr)
            self._find_conditional_slinks(event_expr)

    def _find_alinks(self, event_expr):
        evNode = self.currSent[event_expr.locInSent]
        if evNode is None:
            logger.warning("No event node found at locInSent")
        forwardFSAs = event_expr.alinkingContexts('forward')
        if forwardFSAs:
            logger.debug("PROCESS for FORWARD alinks")
            evNode.createForwardAlink(forwardFSAs)
            if evNode.createdAlink:
                evNode.createdAlink = 0
                return 
        backwardFSAs = event_expr.alinkingContexts('backwards')
        if backwardFSAs:
            logger.debug("PROCESS for BACKWARD alinks")
            evNode.createBackwardAlink(backwardFSAs)
            if evNode.createdAlink:
                evNode.createdAlink = 0

                
    def _find_lexically_based_slinks(self, event_expr):

        """Try to find lexically based Slinks using forward, backward and
        reporting FSA paterns. No return value, if an Slink is found,
        it will be created by the chunk that embeds the Slink
        triggering event.
        Arguments:
           event_expr - an EventExpression"""

        evNode = self.currSent[event_expr.locInSent]
        #logger.out('trying slink')
        if evNode is None:
            logger.error("No event node found at locInSent")
            
        forwardFSAs = event_expr.slinkingContexts('forward')
        if forwardFSAs:
            #logger.out('found', len(forwardFSAs[0]), 'groups of forwardFSAs')
            evNode.find_forward_slink(forwardFSAs)
            if evNode.createdLexicalSlink:
                #logger.out('created slink')
                evNode.createdLexicalSlink = 0
                return
            
        backwardFSAs = event_expr.slinkingContexts('backwards')
        if backwardFSAs:
            #logger.out('found', len(backwardFSAs[0]), 'groups of backwardFSAs')
            logger.debug("PROCESS for BACKWARD slinks")
            evNode.find_backward_slink(backwardFSAs)
            if evNode.createdLexicalSlink:
                evNode.createdLexicalSlink = 0
                return
            
        reportingFSAs = event_expr.slinkingContexts('reporting')
        if reportingFSAs:
            #logger.out('found', len(reportingFSAs[0]), 'groups of reportingFSAs')
            logger.debug("PROCESS for REPORTING slinks")
            evNode.find_reporting_slink(reportingFSAs)
            if evNode.createdLexicalSlink:
                evNode.createdLexicalSlink = 0

                
    def _find_purpose_clause_slinks(self, event_expr):
        """Not yet implemented. But note that some purpose clause SLINKS are
        already introduced in the lexically-triggered process. This is
        so for those events that discoursively tend to appear modified
        by a Purpose Clause (e.g., 'address').  The data are based on
        TimeBank."""
        pass

    def _find_conditional_slinks(self, event_expr):
        """Not yet implemented."""
        pass