def lookForAtlinks(self): """Examine whether the Alink can generate a Tlink.""" if self.is_a2t_candidate(): logger.debug("A2T Alink candidate " + self.attrs['lid'] + " " + self.attrs['relType']) apply_patterns(self) else: logger.debug("A2T Alink not a candidate " + self.attrs['lid'] + " " + self.attrs['relType'])
def _process_opening_tag(self, element): """Process an opening tag, calling the appropriate handler depending on the tag.""" logger.debug(">>" + element.content) if element.tag == SENTENCE: self.currentSentence = Sentence() self.doc.addSentence(self.currentSentence) elif element.tag == NOUNCHUNK: self._process_opening_chunk(NounChunk, element) elif element.tag == VERBCHUNK: self._process_opening_chunk(VerbChunk, element) elif element.tag == TOKEN: self._process_opening_lex(element) elif element.tag == EVENT: self._process_opening_event(element) elif element.tag == INSTANCE: self._process_opening_make_instance(element) elif element.tag == TIMEX: self._process_opening_timex(element) elif element.tag == ALINK: self._process_alink(element) elif element.tag == SLINK: self._process_slink(element) elif element.tag == TLINK: self._process_tlink(element)
def _find_slink(self, event_context, fsa_lists, reltype_list): """Try to find an slink in the given event_context using lists of FSAs. If the context matches an FSA, then create an slink and insert it in the document.""" for i in range(len(fsa_lists)): fsa_list = fsa_lists[i] try: reltype = reltype_list[i] except IndexError: # take last element of reltype list if it happens to # be shorter than the fsa_lists reltype = reltype_list[-1] result = self._look_for_link(event_context, fsa_list) #logger.out(result) if result: (length_of_match, fsa_num) = result fsa = fsa_list[fsa_num] #print fsa #logger.out("match found, FSA=%s size=%d reltype=%s" # % (fsa.fsaname, length_of_match, reltype)) logger.debug(21*"."+"ACCEPTED SLINK!!! LENGTH: "+str(length_of_match)+ " "+ str(reltype)+" || FSA: "+str(i)+"."+str(fsa_num)+ " PatternName: "+fsa.fsaname) slinkAttrs = { 'eventInstanceID': self.eiid, 'subordinatedEventInstance': event_context[length_of_match-1].eiid, 'relType': reltype, 'syntax': fsa.fsaname } self.document().addLink(slinkAttrs, SLINK) self.createdLexicalSlink = 1 break else: logger.debug(".....................REJECTED SLINK by FSA: "+str(i)+".?")
def _find_alink(self, event_context, fsa_lists, reltype_list): """Try to create an alink using the context and patterns from the dictionary. Alinks are created as a side effect. Returns True if an alink was created, False otherwise.""" for i in range(len(fsa_lists)): fsa_list = fsa_lists[i] result = self._look_for_link(event_context, fsa_lists[i]) if result: (length_of_match, fsa_num) = result fsa = fsa_list[fsa_num] # logger.debug("match found, FSA=%s size=%d reltype=%s" # % (fsa.fsaname, length_of_match, reltype)) reltype = get_reltype(reltype_list, i) eiid = event_context[length_of_match - 1].eiid # print self, self.eiid alinkAttrs = { EVENT_INSTANCE_ID: self.eiid, RELATED_TO_EVENT_INSTANCE: eiid, RELTYPE: reltype, SYNTAX: fsa.fsaname } self.tree.addLink(alinkAttrs, ALINK) # for l in self.tree.alink_list: print ' ', l logger.debug("ALINK CREATED") return True else: logger.debug("REJECTED ALINK by FSA: " + str(i)) return False
def createSlink(self, slinkedEventContext, syntaxPatternLists, relTypeList): """Only used if doc is chunked with Alembic; that is, Adj tokens do not belong to any chunk """ for i in range(len(syntaxPatternLists)): self._printSequence(slinkedEventContext, 1) # DEBUGGING method substring = self._lookForLink(slinkedEventContext, syntaxPatternLists[i]) if substring: #log("substring\n"+str(substring)+"\n") substringLength = substring[0] subpatternNum = substring[1] relType = relTypeList[i] #patterns = syntaxPatternLists[i] # should be ith nested list in syntaxPatternLists #patternName = patterns[subpatternNum] # should be subpatternNumth item in the list patternName = syntaxPatternLists[i][subpatternNum] logger.debug(21*"."+"ACCEPTED SLINK!!! LENGTH: "+str(substringLength)+" "+ str(relType)+" || FSA: "+str(i)+"."+str(subpatternNum)+ " PatternName: "+patternName.fsaname) slinkAttrs = { 'eventInstanceID': self.eiid, 'subordinatedEventInstance': slinkedEventContext[substringLength-1].eiid, 'relType': relType, 'syntax': patternName.fsaname } self.doc().addLink(slinkAttrs, SLINK) self.createdLexicalSlink = 1 break else: logger.debug(".....................REJECTED SLINK by FSA: "+str(i)+".?")
def process_fragments(self): """Set fragment names, create the vectors for each fragment, run the classifier and add links from the classifier to the fragments.""" os.chdir(self.DIR_LINK_MERGER + os.sep + 'sputlink') perl = '/usr/local/ActivePerl-5.8/bin/perl' perl = 'perl' perl = self.tarsqi_instance.getopt_perl() for fragment in self.fragments: # set fragment names base = fragment[0] in_fragment = os.path.join(self.DIR_DATA, base+'.'+self.CREATION_EXTENSION) tmp_fragment = os.path.join(self.DIR_DATA, base+'.'+self.TMP_EXTENSION) out_fragment = os.path.join(self.DIR_DATA, base+'.'+self.RETRIEVAL_EXTENSION) # process them command = "%s merge.pl %s %s" % (perl, in_fragment, tmp_fragment) (i, o, e) = os.popen3(command) for line in e: if line.lower().startswith('warn'): logger.warn('MERGING: ' + line) else: logger.error('MERGING: ' + line) for line in o: logger.debug('MERGING: ' + line) self._add_tlinks_to_fragment(in_fragment, tmp_fragment, out_fragment) os.chdir(TTK_ROOT)
def createForwardSlink(self, forwardSlinks): """Only used if doc is chunked with Alembic; that is, Adj tokens do not belong to any chunk""" slinkedEventContext = self.parent[self.position+1:] # chunks following the currEvent self.createSlink(slinkedEventContext, forwardSlinks[0], forwardSlinks[1]) # forwardSlinks[0]:pattern name, forwardSlinks[1]: relType if self.createdLexicalSlink: logger.debug("FORWARD SLINK CREATED")
def _distributeNodes(self): """Distribute the item's information over the lists in the VChunkFeaturesLists.""" # TODO: figure out whether to keep remove_interjections tempNodes = remove_interjections(self) debug("\n" + '-'.join([n.getText() for n in tempNodes])) logger.debug('-'.join([n.getText() for n in tempNodes])) itemCounter = 0 for item in tempNodes: #debug( " %s %-3s %-8s" # % (itemCounter, item.pos, item.getText()), newline=False) message_prefix = " %s %s/%s" % (itemCounter, item.getText(), item.pos) if item.pos == 'TO': debug( '%s ==> TO' % message_prefix) self._distributeNode_TO(item, itemCounter) elif item.getText() in forms.negative: debug( '%s ==> NEG' % message_prefix) self._distributeNode_NEG(item) elif item.pos == 'MD': debug( '%s ==> MD' % message_prefix) self._distributeNode_MD(item) elif item.pos[0] == 'V': debug( '%s ==> V' % message_prefix) self._distributeNode_V(item, tempNodes, itemCounter) elif item.pos in forms.partAdv: debug( '%s ==> ADV' % message_prefix) self._distributeNode_ADV(item, tempNodes, itemCounter) else: debug( '%s ==> None' % message_prefix) itemCounter += 1 if DEBUG: self.print_ChunkLists()
def _find_slink(self, event_context, fsa_lists, reltype_list): """Try to find an slink in the given event_context using lists of FSAs. If the context matches an FSA, then create an slink and insert it in the tree.""" for i in range(len(fsa_lists)): fsa_list = fsa_lists[i] result = self._look_for_link(event_context, fsa_list) if result: (length_of_match, fsa_num) = result fsa = fsa_list[fsa_num] # logger.debug("match found, FSA=%s size=%d reltype=%s" # % (fsa.fsaname, length_of_match, reltype)) reltype = get_reltype(reltype_list, i) eiid = event_context[length_of_match - 1].eiid slinkAttrs = { EVENT_INSTANCE_ID: self.eiid, SUBORDINATED_EVENT_INSTANCE: eiid, RELTYPE: reltype, SYNTAX: fsa.fsaname } self.tree.addLink(slinkAttrs, SLINK) logger.debug("SLINK CREATED") return True else: logger.debug("REJECTED SLINK by FSA: " + str(i)) return False
def find_forward_alink(self, fsa_reltype_groups): """Search for an alink to the right of the event. Return True is event was found, False otherwise.""" logger.debug("len(fsa_reltype_groups) = %s" % len(fsa_reltype_groups)) fsa_lists = fsa_reltype_groups[0] reltypes_list = fsa_reltype_groups[1] alinkedEventContext = self.parent[self.position + 1:] return self._find_alink(alinkedEventContext, fsa_lists, reltypes_list)
def _processDoubleEventInMultiAChunk(self, features, substring): """Tagging EVENT in both VerbChunk and AdjectiveToken. In this case the adjective will not be given the verb features.""" logger.debug("[V_2Ev] " + features.as_verbose_string()) self._conditionallyAddEvent(features) adjToken = substring[-1] adjToken.createAdjEvent() map(update_event_checked_marker, substring)
def _processDoubleEventInMultiAChunk(self, GramVCh, substring): """Tagging EVENT in VChunk """ logger.debug("[V_2Ev] " + GramVCh.as_extended_string()) self._processEventInChunk(GramVCh) """Tagging EVENT in AdjToken""" adjToken = substring[-1] adjToken.createAdjEvent() self._updateFlagCheckedForEvents(substring)
def createTLinksFromSLinks(self): """Calls lookForStlinks for a given Slink object.""" logger.debug("Number of SLINKs in file: "+str(len(self.slinks))) for slinkTag in self.slinks: try: slink = Slink(self.xmldoc, self.doctree, slinkTag) slink.match_rules(self.rules) except: logger.error("Error processing SLINK")
def _createEventOnHave(self, features): logger.debug("Checking for toHave pattern...") substring = self._lookForMultiChunk(patterns.HAVE_FSAs) if substring: self.dribble("HAVE-1", self.getText()) self._processEventInMultiVChunk(substring) else: self.dribble("HAVE-2", self.getText()) self._conditionallyAddEvent(features)
def _createEventOnFutureGoingTo(self, features): logger.debug("Checking for futureGoingTo pattern...") substring = self._lookForMultiChunk(patterns.GOINGto_FSAs) if substring: self.dribble("GOING-TO", self.getText()) self._processEventInMultiVChunk(substring) else: self.dribble("GOING", self.getText()) self._conditionallyAddEvent(features)
def _createEventOnDoAuxiliar(self, features): logger.debug("Checking for doAuxiliar pattern...") substring = self._lookForMultiChunk(patterns.DO_FSAs) if substring: self.dribble("DO-AUX", self.getText()) self._processEventInMultiVChunk(substring) else: self.dribble("DO", self.getText()) self._conditionallyAddEvent(features)
def _run_classifier(self, lemma): """Run the classifier on lemma, using features from the GramNChunk.""" features = [] if EVITA_NOM_CONTEXT: features = ['DEF' if self.isDefinite() else 'INDEF', self.features.head.pos] is_event = nomEventRec.isEvent(lemma, features) logger.debug(" nomEventRec.isEvent(%s) ==> %s" % (lemma, is_event)) return is_event
def _debug_vcf(vcf_list): logger.debug("len(features_list) = %d" % len(vcf_list)) if len(vcf_list) > 0 and DEBUG: for vcf in vcf_list: if DEBUG: print ' ', vcf.pp() for vcf in vcf_list: logger.debug(vcf.as_verbose_string())
def _createEventOnPastUsedTo(self, features): logger.debug("Checking for pastUsedTo pattern...") substring = self._lookForMultiChunk(patterns.USEDto_FSAs) if substring: self.dribble("USED-1", self.getText()) self._processEventInMultiVChunk(substring) else: self.dribble("USED-2", self.getText()) self._conditionallyAddEvent(features)
def _identifySubstringInSentence(self, tokenSentence, FSAset): fsaCounter=-1 # DEBUGGING purposes for fsa in FSAset: fsaCounter = fsaCounter + 1 logger.debug("FSA:\n"+str(fsa)) lenSubstring = fsa.acceptsShortestSubstringOf(tokenSentence) if lenSubstring: return (lenSubstring, fsaCounter) else: return (0, fsaCounter)
def _process_opening_timex(self, element): """Creates a TimeTag and embed it in the current chunk if there is one, otherwise add it to the sentence.""" self.currentTimex = TimexTag(element.attrs) logger.debug(str(self.currentTimex.__dict__)) logger.debug("TYPE:" + str(type(self.currentTimex))) if self.currentChunk != None: self.currentChunk.addToken(self.currentTimex) else: self.currentSentence.add(self.currentTimex)
def createAdjEvent(self, gramvchunk=None): """Processes the adjective after a copular verb and make it an event if the adjective has an event class.""" logger.debug("AdjectiveToken.createAdjEvent(gramvchunk)") if not self.parent.__class__.__name__ == 'Sentence': logger.warn("Unexpected syntax tree") return self.gramchunk = GramAChunk(self, gramvchunk) logger.debug(self.gramchunk.as_verbose_string()) self._conditionallyAddEvent()
def createBackwardAlink(self, backwardAlinks): """Backward Alinks also check for the adequacy (e.g., in terms of TENSE or ASPECT) of the Subordinating Event. For cases such as 'the <EVENT>transaction</EVENT> has been <EVENT>initiated</EVENT>' """ logger.debug("TRYING backward alink") alinkedEventContext = self.parent[:self.position+1] alinkedEventContext.reverse() self.createAlink(alinkedEventContext, backwardAlinks[0], backwardAlinks[1]) if self.createdAlink: logger.debug("BACKWARD ALINK CREATED")
def getPolarity(self): if self.negative: for item in self.adverbsPre: if item.getText() == "only": logger.debug("'only' in self.adverbsPre:") # verbal chunks containing 'not only' have polarity='POS' return "POS" # else: return "NEG" (replaced this with the line below since it did not make sense) return "NEG" else: return "POS"
def _getRestSent(self, structure): """Obtaining the rest of the sentence, which can be in a flat, token-based structure, or chunked.""" logger.debug("Entering _getRestSent") if structure == 'flat': restSentence = self.getTokens(self.parent[self.position+1:]) elif structure == 'chunked': restSentence = self.parent[self.position+1:] else: raise "ERROR: unknown structure value" return restSentence
def _conditionallyAddEvent(self, features=None): """Perform a few little checks on the head and check whether there is an event class, then add the event to the tree. When this is called on a NounChunk, then there is no GramChunk handed in and it will be retrieved from the features instance variable, when it is called from VerbChunk, then the verb's features will be handed in.""" # TODO: split those cases to make for two simpler methods logger.debug("Conditionally adding nominal") chunk_features = self.features if features is None else features if self._acceptable_chunk_features(chunk_features): self.tree.addEvent(Event(chunk_features))
def _createEventOnContinue(self, features): # Looking for CONTINUE + ADJ Predicative Complement e.g., Interest rate # continued low. logger.debug("Checking for CONTINUE + ADJ...") substring = self._lookForMultiChunk(patterns.CONTINUE_A_FSAs, 'chunked') if substring: self.dribble("CONTINUE-ADJ", self.getText()) self._processDoubleEventInMultiAChunk(features, substring) else: self.dribble("CONTINUE", self.getText()) self._conditionallyAddEvent(features)
def _createEventOnBecome(self, features): # Looking for BECOME + ADJ Predicative Complement e.g., He became famous at # the age of 21 logger.debug("Checking for BECOME + ADJ Predicative Complement...") substring = self._lookForMultiChunk(patterns.BECOME_A_FSAs, 'chunked') if substring: self.dribble("BECOME-ADJ", self.getText()) self._processDoubleEventInMultiAChunk(features, substring) else: self.dribble("BECOME", self.getText()) self._conditionallyAddEvent(features)
def _createEventOnKeep(self, features): # Looking for KEEP + ADJ Predicative Complement e.g., The announcement # kept everybody Adj. logger.debug("Checking for KEEP + [NChunk] + ADJ...") substring = self._lookForMultiChunk(patterns.KEEP_A_FSAs, 'chunked') if substring: self.dribble("KEEP-N-ADJ", self.getText()) self._processDoubleEventInMultiAChunk(features, substring) else: self.dribble("KEEP", self.getText()) self._conditionallyAddEvent(features)
def _createEventOnModal(self): """Try to create an event when the head of the chunk is a modal. Check the right context and see if you can extend the chunk into a complete verb group with modal verb and main verb. If so, process the merged constituents as a composed verb chunk.""" # NOTE: this does not tend to apply since the chunker usually groups the # modal in with the rest of the verb group. logger.debug("Checking for modal pattern...") substring = self._lookForMultiChunk(patterns.MODAL_FSAs) if substring: self.dribble("MODAL", self.getText()) self._processEventInMultiVChunk(substring)
def isEvent(self, lemma, features): """Return True if lemma is an event, False if it is not, and raise a DisambiguationError if there is not enough data for a decision.""" senseProbData = self.senseProbs.get(lemma, [0.0, 0.0]) logger.debug("BayesEventRecognizer.isEvent(" + lemma + ")") logger.debug("\traw counts: " + str(senseProbData)) # calculate probabilities from frequency data, refuse to do anything if # frequencies are too low frequency = senseProbData[0] + senseProbData[1] if frequency < MINIMAL_FREQUENCY: raise DisambiguationError('sparse data for "' + lemma + '"') probEvent = senseProbData[1] / frequency probNonEvent = 1 - probEvent logger.debug("\tprobabilities: non-event=%s event=%s" % (probNonEvent, probEvent)) # return now if probabilities are absolute if probEvent == 1: return True if probNonEvent == 1: return False # adjust probablities with probabilities of contextual features, ignore # features for which we have no data for feature in features: try: probs = self.condProbs[lemma][feature] logger.debug("\tfeature prob: %s=%s" % (feature, probs)) probEvent *= probs[1] probNonEvent *= probs[0] except KeyError: pass logger.debug("\tadjusted probabilities: non-event=%s event=%s" % (probNonEvent, probEvent)) return probEvent > probNonEvent
def _find_lexically_based_slinks(self, event_expr): """Try to find lexically based Slinks using forward, backward and reporting FSA paterns. No return value, if an Slink is found, it will be created by the chunk that embeds the Slink triggering event. Arguments: event_expr - an EventExpression""" evNode = self.currSent[event_expr.locInSent] #logger.out('trying slink') if evNode is None: logger.error("No event node found at locInSent") forwardFSAs = event_expr.slinkingContexts('forward') if forwardFSAs: #logger.out('found', len(forwardFSAs[0]), 'groups of forwardFSAs') evNode.find_forward_slink(forwardFSAs) if evNode.createdLexicalSlink: #logger.out('created slink') evNode.createdLexicalSlink = 0 return backwardFSAs = event_expr.slinkingContexts('backwards') if backwardFSAs: #logger.out('found', len(backwardFSAs[0]), 'groups of backwardFSAs') logger.debug("PROCESS for BACKWARD slinks") evNode.find_backward_slink(backwardFSAs) if evNode.createdLexicalSlink: evNode.createdLexicalSlink = 0 return reportingFSAs = event_expr.slinkingContexts('reporting') if reportingFSAs: #logger.out('found', len(reportingFSAs[0]), 'groups of reportingFSAs') logger.debug("PROCESS for REPORTING slinks") evNode.find_reporting_slink(reportingFSAs) if evNode.createdLexicalSlink: evNode.createdLexicalSlink = 0
def find_reporting_slink(self, fsa_reltype_groups): """Reporting Slinks are applied to reporting predicates ('say', 'told', etc) that link an event in a preceeding quoted sentence which is separated from the clause of the reporting event by a comma; e.g., ``I <EVENT>want</EVENT> a referendum,'' Howard <EVENT class='REPORTING'>said</EVENT>. Slinket assumes that these quoted clauses always initiate the main sentence. Therefore, the first item in the sentence are quotation marks.""" fsa_lists = fsa_reltype_groups[0] reltypes_list = fsa_reltype_groups[1] sentenceBeginning = self.parent[:self.position] if len(sentenceBeginning) > 0 and sentenceBeginning[0].getText( ) == "``": # quotation does not contain quotation marks quotation = self._extract_quotation(sentenceBeginning) if quotation is not None: logger.debug("TRYING reporting slink") return self._find_slink(quotation, fsa_lists, reltypes_list) return False
def createSlink(self, slinkedEventContext, syntaxPatternLists, relTypeList): """Only used if doc is chunked with Alembic; that is, Adj tokens do not belong to any chunk """ for i in range(len(syntaxPatternLists)): self._printSequence(slinkedEventContext, 1) # DEBUGGING method substring = self._lookForLink(slinkedEventContext, syntaxPatternLists[i]) if substring: #log("substring\n"+str(substring)+"\n") substringLength = substring[0] subpatternNum = substring[1] relType = relTypeList[i] #patterns = syntaxPatternLists[i] # should be ith nested list in syntaxPatternLists #patternName = patterns[subpatternNum] # should be subpatternNumth item in the list patternName = syntaxPatternLists[i][subpatternNum] logger.debug(21 * "." + "ACCEPTED SLINK!!! LENGTH: " + str(substringLength) + " " + str(relType) + " || FSA: " + str(i) + "." + str(subpatternNum) + " PatternName: " + patternName.fsaname) slinkAttrs = { 'eventInstanceID': self.eiid, 'subordinatedEventInstance': slinkedEventContext[substringLength - 1].eiid, 'relType': relType, 'syntax': patternName.fsaname } self.doc().addLink(slinkAttrs, SLINK) self.createdLexicalSlink = 1 break else: logger.debug(".....................REJECTED SLINK by FSA: " + str(i) + ".?")
def _lookForMultiChunk(self, FSA_set, STRUCT='flat'): """Default argument 'STRUCT' specifies the structural format of the rest of the sentence: either a flat, token-level representation or a chunked one.""" logger.debug("Entering _lookForMultiChunk") restSentence = self._getRestSent(STRUCT) if STRUCT == 'flat': for item in restSentence: logger.debug("\t " + item.getText() + " " + item.pos) lenSubstring, fsaNum = self._identify_substring(restSentence, FSA_set) if lenSubstring: logger.debug("ACCEPTED by FSA, LENGTH:" + str(lenSubstring) + "FSA:" + str(fsaNum)) return restSentence[:lenSubstring] else: logger.debug("REJECTED by FSA:" + str(fsaNum)) return 0
def createAdjEvent(self, verbGramFeat='nil'): """(Evita method) only for tokens that are not in a chunk""" logger.debug("createAdjEvent in AdjToken") if not self.parent.__class__.__name__ == 'Sentence': return else: GramACh = self.gramChunk() if verbGramFeat != 'nil': """ Percolating gram features from copular verb""" GramACh.tense = verbGramFeat['tense'] GramACh.aspect = verbGramFeat['aspect'] GramACh.modality = verbGramFeat['modality'] GramACh.polarity = verbGramFeat['polarity'] logger.debug("Accepted Adjective") logger.debug("[A_APC] " + GramACh.as_extended_string()) else: logger.debug("[A_2Ev] " + GramACh.as_extended_string()) self._processEventInToken(GramACh)
def _passes_semantics_test(self): """Return True if the nominal can be an event semantically. Depending on user settings this is done by a mixture of wordnet lookup and using a simple classifier.""" logger.debug("event candidate?") lemma = self.features.head_lemma # return True if all WordNet senses are events, no classifier needed is_event = wordnet.allSensesAreEvents(lemma) logger.debug(" all WordNet senses are events ==> %s" % is_event) if is_event: return True # run the classifier if required, fall through on disambiguation error if EVITA_NOM_DISAMB: try: is_event = self._run_classifier(lemma) logger.debug(" baysian classifier result ==> %s" % is_event) return is_event except bayes.DisambiguationError, (strerror): pass logger.debug(" DisambiguationError: %s" % unicode(strerror))
def _massage_sequence(sequence, indent=0): """Traverse down the tree, seeking tokens.""" for index in range(0, len(sequence)): item = sequence[index] if item.isToken(): logger.debug("%s%d Token" % (indent * ' ', index)) _massage_token(item, sequence, index) elif item.isChunk(): logger.debug("%s%d Chunk" % (indent * ' ', index)) _massage_sequence(item.dtrs, indent + 3) elif item.isTimex(): logger.debug("%s%d Timex" % (indent * ' ', index)) _massage_sequence(item.dtrs, indent + 3)
def _find_links(self, doc, sentence): """For each event in the sentence, check whether an Alink or Slink can be created for it.""" eventNum = -1 for (eLocation, eid) in sentence.eventList: eventNum += 1 event_expr = EventExpression(eid, eLocation, eventNum, doc.events[eid]) logger.debug(event_expr.as_verbose_string()) if event_expr.can_introduce_alink(): logger.debug("Alink candidate: " + event_expr.form) self._find_alink(sentence, event_expr) if event_expr.can_introduce_slink(): logger.debug("Slink candidate: " + event_expr.form) self._find_lexically_based_slink(sentence, event_expr)
def _createEventOnBe(self, features, imported_events=None): logger.debug("Checking for BE + NOM Predicative Complement...") substring = self._lookForMultiChunk(patterns.BE_N_FSAs, 'chunked') if substring: self.dribble("BE-NOM", self.getText()) self._processEventInMultiNChunk(features, substring, imported_events) else: logger.debug("Checking for BE + ADJ Predicative Complement...") substring = self._lookForMultiChunk(patterns.BE_A_FSAs, 'chunked') if substring: matched_texts = [s.getText() for s in substring] matched = self.getText() + ' ' + ' '.join(matched_texts) self.dribble("BE-ADJ", matched) self._processEventInMultiAChunk(features, substring) else: logger.debug( "Checking for BE + VERB Predicative Complement...") substring = self._lookForMultiChunk(patterns.BE_FSAs) if substring: self.dribble("BE-V", self.getText()) self._processEventInMultiVChunk(substring)
def _matchChunk(self, chunkDescription): """Match chunk to the patterns in chunkDescriptions. chunkDescription is a dictionary with keys-values pairs that match instance variables and their values on GramChunks. The value in key-value pairs can be: - an atomic value. E.g., {..., 'headForm':'is', ...} - a list of possible values. E.g., {..., headForm': forms.have, ...} In this case, _matchChunk checks whether the chunk feature is included within this list. - a negated value. It is done by introducing it as a second constituent of a 2-position tuple whose initial position is the caret symbol: '^'. E.g., {..., 'headPos': ('^', 'MD') ...} (R) M: This code breaks on the fourth line, claiming that gramch is None. This method is also implemented in the Chunk.Constituent class """ debug("......entering _matchChunk()") for feat in chunkDescription.keys(): debug("\n......PAIR <" + str(feat) + " " + str(chunkDescription[feat]) + ">") value = chunkDescription[feat] if type(value) is TupleType: if value[0] == '^': value = value[1] else: raise "ERROR specifying description of pattern" elif type(value) is ListType: if self.__getattr__(feat) not in value: logger.debug("FEAT " + feat + " does not match (11)") return 0 else: if self.__getattr__(feat) != value: logger.debug("FEAT " + feat + " does not match (12)") return 0 logger.debug("Matched! (10)") return 1
def charData(string): """If string is event expression (i.e., within EVENT tag), add form into currentDoc.taggedEventDict""" if currentEvent is not None and currentToken is not None: eid = currentEvent.attrs[EID] logger.debug("Storing Event Values - charData: " + string) """To avoid storing the form of adverbs or other particles following the lexical item tagged as EVENT""" if currentDoc.hasEventWithAttribute(eid, FORM): logger.debug("FORM already there. Hence, not storing: " + str(string)) else: logger.debug("STORING: " + str(string)) currentDoc.storeEventValues({EID: eid, FORM: string}) """Regardless whether the string is an event expression, add it into the Document object """ if currentToken is not None: if currentToken.textIdx: currentDoc.nodeList[-1] = currentDoc.nodeList[-1] + string else: currentToken.setTextNode(currentDoc.nodeCounter) currentDoc.addDocNode(string) else: currentDoc.addDocNode(string)
class NounChunk(Chunk): """Behaviour specific to noun chunks, most notably the NounChunk specific code to create events.""" def __init__(self): # the constituent sets tree, parent, position, dtrs, begin, and end Chunk.__init__(self, NOUNCHUNK) def isNounChunk(self): """Returns True""" return True def isDefinite(self): """Return True if self includes a Token that is a POS, PRP$ or a definite determiner.""" for token in self.dtrs[:self.head]: # sometimes the daughter is not a token but a timex, skip it if not token.isToken(): continue if (token.pos == forms.possessiveEndingTag or token.pos == forms.possessivePronounTag or (token.pos in forms.determinerTags and token.getText() in forms.definiteDeterminers)): return True return False def isEmpty(self): """Return True if the chunk is empty, False otherwise.""" return False if self.dtrs else True def createEvent(self, verbfeatures=None, imported_events=None): """Try to create an event in the NounChunk. Checks whether the nominal is an event candidate, then conditionally adds it. The verbfeatures dictionary is used when a governing verb hands in its features to a nominal in a predicative complement. The imported_events is handed in when Tarsqi tries to import events from a previous annotation.""" logger.debug("NounChunk.createEvent(verbfeatures=%s)" % verbfeatures) if self.isEmpty(): # this happened at some point due to a crazy bug in some old code # that does not exist anymore, log a warning in case this returns logger.warn("There are no dtrs in the NounChunk") else: self.features = NChunkFeatures(self, verbfeatures) logger.debug(self.features.as_verbose_string()) # don't bother if the head already is an event if self.features.head.isEvent(): logger.debug("Nominal already contains an event") # Even if preceded by a BE or a HAVE form, only tagging NounChunks # headed by an eventive noun, so "was an intern" will NOT be tagged elif self._passes_syntax_test(): imported_event = self._get_imported_event_for_chunk( imported_events) #print imported_event if imported_event is not None: self._conditionally_add_imported_event(imported_event) elif self._passes_semantics_test(): self._conditionallyAddEvent() def _passes_syntax_test(self): """Return True if the nominal is syntactically able to be an event, return False otherwise. An event candidate syntactically has to have a head which cannot be a timex and the head has to be a either a noun or a common noun, depending on the value of INCLUDE_PROPERNAMES.""" if self.features.head.isTimex(): return False if INCLUDE_PROPERNAMES: return self.head_is_noun() else: return self.head_is_common_noun() #return (not self.features.head.isTimex() and self.head_is_common_noun()) def _passes_semantics_test(self): """Return True if the nominal can be an event semantically. Depending on user settings this is done by a mixture of wordnet lookup and using a simple classifier.""" logger.debug("event candidate?") lemma = self.features.head_lemma # return True if all WordNet senses are events, no classifier needed is_event = wordnet.allSensesAreEvents(lemma) logger.debug(" all WordNet senses are events ==> %s" % is_event) if is_event: return True # run the classifier if required, fall through on disambiguation error if EVITA_NOM_DISAMB: try: is_event = self._run_classifier(lemma) logger.debug(" baysian classifier result ==> %s" % is_event) return is_event except bayes.DisambiguationError, (strerror): pass logger.debug(" DisambiguationError: %s" % unicode(strerror)) # check whether primary sense or some of the senses are events if EVITA_NOM_WNPRIMSENSE_ONLY: is_event = wordnet.primarySenseIsEvent(lemma) logger.debug(" primary WordNet sense is event ==> %s" % is_event) else: is_event = wordnet.someSensesAreEvents(lemma) logger.debug(" some WordNet sense is event ==> %s" % is_event) return is_event
def _add_link(self, tagname, attrs): """Add the link to the TagRepository instance on the TarsqiDocument.""" logger.debug("Adding %s: %s" % (tagname, attrs)) self.doctree.tarsqidoc.tags.add_tag(tagname, -1, -1, attrs)
def _add_link(self, tagname, attrs): """Add the link to the TagRepository instance on the TarsqiDocument.""" attrs[LIBRARY.timeml.ORIGIN] = SLINKET logger.debug("Adding %s: %s" % (tagname, attrs)) self.doctree.tarsqidoc.tags.add_tag(tagname, -1, -1, attrs)
def _process_element(self, element): """Non-tags are treated as text nodes and added to the current token if there is one.""" logger.debug('>>"' + element.content + '"') if self.currentToken != None: self.currentToken.setTextNode(self.doc.nodeCounter)
def createEvent(self): """Arriving here adjs passed through the main loop for createEvent""" logger.debug("createEvent in AdjToken") pass
def _createEventOnOtherVerb(self, features): self.dribble("OTHER", self.getText()) logger.debug("General case") self._conditionallyAddEvent(features)
def _passes_semantics_test(self): """Return True if the nominal can be an event semantically. Depending on user settings this is done by a mixture of wordnet lookup and using a simple classifier.""" logger.debug("event candidate?") lemma = self.features.head_lemma # return True if all WordNet senses are events, no classifier needed is_event = wordnet.allSensesAreEvents(lemma) logger.debug(" all WordNet senses are events ==> %s" % is_event) if is_event: return True # run the classifier if required, fall through on disambiguation error if EVITA_NOM_DISAMB: try: is_event = self._run_classifier(lemma) logger.debug(" baysian classifier result ==> %s" % is_event) return is_event except bayes.DisambiguationError as e: logger.debug(" DisambiguationError: %s" % e) # check whether primary sense or some of the senses are events if EVITA_NOM_WNPRIMSENSE_ONLY: is_event = wordnet.primarySenseIsEvent(lemma) logger.debug(" primary WordNet sense is event ==> %s" % is_event) else: is_event = wordnet.someSensesAreEvents(lemma) logger.debug(" some WordNet sense is event ==> %s" % is_event) return is_event
def doc(self): logger.debug("RETURNING document") return self.parent.document()
def pretty_print(self): """Print all attributes to the log file.""" logger.debug(str(self)) logger.debug("eid: "+self.eid) logger.debug("eiid: "+self.eiid) logger.debug("tense: "+self.tense) logger.debug("aspect: "+self.aspect) logger.debug("epos: "+self.nf_morph) #("nf_morph: "+self.nf_morph) logger.debug("polarity: "+str(self.polarity)) logger.debug("modality: "+str(self.modality)) logger.debug("evClass: "+self.evClass) logger.debug("pos: "+self.pos) logger.debug("form: "+self.form) logger.debug("locInSent: "+str(self.locInSent)) logger.debug("eventNum: "+str(self.eventNum))
def _lookForMultiChunk(self, FSA_set, structure_type='flat'): """Returns the prefix of the rest of the sentence is it matches one of the FSAs in FSA_set. The structure_type argument specifies the structural format of the rest of the sentence: either a flat, token-level representation or a chunked one. This method is used for finding specific right contexts of verb chunks.""" logger.debug("Entering _lookForMultiChunk for '%s' with %d FSAs" % (self.getText().strip(), len(FSA_set))) logger.debug("\tstructure_type = %s" % structure_type) restSentence = self._getRestSent(structure_type) logger.debug("\trest = %s" % ' '.join([t.__class__.__name__ for t in restSentence])) logger.debug("\trest = %s" % ' '.join([ "%s/%s" % (t.getText(), t.pos) for t in utils.get_tokens(restSentence) ])) lenSubstring, fsaNum = self._identify_substring(restSentence, FSA_set) if lenSubstring: logger.debug("\tACCEPTED by FSA %d, LENGTH=%d" % (fsaNum, lenSubstring)) return restSentence[:lenSubstring] else: logger.debug("\tREJECTED by all FSAs") return 0
def _createEventOnRightmostVerb(self, GramVCh): if GramVCh.nodeIsNotEventCandidate(): pass elif GramVCh.nodeIsModalForm(self.nextNode()): logger.debug("Entering checking for modal pattern............") substring = self._lookForMultiChunk(patterns.MODAL_FSAs) if substring: self._processEventInMultiVChunk(substring) elif GramVCh.nodeIsBeForm(self.nextNode()): logger.debug("Entering checking for toBe pattern............") """Looking for BE + NOM Predicative Complement """ logger.debug("Looking for BE + NOM Predicative Complement ") substring = self._lookForMultiChunk(patterns.BE_N_FSAs, 'chunked') if substring: self._processEventInMultiNChunk(GramVCh, substring) else: """Looking for BE + ADJ Predicative Complement """ logger.debug("Looking for BE + ADJ Predicative Complement ") substring = self._lookForMultiChunk(patterns.BE_A_FSAs, 'chunked') if substring: self._processEventInMultiAChunk(GramVCh, substring) else: """Looking for BE + additional VERBAL structure """ logger.debug( "Looking for BE + VERB Predicative Complement ") substring = self._lookForMultiChunk(patterns.BE_FSAs) if substring: self._processEventInMultiVChunk(substring) elif GramVCh.nodeIsHaveForm(): logger.debug("Entering checking for toHave pattern............") substring = self._lookForMultiChunk(patterns.HAVE_FSAs) if substring: self._processEventInMultiVChunk(substring) else: self._processEventInChunk(GramVCh) elif GramVCh.nodeIsFutureGoingTo(): logger.debug( "Entering checking for futureGoingTo pattern............") substring = self._lookForMultiChunk(patterns.GOINGto_FSAs) if substring: self._processEventInMultiVChunk(substring) else: self._processEventInChunk(GramVCh) elif GramVCh.nodeIsPastUsedTo(): logger.debug( "Entering checking for pastUsedTo pattern............") substring = self._lookForMultiChunk(patterns.USEDto_FSAs) if substring: self._processEventInMultiVChunk(substring) else: self._processEventInChunk(GramVCh) elif GramVCh.nodeIsDoAuxiliar(): logger.debug( "Entering checking for doAuxiliar pattern............") substring = self._lookForMultiChunk(patterns.DO_FSAs) if substring: self._processEventInMultiVChunk(substring) else: self._processEventInChunk(GramVCh) elif GramVCh.nodeIsBecomeForm(self.nextNode()): """Looking for BECOME + ADJ Predicative Complement e.g., He became famous at the age of 21""" logger.debug("Looking for BECOME + ADJ") substring = self._lookForMultiChunk(patterns.BECOME_A_FSAs, 'chunked') if substring: logger.debug("BECOME + ADJ found") self._processDoubleEventInMultiAChunk(GramVCh, substring) else: self._processEventInChunk(GramVCh) elif GramVCh.nodeIsContinueForm(self.nextNode()): """Looking for CONTINUE + ADJ Predicative Complement e.g., Interest rate continued low.""" logger.debug("Looking for CONTINUE + ADJ") substring = self._lookForMultiChunk(patterns.CONTINUE_A_FSAs, 'chunked') if substring: logger.debug("CONTINUE + ADJ found") self._processDoubleEventInMultiAChunk(GramVCh, substring) else: self._processEventInChunk(GramVCh) elif GramVCh.nodeIsKeepForm(self.nextNode()): """Looking for KEEP + ADJ Predicative Complement e.g., The announcement kept everybody Adj.""" logger.debug("Looking for KEEP + [NChunk] + ADJ ") substring = self._lookForMultiChunk(patterns.KEEP_A_FSAs, 'chunked') if substring: logger.debug("KEEP + ADJ found") self._processDoubleEventInMultiAChunk(GramVCh, substring) else: self._processEventInChunk(GramVCh) else: logger.debug("[1] " + GramVCh.as_extended_string()) self._processEventInChunk(GramVCh)
def _find_lexically_based_slink(self, sentence, event_expr): """Try to find lexically based Slinks for an instance of EventExpression using forward, backward and reporting FSA paterns. No return value, if an Slink is found, it will be created by the chunk that embeds the Slink triggering event.""" evNode = sentence[event_expr.locInSent] if evNode is None: logger.error("No node found at locInSent=%s" % event_expr.locInSent) return slink_created = False logger.debug("Sentence element class: %s" % evNode.__class__.__name__) forwardFSAs = event_expr.slinkingContexts('forward') if forwardFSAs: logger.debug("Applying FORWARD slink FSAs") slink_created = evNode.find_forward_slink(forwardFSAs) logger.debug("forward slink created = %s" % slink_created) if not slink_created: backwardFSAs = event_expr.slinkingContexts('backwards') if backwardFSAs: logger.debug("Applying BACKWARD slink FSAs") slink_created = evNode.find_backward_slink(backwardFSAs) logger.debug("backward slink created = %s" % slink_created) if not slink_created: reportingFSAs = event_expr.slinkingContexts('reporting') if reportingFSAs: logger.debug("Applying REPORTING slink FSAs") slink_created = evNode.find_reporting_slink(reportingFSAs) logger.debug("reporting slink created = %s" % slink_created)
def _process_opening_lex(self, element): """Creates a Token or AdjectviveToken and adds it to the current timex if there is one, otherwise to the current chunk if there is one, and otherwise to the current sentence if there is one.""" logger.debug(' opening lex') pos = element.attrs[POS] if pos.startswith(POS_ADJ): self.currentToken = AdjectiveToken(self.doc, pos) else: self.currentToken = Token(self.doc, pos) # this is needed for later when tokens and events are swapped, # the default is that a token does not contain an event, this # can be changed when an event tag is processed self.currentToken.contains_event = False self.currentToken.lex_tag = element logger.debug(' current chunk ' + str(self.currentChunk)) logger.debug(' current sent ' + str(self.currentSentence)) # previously, just checked the truth of self.currentChunk and # self.currentSentence, which worked fine for the latter but # not for the former (no idea why that was the case, MV) if self.currentTimex is not None: logger.debug(' adding token to chunk') self.currentTimex.add(self.currentToken) elif self.currentChunk is not None: logger.debug(' adding token to chunk') self.currentChunk.addToken(self.currentToken) elif self.currentSentence is not None: logger.debug(' adding token to sentence') self.currentSentence.add(self.currentToken)
def apply_patterns(alink): """Loop through TLINKs to match A2T pattern""" logger.debug("ALINK Properties:") logger.debug(alink.attrs['lid'] + " " + alink.attrs['eventInstanceID'] + " " + alink.attrs['relatedToEventInstance'] + " " + alink.attrs['relType']) for tlink in self.doctree.alink_list: logger.debug("Current TLINK ID: " + tlink.attrs['lid']) if 'relatedToTime' not in tlink.attrs and 'timeID' not in tlink.attrs: if alink.attrs['eventInstanceID'] == tlink.attrs[ 'eventInstanceID']: logger.debug("Matched TLINK Properties:") logger.debug(tlink.attrs['lid'] + " " + tlink.attrs['eventInstanceID'] + " " + tlink.attrs['relatedToEventInstance'] + " " + tlink.attrs['relType']) createTlink(alink, tlink, 1) elif alink.attrs['eventInstanceID'] == tlink.attrs[ 'relatedToEventInstance']: logger.debug("Matched TLINK Properties:") logger.debug(tlink.attrs['lid'] + " " + tlink.attrs['eventInstanceID'] + " " + tlink.attrs['relatedToEventInstance'] + " " + tlink.attrs['relType']) createTlink(alink, tlink, 2) else: logger.debug("No TLINK match") else: logger.debug("TLINK with Time, no match")
def createEvent(self): """Used in Evita only""" logger.debug("CreateEvent in Consituent") pass