def _getRestSent(self, structure_type): """Obtain the rest of the sentence as a list of tokens if structure_type is 'flat' and as a list of constituents if structure type is 'chunked'. Log a warning and return a list of constituents for an unknown structure type.""" if structure_type == 'flat': restSentence = utils.get_tokens(self.parent[self.position + 1:]) elif structure_type == 'chunked': restSentence = self.parent[self.position + 1:] if structure_type != 'chunked': logger.warn("unknown structure type: %s" % structure_type) return restSentence
def dribble(self, header, text): """Write information on the sentence that an event was added to.""" if DRIBBLE: toks = utils.get_tokens(self.parent.dtrs) p1 = toks[0].begin p2 = toks[-1].end e_p1 = self.dtrs[-1].begin e_p2 = self.dtrs[-1].end text = ' '.join(text.split()) sentence = self.tree.tarsqidoc.sourcedoc.text[p1:p2] sentence = ' '.join(sentence.split()) line = "%s\t%s\t%s\t%s:%s\n" % (header, text, sentence, e_p1, e_p2) VerbChunk.DRIBBLE_FH.write(line)
def _lookForMultiChunk(self, FSA_set, structure_type='flat'): """Returns the prefix of the rest of the sentence is it matches one of the FSAs in FSA_set. The structure_type argument specifies the structural format of the rest of the sentence: either a flat, token-level representation or a chunked one. This method is used for finding specific right contexts of verb chunks.""" logger.debug("Entering _lookForMultiChunk for '%s' with %d FSAs" % (self.getText().strip(), len(FSA_set))) logger.debug("\tstructure_type = %s" % structure_type) restSentence = self._getRestSent(structure_type) logger.debug("\trest = %s" % ' '.join([t.__class__.__name__ for t in restSentence])) logger.debug("\trest = %s" % ' '.join(["%s/%s" % (t.getText(), t.pos) for t in utils.get_tokens(restSentence)])) lenSubstring, fsaNum = self._identify_substring(restSentence, FSA_set) if lenSubstring: logger.debug("\tACCEPTED by FSA %d, LENGTH=%d" % (fsaNum, lenSubstring)) return restSentence[:lenSubstring] else: logger.debug("\tREJECTED by all FSAs") return 0
def _lookForMultiChunk(self, FSA_set, structure_type='flat'): """Returns the prefix of the rest of the sentence is it matches one of the FSAs in FSA_set. The structure_type argument specifies the structural format of the rest of the sentence: either a flat, token-level representation or a chunked one. This method is used for finding specific right contexts of verb chunks.""" logger.debug("Entering _lookForMultiChunk for '%s' with %d FSAs" % (self.getText().strip(), len(FSA_set))) logger.debug("\tstructure_type = %s" % structure_type) restSentence = self._getRestSent(structure_type) logger.debug("\trest = %s" % ' '.join([t.__class__.__name__ for t in restSentence])) logger.debug("\trest = %s" % ' '.join([ "%s/%s" % (t.getText(), t.pos) for t in utils.get_tokens(restSentence) ])) lenSubstring, fsaNum = self._identify_substring(restSentence, FSA_set) if lenSubstring: logger.debug("\tACCEPTED by FSA %d, LENGTH=%d" % (fsaNum, lenSubstring)) return restSentence[:lenSubstring] else: logger.debug("\tREJECTED by all FSAs") return 0
def _processEventInMultiVChunk(self, substring): token_list = utils.get_tokens(self) + substring verbfeatureslist = VChunkFeaturesList(tokens=token_list) GramMultiVChunk = verbfeatureslist[0] self._conditionallyAddEvent(GramMultiVChunk) map(update_event_checked_marker, substring)
def _processEventInMultiVChunk(self, substring): chunk_list = utils.get_tokens(self) + substring gramvchunklist = GramVChunkList(tokens=chunk_list) GramMultiVChunk = gramvchunklist[0] self._conditionallyAddEvent(GramMultiVChunk) map(update_event_checked_marker, substring)