def _get_turn_info(self, anno): "return the speaker and turn id for a unit-level annotation" enclosing_turns = [t for t in self.turns if t.span.encloses(anno.span)] if len(enclosing_turns) > 0: turn = enclosing_turns[0] speaker = stac_anno.speaker(turn) turn_id = stac_anno.turn_id(turn) return speaker, turn_id else: return None, None
def position_of_speaker_first_turn(edu): """ Given an EDU context, determine the position of the first turn by that EDU's speaker relative to other turns in that dialogue. """ edu_speaker = edu.speaker() # we can assume these are sorted for i, turn in enumerate(edu.dialogue_turns): if speaker(turn) == edu_speaker: return i oops = "Implementation error? No turns found which match speaker's turn" raise CorpusConsistencyException(oops)
def players_for_doc(corpus, kdoc): """ Return the set of speakers/addressees associated with a document. In STAC, documents are semi-arbitrarily cut into sub-documents for technical and possibly ergonomic reasons, ie. meaningless as far as we are concerned. So to find all speakers, we would have to search all the subdocuments of a single document. :: (Corpus, String) -> Set String """ speakers = set() docs = [corpus[k] for k in corpus if k.doc == kdoc] for doc in docs: for anno in doc.units: if educe.stac.is_turn(anno): turn_speaker = speaker(anno) if turn_speaker: speakers.add(turn_speaker) elif educe.stac.is_edu(anno): speakers.update(player_addresees(anno)) return frozenset(speakers)
def num_speakers_between(_current, gap, _edu1, _edu2): "number of distinct speakers in intervening EDUs" return len(frozenset(speaker(t) for t in gap.turns_between))
def speaker_started_the_dialogue(_, edu): "if the speaker for this EDU is the same as that of the\ first turn in the dialogue" return speaker(edu.dialogue_turns[0]) == speaker(edu.turn)
def speaker_id(_, edu): """Get the speaker ID""" return speaker(edu.turn)
def speaker(self): """ the speaker associated with the turn surrounding an edu """ return speaker(self.turn)