def selfdetection(self, inputtier1): """ Self-Repetition detection. @param inputtier1 (Tier) """ # Verifications: is there any data? if inputtier1.IsEmpty() is True: raise Exception("Repetition. Empty input tokens tier.\n") # Update the stoplist if self._use_stopwords is True: stpw = self.relevancy( inputtier1 ) else: stpw = self.stopwords # Create repeat objects repeatobj = Repetitions( ) # Create output data srctier = Tier("Sources") reptier = Tier("Repetitions") nbrepeats = 1 # Initialization of tokstart and tokend tokstart = 0 if inputtier1[0].GetLabel().IsDummy(): tokstart = 1 toksearch = self.find_next_break( inputtier1, tokstart+1 , empan=1) tokend = self.find_next_break( inputtier1, tokstart+1 , empan=self._empan) # Detection is here: while tokstart < tokend: # Build an array with the tokens tokens1 = list() for i in range(tokstart, tokend+1): tokens1.append( inputtier1[i].GetLabel().GetValue() ) speaker1 = DataSpeaker( tokens1, stpw ) # Detect repeats in these data repeatobj.detect( speaker1, toksearch-tokstart, None ) # Save repeats if repeatobj.get_repeats_size()>0: n = self._addrepetition(repeatobj, nbrepeats, inputtier1, inputtier1, tokstart, tokstart, srctier, reptier) nbrepeats = nbrepeats + n # Prepare next search tokstart = toksearch toksearch = self.find_next_break( inputtier1 , tokstart+1 , empan=1 ) tokend = self.find_next_break( inputtier1 , tokstart+1 , empan=self._empan ) return (srctier,reptier)
def otherdetection(self, inputtier1, inputtier2): """ Other-Repetition detection. @param inputtier (Tier) """ # Verifications: is there any data? if inputtier1.IsEmpty() is True: raise Exception("Repetition. Empty input tokens tier.\n") # Update the stoplist if self._use_stopwords is True: # other-repetition: relevance of the echoing-speaker stpw = self.relevancy( inputtier2 ) else: stpw = self.stopwords # Create repeat objects repeatobj = Repetitions( ) # Create output data srctier = Tier("OR-Source") reptier = Tier("OR-Repetition") nbrepeats = 1 # Initialization of tokstart, and tokend tokstartsrc = 0 if inputtier1[0].GetLabel().IsDummy(): tokstartsrc = 1 tokendsrc = min(20, inputtier1.GetSize()-1) # Detection is here: # detect() is applied work by word, from tokstart to tokend while tokstartsrc < tokendsrc: # Build an array with the tokens tokens1 = list() for i in range(tokstartsrc, tokendsrc): tokens1.append( inputtier1[i].GetLabel().GetValue() ) speaker1 = DataSpeaker( tokens1, stpw ) # Create speaker2 tokens2 = list() nbbreaks = 0 tokstartrep = -1 a = inputtier1[tokstartsrc] for (r,b) in enumerate(inputtier2): if b.GetLocation().GetBeginMidpoint() >= a.GetLocation().GetBeginMidpoint(): if tokstartrep == -1: tokstartrep = r if b.GetLabel().IsSilence(): nbbreaks = nbbreaks + 1 if nbbreaks == self._empan: break tokens2.append( b.GetLabel().GetValue() ) speaker2 = DataSpeaker( tokens2, stpw ) if DEBUG is True: print "SRC : ",speaker1 print "ECHO: ",speaker2 # Detect repeats in these data: search if the first token of spk1 # is the first token of a source. repeatobj.detect( speaker1, 1, speaker2 ) # Save repeats shift = 1 if repeatobj.get_repeats_size()>0: if DEBUG is True: print " ----> found : " repeatobj.get_repeat(0).print_echo() s,e = repeatobj.get_repeat_source(0) n = self._addrepetition(repeatobj, nbrepeats, inputtier1, inputtier2, tokstartsrc, tokstartrep, srctier, reptier) if n > 0: nbrepeats = nbrepeats + n shift = e + 1 while speaker1.is_token(speaker1.get_token(shift)) is False and shift < 20: shift = shift + 1 tokstartsrc = tokstartsrc + shift tokstartsrc = min(tokstartsrc, inputtier1.GetSize()-1) tokendsrc = min(tokstartsrc + 20, inputtier1.GetSize()-1) return (srctier,reptier)