Beispiel #1
0
    def selfdetection(self, inputtier1):
        """
        Self-Repetition detection.

        @param inputtier1 (Tier)

        """
        # Verifications: is there any data?
        if inputtier1.IsEmpty() is True:
            raise Exception("Repetition. Empty input tokens tier.\n")

        # Update the stoplist
        if self._use_stopwords is True:
            stpw = self.relevancy( inputtier1 )
        else:
            stpw = self.stopwords

        # Create repeat objects
        repeatobj = Repetitions( )

        # Create output data
        srctier = Tier("Sources")
        reptier = Tier("Repetitions")
        nbrepeats = 1

        # Initialization of tokstart and tokend
        tokstart = 0
        if inputtier1[0].GetLabel().IsDummy():
            tokstart = 1
        toksearch = self.find_next_break( inputtier1, tokstart+1 , empan=1)
        tokend    = self.find_next_break( inputtier1, tokstart+1 , empan=self._empan)

        # Detection is here:
        while tokstart < tokend:

            # Build an array with the tokens
            tokens1 = list()
            for i in range(tokstart, tokend+1):
                tokens1.append( inputtier1[i].GetLabel().GetValue() )
            speaker1 = DataSpeaker( tokens1, stpw )

            # Detect repeats in these data
            repeatobj.detect( speaker1, toksearch-tokstart, None )

            # Save repeats
            if repeatobj.get_repeats_size()>0:
                n = self._addrepetition(repeatobj, nbrepeats, inputtier1, inputtier1, tokstart, tokstart, srctier, reptier)
                nbrepeats = nbrepeats + n

            # Prepare next search
            tokstart  = toksearch
            toksearch = self.find_next_break( inputtier1 , tokstart+1 , empan=1 )
            tokend    = self.find_next_break( inputtier1 , tokstart+1 , empan=self._empan )

        return (srctier,reptier)
Beispiel #2
0
    def otherdetection(self, inputtier1, inputtier2):
        """
        Other-Repetition detection.

        @param inputtier (Tier)

        """
        # Verifications: is there any data?
        if inputtier1.IsEmpty() is True:
            raise Exception("Repetition. Empty input tokens tier.\n")

        # Update the stoplist
        if self._use_stopwords is True:
            # other-repetition: relevance of the echoing-speaker
            stpw = self.relevancy( inputtier2 )
        else:
            stpw = self.stopwords

        # Create repeat objects
        repeatobj = Repetitions( )

        # Create output data
        srctier = Tier("OR-Source")
        reptier = Tier("OR-Repetition")

        nbrepeats = 1

        # Initialization of tokstart, and tokend
        tokstartsrc = 0
        if inputtier1[0].GetLabel().IsDummy():
            tokstartsrc = 1
        tokendsrc = min(20, inputtier1.GetSize()-1)

        # Detection is here:
        # detect() is applied work by word, from tokstart to tokend
        while tokstartsrc < tokendsrc:

            # Build an array with the tokens
            tokens1 = list()
            for i in range(tokstartsrc, tokendsrc):
                tokens1.append( inputtier1[i].GetLabel().GetValue() )
            speaker1 = DataSpeaker( tokens1, stpw )

            # Create speaker2
            tokens2 = list()
            nbbreaks = 0
            tokstartrep = -1
            a = inputtier1[tokstartsrc]

            for (r,b) in enumerate(inputtier2):
                if b.GetLocation().GetBeginMidpoint() >= a.GetLocation().GetBeginMidpoint():
                    if tokstartrep == -1:
                        tokstartrep = r
                    if b.GetLabel().IsSilence():
                        nbbreaks = nbbreaks + 1
                    if nbbreaks == self._empan:
                        break
                    tokens2.append( b.GetLabel().GetValue() )
            speaker2 = DataSpeaker( tokens2, stpw )

            if DEBUG is True:
                print "SRC : ",speaker1
                print "ECHO: ",speaker2

            # Detect repeats in these data: search if the first token of spk1
            # is the first token of a source.
            repeatobj.detect( speaker1, 1, speaker2 )

            # Save repeats
            shift = 1
            if repeatobj.get_repeats_size()>0:
                if DEBUG is True:
                    print " ----> found : "
                    repeatobj.get_repeat(0).print_echo()
                s,e = repeatobj.get_repeat_source(0)
                n = self._addrepetition(repeatobj, nbrepeats, inputtier1, inputtier2, tokstartsrc, tokstartrep, srctier, reptier)
                if n > 0:
                    nbrepeats = nbrepeats + n
                shift = e + 1


            while speaker1.is_token(speaker1.get_token(shift)) is False and shift < 20:
                shift = shift + 1

            tokstartsrc = tokstartsrc + shift
            tokstartsrc = min(tokstartsrc, inputtier1.GetSize()-1)
            tokendsrc   = min(tokstartsrc + 20, inputtier1.GetSize()-1)

        return (srctier,reptier)