Ejemplo n.º 1
0
    def __init__(self, channel, winlenght=0.01):
        """
        Constructor.

        @param channel (Channel) the input channel object
        @param winlenght (float) duration of a window for the estimation of the volume

        """
        self.channel    = channel
        self.volstats   = ChannelVolume( channel, winlenght )
        self.__silences = []
Ejemplo n.º 2
0
    def test_rms(self):
        audio = audiodata.io.open(sample_1)
        cidx = audio.extract_channel(0)
        channel = audio.get_channel(cidx)

        chanvol  = ChannelVolume(channel)
        audiovol = AudioVolume(audio)

        self.assertEqual(chanvol.volume(), channel.rms())
        self.assertEqual(audiovol.volume(), audio.rms())
        self.assertEqual(chanvol.len(), int(channel.get_duration()/0.01) + 1)
        self.assertEqual(chanvol.min(), audiovol.min())
        self.assertEqual(chanvol.max(), audiovol.max())
        self.assertEqual(int(chanvol.mean()), int(audiovol.mean()))
        self.assertEqual(int(chanvol.variance()), int(audiovol.variance()))
        self.assertEqual(int(chanvol.stdev()), int(audiovol.stdev()))
Ejemplo n.º 3
0
    print "  - median:        ", round(audiovol.median(),2)
    print "  - stdev:         ", round(audiovol.stdev(),2)
    print "  - coefvariation: ", round(audiovol.coefvariation(),2)

else:

    for n in range(nc):
        print "Channel %d:"%(n)
        cidx = audio.extract_channel(n)
        channel = audio.get_channel(cidx)

        # Values related to amplitude
        frames = channel.get_frames(channel.get_nframes())
        ca = AudioFrames(frames, channel.get_sampwidth(), 1)
        for i in range(2,9,2):
            f = float(i)/10.
            c = ca.clipping_rate( f ) * 100.
            print "  - factor=%.1f:      %.3f"%(f,c)

        # RMS (=volume)
        cv = ChannelVolume( channel )
        print "  Volume:"
        print "    - min:           ", cv.min()
        print "    - max:           ", cv.max()
        print "    - mean:          ", round(cv.mean(),2)
        print "    - median:        ", round(cv.median(),2)
        print "    - stdev:         ", round(cv.stdev(),2)
        print "    - coefvariation: ", round(cv.coefvariation(),2)

# ----------------------------------------------------------------------------
Ejemplo n.º 4
0
class ChannelSilence( object ):
    """
    @author:       Brigitte Bigi
    @organization: Laboratoire Parole et Langage, Aix-en-Provence, France
    @contact:      [email protected]
    @license:      GPL, v3
    @copyright:    Copyright (C) 2011-2016  Brigitte Bigi
    @summary:      This class implements the silence finding on a channel.

    """
    def __init__(self, channel, winlenght=0.01):
        """
        Constructor.

        @param channel (Channel) the input channel object
        @param winlenght (float) duration of a window for the estimation of the volume

        """
        self.channel    = channel
        self.volstats   = ChannelVolume( channel, winlenght )
        self.__silences = []

    # ------------------------------------------------------------------

    def get_channel(self):
        return self.channel

    def get_volstats(self):
        return self.volstats

    # ------------------------------------------------------------------
    # DEPRECATED Silence detection. Used until sppas-1.7.8.
    # ------------------------------------------------------------------

    def tracks(self, m):
        """
        Yield tuples (from,to) of the tracks, from the result of get_silence().
        @deprecated

        """
        from_pos = 0
        if len(self.silence)==0:
        # No silence: Only one track!
            yield 0,self.channel.get_nframes()
            return
        # At least one silence
        for to_pos, next_from in self.silence:
            if (to_pos - from_pos) >= (m * self.channel.get_framerate()):
                # Track is long enough to be considered a track.
                yield int(from_pos), int(to_pos)
            from_pos = next_from

        # Last track after the last silence
        # (if the silence does not end at the end of the channel)
        to_pos = self.channel.get_nframes()
        if (to_pos - from_pos) >= (m * self.channel.get_framerate()):
            yield int(from_pos), int(to_pos)


    def get_silence(self, p=0.250, v=150, s=0.):
        """
        Estimates silences from an audio file.
        @deprecated

        @param p (float) Minimum silence duration in seconds
        @param v (int) Expected minimum volume (rms value)
        @param s (float) Shift delta duration in seconds
        @return a set of frames corresponding to silences.

        """
        self.channel.seek(0)
        self.silence = []

        # Once silence has been found, continue searching in this interval
        nbreadframes = int(self.volstats.get_winlen() * self.channel.get_framerate())
        afterloop_frames = int(nbreadframes/2) #int((nbreadframes/2) * self.channel.get_framerate())
        initpos = i = self.channel.tell()

        # This scans the file in steps of frames whether a section's volume
        # is lower than silence_cap, if it is it is written to silence.
        while i < self.channel.get_nframes():

            curframe = self.channel.get_frames(nbreadframes)

            a = AudioFrames( curframe, self.channel.get_sampwidth(), 1)
            #volume = audioutils.get_rms(curframe, self.channel.get_sampwidth())
            volume = a.rms()
            if volume < v:

                # Continue searching in smaller steps whether the silence is
                # longer than read frames but smaller than read frames * 2.
                while volume < v and self.channel.tell() < self.channel.get_nframes():
                    curframe = self.channel.get_frames(afterloop_frames)

                    a = AudioFrames( curframe, self.channel.get_sampwidth(), 1)
                    volume = a.rms()
                    #volume   = audioutils.get_rms(curframe, self.channel.get_sampwidth())

                # If the last sequence of silence ends where the new one starts
                # it's a continuous range.
                if self.silence and self.silence[-1][1] == i:
                    self.silence[-1][1] = self.channel.tell()
                else:
                # append if silence is long enough
                    duree = self.channel.tell() - i
                    nbmin = int( (p+s) * self.channel.get_framerate())
                    if duree > nbmin:
                        # Adjust silence start-pos
                        __startpos = i + ( s * self.channel.get_framerate() )
                        # Adjust silence end-pos
                        __endpos = self.channel.tell() - ( s * self.channel.get_framerate() )
                        self.silence.append([__startpos, __endpos])

            i = self.channel.tell()

        # Return the position in the file to where it was when we got it.
        self.channel.seek(initpos)

    # ------------------------------------------------------------------
    # Utility method
    # ------------------------------------------------------------------

    def track_data(self, tracks):
        """
        Get the track data: a set of frames for each track.

        @param tracks (list of tuples) List of (from_pos,to_pos)

        """
        nframes = self.channel.get_nframes()
        for from_pos, to_pos in tracks:
            if nframes < from_pos:
                # Accept a "DELTA" of 10 frames, in case of corrupted data.
                if nframes < from_pos-10:
                    raise ValueError("Position %d not in range(%d)" % (from_pos, nframes))
                else:
                    from_pos = nframes
            # Go to the provided position
            self.channel.seek(from_pos)
            # Keep in mind the related frames
            yield self.channel.get_frames(to_pos - from_pos)

    # ------------------------------------------------------------------

    def refine(self, pos, threshold, winlenght=0.005, direction=1):
        """
        Refine the position of a silence around a given position.

        @param pos (int) Initial position of the silence
        @param threshold (int) RMS threshold value for a silence
        @param winlenght (float) Windows duration to estimate the RMS
        @return new position

        """
        delta = int(self.volstats.get_winlen() * self.channel.get_framerate())
        from_pos = max(pos-delta,0)
        self.channel.seek( from_pos )
        frames = self.channel.get_frames( delta*2 )
        c = Channel( self.channel.get_framerate(), self.channel.get_sampwidth(), frames )
        volstats = ChannelVolume( c, winlenght )

        if direction==1:
            for i,v in enumerate(volstats):
                if v > threshold:
                    return (from_pos + i*( int(winlenght*self.channel.get_framerate())))
        if direction==-1:
            i=len(volstats)
            for v in reversed(volstats):
                if v > threshold:
                    return (from_pos + (i*( int(winlenght*self.channel.get_framerate()))))
                i = i-1

        return pos

    # ------------------------------------------------------------------

    def extract_tracks(self, mintrackdur=0.300, shiftdurstart=0.010, shiftdurend=0.010):
        """
        Return a list of tuples (from_pos,to_pos) of the tracks.

        @param mintrackdur (float) The minimum duration for a track (in seconds)
        @param shiftdurstart (float) The time to remove to the start boundary (in seconds)
        @param shiftdurend (float) The time to add to the end boundary (in seconds)

        """
        tracks = []

        # No silence: Only one track!
        if len(self.__silences)==0:
            tracks.append( (0,self.channel.get_nframes()) )
            return tracks

        # Convert values: time into frame
        delta      = int(mintrackdur   * self.channel.get_framerate())
        shiftstart = int(shiftdurstart * self.channel.get_framerate())
        shiftend   = int(shiftdurend   * self.channel.get_framerate())
        from_pos = 0

        for to_pos, next_from in self.__silences:

            shift_from_pos = max(from_pos - shiftstart, 0)
            shift_to_pos   = min(to_pos + shiftend, self.channel.get_nframes())

            if (shift_to_pos-shift_from_pos) >= delta:
                # Track is long enough to be considered a track.
                tracks.append( (int(shift_from_pos), int(shift_to_pos)) )

            from_pos = next_from

        # Last track after the last silence
        # (if the silence does not end at the end of the channel)
        to_pos = self.channel.get_nframes()
        if (to_pos - from_pos) >= delta:
            tracks.append( (int(from_pos), int(to_pos)) )

        return tracks

    # ------------------------------------------------------------------
    # New silence detection
    # ------------------------------------------------------------------

    def search_threshold_vol(self):
        """
        Try to fix optimally the threshold for speech/silence segmentation.
        This is a simple observation of the distribution of rms values.

        @return (int) volume value

        """
        vmin  = self.volstats.min()
        vmean = self.volstats.mean()
        vcvar = 1.5 * self.volstats.coefvariation()
        alt = (vmean-vmin)/5.
        if alt > vcvar:
            vcvar=alt

        return vmin + int((vmean - vcvar))

    # ------------------------------------------------------------------

    def search_silences(self, threshold=0, mintrackdur=0.08):
        """
        Search windows with a volume lesser than a given threshold.

        @param threshold (int) Expected minimum volume (rms value)
        If threshold is set to 0, search_minvol() will assign a value.
        @param mintrackdur (float) The very very minimum duration for
        a track (in seconds).

        """
        if threshold == 0:
            threshold = self.search_threshold_vol()

        # This scans the volumes whether it is lower than threshold,
        # if it is it is written to silence.
        self.__silences = []
        inside = False
        idxbegin = 0
        ignored  = 0
        delta = int(mintrackdur / self.volstats.get_winlen())

        for i,v in enumerate(self.volstats):
            if v < threshold:
                if inside is False:
                    # It's the beginning of a block of zero volumes
                    idxbegin = i
                    inside = True
            else:
                if inside is True:
                    # It's the end of a block of non-zero volumes...
                    # or not if the track is very short!
                    if (i-idxbegin) > delta:
                        inside = False
                        idxend   = i-ignored #-1 # not use -1 because we want the end of the frame
                        from_pos = int(idxbegin * self.volstats.get_winlen() * self.channel.get_framerate())
                        to_pos   = int(idxend   * self.volstats.get_winlen() * self.channel.get_framerate())

                        # Find the boundaries with a better precision
                        w = self.volstats.get_winlen()/4.
                        from_pos = self.refine(from_pos, threshold, w, direction=-1)
                        to_pos   = self.refine(to_pos,   threshold, w, direction=1)

                        self.__silences.append((from_pos,to_pos))
                        ignored = 0
                    else:
                        ignored += 1

        # Last interval
        if inside is True:
            start_pos = int(idxbegin * self.volstats.get_winlen() * self.channel.get_framerate())
            end_pos   = self.channel.get_nframes()
            self.__silences.append((start_pos,end_pos))

        return threshold

    # ------------------------------------------------------------------

    def filter_silences(self, minsildur=0.200):
        """
        Filtered the current silences.

        @param minsildur (float) Minimum silence duration in seconds

        """
        if len(self.__silences) == 0:
            return 0

        filteredsil = []
        for (start_pos,end_pos) in self.__silences:
            sildur = float(end_pos-start_pos) / float(self.channel.get_framerate())
            if sildur > minsildur:
                filteredsil.append( (start_pos,end_pos) )

        self.__silences = filteredsil

        return len(self.__silences)

#         print "Number of silences: ",len(self.__silences)
#         print "Silences: "
#         for (s,e) in self.__silences:
#             print " (",float(s)/float(self.channel.get_framerate())," ; ",float(e)/float(self.channel.get_framerate()),")"

    # ------------------------------------------------------------------

    def set_silences(self, silences):
        """
        Fix manually silences!

        @param silences (list of tuples (start_pos,end_pos))

        """
        self.__silences = silences

    # ------------------------------------------------------------------

    def reset_silences(self):
        """
        Reset silences.

        """
        self.__silences = []

    # -----------------------------------------------------------------------
    #
    # -----------------------------------------------------------------------

    def __len__(self):
        return len(self.__silences)

    # -----------------------------------------------------------------------

    def __iter__(self):
        for x in self.__silences:
            yield x

    # -----------------------------------------------------------------------

    def __getitem__(self, i):
        return self.__silences[i]