コード例 #1
0
    def __init__(self):
        super(IRITStartSeg, self).__init__()

        self._buffer = BufferTable()

        # self.energy = []

        self.max_energy = 0.002 * 2
        self.min_overlap = 20
        self.threshold = 0.12
コード例 #2
0
    def __init__(self):
        super(IRITStartSeg, self).__init__()

        self._buffer = BufferTable()

        # self.energy = []

        self.max_energy = 0.002*2
        self.min_overlap = 20
        self.threshold = 0.12
コード例 #3
0
class IRITStartSeg(Analyzer):
    '''
    Segmentation of recording sessions into 'start' and 'session' segments

    Properties:
    '''
    implements(IAnalyzer)

    @interfacedoc
    def __init__(self):
        super(IRITStartSeg, self).__init__()

        self._buffer = BufferTable()

        # self.energy = []

        self.max_energy = 0.002*2
        self.min_overlap = 20
        self.threshold = 0.12

    @interfacedoc
    def setup(self, channels=None, samplerate=None,
              blocksize=None, totalframes=None):

        super(IRITStartSeg, self).setup(channels,
                                        samplerate,
                                        blocksize,
                                        totalframes)

        self.input_blocksize = int(0.02 * samplerate)
        self.input_stepsize = int(0.008 * samplerate)


        sr = float(samplerate)
        lowFreq = 100.0
        highFreq = sr / 5
        f1 = lowFreq / sr
        f2 = highFreq / sr
        numtaps = 10
        self.filtre = firwin(numtaps=numtaps, cutoff=[f1, f2], pass_zero=False)
        self.filtre_z = lfiltic(b=self.filtre, a=1, y=0)  # Initial conditions

    @staticmethod
    @interfacedoc
    def id():
        return "irit_startseg"

    @staticmethod
    @interfacedoc
    def name():
        return "IRIT Start/Session segmentation"

    @staticmethod
    @interfacedoc
    def unit():
        return ""

    def __str__(self):
        return "Labeled Start/session segments"

    @frames_adapter
    def process(self, frames, eod=False):
        '''

        '''

        #self.energy += [numpy.sqrt(numpy.mean(lfilter(self.filtre,
        #                                              1.0,
        #                                              frames.T[0]) ** 2))]
        # Compute energy
        env, self.filtre_z = lfilter(b=self.filtre, a=1.0, axis=0,
                                     x=frames[:, 0],
                                     zi=self.filtre_z)
        self._buffer.append('energy', numpy.sqrt(numpy.mean(env ** 2)))

        return frames, eod

    def post_process(self):
        '''

        '''
        # Normalize energy
        self.energy = self._buffer['energy'][:]
        
        
        # BAD PATCH !!!
        self.energy[-1] = 0
        if self.energy.max():
            self.energy = self.energy / self.energy.max()

        silences = [1 if e < self.max_energy else 0 for e in self.energy]
        step = float(self.input_stepsize) / float(self.samplerate())

        path = os.path.split(__file__)[0]
        models_dir = os.path.join(path, 'trained_models')

        prototype1_file = os.path.join(models_dir,
                                       'irit_noise_startSilences_proto1.dat')
        prototype2_file = os.path.join(models_dir,
                                       'irit_noise_startSilences_proto2.dat')

        prototype = numpy.load(prototype1_file)
        prototype2 = numpy.load(prototype2_file)

        # Lissage pour éliminer les petits segments dans un sens ou l'autre
        struct = [1] * len(prototype)
        silences = binary_closing(silences, struct)
        silences = binary_opening(silences, struct)

        seg = [0, -1, silences[0]]
        silencesList = []
        for i, v in enumerate(silences):
            if not (v == seg[2]):
                seg[1] = i
                silencesList.append(tuple(seg))
                seg = [i, -1, v]
        seg[1] = i
        silencesList.append(tuple(seg))
        segments = []
        start  = 0.0
        
        for s in silencesList:
            if s[2] == 1:
                shape = numpy.array(self.energy[s[0]:s[1]])

                d1, _ = computeDist2(prototype, shape)
                d2, _ = computeDist2(prototype2, shape)
                dist = min([d1, d2])

                if dist < self.threshold:
                    s = map(float, s)	
                    segments += [(start, s[0]*step-start, 1) , (s[0]*step, (s[1]-s[0])*step, 0)]
                    start = s[1]*step

        segments += [(start, len(self.energy)*step-start, 1)]
                
        label = {0: 'Start', 1: 'Session'}
        segs = self.new_result(data_mode='label', time_mode='segment')
        segs.id_metadata.id += '.' + 'segments'
        segs.id_metadata.name += ' ' + 'Segments'
        segs.data_object.label_metadata.label = label
        
        segs.data_object.time, segs.data_object.duration,	 segs.data_object.label= zip(*segments)

        self.add_result(segs)

    def release(self):
        self._buffer.close()
コード例 #4
0
 def __init__(self,
              input_blocksize=2048,
              input_stepsize=None,
              fft_size=None):
     super(SpectrogramBuffer, self).__init__()
     self.values = BufferTable()
コード例 #5
0
class SpectrogramBuffer(Spectrogram):
    """
    Spectrogram image builder with an extensible buffer based on tables

    Parameters
    ----------
    input_blocksize : int, optional
        Blocksize of the input signal, default to 2048
    input_stepsize : str, optional
        The second parameter, default to half blocksize.
    fft_size : int, optional
        The size of the fft, default to blocksize.

    Examples
    --------
    >>> import timeside
    >>> from timeside.core import get_processor
    >>> from timeside.core.tools.test_samples import samples
    >>> audio_source = samples['sweep.wav']
    >>> decoder = get_processor('file_decoder')(uri=audio_source)
    >>> spectrogram = get_processor('spectrogram_analyzer_buffer')(input_blocksize=2048, input_stepsize=1024)
    >>> pipe = (decoder | spectrogram)
    >>> pipe.run()
    >>> spectrogram.results.keys()
    ['spectrogram_analyzer_buffer']
    >>> result = spectrogram.results['spectrogram_analyzer_buffer']
    >>> result.data.shape
    (344, 1025)

     .. plot::

      import timeside
      from timeside.core import get_processor
      from timeside.core.tools.test_samples import samples
      audio_source = samples['sweep.wav']
      decoder = get_processor('file_decoder')(uri=audio_source)
      spectrogram = get_processor('spectrogram_analyzer_buffer')(input_blocksize=2048,
                                                          input_stepsize=1024)
      pipe = (decoder | spectrogram)
      pipe.run()
      res = spectrogram.results['spectrogram_analyzer_buffer']
      res.render()
    """

    implements(IAnalyzer)

    def __init__(self,
                 input_blocksize=2048,
                 input_stepsize=None,
                 fft_size=None):
        super(SpectrogramBuffer, self).__init__()
        self.values = BufferTable()

    @staticmethod
    @interfacedoc
    def id():
        return "spectrogram_analyzer_buffer"

    @staticmethod
    @interfacedoc
    def name():
        return "Spectrogram Analyzer with extensible buffer"

    @downmix_to_mono
    @frames_adapter
    def process(self, frames, eod=False):
        stft = np.fft.rfft(frames, self.fft_size)
        self.values.append('stft', stft)
        return frames, eod

    def post_process(self):
        spectrogram = self.new_result(data_mode='value', time_mode='framewise')
        spectrogram.parameters = {'fft_size': self.fft_size}
        spectrogram.data_object.value = np.abs(self.values['stft'])
        nb_freq = spectrogram.data_object.value.shape[1]
        spectrogram.data_object.y_value = (np.arange(0, nb_freq) *
                                           self.samplerate() / self.fft_size)
        self.add_result(spectrogram)

    def release(self):
        self.values.close()
コード例 #6
0
ファイル: spectrogram_buffer.py プロジェクト: Eyepea/TimeSide
 def __init__(self, input_blocksize=2048, input_stepsize=None,
              fft_size=None):
     super(SpectrogramBuffer, self).__init__()
     self.values = BufferTable()
コード例 #7
0
ファイル: spectrogram_buffer.py プロジェクト: Eyepea/TimeSide
class SpectrogramBuffer(Spectrogram):
    """
    Spectrogram image builder with an extensible buffer based on tables

    Parameters
    ----------
    input_blocksize : int, optional
        Blocksize of the input signal, default to 2048
    input_stepsize : str, optional
        The second parameter, default to half blocksize.
    fft_size : int, optional
        The size of the fft, default to blocksize.

    Examples
    --------
    >>> import timeside
    >>> from timeside.core import get_processor
    >>> from timeside.core.tools.test_samples import samples
    >>> audio_source = samples['sweep.wav']
    >>> decoder = get_processor('file_decoder')(uri=audio_source)
    >>> spectrogram = get_processor('spectrogram_analyzer_buffer')(input_blocksize=2048, input_stepsize=1024)
    >>> pipe = (decoder | spectrogram)
    >>> pipe.run()
    >>> spectrogram.results.keys()
    ['spectrogram_analyzer_buffer']
    >>> result = spectrogram.results['spectrogram_analyzer_buffer']
    >>> result.data.shape
    (344, 1025)

     .. plot::

      import timeside
      from timeside.core import get_processor
      from timeside.core.tools.test_samples import samples
      audio_source = samples['sweep.wav']
      decoder = get_processor('file_decoder')(uri=audio_source)
      spectrogram = get_processor('spectrogram_analyzer_buffer')(input_blocksize=2048,
                                                          input_stepsize=1024)
      pipe = (decoder | spectrogram)
      pipe.run()
      res = spectrogram.results['spectrogram_analyzer_buffer']
      res.render()
    """

    implements(IAnalyzer)

    def __init__(self, input_blocksize=2048, input_stepsize=None,
                 fft_size=None):
        super(SpectrogramBuffer, self).__init__()
        self.values = BufferTable()

    @staticmethod
    @interfacedoc
    def id():
        return "spectrogram_analyzer_buffer"

    @staticmethod
    @interfacedoc
    def name():
        return "Spectrogram Analyzer with extensible buffer"

    @downmix_to_mono
    @frames_adapter
    def process(self, frames, eod=False):
            stft = np.fft.rfft(frames, self.fft_size)
            self.values.append('stft', stft)
            return frames, eod

    def post_process(self):
        spectrogram = self.new_result(data_mode='value', time_mode='framewise')
        spectrogram.parameters = {'fft_size': self.fft_size}
        spectrogram.data_object.value = np.abs(self.values['stft'])
        nb_freq = spectrogram.data_object.value.shape[1]
        spectrogram.data_object.y_value = (np.arange(0, nb_freq) *
                                           self.samplerate() / self.fft_size)
        self.add_result(spectrogram)

    def release(self):
        self.values.close()
コード例 #8
0
class IRITStartSeg(Analyzer):
    '''
    Segmentation of recording sessions into 'start' and 'session' segments

    Properties:
    '''
    implements(IAnalyzer)

    @interfacedoc
    def __init__(self):
        super(IRITStartSeg, self).__init__()

        self._buffer = BufferTable()

        # self.energy = []

        self.max_energy = 0.002 * 2
        self.min_overlap = 20
        self.threshold = 0.12

    @interfacedoc
    def setup(self,
              channels=None,
              samplerate=None,
              blocksize=None,
              totalframes=None):

        super(IRITStartSeg, self).setup(channels, samplerate, blocksize,
                                        totalframes)

        self.input_blocksize = int(0.02 * samplerate)
        self.input_stepsize = int(0.008 * samplerate)

        sr = float(samplerate)
        lowFreq = 100.0
        highFreq = sr / 5
        f1 = lowFreq / sr
        f2 = highFreq / sr
        numtaps = 10
        self.filtre = firwin(numtaps=numtaps, cutoff=[f1, f2], pass_zero=False)
        self.filtre_z = lfiltic(b=self.filtre, a=1, y=0)  # Initial conditions

    @staticmethod
    @interfacedoc
    def id():
        return "irit_startseg"

    @staticmethod
    @interfacedoc
    def name():
        return "IRIT Start/Session segmentation"

    @staticmethod
    @interfacedoc
    def unit():
        return ""

    def __str__(self):
        return "Labeled Start/session segments"

    @frames_adapter
    def process(self, frames, eod=False):
        '''

        '''

        #self.energy += [numpy.sqrt(numpy.mean(lfilter(self.filtre,
        #                                              1.0,
        #                                              frames.T[0]) ** 2))]
        # Compute energy
        env, self.filtre_z = lfilter(b=self.filtre,
                                     a=1.0,
                                     axis=0,
                                     x=frames[:, 0],
                                     zi=self.filtre_z)
        self._buffer.append('energy', numpy.sqrt(numpy.mean(env**2)))

        return frames, eod

    def post_process(self):
        '''

        '''
        # Normalize energy
        self.energy = self._buffer['energy'][:]

        # BAD PATCH !!!
        self.energy[-1] = 0
        if self.energy.max():
            self.energy = self.energy / self.energy.max()

        silences = [1 if e < self.max_energy else 0 for e in self.energy]
        step = float(self.input_stepsize) / float(self.samplerate())

        path = os.path.split(__file__)[0]
        models_dir = os.path.join(path, 'trained_models')

        prototype1_file = os.path.join(models_dir,
                                       'irit_noise_startSilences_proto1.dat')
        prototype2_file = os.path.join(models_dir,
                                       'irit_noise_startSilences_proto2.dat')

        prototype = numpy.load(prototype1_file)
        prototype2 = numpy.load(prototype2_file)

        # Lissage pour éliminer les petits segments dans un sens ou l'autre
        struct = [1] * len(prototype)
        silences = binary_closing(silences, struct)
        silences = binary_opening(silences, struct)

        seg = [0, -1, silences[0]]
        silencesList = []
        for i, v in enumerate(silences):
            if not (v == seg[2]):
                seg[1] = i
                silencesList.append(tuple(seg))
                seg = [i, -1, v]
        seg[1] = i
        silencesList.append(tuple(seg))
        segments = []
        start = 0.0

        for s in silencesList:
            if s[2] == 1:
                shape = numpy.array(self.energy[s[0]:s[1]])

                d1, _ = computeDist2(prototype, shape)
                d2, _ = computeDist2(prototype2, shape)
                dist = min([d1, d2])

                if dist < self.threshold:
                    s = map(float, s)
                    segments += [(start, s[0] * step - start, 1),
                                 (s[0] * step, (s[1] - s[0]) * step, 0)]
                    start = s[1] * step

        segments += [(start, len(self.energy) * step - start, 1)]

        label = {0: 'Start', 1: 'Session'}
        segs = self.new_result(data_mode='label', time_mode='segment')
        segs.id_metadata.id += '.' + 'segments'
        segs.id_metadata.name += ' ' + 'Segments'
        segs.data_object.label_metadata.label = label

        segs.data_object.time, segs.data_object.duration, segs.data_object.label = zip(
            *segments)

        self.add_result(segs)

    def release(self):
        self._buffer.close()