def __init__(self): super(IRITStartSeg, self).__init__() self._buffer = BufferTable() # self.energy = [] self.max_energy = 0.002 * 2 self.min_overlap = 20 self.threshold = 0.12
def __init__(self): super(IRITStartSeg, self).__init__() self._buffer = BufferTable() # self.energy = [] self.max_energy = 0.002*2 self.min_overlap = 20 self.threshold = 0.12
class IRITStartSeg(Analyzer): ''' Segmentation of recording sessions into 'start' and 'session' segments Properties: ''' implements(IAnalyzer) @interfacedoc def __init__(self): super(IRITStartSeg, self).__init__() self._buffer = BufferTable() # self.energy = [] self.max_energy = 0.002*2 self.min_overlap = 20 self.threshold = 0.12 @interfacedoc def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(IRITStartSeg, self).setup(channels, samplerate, blocksize, totalframes) self.input_blocksize = int(0.02 * samplerate) self.input_stepsize = int(0.008 * samplerate) sr = float(samplerate) lowFreq = 100.0 highFreq = sr / 5 f1 = lowFreq / sr f2 = highFreq / sr numtaps = 10 self.filtre = firwin(numtaps=numtaps, cutoff=[f1, f2], pass_zero=False) self.filtre_z = lfiltic(b=self.filtre, a=1, y=0) # Initial conditions @staticmethod @interfacedoc def id(): return "irit_startseg" @staticmethod @interfacedoc def name(): return "IRIT Start/Session segmentation" @staticmethod @interfacedoc def unit(): return "" def __str__(self): return "Labeled Start/session segments" @frames_adapter def process(self, frames, eod=False): ''' ''' #self.energy += [numpy.sqrt(numpy.mean(lfilter(self.filtre, # 1.0, # frames.T[0]) ** 2))] # Compute energy env, self.filtre_z = lfilter(b=self.filtre, a=1.0, axis=0, x=frames[:, 0], zi=self.filtre_z) self._buffer.append('energy', numpy.sqrt(numpy.mean(env ** 2))) return frames, eod def post_process(self): ''' ''' # Normalize energy self.energy = self._buffer['energy'][:] # BAD PATCH !!! self.energy[-1] = 0 if self.energy.max(): self.energy = self.energy / self.energy.max() silences = [1 if e < self.max_energy else 0 for e in self.energy] step = float(self.input_stepsize) / float(self.samplerate()) path = os.path.split(__file__)[0] models_dir = os.path.join(path, 'trained_models') prototype1_file = os.path.join(models_dir, 'irit_noise_startSilences_proto1.dat') prototype2_file = os.path.join(models_dir, 'irit_noise_startSilences_proto2.dat') prototype = numpy.load(prototype1_file) prototype2 = numpy.load(prototype2_file) # Lissage pour éliminer les petits segments dans un sens ou l'autre struct = [1] * len(prototype) silences = binary_closing(silences, struct) silences = binary_opening(silences, struct) seg = [0, -1, silences[0]] silencesList = [] for i, v in enumerate(silences): if not (v == seg[2]): seg[1] = i silencesList.append(tuple(seg)) seg = [i, -1, v] seg[1] = i silencesList.append(tuple(seg)) segments = [] start = 0.0 for s in silencesList: if s[2] == 1: shape = numpy.array(self.energy[s[0]:s[1]]) d1, _ = computeDist2(prototype, shape) d2, _ = computeDist2(prototype2, shape) dist = min([d1, d2]) if dist < self.threshold: s = map(float, s) segments += [(start, s[0]*step-start, 1) , (s[0]*step, (s[1]-s[0])*step, 0)] start = s[1]*step segments += [(start, len(self.energy)*step-start, 1)] label = {0: 'Start', 1: 'Session'} segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.data_object.label_metadata.label = label segs.data_object.time, segs.data_object.duration, segs.data_object.label= zip(*segments) self.add_result(segs) def release(self): self._buffer.close()
def __init__(self, input_blocksize=2048, input_stepsize=None, fft_size=None): super(SpectrogramBuffer, self).__init__() self.values = BufferTable()
class SpectrogramBuffer(Spectrogram): """ Spectrogram image builder with an extensible buffer based on tables Parameters ---------- input_blocksize : int, optional Blocksize of the input signal, default to 2048 input_stepsize : str, optional The second parameter, default to half blocksize. fft_size : int, optional The size of the fft, default to blocksize. Examples -------- >>> import timeside >>> from timeside.core import get_processor >>> from timeside.core.tools.test_samples import samples >>> audio_source = samples['sweep.wav'] >>> decoder = get_processor('file_decoder')(uri=audio_source) >>> spectrogram = get_processor('spectrogram_analyzer_buffer')(input_blocksize=2048, input_stepsize=1024) >>> pipe = (decoder | spectrogram) >>> pipe.run() >>> spectrogram.results.keys() ['spectrogram_analyzer_buffer'] >>> result = spectrogram.results['spectrogram_analyzer_buffer'] >>> result.data.shape (344, 1025) .. plot:: import timeside from timeside.core import get_processor from timeside.core.tools.test_samples import samples audio_source = samples['sweep.wav'] decoder = get_processor('file_decoder')(uri=audio_source) spectrogram = get_processor('spectrogram_analyzer_buffer')(input_blocksize=2048, input_stepsize=1024) pipe = (decoder | spectrogram) pipe.run() res = spectrogram.results['spectrogram_analyzer_buffer'] res.render() """ implements(IAnalyzer) def __init__(self, input_blocksize=2048, input_stepsize=None, fft_size=None): super(SpectrogramBuffer, self).__init__() self.values = BufferTable() @staticmethod @interfacedoc def id(): return "spectrogram_analyzer_buffer" @staticmethod @interfacedoc def name(): return "Spectrogram Analyzer with extensible buffer" @downmix_to_mono @frames_adapter def process(self, frames, eod=False): stft = np.fft.rfft(frames, self.fft_size) self.values.append('stft', stft) return frames, eod def post_process(self): spectrogram = self.new_result(data_mode='value', time_mode='framewise') spectrogram.parameters = {'fft_size': self.fft_size} spectrogram.data_object.value = np.abs(self.values['stft']) nb_freq = spectrogram.data_object.value.shape[1] spectrogram.data_object.y_value = (np.arange(0, nb_freq) * self.samplerate() / self.fft_size) self.add_result(spectrogram) def release(self): self.values.close()
class IRITStartSeg(Analyzer): ''' Segmentation of recording sessions into 'start' and 'session' segments Properties: ''' implements(IAnalyzer) @interfacedoc def __init__(self): super(IRITStartSeg, self).__init__() self._buffer = BufferTable() # self.energy = [] self.max_energy = 0.002 * 2 self.min_overlap = 20 self.threshold = 0.12 @interfacedoc def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(IRITStartSeg, self).setup(channels, samplerate, blocksize, totalframes) self.input_blocksize = int(0.02 * samplerate) self.input_stepsize = int(0.008 * samplerate) sr = float(samplerate) lowFreq = 100.0 highFreq = sr / 5 f1 = lowFreq / sr f2 = highFreq / sr numtaps = 10 self.filtre = firwin(numtaps=numtaps, cutoff=[f1, f2], pass_zero=False) self.filtre_z = lfiltic(b=self.filtre, a=1, y=0) # Initial conditions @staticmethod @interfacedoc def id(): return "irit_startseg" @staticmethod @interfacedoc def name(): return "IRIT Start/Session segmentation" @staticmethod @interfacedoc def unit(): return "" def __str__(self): return "Labeled Start/session segments" @frames_adapter def process(self, frames, eod=False): ''' ''' #self.energy += [numpy.sqrt(numpy.mean(lfilter(self.filtre, # 1.0, # frames.T[0]) ** 2))] # Compute energy env, self.filtre_z = lfilter(b=self.filtre, a=1.0, axis=0, x=frames[:, 0], zi=self.filtre_z) self._buffer.append('energy', numpy.sqrt(numpy.mean(env**2))) return frames, eod def post_process(self): ''' ''' # Normalize energy self.energy = self._buffer['energy'][:] # BAD PATCH !!! self.energy[-1] = 0 if self.energy.max(): self.energy = self.energy / self.energy.max() silences = [1 if e < self.max_energy else 0 for e in self.energy] step = float(self.input_stepsize) / float(self.samplerate()) path = os.path.split(__file__)[0] models_dir = os.path.join(path, 'trained_models') prototype1_file = os.path.join(models_dir, 'irit_noise_startSilences_proto1.dat') prototype2_file = os.path.join(models_dir, 'irit_noise_startSilences_proto2.dat') prototype = numpy.load(prototype1_file) prototype2 = numpy.load(prototype2_file) # Lissage pour éliminer les petits segments dans un sens ou l'autre struct = [1] * len(prototype) silences = binary_closing(silences, struct) silences = binary_opening(silences, struct) seg = [0, -1, silences[0]] silencesList = [] for i, v in enumerate(silences): if not (v == seg[2]): seg[1] = i silencesList.append(tuple(seg)) seg = [i, -1, v] seg[1] = i silencesList.append(tuple(seg)) segments = [] start = 0.0 for s in silencesList: if s[2] == 1: shape = numpy.array(self.energy[s[0]:s[1]]) d1, _ = computeDist2(prototype, shape) d2, _ = computeDist2(prototype2, shape) dist = min([d1, d2]) if dist < self.threshold: s = map(float, s) segments += [(start, s[0] * step - start, 1), (s[0] * step, (s[1] - s[0]) * step, 0)] start = s[1] * step segments += [(start, len(self.energy) * step - start, 1)] label = {0: 'Start', 1: 'Session'} segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.data_object.label_metadata.label = label segs.data_object.time, segs.data_object.duration, segs.data_object.label = zip( *segments) self.add_result(segs) def release(self): self._buffer.close()