def post_process(self): ''' ''' aubio_res_id = 'aubio_pitch.pitch_confidence' aubio_uuid = self.parents['aubio_pitch'].uuid() aubio_results = self.process_pipe.results[aubio_uuid] pitch_confidences = aubio_results[aubio_res_id].data nb_frameDecision = int(self.decisionLen / self.wStep) epsilon = numpy.spacing(pitch_confidences[0]) w = int(nb_frameDecision / 2) is_mono = [] for i in range(w, len(pitch_confidences) - w, nb_frameDecision): d = pitch_confidences[i - w:i + w] conf_mean = numpy.mean(d) conf_var = numpy.var(d + epsilon) if self.monoLikelihood(conf_mean, conf_var) > self.polyLikelihood( conf_mean, conf_var): is_mono += [True] else: is_mono += [False] conf = self.new_result(data_mode='value', time_mode='framewise') conf = self.new_result(data_mode='value', time_mode='framewise') conf.id_metadata.id += '.' + 'yin_confidence' conf.id_metadata.name += ' ' + 'Yin Confidence' conf.data_object.value = pitch_confidences self.add_result(conf) convert = {False: 0, True: 1} label = {0: 'Poly', 1: 'Mono'} segList = segmentFromValues(is_mono) segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.data_object.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] segs.data_object.time = [(float(s[0] + 0.5) * self.decisionLen) for s in segList] segs.data_object.duration = [ (float(s[1] - s[0] + 1) * self.decisionLen) for s in segList ] self.add_result(segs) return
def post_process(self): ''' ''' aubio_res_id = 'aubio_pitch.pitch_confidence' aubio_uuid = self.parents['aubio_pitch'].uuid() aubio_results = self.process_pipe.results[aubio_uuid] pitch_confidences = aubio_results[aubio_res_id].data nb_frameDecision = int(self.decisionLen / self.wStep) epsilon = numpy.spacing(pitch_confidences[0]) w = int(nb_frameDecision/2) is_mono = [] for i in range(w, len(pitch_confidences) - w, nb_frameDecision): d = pitch_confidences[i - w:i + w] conf_mean = numpy.mean(d) conf_var = numpy.var(d + epsilon) if self.monoLikelihood(conf_mean, conf_var) > self.polyLikelihood(conf_mean, conf_var): is_mono += [True] else: is_mono += [False] conf = self.new_result(data_mode='value', time_mode='framewise') conf = self.new_result(data_mode='value', time_mode='framewise') conf.id_metadata.id += '.' + 'yin_confidence' conf.id_metadata.name += ' ' + 'Yin Confidence' conf.data_object.value = pitch_confidences self.add_result(conf) convert = {False: 0, True: 1} label = {0: 'Poly', 1: 'Mono'} segList = segmentFromValues(is_mono) segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.data_object.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] segs.data_object.time = [(float(s[0]+0.5) * self.decisionLen) for s in segList] segs.data_object.duration = [(float(s[1] - s[0]+1) * self.decisionLen) for s in segList] self.add_result(segs) return
def post_process(self): entropyValue = array(self.entropyValue) w = self.modulLen * self.samplerate() / self.blocksize() modulentropy = computeModulation(entropyValue, w, False) confEntropy = array(modulentropy - self.threshold) / self.threshold confEntropy[confEntropy > 1] = 1 conf = self.new_result(data_mode='value', time_mode='framewise') conf.id_metadata.id += '.' + 'confidence' conf.id_metadata.name += ' ' + 'Confidence' conf.data_object.value = confEntropy self.add_result(conf) # Binary Entropy binaryEntropy = modulentropy > self.threshold binaryEntropy = binary_opening(binaryEntropy, [1] * (self.smoothLen * 2)) convert = {False: 0, True: 1} label = {0: 'NonSpeech', 1: 'Speech'} segList = segmentFromValues(binaryEntropy) segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.data_object.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] segs.data_object.time = [ (float(s[0]) * self.blocksize() / self.samplerate()) for s in segList ] segs.data_object.duration = [ (float(s[1] - s[0] + 1) * self.blocksize() / self.samplerate()) for s in segList ] self.add_result(segs) return
def post_process(self): entropyValue = array(self.entropyValue) w = self.modulLen * self.samplerate() / self.blocksize() modulentropy = computeModulation(entropyValue, w, False) confEntropy = array(modulentropy - self.threshold) / self.threshold confEntropy[confEntropy > 1] = 1 conf = self.new_result(data_mode='value', time_mode='framewise') conf.id_metadata.id += '.' + 'confidence' conf.id_metadata.name += ' ' + 'Confidence' conf.data_object.value = confEntropy self.process_pipe.results.add(conf) # Binary Entropy binaryEntropy = modulentropy > self.threshold binaryEntropy = binary_opening( binaryEntropy, [1] * (self.smoothLen * 2)) convert = {False: 0, True: 1} label = {0: 'NonSpeech', 1: 'Speech'} segList = segmentFromValues(binaryEntropy) segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] segs.data_object.time = [(float(s[0]) * self.blocksize() / self.samplerate()) for s in segList] segs.data_object.duration = [(float(s[1] - s[0] + 1) * self.blocksize() / self.samplerate()) for s in segList] self.process_pipe.results.add(segs) return
def post_process(self): ''' ''' # Creation of the pass-band filter Wo = self.frequency_center / self.samplerate() Wn = [Wo - (self.frequency_width / 2) / self.samplerate(), Wo + (self.frequency_width / 2) / self.samplerate()] num = firwin(self.orderFilter, Wn, pass_zero=False) # Energy on the frequency range self.energy4hz = array(self.energy4hz) energy = lfilter(num, 1, self.energy4hz.T, 0) energy = sum(energy) # Normalization if self.normalizeEnergy and energy.any(): energy = energy / mean(energy) # Energy Modulation frameLenModulation = int( self.modulLen * self.samplerate() / self.blocksize()) modEnergyValue = computeModulation(energy, frameLenModulation, True) # Confidence Index conf = array(modEnergyValue - self.threshold) / self.threshold conf[conf > 1] = 1 modEnergy = self.new_result(data_mode='value', time_mode='framewise') modEnergy.id_metadata.id += '.' + 'energy_confidence' modEnergy.id_metadata.name += ' ' + 'Energy Confidence' modEnergy.data_object.value = conf self.process_pipe.results.add(modEnergy) # Segment convert = {False: 0, True: 1} label = {0: 'nonSpeech', 1: 'Speech'} segList = segmentFromValues(modEnergyValue > self.threshold) # Hint : Median filtering could imrove smoothness of the result # from scipy.signal import medfilt # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31)) segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' segs.id_metadata.name += ' ' + 'Segments' segs.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] segs.data_object.time = [(float(s[0]) * self.blocksize() / self.samplerate()) for s in segList] segs.data_object.duration = [(float(s[1]-s[0]+1) * self.blocksize() / self.samplerate()) for s in segList] self.process_pipe.results.add(segs) return