def replayAudio(self): self.player = player_for(self.audio_source) while True: if self.trigger_echo: self.player.play(self.bdata) self.trigger_echo = False time.sleep(2)
def __init__(self): self.asource = ADSFactory.ads(record=True, max_time=4) self.validator = AudioEnergyValidator(sample_width=2, energy_threshold=50) self.tokenizer = StreamTokenizer(validator=self.validator, min_length=20, max_length=1000, max_continuous_silence=30) self.player = player_for(self.asource) self.model = self.load_cnn('../model/final_cnn_model.json', '../model/weights_final_cnn.h5') self.model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
def configure(self, rf): result = BaseModule.configure(self, rf) if rf.check("sampleRate"): sr = rf.find("sampleRate").asInt() else: sr = 16000 asource = ADSFactory.ads( sampling_rate=sr) # create a default ADSFactory 16000Hz 2bytes # If non default values are needed # e.g. if the audio to be played back # was captured at a different rate # add parameters to the method creating the ads object self.player = player_for( asource) # create a player for the audio source return result
energy_threshold=65) # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate()) # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms # max_length=400 : maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds # max_continuous_silence=30 : maximum length of a tolerated silence within a valid audio activity is 30 * 30 == 300 ms tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30) asource.open() tokens = tokenizer.tokenize(asource) # Play detected regions back player = player_for(asource) # Rewind and read the whole signal asource.rewind() original_signal = [] while True: w = asource.read() if w is None: break original_signal.append(w) original_signal = b''.join(original_signal) player.play(original_signal) print("\n ** playing detected regions...\n")
# We set the `record` argument to True so that we can rewind the source asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65) # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate()) # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms # max_length=400 : maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds # max_continuous_silence=30 : maximum length of a tolerated silence within a valid audio activity is 30 * 30 == 300 ms tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30) asource.open() tokens = tokenizer.tokenize(asource) # Play detected regions back player = player_for(asource) # Rewind and read the whole signal asource.rewind() original_signal = [] while True: w = asource.read() if w is None: break original_signal.append(w) original_signal = b''.join(original_signal) player.play(original_signal)
def find_voice_segments(audio_file, music_time_list): segments = [] formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32} #[Fs_cr, x_cr] = aIO.readAudioFile(input_audio_audio_file) #[Fs_ce, x_ce] = aIO.readAudioFile(callee_audio_file) #segments = aS.silenceRemoval(x_cr, Fs_cr, 0.010, 0.010, smoothWindow=3,Weight=0.3,plot=False) #print(segments) #callee_segments = aS.silenceRemoval(x_ce, Fs_ce, 0.010, 0.010, smoothWindow=5,Weight=0.3,plot=False) #print(callee_segments) test_source = ADSFactory.ads(filename=audio_file, record=False) test_source.open() i = 0 max_value = 0.0 a = numpy.empty([], dtype=numpy.float64) b = numpy.empty([], dtype=numpy.float64) while True: frame = test_source.read() if frame is None: break signal = numpy.array(numpy.frombuffer( frame, dtype=formats[test_source.get_sample_width()]), dtype=numpy.float64) energy = float(numpy.dot(signal, signal)) / len(signal) max_value = max(max_value, energy) i += 1 b = numpy.append(b, [energy]) #diff = max_value - numpy.mean(b) #print(10. * numpy.log10(0.3*diff)) log_max = 10. * numpy.log10(max_value) log_mean = 10. * numpy.log10(numpy.mean(b)) tmp = log_max - log_mean threshold = log_mean + 0.4 * tmp #print(threshold) test_source.close() asource = ADSFactory.ads(filename=audio_file, record=False) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=threshold) tokenizer = StreamTokenizer(validator=validator, min_length=300, max_length=99999999, max_continuous_silence=300) player = player_for(asource) asource.open() tokens = tokenizer.tokenize(asource) for i, t in enumerate(tokens): segment_begin = t[1] * 10 segment_end = t[2] * 10 if len(music_time_list) > 0: for item in music_time_list: # if segment end includes music begin if segment_end > item[0]: #include segment before music segments.append([segment_begin, item[0]]) #save stamps for incluing segment after music segment_begin = item[1] # remove music segment from list # to not use it in further music_time_list.remove(item) segments.append([segment_begin, segment_end]) asource.close() return segments
def runAuditok(self): ''' This method captures sound from the audio source specified in self.audioPath if self.audioPath is None, the built in microphone is used. ''' #a = raw_input("waiting for start") if self.audioPath == None: self.asource = ADSFactory.ads(sampling_rate=self.sr) else: self.asource = ADSFactory.ads(filename=self.audioPath, sampling_rate=self.sr) self.validator = AudioEnergyValidator( sample_width=self.asource.get_sample_width(), energy_threshold=self.energy) self.tokenizer = StreamTokenizer( validator=self.validator, min_length=self.min_len, max_length=self.max_len, max_continuous_silence=self.max_con_si) self.player = player_for(self.asource) self.prev_data = np.zeros([1]) def audio_callback(data, start, end): if not np.array_equal(data, self.prev_data): self.sendTrigger() # send notice that audio has been detected print("Acoustic activity at: {0}--{1}".format(start, end)) stamp = (start, end, self.chunk_count) if self.record: self.saveAudio(data) copied = [] for x in data: np_data = np.frombuffer(x, dtype=np.uint8) #print np_data copied.append(np_data) data_rs = self.reshapeAudio(np.asarray(copied)) self.sendAudio(data_rs, stamp) self.prev_data = data if self.PLAYBACK: print "playing audio" self.playback(data_rs) self.chunk_count += 1 self.asource.open() self.sendTrigger( ) # send notice that the audio has started to be processed self.tokenizer.tokenize(self.asource, callback=audio_callback) sys.exit(0)