def wavInfoChk(wave, name): print(name + " # channels : " + str(wave.getnchannels())) print(name + " sampling : " + str(wave.getframerate()) + " Hz") print(name + " samp width : " + str(wave.getsampwidth()) + " byte") print(name + " length : " + str(wave.getnframes())) if wave.getnchannels() != 1 or wave.getframerate( ) != 16000 or wave.getsampwidth() != 2: print("ERROR: Input wav files must be Monaural/16kHz/16bit PCM", file=sys.stderr) sys.exit(1)
def extractSamples(wave, start, end): sampleRate = wave.getframerate() duration = end - start assert duration > 0 wave.setpos(start * sampleRate) return [ struct.unpack_from("<h", wave.readframes(1))[0] for i in range(0, int(duration * sampleRate))]
def get_speech_int_array(wave, start, end): vad = webrtcvad.Vad(3) start = max(0, start) samples_per_second = wave.getframerate() #print "Framerate: %i" % samples_per_second samples_per_frame = int(SPEECH_FRAME_SEC * samples_per_second) total_samples = wave.getnframes() #print "Samples per frame: %i" % samples_per_frame wave.rewind() try: wave.setpos(start * samples_per_second) except: print "faield to set pos %f" % start wave_view_int = [] while wave.tell() < min(end * samples_per_second, total_samples): #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0" try: wav_samples = wave.readframes(samples_per_frame) wave_view_int.append( 1 if vad.is_speech(wav_samples, samples_per_second) else 0) except Exception as ex: print("Exception: " + str(ex)) return [] return wave_view_int
def play(self): if self.stream: self.stream.close() self.stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wave.getnchannels(), rate=wave.getframerate(), output=True)
def get_speech_int_array(wave, start, end): vad = webrtcvad.Vad(3) samples_per_second = wave.getframerate() samples_per_frame = int(SPEECH_FRAME_SEC * samples_per_second) wave.setpos(start * samples_per_second) wave_view_int = [] while wave.tell() < end * samples_per_second: #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0" try: wav_samples = wave.readframes(samples_per_frame) wave_view_int.append( 1 if vad.is_speech(wav_samples, samples_per_second) else 0) except: return [] return wave_view_int
def get_speech_int_array(wave, start=0, end=-1): vad = webrtcvad.Vad(0) samples_per_second = wave.getframerate() samples_per_frame = int(SPEECH_FRAME_SEC * samples_per_second) if end == -1: end = float(wave.getnframes()) / samples_per_second wave.setpos(start * samples_per_second) wave_view_int = [] while wave.tell() < end * samples_per_second: #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0" try: wav_samples = wave.readframes(samples_per_frame) wave_view_int.append( 1 if vad.is_speech(wav_samples, samples_per_second) else 0) except: print "Exception reading frames" return [] return wave_view_int
def trim_silence(wave, output_file_path): vad = webrtcvad.Vad(3) VAD_WINDOW_SEC = 0.01 samples_per_second = wave.getframerate() samples_per_frame = int(VAD_WINDOW_SEC * samples_per_second) total_samples = wave.getnframes() #print('samples_step: %i' % samples_per_frame) wave_view_str = "" wave_view_int = [] while wave.tell() < total_samples: #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0" try: wav_samples = wave.readframes(samples_per_frame) val = 1 if vad.is_speech(wav_samples, samples_per_second) else 0 wave_view_int.append(val) wave_view_str += str(val) #print "current_pos: %i" % wave.tell() except Exception as ex: print("Exception: " + str(ex)) return [] print wave_view_str