Esempio n. 1
0
def wavInfoChk(wave, name):
    print(name + " # channels : " + str(wave.getnchannels()))
    print(name + " sampling   : " + str(wave.getframerate()) + " Hz")
    print(name + " samp width : " + str(wave.getsampwidth()) + " byte")
    print(name + " length     : " + str(wave.getnframes()))
    if wave.getnchannels() != 1 or wave.getframerate(
    ) != 16000 or wave.getsampwidth() != 2:
        print("ERROR: Input wav files must be Monaural/16kHz/16bit PCM",
              file=sys.stderr)
        sys.exit(1)
Esempio n. 2
0
def extractSamples(wave, start, end):
    sampleRate = wave.getframerate()
    duration = end - start
    assert duration > 0
    wave.setpos(start * sampleRate)
    return [ struct.unpack_from("<h", wave.readframes(1))[0]
                    for i in range(0, int(duration * sampleRate))]
Esempio n. 3
0
def get_speech_int_array(wave, start, end):
    vad = webrtcvad.Vad(3)

    start = max(0, start)

    samples_per_second = wave.getframerate()

    #print "Framerate: %i" % samples_per_second

    samples_per_frame = int(SPEECH_FRAME_SEC * samples_per_second)

    total_samples = wave.getnframes()

    #print "Samples per frame: %i" % samples_per_frame
    wave.rewind()
    try:
        wave.setpos(start * samples_per_second)
    except:
        print "faield to set pos %f" % start

    wave_view_int = []
    while wave.tell() < min(end * samples_per_second, total_samples):
        #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0"
        try:
            wav_samples = wave.readframes(samples_per_frame)
            wave_view_int.append(
                1 if vad.is_speech(wav_samples, samples_per_second) else 0)
        except Exception as ex:
            print("Exception: " + str(ex))
            return []

    return wave_view_int
Esempio n. 4
0
 def play(self):
     if self.stream:
         self.stream.close()
     self.stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                          channels=wave.getnchannels(),
                          rate=wave.getframerate(),
                          output=True)
Esempio n. 5
0
def get_speech_int_array(wave, start, end):
    vad = webrtcvad.Vad(3)

    samples_per_second = wave.getframerate()

    samples_per_frame = int(SPEECH_FRAME_SEC * samples_per_second)

    wave.setpos(start * samples_per_second)

    wave_view_int = []
    while wave.tell() < end * samples_per_second:
        #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0"
        try:
            wav_samples = wave.readframes(samples_per_frame)
            wave_view_int.append(
                1 if vad.is_speech(wav_samples, samples_per_second) else 0)
        except:
            return []

    return wave_view_int
Esempio n. 6
0
def get_speech_int_array(wave, start=0, end=-1):
    vad = webrtcvad.Vad(0)

    samples_per_second = wave.getframerate()

    samples_per_frame = int(SPEECH_FRAME_SEC * samples_per_second)

    if end == -1:
        end = float(wave.getnframes()) / samples_per_second

    wave.setpos(start * samples_per_second)

    wave_view_int = []
    while wave.tell() < end * samples_per_second:
        #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0"
        try:
            wav_samples = wave.readframes(samples_per_frame)
            wave_view_int.append(
                1 if vad.is_speech(wav_samples, samples_per_second) else 0)
        except:
            print "Exception reading frames"
            return []

    return wave_view_int
def trim_silence(wave, output_file_path):
    vad = webrtcvad.Vad(3)

    VAD_WINDOW_SEC = 0.01
    samples_per_second = wave.getframerate()
    samples_per_frame = int(VAD_WINDOW_SEC * samples_per_second)
    total_samples = wave.getnframes()

    #print('samples_step: %i' % samples_per_frame)
    wave_view_str = ""
    wave_view_int = []
    while wave.tell() < total_samples:
        #wave_view_str += "1" if vad.is_speech(wave.readframes(samples_to_get), sample_rate) else "0"
        try:
            wav_samples = wave.readframes(samples_per_frame)
            val = 1 if vad.is_speech(wav_samples, samples_per_second) else 0
            wave_view_int.append(val)
            wave_view_str += str(val)
        #print "current_pos: %i" % wave.tell()
        except Exception as ex:
            print("Exception: " + str(ex))
            return []

    print wave_view_str