def read_sound(fp):
    """
    create a normalized float array and datarate from any audo file
    """
    if fp.endswith('mp3'):
        try:
            oname = 'temp.wav'
            #cmd = 'lame --decode "{0}" {1}'.format( fp ,oname )
            result = subprocess.call(['lame', '--decode', fp, oname])
            assert(result is 0)
            samplerate, data = wav.read(oname)
        except:
            print "couldn't run lame"
            try:
                import moviepy.editor as mpy
                aud_clip = mpy.AudioFileClip(fp)
                samplerate = aud_clip.fps
                data = aud_clip.to_soundarray()
            except:
                print "moviepy not installed?"
    if fp.endswith('aif'):
        #sf = aifc.open(fp)
        oname = fp
        sf = Sndfile(fp, 'r')
        sf.seek(0)
        data = sf.read_frames(sf.nframes)
        samplerate = sf.samplerate
    if fp.endswith('wav'):
        samplerate, data = wav.read(fp)

    if len(data.shape)>1: data = data[:,0]
    data = data.astype('float64')
    data /= data.max()
    return data, samplerate
    def test_energy_denois(self):
        original_file = self.auxiliary_files_url + "/nai_sample.wav"
        denoised_file = self.auxiliary_files_url + "/nai_sample_energy_denoised.wav"

        result = self.energy_denoise_module.energyDenoise(\
                original_file,\
                0.2,\
                denoised_file,\
                False)
        # The function thinks the denoising succeded
        self.assertEqual(result, True)
        # Check for the denoised file
        denoised_exists = os.path.isfile(denoised_file)
        self.assertEqual(denoised_exists, True)
        # Check if denoised energy is lower than the initial one
        samp_freq, signal_orig = wavfile.read(original_file)
        energy_orig = 0.0
        for i in signal_orig:
            energy_orig += i * 1.0 * i
        samp_freq, signal_denoised = wavfile.read(denoised_file)
        energy_denoised = 0.0
        for i in signal_denoised:
            energy_denoised += i * 1.0 * i
        self.assertGreater(energy_orig, energy_denoised)

        # erase the denoised file
        os.remove(denoised_file)
 def loadReferenceWords(self, word1_path, word2_path):
     fs, self.word1 = wavfile.read(word1_path)
     fs, self.word2 = wavfile.read(word2_path)
     self.word1 =  self.scaler(self.word1)
     self.word2 = self.scaler(self.word2)
     self.word1 = self.word1[self.get_startingpoint(self.word1):self.get_endingpoint(self.word1),:]
     self.word2 = self.word2[self.get_startingpoint(self.word2):self.get_endingpoint(self.word2),:]
Example #4
0
def remove_silence(filename):
    (rate,sig) = wav.read(filename)
    
    framelength = int(round(FRAMELENGTH * rate))
    
    frameamount = int(math.ceil(len(sig) / framelength))
    
    newsig = np.array([])
    
    for i in xrange(0,frameamount+1):
        start = i * framelength
        end =  start + framelength
        print end
        if (end > len(sig)):
            end = len(sig)
        if (start >= len(sig)):
            start = len(sig) - 1
        
        length = end - start
        energy = 0.0
        for j in xrange(start,end):
            energy = energy + pow(float(sig[j]), 2)
        energy = energy / length
        if (energy >= TRESHOLD):
            newsig = np.concatenate((newsig,sig[start:end]))
            
    newsig = newsig.astype(sig.dtype)
    print "silence removed, saving: "+ filename+".sr"
    wav.write(filename+".sr", rate, newsig)
    (rate,sig) = wav.read(filename+".sr")
def cut_video(recording_path, datapack_dir):

    # Read the start/end pattern
    sr1, pattern_wav = wav.read('pattern.wav')

    workingdir = tempfile.mkdtemp()

    # Open the video file
    clip = VideoFileClip(recording_path)

    # Save its audio track temporarily on disk
    clip.audio.write_audiofile(os.path.join(workingdir,"temp_audio.wav"))

    # Read the audio samples, mix down to mono (if necessary), and delete the temporary audio track
    sr2, recording_wav = wav.read(os.path.join(workingdir,"temp_audio.wav"))
    if recording_wav.shape[1]>1:
        recording_wav = numpy.mean(recording_wav,1)

    shutil.rmtree(workingdir)
    # Detect the start and end audio pattern
    start, end = detect_start_end_times(pattern_wav, recording_wav, sr2, 4)

    # Cut the video and write it into two separate video and audio files
    clip.subclip(start+0.4, end).write_videofile(os.path.join(datapack_dir, 'video.mp4'), codec='libx264')
    clip.subclip(start+0.4, end).audio.write_audiofile(os.path.join(datapack_dir,'audio.wav'))
def extract_features(recording_files, nr_ceps=12):
    print("skipping features")
    return  Ceps()(range(100))
    nr_utt_in_ubm = 300

    win_length_ms = 25  # The window length of the cepstral analysis in milliseconds
    win_shift_ms = 10  # The window shift of the cepstral analysis in milliseconds
    nr_filters = 24  # NOTSURE The number of filter bands
    nr_ceps = nr_ceps  # The number of cepstral coefficients
    f_min = 0.  # NOTSURE The minimal frequency of the filter bank
    f_max = 4000.  # NOTSURE The maximal frequency of the filter bank
    delta_win = 2  # NOTSURE The integer delta value used for computing the first and second order derivatives
    pre_emphasis_coef = 0.97  # NOTSURE The coefficient used for the pre-emphasis
    dct_norm = True  # NOTSURE A factor by which the cepstral coefficients are multiplied
    mel_scale = True  # Tell whether cepstral features are extracted on a linear (LFCC) or Mel (MFCC) scale
    # TODO add feature wrapping

    if glob.has_magic(recording_files):
        recording_files = glob.glob(recording_files)

    rate, ubm_wav = wavfile.read(recording_files.pop())
    for recording_file in recording_files:
        rate, signal = wavfile.read(recording_file)
        ubm_wav = np.append(ubm_wav, signal)
    c = Ceps(rate, win_length_ms, win_shift_ms, nr_filters, nr_ceps, f_min, f_max, delta_win, pre_emphasis_coef,
                    mel_scale, dct_norm)
    ubm_wav = np.cast['float'](ubm_wav)  # vector should be in **float**
    mfcc = c(ubm_wav)
    return mfcc
def get_data(path):
    """
    Gets the data associated with an audio file, converting to wav when necessary.
    :param path: path to audio file
    :return: sample rate, audio data
    """
    if path.endswith(".wav"):
        bee_rate, bee_data = read(path)
    else:
        temp = tempfile.NamedTemporaryFile(suffix=".wav")
        temp.close()
        if path.endswith(".flac"):
            sound = AudioSegment.from_file(path, "flac")
            sound.export(temp.name, format="wav")
        elif path.endswith(".mp3"):
            sound = AudioSegment.from_file(path, "mp3")
            sound.export(temp.name, format="wav")
        bee_rate, bee_data = read(temp.name)
        os.remove(temp.name)
    data_type = np.iinfo(bee_data.dtype)
    dmin = data_type.min
    dmax = data_type.max
    bee_data = bee_data.astype(np.float64)
    bee_data = 2.0 * ((bee_data - dmin) / (dmax - dmin)) - 1.0
    bee_data = bee_data.astype(np.float32)
    return bee_rate, bee_data
def main(argv):
    if len(argv) == 6:
        window_size = int(argv[1])
        a_fname = argv[2]
        b_fname = argv[3]
        a_phase_b_mag_fname = argv[4]
        b_phase_a_mag_fname = argv[5]
    else:
        print(
            'Usage: %s ' % argv[0] +
            '<WINDOW_WIDTH> <FILE_1> <FILE_2> <OUTFILE_1> <OUTFILE_2>' + (
                '\n\nSwap magnitude and phase of WAV files FILE_1 and FILE_2.'
                '\n\nWINDOW_WIDTH: STFT frame length (integer # of samples)'
                '\nOUTFILE_1: phase of FILE_1, magnitude of FILE_2'
                '\nOUTFILE_2: phase of FILE_2, magnitude of FILE_1'))
        return 1

    a_rate, a = wavfile.read(a_fname)
    b_rate, b = wavfile.read(b_fname)
    assert a_rate == b_rate
    assert a.dtype == b.dtype

    print('Window width: %d samples = %.3f ms' %
          (window_size, 1e3 * window_size / a_rate))

    a_phase_b_mag, b_phase_a_mag = swap_wav_magnitude(a, b, window_size)

    wavfile.write(a_phase_b_mag_fname, a_rate, a_phase_b_mag)
    wavfile.write(b_phase_a_mag_fname, a_rate, b_phase_a_mag)

    return 0
Example #9
0
def mainfn():
    print "In comb4.py"

    #Read the file
    f = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_0.wav')
    rdata1 = f[1]
    fs = f[0]
    print "Sampling frequency: " + str(fs)
    print "shape of rdata1: ", str(rdata1.shape)
    #rdata1 = np.matrix(rdata1)
    #print "shape of rdata1: ", str(rdata1.shape)
    g = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_1.wav')
    rdata2 = g[1]
    #rdata2 = np.matrix(rdata2)
    h = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_2.wav')
    rdata3 = h[1]
    #rdata3 = np.matrix(rdata3)
    i = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_3.wav')
    rdata4 = i[1]
    #rdata4 = np.matrix(rdata4)
    
    rdata = np.array([[rdata1],[rdata2],[rdata3],[rdata4]])
    rdatat = rdata[:,0,:]
    print "Shape of rdatat: ", str(rdatat.shape)
    return(rdatat)
def output(partIdx):
  """Uses the student code to compute the output for test cases."""
  outputString = ''

  if partIdx == 0: # This is ScaledFFTdB

    from assignment1 import scaled_fft_db

    r,x = wavfile.read('data/a1_submissionInput.wav')
    X = scaled_fft_db(x)

    for val in X:
        outputString += '%.5f ' % (val)


  elif partIdx == 1: # This is PrototypeFilter

    from assignment2 import prototype_filter

    h = prototype_filter()
      
    # test signal
    s = np.loadtxt('data/a2_submissionInput.txt')
    r = np.convolve(h, s)[4*512:5*512]/2

    for val in r:
        outputString += '%.5f ' % val

  elif partIdx == 2: # This is SubbandFiltering

    from assignment3 import subband_filtering

    r,x = wavfile.read('data/a3_submissionInput.wav')

    h = np.hanning(512)
    X = subband_filtering(x, h)

    for val in X:
        outputString += '%.5f ' % (val)

  elif partIdx == 3: # This is Quantization

    from assignment4 import quantization

    from parameters import EncoderParameters
    params = EncoderParameters(44100, 2, 64)

    val_in = np.loadtxt('data/a4_submissionInput.txt')

    for r,row in enumerate(val_in):
        val = row[0]
        scf = row[1]
        ba = int(row[2])
        QCa = params.table.qca[ba-2]
        QCb = params.table.qcb[ba-2]
        val = quantization(val, scf, ba, QCa, QCb)
        outputString += '%d ' % (val)


  return outputString.strip()
def processing():
    """post-processing of MLSbuf and recBuf, using the matched filter functions"""
    # -- start recording and playback in async. mode
    play_while_recording()
    
    global SAMPLE_RATE

    # -- latency for input and output devs, obtained using portaudio pa_devs script
    inputLatency = 0.0087
    outputLatency = 0.0087
    # -- convert latencies to num. of samples
    latencySamples = math.ceil((inputLatency+outputLatency)*SAMPLE_RATE)

    # -- calibration samples (uncomment for debugging)
    calSamp = 52
    
    # -- load recording buffer into numpy array
    recData = read("recBuf.wav")
    recBuf = np.array(recData[1],dtype =float)
    # -- index of internal delays & calibritation samples to subtract
    interDelaySamp = np.s_[0:(latencySamples + calSamp)]
    recBuf = np.delete(recBuf,interDelaySamp)
    # -- remove excess samples from the recording buffer
    removeExcessSamples = np.s_[6000:]
    recBuf = np.delete(recBuf,removeExcessSamples)
    
    # -- load playback buffer
    MLSdata = read("MLS.wav")
    MLSbuf = np.array(MLSdata[1],dtype =float)
 
    # -- compute delay using Matched Filters & normalize
    xcorr = matched_filter(MLSbuf,recBuf)/50000000000.0

    # -- get gain
    gain = get_gain(MLSbuf,recBuf)
    
    # -- peak detector
    prop_delay = peak_detector(xcorr)

    # -- plot recorded seq, Tx MLS seq. (uncomment for debugging)
    plt.figure(1)
    plt.plot(MLSbuf)
    plt.title("MLS sequence")
    plt.xlabel("samples")
    plt.grid(True)
    
    plt.figure(2)
    plt.plot(recBuf)
    plt.title("Recorded MLS sequence")
    plt.xlabel("samples")
    plt.ylabel("Amplitude")
    plt.grid(True)

    plt.figure(3)
    plt.plot(abs(xcorr))
    plt.title("Matched Filter Output")
    plt.xlabel("delay (samples)")
    plt.ylabel("Rxy")
    plt.grid(True)
    plt.show()
Example #12
0
def plot_from_wavfile(file1, file2):
    '''
    Given two wav files, plot their frequency spectrums
    '''
    rate1, data1 = wavefile.read(file1)
    rate2, data2 = wavefile.read(file2)
    plot_from_rawdata(data1, data2, rate1)
Example #13
0
    def test_write_edge_values(self):
        # Write edge values 1.0
        samples = numpy.ones((441, 1), dtype=numpy.float32)
        dest_file = NamedTemporaryFile(delete=True)
        wfile, infos = wav.open_write_mode(dest_file.name, 44100, 1)
        wav.write_block(wfile, samples)
        wfile._file.flush() # To force the file to be written to the disk

        frame_rate, samples_written = sp_wavfile.read(dest_file.name)
        numpy.testing.assert_array_equal(samples_written, numpy.array([2**15 - 1] * 441, dtype=numpy.int16))
        dest_file.close()

        # Write value 2.0, clipped to 1.0
        samples = numpy.ones((441, 1), dtype=numpy.float32) * 2.0
        dest_file = NamedTemporaryFile(delete=True)
        wfile, infos = wav.open_write_mode(dest_file.name, 44100, 1)
        wav.write_block(wfile, samples)
        wfile._file.flush() # To force the file to be written to the disk

        frame_rate, samples_written = sp_wavfile.read(dest_file.name)
        numpy.testing.assert_array_equal(samples_written, numpy.array([2**15 - 1] * 441, dtype=numpy.int16))
        dest_file.close()

        # Write edge values -1.0
        samples = numpy.ones((441, 1), dtype=numpy.float32) * -1
        dest_file = NamedTemporaryFile(delete=True)
        wfile, infos = wav.open_write_mode(dest_file.name, 44100, 1)
        wav.write_block(wfile, samples)
        wfile._file.flush() # To force the file to be written to the disk

        frame_rate, samples_written = sp_wavfile.read(dest_file.name)
        numpy.testing.assert_array_equal(samples_written, numpy.array([-2**15] * 441, dtype=numpy.int16))
        dest_file.close()
Example #14
0
    def __init__(self, snd, fps=None, bitrate=3000):

        Clip.__init__(self)

        if isinstance(snd, str):
            if not snd.endswith('.wav'):
                temp = 'temp.wav'
                ffmpeg.extract_sound(snd, temp, fps, bitrate)
                fps, arr = wavfile.read(temp)
                # os.remove(temp)
            else:
                fps, arr = wavfile.read(snd)

            self.array = arr
            self.fps = fps
        else:
            self.array = snd
            self.fps = fps

        self.duration = 1.0 * len(self.array) / self.fps

        def gf(t):
            i = int(self.fps * t)
            if i < 0 or i >= len(self.array):
                return 0
            else:
                return self.array[i]

        self.get_frame = gf
    def test_realFile(self):
        original_file = self.auxiliary_files_url + "/nai_sample.wav"
        denoised_file = self.auxiliary_files_url + "/nai_sample_sox_denoised.wav"
        user = '******'
        audio_type = 'nao_wav_1_ch'
        scale = 0.2

        result = self.sox_denoise_module.soxDenoise(\
                user,\
                audio_type,\
                original_file,\
                denoised_file,\
                scale)
        # The function thinks the denoising succeded
        self.assertEqual(result, "true")
        # Check for the denoised file
        denoised_exists = os.path.isfile(denoised_file)
        self.assertEqual(denoised_exists, True)
        # Check if denoised energy is lower than the initial one
        samp_freq, signal_orig = wavfile.read(original_file)
        energy_orig = 0.0
        for i in signal_orig:
            energy_orig += i * 1.0 * i
        samp_freq, signal_denoised = wavfile.read(denoised_file)
        energy_denoised = 0.0
        for i in signal_denoised:
            energy_denoised += i * 1.0 * i
        self.assertGreater(energy_orig, energy_denoised)

        # erase the denoised file
        os.remove(denoised_file)
Example #16
0
def get_offset_wav(wav_filename1, wav_filename2, time_limit=300):
    """Return offset in seconds between wav_filename1 and
    wav_filename2, which are recordings of the same event
    with potentially different starting times. Returns the 
    number of seconds that wav_filename2 starts after wav_filename1
    (possibly negative).
     
    
    If time_limit is provided, clip files
    to first time_limit seconds. This can substantially speed up 
    offset detection"""
    
    rate1, data1 = sp_wav.read(wav_filename1)
    rate2, data2 = sp_wav.read(wav_filename2)
    # the two files must have the same sampling rate
    assert(rate1==rate2)
    
    if time_limit is not None:
        data1 = data1[0:rate1 * time_limit]
        data2 = data2[0:rate2 * time_limit]
                
    offset_samples = get_offset_xcorr(data1, data2)
    offset_seconds = offset_samples / float(rate1)
    
    return offset_seconds
Example #17
0
def perf_eval(param):
    # wrtitten in 3000 basis, finding the nperseg value from param
    nperseg=param % 3000
    # wrtitten in 3000 basis, finding the number of music segments
    num_of_seg_idx=(param-nperseg)/3000
    num_of_seg=num_of_segs[num_of_seg_idx]

    input_rate,input_sig=wavfile.read(input_dir+song_name+'.wav')
    output_rate,output_sig=wavfile.read(output_dir+song_name+'.wav')
    
    #the +1 in denominator is because we exclude the last piece of music to only
    # consider music pieces of the same size.
    input_seg_len=input_sig.shape[0]/(num_of_seg+1)
    output_seg_len=output_sig.shape[0]/(num_of_seg+1)
    
    if input_rate!=output_rate:
        print ("Rate Mistmatch!")
        sys.exit(0)
    
    #print (nperseg,nperseg_step,input_seg_len,output_seg_len)
    if np.min((input_seg_len,output_seg_len))*0.7 < nperseg * nperseg_step:
        print ("Nothing to do!")
        sys.exit(0)
        
    res=estim_diff(input_sig, input_seg_len, output_sig, output_seg_len, nperseg, num_of_seg, nperseg_step)

    
    f=open('/agbs/cluster/naji/Linear Filters/Echo/out/Winter/Room/'+str(num_of_seg)+'/'+str(nperseg)+'.txt','w')
    print (nperseg,file=f)
    print (np.mean(res>0),file=f)
def test_ubm_var_channel():
    ubm = GMM.load('model/ubm.mixture-32.person-20.immature.model')

    train_duration = 8.
    nr_test = 5
    test_duration = 3.
    audio_files = ['xinyu.vad.wav', 'wyx.wav']
    X_train, y_train, X_test, y_test = [], [], [], []
    for audio_file in audio_files:
        fs, signal = wavfile.read(audio_file)
        signal = monotize_signal(signal)

        train_len = int(fs * train_duration)
        test_len = int(fs * test_duration)

        X_train.append(mix_feature((fs, signal[:train_len])))
        y_train.append(audio_file)

        for i in range(nr_test):
            start = random.randint(train_len, len(signal) - test_len)
            X_test.append(mix_feature((fs, signal[start:start+train_len])))
            y_test.append(audio_file)

    gmmset = GMMSet(32, ubm=ubm)
    gmmset.fit(X_train, y_train)
    y_pred = gmmset.predict_with_reject(X_test)
    for i in xrange(len(y_pred)):
        print y_test[i], y_pred[i], '' if y_test[i] == y_pred[i] else 'wrong'

    for imposter_audio_file in map(
            lambda x: 'test-{}.wav'.format(x), range(5)):
        fs, signal = wavfile.read(imposter_audio_file)
        signal = monotize_signal(signal)
        imposter_x = mix_feature((fs, signal))
        print gmmset.predict_one_with_rejection(imposter_x)
def find_offset(file1, file2, fs=8000, trim=60*15, correl_nframes=1000):
    tmp1 = convert_and_trim(file1, fs, trim)
    tmp2 = convert_and_trim(file2, fs, trim)
    # Removing warnings because of 18 bits block size
    # outputted by ffmpeg
    # https://trac.ffmpeg.org/ticket/1843
    warnings.simplefilter("ignore", wavfile.WavFileWarning)
    a1 = wavfile.read(tmp1, mmap=True)[1] / (2.0 ** 15)
    a2 = wavfile.read(tmp2, mmap=True)[1] / (2.0 ** 15)
    # We truncate zeroes off the beginning of each signals
    # (only seems to happen in ffmpeg, not in sox)
    a1 = ensure_non_zero(a1)
    a2 = ensure_non_zero(a2)
    mfcc1 = mfcc(a1, nwin=256, nfft=512, fs=fs, nceps=13)[0]
    mfcc2 = mfcc(a2, nwin=256, nfft=512, fs=fs, nceps=13)[0]
    mfcc1 = std_mfcc(mfcc1)
    mfcc2 = std_mfcc(mfcc2)
    c = cross_correlation(mfcc1, mfcc2, nframes=correl_nframes)
    max_k_index = np.argmax(c)
    # The MFCC window overlap is hardcoded in scikits.talkbox
    offset = max_k_index * 160.0 / float(fs) # * over / sample rate
    score = (c[max_k_index] - np.mean(c)) / np.std(c) # standard score of peak
    os.remove(tmp1)
    os.remove(tmp2)
    return offset, score
Example #20
0
def load_data(syllable, N, used_samples, snr, sample_order = None):
    """Function that goes through all N samples of syllable and loads its wave data.
    
    :param syllable: complete path name of syllable (string)
    :param N: number of samples to load
    :param used_samples: number of samples to skip in the beginning
    :param snr: the strength of the noise
    :param sample_order: if not None should be vector of indices of samples to be loaded (default = None)
    
    :returns syllable_waves: list of N sample waves of syllable
    """

    samples = [files for files in os.listdir(syllable)]
    syllable_waves = []
    if sample_order is None:
        for i in range(int(N)):
            rate, wave = wav.read(syllable + '/' + samples[i + used_samples])
            if (snr != 0.0):
                noiseLvl = np.sqrt(np.var(wave) / snr)
            else:
                noiseLvl = 0.0
            wave = wave + noiseLvl * np.random.randn(len(wave))
            syllable_waves.append([wave,rate])
    else:
        for i in sample_order:
            rate, wave = wav.read(syllable + '/' + samples[i])
            if(snr != 0.0):
                noiseLvl = np.sqrt(np.var(wave) / snr)
            else:
                noiseLvl = 0.0
            wave = wave + noiseLvl * np.random.randn(len(wave))
            syllable_waves.append([wave,rate])
    return syllable_waves
Example #21
0
def estim_diff(percent=256):
    sound_counter=0
    res=np.empty(len(input_file_names))
    for i in range(res.shape[0]):
        input_rate,input_sig=wavfile.read(input_dir+'Segments/'+input_file_names[i])
        output_rate,output_sig=wavfile.read(output_dir+'Segments/'+output_file_names[i])
    
        input_sig=pcm2float(input_sig,'float32')
        output_sig=pcm2float(output_sig,'float32')
        
        min_size=np.min((input_sig[:,0].shape[0],output_sig[:,0].shape[0]))
        #print min_size,min_size*percent
        #S_inp=np.absolute(fft(input_sig[:min_size,0]-np.mean(input_sig[:min_size,0])))
        #S_out=np.absolute(fft(output_sig[:min_size,0]-np.mean(output_sig[:min_size,0])))
    
        t=time()
        nperseg=int(min_size*percent)-np.mod(int(min_size*percent),10)
        real_perc=float(float(nperseg)/int(min_size*percent))
        S_inp=signal.welch(input_sig[:min_size,0],nperseg=nperseg)[1]    
        S_out=signal.welch(output_sig[:min_size,0],nperseg=nperseg)[1]    
        #S_inp=ndim_welch(input_sig[:min_size,0][None,...],nperseg=int(min_size*percent))[1]    
        #S_out=ndim_welch(output_sig[:min_size,0][None,...],nperseg=int(min_size*percent))[1]    
        #print time()-t
        #print S_inp_1,S_inp_2
        res[sound_counter]=delta_estimator_3(S_out/S_inp,S_inp)-delta_estimator_3(S_inp/S_out,S_out)
        #out=float2pcm(output_sig,'int16')
        sound_counter+=1
    return real_perc,int(min_size*percent),res
def generate_mixture(src1, src2, fname, attn1, attn2):
    """
        mixes 10 seconds of two sources of the same sample rate and saves them as fname

    Args:
        src1: filename for the first source
        src2: filename for the second source
        fname: output filename to save as
        attn1: relative attenuation for the first source
        attn2: relative attenuation for the second source

    Returns:

    """
    sr1, data1 = wav.read(src1)
    if data1.dtype == np.dtype("int16"):
        data1 = data1 / float(np.iinfo(data1.dtype).max)

    sr2, data2 = wav.read(src2)
    if data2.dtype == np.dtype("int16"):
        data2 = data2 / float(np.iinfo(data2.dtype).max)

    if sr1 != sr2:
        raise ValueError("Both sources muse have same sample rate")

    attn1 = float(attn1 + 1) / 2
    attn2 = float(attn2 + 1) / 2
    sample1 = data1[0:10 * sr1]
    sample2 = data2[0:10 * sr1]
    left = attenuate(sample1, attn1) + attenuate(sample2, attn2)
    right = attenuate(sample1, 1-attn1) + attenuate(sample2, 1-attn2)

    signal = np.vstack((left, right))
    scipy.io.wavfile.write(fname, sr1, signal.T)
Example #23
0
def simple_noise_filter(target, files, method=median_by_intensity, combination=flatten, section_length=4096):
    # load all .mp3 files into an arrays 
    # bin each to a certain length
    #print time()
    feeds = [section_by_length(wavfile.read(file)[1], section_length) for file in files]
    samplerate = wavfile.read(files[0])[0]
    #print time()
    # perform fft on each bin, select median of each 
    max_len = len(max(feeds, key=len))
    sections = []
    for i in range(max_len):
        begin = time()
        freqs = [fft.fft(feed[i], axis=0) for feed in feeds]
        #print "Fourier per ~.1s feed: ",
        #print (time()-begin)/3.
        begin = time()
        #filtered_freqs = [median_by_intensity(freqs, j) for j in range(len(freqs[0]))] # traverse the arrays in parallel
        filtered_freqs = [method(freqs, j) for j in range(len(freqs[0]))]
        #print "Filtering: ",
        #print (time()-begin)
        begin = time()
        sections += [real(fft.ifft(filtered_freqs, axis=0)).astype(feeds[0][0].dtype)]
        #print "Inversing per ~.1s feed: ",
        #print (time() - begin)
    # output
    #print time()
    samples = combination(sections)
    wavfile.write(target, samplerate, samples)
def generate_reverb(signal, reverb, fname, iter_range):
    """
    Adds reverb from the path reverb to the data in the path signal and saves it as fname. Applies reverb iteratively over
    iter_range
    :param signal: the filename for the stereo input signal
    :param reverb: the filename for the stereo impulse response
    :param fname: the output filename to save as
    :param iter_range: the max number of iterations to convolve with the signal
    :return:
    """
    sr, data = wav.read(signal)
    if data.dtype == np.dtype("int16"):
        data = data / float(np.iinfo(data.dtype).max)


    sr_ir, data_ir = wav.read(reverb)
    if data_ir.dtype == np.dtype("int16"):
        data_ir = data_ir / float(np.iinfo(data_ir.dtype).max)

    if sr_ir != sr:
        raise ValueError("Impulse Response must have same sample rate as signal")

    prev_data = data
    for i in xrange(0, iter_range+1):
        if i > 0:
            mix = add_reverb(prev_data.T, data_ir.T)
            prev_data = np.copy(mix).T
        else:
            mix = data.T
        if not os.path.exists(os.path.splitext(fname)[0]+'-'+str(i)+'.wav'):
            scipy.io.wavfile.write(os.path.splitext(fname)[0]+'-'+str(i)+'.wav', sr, mix.T)
def mix_files(f1,f2):

    base1 = f1.split('/')[-1].split('.wav')[0]
    base2 = f2.split('/')[-1].split('.wav')[0]

    (fs,sig) = wav.read(f1)
    s1 = sig.reshape((len(sig),1))
    del sig
    (fs,sig) = wav.read(f2)
    s2 = sig.reshape((len(sig),1))
    del sig
    block_length = 5*fs
    s1_blocks = enframe(s1,block_length,block_length)
    s2_blocks = enframe(s2,block_length,block_length)
    del s1, s2
     
    nrg1 = 0.707*np.sqrt(np.sum(np.power(s1_blocks,2),axis=1))
    nrg2 = 0.707*np.sqrt(np.sum(np.power(s2_blocks,2),axis=1))
     
    for i in range(len(nrg1)):
        db1 = np.log(nrg1[i])
        db2 = np.log(nrg2[i])
        if (db1 >= 9) and (db2 >= 9) and (0.1 < abs(db1 - db2) < 5):
            sir = '%.2f' % (db1 - db2)
            ovl_name = '/erasable/nxs113020/wav_ovl/'+base1+'_'+base2+'_sir'+sir+'_'+str(i)+'.wav'
            overlapped = s1_blocks[i,:] + s2_blocks[i,:]
            nrg_ovl = 0.707*np.sqrt(np.sum(np.power(overlapped,2)))
            scikits.audiolab.wavwrite(overlapped/nrg_ovl, ovl_name, fs, 'pcm16')
Example #26
0
    def generate(self, fileList, inputParam1, inputParam2, inputParam3, inputParam4):
        for path in fileList: # add Music objects to pl (playlist). Pass in filename, full  L/R data, and mean-ed data
            self.pl.append(Music(path.split("/")[-1], wav.read(path)[1], wav.read(path)[1].mean(axis=1)))

        print(self.pl[1].title)
        print(self.pl[1].data)
        print(self.pl[1].avgData)
        print(inputParam1, inputParam2, inputParam3, inputParam4)
Example #27
0
def wait_for_wav(filename):
    # Super ugly hack! Since Csound might not be finished writing to the file, we try to read it, and upon fail (i.e. it was not closed) we wait .05 seconds.
    while True:
        try:
            wavfile.read(filename)
            break
        except:
            time.sleep(.05)
    return filename
Example #28
0
def main():
    fs, bg_signal = wavfile.read(sys.argv[1])
    ltsd = LTSD_VAD()
    ltsd.init_params_by_noise(fs, bg_signal)

    fs, signal = wavfile.read(sys.argv[2])
    vaded_signal = ltsd.filter(signal)

    wavfile.write('vaded.wav', fs, vaded_signal)
Example #29
0
def training(nfiltbank, orderLPC):
    nSpeaker = 8
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train';
    fname = str()

    for i in range(nSpeaker):
        fname = '/s' + str(i+1) + '.wav'
        print('Now speaker ', str(i+1), 'features are being trained' )
        (fs,s) = read(directory + fname)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid)
        
        plt.figure(i)
        plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(codebooks_mfcc[i,:,j])
            plt.ylabel('MFCC')
            plt.subplot(212)
            markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j])
            plt.setp(markerline,'markerfacecolor','r')
            plt.setp(baseline,'color', 'k')
            plt.ylabel('LPC')
            plt.axis(ymin = -1, ymax = 1)
            plt.xlabel('Number of features')
    
    plt.show()
    print('Training complete')
    
    #plotting 5th and 6th dimension MFCC features on a 2D plane
    #comment lines 54 to 71 if you don't want to see codebook
    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))
   
    for i in range(2):
        fname = '/s' + str(i+2) + '.wav'
        (fs,s) = read(directory + fname)
        mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68]
        codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid)
        
    
    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100,  color = 'r', marker = 'o')
    c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
    s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100,  color = 'b', marker = 'o')
    c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
    plt.grid()
    plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
    plt.show()
   
    
    return (codebooks_mfcc, codebooks_lpc)
Example #30
0
def run():
	print "Creating file voiceCepstrums.arff..."

	files = []
	output = open("voiceCepstrums.arff", 'w')

	for x in os.walk("AudioRecordings"):
		files.append(x)
	females = files[1][2]
	males = files[2][2]

	output.write("@relation voiceCepstrums\n\n")
	output.write("@attribute coefficient1 Continuous\n")
	output.write("@attribute coefficient2 Continuous\n")
	output.write("@attribute coefficient3 Continuous\n")
	output.write("@attribute coefficient4 Continuous\n")
	output.write("@attribute coefficient5 Continuous\n")
	output.write("@attribute gender {male, female}\n\n")

	output.write("@data\n")

	for filename in males:
		if filename.endswith(".wav") or filename.endswith(".WAV"):
			sampFreq, data = wavfile.read('AudioRecordings/Male/' + filename)
			cepstrum = getCepstrum(data)
			frequencies = {}
			for i in range(0, len(data)):
				coefficient = data[i]
				if coefficient[0] != float('Inf') and coefficient[0] != 0.0:
					frequency = (i*sampFreq)/len(data)
					frequencies[frequency] = coefficient[0];
			sortedFrequencies = sorted(frequencies.items(), key=operator.itemgetter(1), reverse=True);
			sortedFrequencies = sortedFrequencies
			for i in range(0, 5):
				output.write(str(sortedFrequencies[i][0]))
				output.write(", ")
			output.write("male\n")

	for filename in females:
		if filename.endswith(".wav") or filename.endswith(".WAV"):
			sampFreq, data = wavfile.read('AudioRecordings/Female/' + filename)
			# time =float(len(data))/float(sampFreq)
			# frequency = 
			cepstrum = getCepstrum(data)
			frequencies = {}
			for i in range(0, len(data)):
				coefficient = data[i]
				if coefficient[0] != float('Inf') and coefficient[0] != 0.0:
					frequency = (i*44100)/len(data)
					frequencies[frequency] = coefficient[0];
			sortedFrequencies = sorted(frequencies.items(), key=operator.itemgetter(1), reverse=True);
			sortedFrequencies = sortedFrequencies
			for i in range(0, 5):
				output.write(str(sortedFrequencies[i][0]))
				output.write(", ")
			output.write("female\n")
Example #31
0
from features import ssc
import scipy.io.wavfile as wav
file_name = "splateroyyo.wav"
(rate, sig) = wav.read(file_name)  #or whatever the filename is
f = ssc(rate, sig)
print f.shape

fd = open("x_train.npy", "a+b")
np.save(fd, f)
fd.close()

if file_name[0] == 's':
    y_train = np.ones(f.shape[0], 1)
elif file_name[0] == 'e':
    y_train = np.zeros(f.shape[0], 1)

fdd = open("y_train.npy", "a+b")
np.save(fdd, y_train)
fdd.close()
Example #32
0
def load_metadata_from_wavs():
    global background_noise
    background = [
        f for f in os.listdir(join(TRAIN_AUDIO_PATH, '_background_noise_'))
        if f.endswith('.wav')
    ]
    for wav in background:
        samples, sample_rate = librosa.load(join(
            join(TRAIN_AUDIO_PATH, '_background_noise_'), wav),
                                            sr=INPUT_SAMPLES)
        background_noise.append(samples)

    dirs = [
        f for f in os.listdir(TRAIN_AUDIO_PATH)
        if isdir(join(TRAIN_AUDIO_PATH, f))
    ]
    dirs.sort()

    wavs = []
    labels = []
    unknown_wavs = []

    unknown_list = [
        d for d in dirs if d not in TARGET_LIST and d != '_background_noise_'
    ]
    print('target_list : ', end='')
    print(TARGET_LIST)
    print('unknowns_list : ', end='')
    print(unknown_list)
    print('silence : _background_noise_')

    i = 0
    for directory in dirs[1:]:
        waves = [
            f for f in os.listdir(join(TRAIN_AUDIO_PATH, directory))
            if f.endswith('.wav')
        ]

        for j, wav in enumerate(waves):
            # samples, sample_rate = librosa.load(join(join(TRAIN_AUDIO_PATH, directory), wav), sr=16000)
            sample_rate, samples = wavfile.read(
                join(join(TRAIN_AUDIO_PATH, directory), wav))
            samples = np.concatenate((np.zeros((INPUT_SAMPLES - 8000) // 2,
                                               dtype="float32"), samples[::2],
                                      np.zeros((INPUT_SAMPLES - 8000) // 2,
                                               dtype="float32")))
            if len(samples) != INPUT_SAMPLES:
                continue

            if directory in unknown_list:
                unknown_wavs.append((wav, directory))
            else:
                wavs.append((TYPE_REGULAR, wav))
                labels.append(directory)

    wavc = len(wavs)
    for n in range(NOISE_MULTIPLIER):
        for i in range(wavc):
            wavs.append((TYPE_NOISED, wavs[i][1]))
            labels.append(labels[i])

    for i in range(UNKNOWN_COUNT):
        wavs.append((TYPE_UNKNOWN, random.choice(unknown_wavs)))
        labels.append("unknown")

    for i in range(SILENCE_COUNT):
        wavs.append((TYPE_SILENCE, random.randrange(0, len(background_noise))))
        labels.append("silence")

    return wavs, labels
Example #33
0
    room_dim = [8, 9]

    # source location
    source = np.array([1, 4.5])

    # create an anechoic room with sources and mics
    room = pra.ShoeBox(room_dim,
                       fs=16000,
                       max_order=15,
                       absorption=0.35,
                       sigma2_awgn=1e-8)

    # get signals
    signals = [
        np.concatenate(
            [wavfile.read(f)[1].astype(np.float32) for f in source_files])
        for source_files in wav_files
    ]
    delays = [1., 0.]
    locations = [[2.5, 3], [2.5, 6]]

    # add mic and good source to room
    # Add silent signals to all sources
    for sig, d, loc in zip(signals, delays, locations):
        room.add_source(loc, signal=np.zeros_like(sig), delay=d)

    # add microphone array
    room.add_microphone_array(
        pra.MicrophoneArray(np.c_[[6.5, 4.49], [6.5, 4.51]], fs=room.fs))

    # compute RIRs
Example #34
0
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    # Load wav files
    fs, audio = wav.read(audio_filename)

    # Get mfcc coefficients
    orig_inputs = mfcc(audio, samplerate=fs, numcep=numcep)

    # For each time slice of the training set, we need to copy the context this makes
    # the numcep dimensions vector into a numcep + 2*numcep*numcontext dimensions
    # because of:
    #  - numcep dimensions for the current mfcc feature set
    #  - numcontext*numcep dimensions for each of the past and future (x2) mfcc feature set
    # => so numcep + 2*numcontext*numcep
    train_inputs = np.array([], np.float32)
    train_inputs.resize((orig_inputs.shape[0], numcep + 2*numcep*numcontext))

    # Prepare pre-fix post fix context (TODO: Fill empty_mfcc with MCFF of silence)
    empty_mfcc = np.array([])
    empty_mfcc.resize((numcep))

    # Prepare train_inputs with past and future contexts
    time_slices = range(train_inputs.shape[0])
    context_past_min   = time_slices[0]  + numcontext
    context_future_max = time_slices[-1] - numcontext
    for time_slice in time_slices:
        ### Reminder: array[start:stop:step]
        ### slices from indice |start| up to |stop| (not included), every |step|
        # Pick up to numcontext time slices in the past, and complete with empty
        # mfcc features
        need_empty_past     = max(0, (context_past_min - time_slice))
        empty_source_past   = list(empty_mfcc for empty_slots in range(need_empty_past))
        data_source_past    = orig_inputs[max(0, time_slice - numcontext):time_slice]
        assert(len(empty_source_past) + len(data_source_past) == numcontext)

        # Pick up to numcontext time slices in the future, and complete with empty
        # mfcc features
        need_empty_future   = max(0, (time_slice - context_future_max))
        empty_source_future = list(empty_mfcc for empty_slots in range(need_empty_future))
        data_source_future  = orig_inputs[time_slice + 1:time_slice + numcontext + 1]
        assert(len(empty_source_future) + len(data_source_future) == numcontext)

        if need_empty_past:
            past   = np.concatenate((empty_source_past, data_source_past))
        else:
            past   = data_source_past

        if need_empty_future:
            future = np.concatenate((data_source_future, empty_source_future))
        else:
            future = data_source_future

        past   = np.reshape(past, numcontext*numcep)
        now    = orig_inputs[time_slice]
        future = np.reshape(future, numcontext*numcep)

        train_inputs[time_slice] = np.concatenate((past, now, future))
        assert(len(train_inputs[time_slice]) == numcep + 2*numcep*numcontext)

    # Whiten inputs (TODO: Should we whiten)
    train_inputs = (train_inputs - np.mean(train_inputs))/np.std(train_inputs)

    # Return results
    return train_inputs
Example #35
0
def mainfn():

    #Read the file
    wavfil = raw_input("Wav file to be read: ")
    f = wavfile.read(
        '/home/akshay/anaconda/ModulesPython/MUSICGEVD/{}'.format(wavfil))
    rdata = f[1]
    nc = rdata.shape[1]
    print "Number of channels: " + str(nc)
    fs = f[0]
    print "Sampling frequency: " + str(fs)
    (Pxx, freqs, bins, im) = plt.specgram(rdata[:, 0],
                                          NFFT=512,
                                          Fs=fs,
                                          noverlap=160)
    plt.title('Spectrogram')
    plt.xlabel('Time')
    plt.ylabel('Frequency in Hz')
    plt.show()

    tf, az, Nd = readingdat.mainfn()
    #print tf
    #print str(tf.shape)

    # INPUTS:
    N = 512  # Block Size & N point FFT
    M = 160  # Block Increment
    WINDOW = 50  # Time averaging of the CM, WINDOW_TYPE is FUTURE

    ## Plot the audio signal - Channel 1
    ##xaxis = np.arange(len(rdata))
    ##plt.plot(xaxis,rdata[:,0])
    ##plt.title('Channel 1: First microphone')
    #plt.show()

    # Divide into frames with overlap - Use blockd
    ## Assumption: rdata has channels on columns, and samples on rows

    rdata = np.transpose(
        rdata)  #Comment out if samples on col. and channels on rows
    num_blocks = nblocks(rdata[0, :], N, M)

    blockeddata = np.zeros((nc, num_blocks, N))  # blockeddata is a 3D matrix

    for i in range(nc):
        blockeddata[i, :, :] = blockd(rdata[i, :], N, M)
    print "Shape of blocked data: " + str(blockeddata.shape)

    #EACH FRAME COMPUTATION:
    #Each frame has to be taken for FFT

    ##    fftmat = mfft(blockeddata[:,0,:])

    fnum = 0  #fnum is the frame index
    #The correlation matrix is calculated once every PERIOD number of frames.
    # Time averaging is done with WINDOW_TYPE = FUTURE
    #print "Calculating the time-averaged correlation matrices"

    N_avg = noisecormat.mainfn()
    for t in range(int(math.ceil(num_blocks / WINDOW))):
        #fnum is Frame index
        i = 0
        R_tot = np.zeros((nc, nc, N / 2 + 1), dtype=complex)
        print "Localization for frames: ", str(
            t * 50), " to ", str((t + 1) * 50 - 1)
        while i < WINDOW:
            if fnum >= num_blocks:
                break
            else:
                fftmat = mfft(blockeddata[:, fnum, :])
                #print fftmat
                #print "Shape of fftmat: " + str(fftmat.shape)
                R_tot = R_tot + Rmatrix(fftmat)
                fnum = fnum + 1
                i = i + 1
##        print i
##        print fnum
        R_avg = R_tot / (i)
        print "R_avg matrix for freq bin 20: "
        print R_avg[:, :, 20]
        ##        print "Shape of R_avg is: " + str(R_avg.shape)

        print "N_avg shape: ", str(N_avg.shape)
        for fy in range(N / 2 + 1):
            N_avg2 = sl.inv(np.matrix(N_avg[:, :, fy]))
            N_avg2 = sl.sqrtm(N_avg2)
            R_avg[:, :, fy] = N_avg2 * R_avg[:, :, fy]
            R_avg[:, :, fy] = R_avg[:, :, fy] * N_avg2

#Eigen value decomposition is done for each frequency
        w = np.zeros((nc, N / 2 + 1), dtype=complex)
        v = np.zeros((nc, nc, N / 2 + 1), dtype=complex)
        for fy in range(N / 2 + 1):
            w[:, fy], v[:, :, fy] = np.linalg.eig(R_avg[:, :, fy])

            #print "Shape of w: " + str(w.shape)
            #print "Shape of v: " + str(v.shape)

        print "values: ", w[:,
                            5]  #printing the 3rd freq bin's eigen values and eigen vectors
        #print w[:,2].shape
        print "vectors: ", v[:, :, 5]
        ##        print "v shape: ", str(v[:,1,2].shape)

        ##    Calculation of MUSIC spectrum
        print "Calculating MUSIC spectrum"
        powerspec.mainfn(tf, v, t, az, Nd)
    def load_model():
        # learning loop
        model = FunctionSet(l1=F.Linear(2 * (dim * 2 + 1),
                                        n_units,
                                        initialW=initializer),
                            l2=F.Linear(n_units, n_units,
                                        initialW=initializer),
                            l3=F.Linear(n_units, 1, initialW=initializer))
        # Setup optimizer
        optimizer = optimizers.Adam()
        optimizer.setup(model)
        model.to_gpu()

        # Neural net architecture
        def forward(x_data, y_data, ratio=0.5, train=True):
            x, t = Variable(x_data), Variable(y_data)
            h1 = F.dropout(F.sigmoid(model.l1(x)), ratio=ratio, train=train)
            h2 = F.dropout(F.sigmoid(model.l2(h1)), ratio=ratio, train=train)
            y = model.l3(h2)
            return F.mean_squared_error(y, t), y

        with open('selectfrq_estimated_data/pretrain_write1.csv',
                  'r') as csvfile:
            readfile = csv.reader(csvfile)
            for row in readfile:
                print len(row)
                # print row
                l1_W.append(row)
        with open('selectfrq_estimated_data/pretrain_write2.csv',
                  "r") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                # print row
                l2_W.append(row)
        with open('selectfrq_estimated_data/pretrain_write3.csv',
                  "r") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                # print row
                l3_W.append(row)
        #test loop
        SNRList = ["-10dB", "-5dB", "0dB", "5dB", "10dB", "-20dB"]
        for SNRnum, SNR in enumerate(SNRList):  #-10,-5,0,5,10,-20dB
            loss_sum = np.zeros(testsize)
            for idx in np.arange(learnsize + SNRnum * testsize,
                                 learnsize + (SNRnum + 1) * testsize):
                fs, signal_data = read(estimated_signal[idx], "r")
                fs, noise_data = read(estimated_noise[idx], "r")
                fs, teacher_data = read(
                    teacher_signal[idx - testsize * SNRnum], "r")
                signal_data = signal_data / np.sqrt(np.mean(signal_data**2))
                noise_data = noise_data / np.sqrt(np.mean(noise_data**2))
                teacher_data = teacher_data / np.sqrt(np.mean(teacher_data**2))

                Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data)
                Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data)
                Tspectrum_, synparam = FFTanalysis.FFTanalysis(teacher_data)

                N_FRAMES = np.shape(Sspectrum_)[0]
                HFFTL = np.shape(Sspectrum_)[1]
                x_data = np.zeros((N_FRAMES, HFFTL * 2))
                y_data = np.zeros((N_FRAMES, HFFTL))

                for nframe in xrange(N_FRAMES):
                    spectrum = np.append(Sspectrum_[nframe],
                                         Nspectrum_[nframe])
                    x_data[nframe] = [
                        np.sqrt(c.real**2 + c.imag**2) for c in spectrum
                    ]  #DNN indata
                    #phaseSpectrum = [np.arctan2(c.imag, c.real) for c in spectrum]
                    Spower = np.array([
                        np.sqrt(c.real**2 + c.imag**2)
                        for c in Sspectrum_[nframe]
                    ])
                    Tpower = np.array([
                        np.sqrt(c.real**2 + c.imag**2)
                        for c in Tspectrum_[nframe]
                    ])
                    for i, x in enumerate(Spower):
                        if x == 0:
                            Spower[i] = 1e-10
                    y_data[nframe] = Tpower / Spower

                calcSNR = np.empty((N_FRAMES, 0), float)
                totalloss = np.zeros(HFFTL, float)
                # testing
                for frq in xrange(HFFTL):
                    model.l1.W.data = cuda.to_gpu(l1_W[frq])
                    model.l2.W.data = cuda.to_gpu(l2_W[frq])
                    model.l3.W.data = cuda.to_gpu(l3_W[frq])
                    # testing
                    x_frqdata = np.zeros(
                        (np.shape(x_data)[0], 2 * (dim * 2 + 1)), float)
                    x_frqdata[:, dim] = x_data[:, frq]
                    x_frqdata[:, dim * 3 + 1] = x_data[:, frq + HFFTL]
                    for j in np.arange(1, dim + 1):
                        if (frq - j) >= 0:
                            x_frqdata[:, dim - j] = x_data[:, frq - j]
                            x_frqdata[:, dim * 3 + 1 - j] = x_data[:, frq +
                                                                   HFFTL - j]
                        if ((HFFTL - 1) - (j + frq)) >= 0:
                            x_frqdata[:, dim + j] = x_data[:, frq + j]
                            x_frqdata[:, dim * 3 + 1 + j] = x_data[:, frq +
                                                                   HFFTL + j]
                    y_frqdata = np.zeros((np.shape(y_data)[0], 1), float)
                    y_frqdata = y_data[:, frq].reshape(np.shape(y_data)[0], 1)

                    x_frqdata = x_frqdata.astype(np.float32)
                    y_frqdata = y_frqdata.astype(np.float32)
                    if args.gpu >= 0:
                        x_frqdata = cuda.to_gpu(x_frqdata)
                        y_frqdata = cuda.to_gpu(y_frqdata)
                    loss, pred = forward(x_frqdata, y_frqdata, train=False)
                    totalloss[frq] = cuda.to_cpu(loss.data)
                    pred = np.reshape(cuda.to_cpu(pred.data), (N_FRAMES, 1))
                    calcSNR = np.append(calcSNR, pred, axis=1)
                fs, teacher_data = read(
                    teacher_signal[idx - testsize * SNRnum], "r")
                if teacher_data.dtype == "int16":
                    teacher_data = teacher_data / norm
                y_out = Sspectrum_ * calcSNR
                wf_signal = FFTanalysis.Synth(y_out, synparam, BPFon=0)
                wf_signal = wf_signal * np.sqrt(
                    np.mean(teacher_data**2) / np.mean(wf_signal**2))
                write(
                    dir + SNR + "/dim{}_DNNbased_No{}.wav".format(
                        dim, idx - testsize * SNRnum), Fs, wf_signal)
    def DNNbasedWienerfilter():

        #pretrain loop
        startexec = time.time()
        for epoch in xrange(pretrain_epoch):
            print "now proc: pretraining epoch{}".format(epoch)
            startepoch = time.time()
            perm = np.random.permutation(learnsize)
            for idx in np.arange(0, pretrainsize,
                                 3):  #utterance Number training dataset
                # start = time.time()
                x_batch = np.empty((0, HFFTL * 2), float)
                for iter in xrange(3):
                    fs, signal_data = read(estimated_signal[perm[idx + iter]],
                                           "r")
                    fs, noise_data = read(estimated_noise[perm[idx + iter]],
                                          "r")
                    signal_data = signal_data / np.sqrt(np.mean(signal_data**
                                                                2))
                    noise_data = noise_data / np.sqrt(np.mean(noise_data**2))
                    #FFT
                    Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data)
                    Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data)
                    N_FRAMES = np.shape(Sspectrum_)[0]
                    HFFTL = np.shape(Sspectrum_)[1]
                    x_data = np.zeros((N_FRAMES, HFFTL * 2))
                    for nframe in xrange(N_FRAMES):
                        spectrum = np.append(Sspectrum_[nframe],
                                             Nspectrum_[nframe])
                        x_data[nframe] = [
                            np.sqrt(c.real**2 + c.imag**2) for c in spectrum
                        ]  #DNN indata
                    if iter == 0:
                        x_batch = np.append(x_batch, x_data, axis=0)
                    else:
                        x_batch = np.vstack((x_batch, x_data))
                for frq in xrange(HFFTL):
                    x_frqbatch = np.zeros(
                        (np.shape(x_batch)[0], 2 * (dim * 2 + 1)), float)
                    x_frqbatch[:, dim] = x_batch[:, frq]
                    x_frqbatch[:, dim * 3 + 1] = x_batch[:, frq + HFFTL]
                    for j in np.arange(1, dim + 1):
                        if (frq - j) >= 0:
                            x_frqbatch[:, dim - j] = x_batch[:, frq - j]
                            x_frqbatch[:, dim * 3 + 1 - j] = x_batch[:, frq +
                                                                     HFFTL - j]
                        if ((HFFTL - 1) - (j + frq)) >= 0:
                            x_frqbatch[:, dim + j] = x_batch[:, frq + j]
                            x_frqbatch[:, dim * 3 + 1 + j] = x_batch[:, frq +
                                                                     HFFTL + j]

                    x_frqbatch = x_frqbatch.astype(np.float32)
                    if epoch != 0 or idx != 0:  #except first batch
                        modelL1.l1.W.data = cuda.to_gpu(l1_W.pop(0))
                        modelL2.l1.W.data = cuda.to_gpu(l2_W.pop(0))
                        modelL1.l2.W.data = cuda.to_gpu(l1b_W.pop(0))
                        modelL2.l2.W.data = cuda.to_gpu(l2b_W.pop(0))
                    # training
                    if args.gpu >= 0:
                        x_frqbatch = cuda.to_gpu(x_frqbatch)
                    optL1.zero_grads()
                    loss, hidden = pretrain_L1(x_frqbatch, ratio=0.5)
                    loss.backward()
                    optL1.update()
                    optL2.zero_grads()
                    loss, hidden = pretrain_L2(hidden, ratio=0.5)
                    loss.backward()
                    optL2.update()
                    #model parameter saving
                    l1_W.append(cuda.to_cpu(modelL1.l1.W.data))
                    l2_W.append(cuda.to_cpu(modelL2.l1.W.data))
                    l1b_W.append(cuda.to_cpu(modelL1.l2.W.data))
                    l2b_W.append(cuda.to_cpu(modelL2.l2.W.data))
            print 'pretrain epoch time:{0}sec'.format(
                np.round(time.time() - startepoch, decimals=2))
        # learning loop
        model = FunctionSet(l1=F.Linear(2 * (dim * 2 + 1),
                                        n_units,
                                        initialW=initializer),
                            l2=F.Linear(n_units, n_units,
                                        initialW=initializer),
                            l3=F.Linear(n_units, 1, initialW=initializer))
        # Setup optimizer
        optimizer = optimizers.Adam()
        optimizer.setup(model)
        model.to_gpu()

        # Neural net architecture
        def forward(x_data, y_data, ratio=0.5, train=True):
            x, t = Variable(x_data), Variable(y_data)
            h1 = F.dropout(F.sigmoid(model.l1(x)), ratio=ratio, train=train)
            h2 = F.dropout(F.sigmoid(model.l2(h1)), ratio=ratio, train=train)
            y = model.l3(h2)
            return F.mean_squared_error(y, t), y

        startexec = time.time()
        for epoch in xrange(n_epoch):
            print "now proc: learning epoch{}".format(epoch)
            startepoch = time.time()
            perm = np.random.permutation(learnsize)
            for idx in np.arange(0, learnsize,
                                 3):  #utterance Number training dataset
                # start = time.time()
                x_batch = np.empty((0, HFFTL * 2), float)
                y_batch = np.empty((0, HFFTL), float)
                for iter in xrange(3):
                    fs, signal_data = read(estimated_signal[perm[idx + iter]],
                                           "r")
                    fs, noise_data = read(estimated_noise[perm[idx + iter]],
                                          "r")
                    fs, teacher_data = read(teacher_signal[perm[idx + iter]],
                                            "r")
                    signal_data = signal_data / np.sqrt(np.mean(signal_data**
                                                                2))
                    noise_data = noise_data / np.sqrt(np.mean(noise_data**2))
                    teacher_data = teacher_data / np.sqrt(
                        np.mean(teacher_data**2))

                    #FFT
                    Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data)
                    Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data)
                    Tspectrum_, synparam = FFTanalysis.FFTanalysis(
                        teacher_data)

                    N_FRAMES = np.shape(Sspectrum_)[0]
                    HFFTL = np.shape(Sspectrum_)[1]
                    x_data = np.zeros((N_FRAMES, HFFTL * 2))
                    y_data = np.zeros((N_FRAMES, HFFTL))
                    if epoch == 0:
                        learned_data += N_FRAMES

                    for nframe in xrange(N_FRAMES):
                        spectrum = np.append(Sspectrum_[nframe],
                                             Nspectrum_[nframe])
                        x_data[nframe] = [
                            np.sqrt(c.real**2 + c.imag**2) for c in spectrum
                        ]  #DNN indata
                        #phaseSpectrum = [np.arctan2(c.imag, c.real) for c in spectrum]
                        Spower = np.array([
                            np.sqrt(c.real**2 + c.imag**2)
                            for c in Sspectrum_[nframe]
                        ])
                        Tpower = np.array([
                            np.sqrt(c.real**2 + c.imag**2)
                            for c in Tspectrum_[nframe]
                        ])
                        for i, x in enumerate(Spower):
                            if x == 0:
                                Spower[i] = 1e-10
                        y_data[nframe] = Tpower / Spower
                    if iter == 0:
                        x_batch = np.append(x_batch, x_data, axis=0)
                        y_batch = np.append(y_batch, y_data, axis=0)
                    else:
                        x_batch = np.vstack((x_batch, x_data))
                        y_batch = np.vstack((y_batch, y_data))

                for frq in xrange(HFFTL):
                    x_frqbatch = np.zeros(
                        (np.shape(x_batch)[0], 2 * (dim * 2 + 1)), float)
                    x_frqbatch[:, dim] = x_batch[:, frq]
                    x_frqbatch[:, dim * 3 + 1] = x_batch[:, frq + HFFTL]
                    for j in np.arange(1, dim + 1):
                        if (frq - j) >= 0:
                            x_frqbatch[:, dim - j] = x_batch[:, frq - j]
                            x_frqbatch[:, dim * 3 + 1 - j] = x_batch[:, frq +
                                                                     HFFTL - j]
                        if ((HFFTL - 1) - (j + frq)) >= 0:
                            x_frqbatch[:, dim + j] = x_batch[:, frq + j]
                            x_frqbatch[:, dim * 3 + 1 + j] = x_batch[:, frq +
                                                                     HFFTL + j]
                    y_frqbatch = np.zeros((np.shape(y_batch)[0], 1), float)
                    y_frqbatch = y_batch[:,
                                         frq].reshape(np.shape(y_batch)[0], 1)

                    x_frqbatch = x_frqbatch.astype(np.float32)
                    y_frqbatch = y_frqbatch.astype(np.float32)

                    model.l1.W.data = cuda.to_gpu(l1_W.pop(0))
                    model.l2.W.data = cuda.to_gpu(l2_W.pop(0))
                    if epoch != 0 or idx != 0:  #except first batch
                        model.l3.W.data = cuda.to_gpu(l3_W.pop(0))
                    # training
                    if args.gpu >= 0:
                        x_frqbatch = cuda.to_gpu(x_frqbatch)
                        y_frqbatch = cuda.to_gpu(y_frqbatch)
                    optimizer.zero_grads()
                    loss, pred = forward(x_frqbatch, y_frqbatch, ratio=0.5)
                    loss.backward()
                    optimizer.update()
                    #model parameter saving
                    l1_W.append(cuda.to_cpu(model.l1.W.data))
                    l2_W.append(cuda.to_cpu(model.l2.W.data))
                    l3_W.append(cuda.to_cpu(model.l3.W.data))
            print 'epoch time:{0}sec'.format(
                np.round(time.time() - startepoch, decimals=2))
        f = open('selectfrq_estimated_data/pretrain_write1.csv', 'w')
        writer = csv.writer(f)
        writer.writerows(l1_W)
        f.close()
        f = open('selectfrq_estimated_data/pretrain_write2.csv', 'w')
        writer = csv.writer(f)
        writer.writerows(l2_W)
        f.close()
        f = open('selectfrq_estimated_data/pretrain_write3.csv', 'w')
        writer = csv.writer(f)
        writer.writerows(l3_W)
        f.close()
        #test loop
        SNRList = ["-10dB", "-5dB", "0dB", "5dB", "10dB", "-20dB"]
        for SNRnum, SNR in enumerate(SNRList):  #-10,-5,0,5,10,-20dB
            loss_sum = np.zeros(testsize)
            for idx in np.arange(learnsize + SNRnum * testsize,
                                 learnsize + (SNRnum + 1) * testsize):
                fs, signal_data = read(estimated_signal[idx], "r")
                fs, noise_data = read(estimated_noise[idx], "r")
                fs, teacher_data = read(
                    teacher_signal[idx - testsize * SNRnum], "r")
                signal_data = signal_data / np.sqrt(np.mean(signal_data**2))
                noise_data = noise_data / np.sqrt(np.mean(noise_data**2))
                teacher_data = teacher_data / np.sqrt(np.mean(teacher_data**2))

                Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data)
                Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data)
                Tspectrum_, synparam = FFTanalysis.FFTanalysis(teacher_data)

                N_FRAMES = np.shape(Sspectrum_)[0]
                HFFTL = np.shape(Sspectrum_)[1]
                x_data = np.zeros((N_FRAMES, HFFTL * 2))
                y_data = np.zeros((N_FRAMES, HFFTL))

                for nframe in xrange(N_FRAMES):
                    spectrum = np.append(Sspectrum_[nframe],
                                         Nspectrum_[nframe])
                    x_data[nframe] = [
                        np.sqrt(c.real**2 + c.imag**2) for c in spectrum
                    ]  #DNN indata
                    #phaseSpectrum = [np.arctan2(c.imag, c.real) for c in spectrum]
                    Spower = np.array([
                        np.sqrt(c.real**2 + c.imag**2)
                        for c in Sspectrum_[nframe]
                    ])
                    Tpower = np.array([
                        np.sqrt(c.real**2 + c.imag**2)
                        for c in Tspectrum_[nframe]
                    ])
                    for i, x in enumerate(Spower):
                        if x == 0:
                            Spower[i] = 1e-10
                    y_data[nframe] = Tpower / Spower

                calcSNR = np.empty((N_FRAMES, 0), float)
                totalloss = np.zeros(HFFTL, float)
                # testing
                for frq in xrange(HFFTL):
                    model.l1.W.data = cuda.to_gpu(l1_W[frq])
                    model.l2.W.data = cuda.to_gpu(l2_W[frq])
                    model.l3.W.data = cuda.to_gpu(l3_W[frq])
                    # testing
                    x_frqdata = np.zeros(
                        (np.shape(x_data)[0], 2 * (dim * 2 + 1)), float)
                    x_frqdata[:, dim] = x_data[:, frq]
                    x_frqdata[:, dim * 3 + 1] = x_data[:, frq + HFFTL]
                    for j in np.arange(1, dim + 1):
                        if (frq - j) >= 0:
                            x_frqdata[:, dim - j] = x_data[:, frq - j]
                            x_frqdata[:, dim * 3 + 1 - j] = x_data[:, frq +
                                                                   HFFTL - j]
                        if ((HFFTL - 1) - (j + frq)) >= 0:
                            x_frqdata[:, dim + j] = x_data[:, frq + j]
                            x_frqdata[:, dim * 3 + 1 + j] = x_data[:, frq +
                                                                   HFFTL + j]
                    y_frqdata = np.zeros((np.shape(y_data)[0], 1), float)
                    y_frqdata = y_data[:, frq].reshape(np.shape(y_data)[0], 1)

                    x_frqdata = x_frqdata.astype(np.float32)
                    y_frqdata = y_frqdata.astype(np.float32)
                    if args.gpu >= 0:
                        x_frqdata = cuda.to_gpu(x_frqdata)
                        y_frqdata = cuda.to_gpu(y_frqdata)
                    loss, pred = forward(x_frqdata, y_frqdata, train=False)
                    totalloss[frq] = cuda.to_cpu(loss.data)
                    pred = np.reshape(cuda.to_cpu(pred.data), (N_FRAMES, 1))
                    calcSNR = np.append(calcSNR, pred, axis=1)
                fs, teacher_data = read(
                    teacher_signal[idx - testsize * SNRnum], "r")
                if teacher_data.dtype == "int16":
                    teacher_data = teacher_data / norm
                y_out = Sspectrum_ * calcSNR
                wf_signal = FFTanalysis.Synth(y_out, synparam, BPFon=0)
                wf_signal = wf_signal * np.sqrt(
                    np.mean(teacher_data**2) / np.mean(wf_signal**2))
                write(
                    dir + SNR + "/dim{}_DNNbased_No{}.wav".format(
                        dim, idx - testsize * SNRnum), Fs, wf_signal)

        print 'exec time:{0}sec'.format(
            np.round(time.time() - startexec, decimals=2))
        print "data: ", learned_data
Example #38
0
from scipy.io import wavfile
import numpy as np

ii = 0
length = 20
half_length = length//2
frequency = 48000

for i in range(0, 46):
    print(i)
    temp_wav = wavfile.read('youtube_downloader/wav/'+str(i)+'.wav')[1]
    wav_length = len(temp_wav)
    if (wav_length > frequency * length):
        mid = wav_length // 2
        temp_wav = temp_wav[mid-half_length * frequency: mid+half_length*frequency]
        if (not np.isnan(temp_wav).any()) and (not np.isinf(temp_wav).any()):
            wavfile.write('48000_wavs/'+str(ii)+'.wav', frequency, temp_wav)
            ii += 1
Example #39
0
def load_wav_to_torch(full_path):
    sampling_rate, data = read(full_path)
    return torch.FloatTensor(data.astype(np.float32)), sampling_rate
Example #40
0
 def _read_wav(self, wave_file):
     self.rate, self.data = wf.read(wave_file)
     self.channels = len(self.data.shape)
     self.filename = wave_file
     return self
Example #41
0
createPath(TEMP_FOLDER)

command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
    FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
subprocess.call(command, shell=True)

command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
    SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"

subprocess.call(command, shell=True)

command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1"
f = open(TEMP_FOLDER + "/params.txt", "w")
subprocess.call(command, shell=True, stdout=f)

sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
audioSampleCount = audioData.shape[0]
maxAudioVolume = getMaxVolume(audioData)

f = open(TEMP_FOLDER + "/params.txt", 'r+')
pre_params = f.read()
f.close()
params = pre_params.split('\n')
for line in params:
    m = re.search('Stream #.*Video.* ([0-9]*) fps', line)
    if m is not None:
        frameRate = float(m.group(1))

samplesPerFrame = sampleRate / frameRate

audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))
Example #42
0
import numpy as np
import scipy as sp
from scipy.io.wavfile import read
from scipy.io.wavfile import write
from scipy import signal
from scipy.signal.signaltools import wiener
import matplotlib.pyplot as plt
# get_ipython().magic('matplotlib inline')

(Frequency, array) = read('mimii_dummy.wav')

len(array)

plt.plot(array)
plt.title('Original Signal Spectrum')
plt.xlabel('Frequency(Hz)')
plt.ylabel('Amplitude')
# plt.show()

FourierTransformation = sp.fft(array)
# array = sp.fft(array)

scale = sp.linspace(0, Frequency, len(array))

plt.stem(scale[0:5000], np.abs(FourierTransformation[0:5000]), 'r')
# array = FourierTransformation

filteredSignal = signal.wiener(array)
plt.plot(filteredSignal)  # plotting the signal.
plt.title('wiener filter plot')
plt.xlabel('Frequency(Hz)')
Example #43
0
from scipy.fftpack import fft, ifft
import numpy as np
import matplotlib.pyplot as pl
from scipy.io.wavfile import read


def configFilter(data):
    out = []
    for i in range(1, len(data), 1):
        out.append(data[i] - 0.98 * data[i - 1])
    return out


fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7) = pl.subplots(7, 1, sharex=False)

rate, data = read('Xe.wav')

sizecut = 512
start = int(0.4 * rate)
# start = 100

sg = data[start:start + sizecut]

ax1.plot(sg)
ax1.set_title('Original')

sg = configFilter(sg)

ax2.plot(sg)
ax2.set_title('After Adjust Filter')
Example #44
0
# coding:utf-8
'''
@time:    Created on  2018-10-19 06:02:12
@author:  Lanqing
@Func:    src.wav
'''
from scipy.io import wavfile
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree, preprocessing
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
import pandas as pd, numpy as np

fs1, data1 = wavfile.read('C:/Users/jhh/Desktop/audio/REC20181019054833.wav')
fs2, data2 = wavfile.read('C:/Users/jhh/Desktop/audio/REC20181019055029.wav')

df1 = pd.DataFrame(data1).iloc[:, 0]
df2 = pd.DataFrame(data2).iloc[:, 0]

df1.plot()
plt.show()
df2.plot()
plt.show()

n = 800
timestep = 1 / 48000

list_df1 = [list(df1[i:i + n].values) for i in range(0, df1.shape[0], n)][:-1]
list_df2 = [list(df2[i:i + n].values) for i in range(0, df2.shape[0], n)][:-1]
Example #45
0
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)


# define input sequence
rate, raw_seq = wavfile.read('songs/hakuna_matata.wav')
raw_seq = raw_seq[np.logical_not(np.isnan(raw_seq))]
raw_seq = raw_seq.astype(int)

print(max(raw_seq))

# choose a number of time steps
n_steps = 1

# sample
#raw_seq = raw_seq # random sample. dev purposes.

# split into samples
X = raw_seq[0:1323000]  #split_sequence(raw_seq, n_steps)
y = raw_seq[1323000:1345050]
# reshape from [samples, timesteps] into [samples, timesteps, features]
Example #46
0
def audio_fft():

    rate, data = wav.read('31beethovens3a.wav')
    fft_out = fft(data)
    signs = data / np.absolute(data)
    return [fft_out, signs]
import IPython.display as ipd
import os

fulldatasetpath = "D:/Datasets/UrbanSound8K/"
os.chdir(fulldatasetpath)

import librosa
from scipy.io import wavfile as wav
import numpy as np

filename = fulldatasetpath + 'audio/fold9/106955-6-0-0.wav'

librosa_audio, librosa_sample_rate = librosa.load(filename)
scipy_sample_rate, scipy_audio = wav.read(filename)

print('Original sample rate:', scipy_sample_rate)
print('Librosa sample rate:', librosa_sample_rate)

print('Original audio file min~max range:', np.min(scipy_audio), 'to',
      np.max(scipy_audio))
print('Librosa audio file min~max range:', np.min(librosa_audio), 'to',
      np.max(librosa_audio))

import matplotlib.pyplot as plt

# Original audio with 2 channels
#plt.figure(figsize=(12, 4))
#plt.plot(scipy_audio)

mfccs = librosa.feature.mfcc(y=librosa_audio,
                             sr=librosa_sample_rate,
Example #48
0
    B = np.asarray(B).reshape(y1, x1)

    rgbArray = np.zeros((y1, x1, 3), 'uint8')
    rgbArray[..., 0] = R
    rgbArray[..., 1] = G
    rgbArray[..., 2] = B

    plt.figure(dpi=1200)
    plt.imshow(rgbArray, interpolation='spline16')
    plt.style.use('dark_background')
    plt.axis('off')
    plt.savefig(filename)
    plt.close()


fs, data = wavfile.read('Melulu_rev1_viz.wav')  # load the data
a_0 = data.T[0] / (2.**15
                   )  # this is a two channel soundtrack,get the first track
n_0 = len(a_0)

sample_period = 0.2  ### in seconds
sample_count = mh.floor(n_0 / (fs * sample_period))

a_1 = chunkIt(a_0, sample_count)

for a in a_1:
    freqArray_top5 = np.empty([])
    n = len(a)
    freqArray, dB = freq_dB(a, n, fs, -300)
    dB_sort = np.argsort(dB)
    freqArray = freqArray[dB_sort]
Example #49
0
def read_files():
    org = wfile.read('org.wav')
    long = wfile.read('long2.wav')
    short = wfile.read('short2.wav')
    return org, long, short
    for j in range(1,taille2):
        matrice_distance_elastique[0,j] = matrice_distance_elastique[0,j-1] + cout_horiz*matrice_distance[0,j]

    # remplissage du reste
    for i in range(1,taille1):
        for j in range(1,taille2):
            chemin_vert = matrice_distance_elastique[i-1,j] + cout_vert*matrice_distance[i,j]
            chemin_horiz = matrice_distance_elastique[i,j-1] + cout_horiz*matrice_distance[i,j]
            chemin_diag = matrice_distance_elastique[i-1,j-1] + cout_diag*matrice_distance[i,j]
            matrice_distance_elastique[i,j] = min(chemin_vert, chemin_horiz, chemin_diag)
    return matrice_distance_elastique[-1,-1] /(taille1+taille2)

#%% test
nbr_prononce = 5
locuteur=liste_nom[0]
essai = 2

N = 10
chemin1 = genere_nom(nbr_prononce, locuteur, essai)
chemin2 = genere_nom(5, liste_nom[0], 2)
samplerate1, data1 = wav.read(chemin1)
samplerate2, data2 = wav.read(chemin2)
if data1.ndim > 1 :
    data1 = data1[:,0]
    
#print(np.shape(calcul_coeff_lpc_fenetre(data[0:160], 10)))
matrice1 =calcul_lpc(samplerate1, data1, 10)
matrice2 =calcul_lpc(samplerate2, data2, 10)
truc_a_print = distance_elastique(matrice1, matrice2)
print(truc_a_print)
#affichage(chemin)
Example #51
0
def getCaliPower(file):
    fs, data = read(file)
    data = data.astype("float32")
    filtData = bandPass(data, 1000, fs)
    cali = (filtData * filtData).sum()
    return cali
Example #52
0
        #print "DISTANCE!!!" + str(dist)
        if dist < distmin:
            distmin = dist
            speaker = k

    return speaker


print "|||||STARTING TEST|||||\n"

for i, wave_file in enumerate(wave_files):
    fname = '/' + wave_file
    to_print = 'Speaker [' + str(
        i) + ']   File:' + wave_file + '    Testing features...'
    print to_print
    (fs, s) = read(directory + fname)

    #Passing test file to MFCC
    mel_coefs = mfcc_p(s, fs)
    mel_coefs = mel_coefs.transpose()
    mel_coefs[0, :] = np.zeros(
        mel_coefs.shape[1]
    )  # 0th coefficient does not carry significant information

    #Passing test file to LPC
    lpc_coefs = lpc(s, fs, orderLPC)
    sp_mfcc = minDistance(mel_coefs, codebooks_mfcc)
    sp_lpc = minDistance(lpc_coefs, codebooks_lpc)

    print 'Speaker [' + str(i) + '] matches Speaker [' + str(
        sp_mfcc) + ']   ||MFCC||'
filename      = '..\Data\TallShips.wav'

port          = 'com4'
baudRate      = 12_000_000
blockSize     = 3
scale         = 2 ** 21     # 75 kHz frequency deviation
lowPassCutOff = 15_000
lowPassOrder  = 11
chunkSize     = 20_000
updatePeriod  = 1000

uart = serial.Serial(port, baudrate = baudRate, bytesize = 8, parity = 'N', stopbits = 1)

print('Loading file \'%s\'...' % filename)

audioRate, wave = wavfile.read(filename)

print('Low-pass filtering...')

b, a = signal.butter(lowPassOrder, lowPassCutOff / (audioRate / 2.0), 'low')

waveL = signal.filtfilt(b, a, wave[:, 0].astype(numpy.float))
waveR = signal.filtfilt(b, a, wave[:, 1].astype(numpy.float))

print('Resampling...')

symbolRate = baudRate / (blockSize * 10)
resampleScale = symbolRate / audioRate
waveL = interpolation.zoom(waveL, zoom = resampleScale, order = 3)
waveR = interpolation.zoom(waveR, zoom = resampleScale, order = 3)
Example #54
0
#     'F': 'happiness',
#     'T': 'sadness',
#     'N': 'neutral',
#     }

emo = {
    'W': '1',
    'L': '2',
    'E': '3',
    'A': '4',
    'F': '5',
    'T': '6',
    'N': '0',
}

(rate, sig) = wav.read(sys.argv[1])
print("Wave File Read.")
mfcc_feat = mfcc(sig, rate)
print("MFCC Calculated.")
print("Writing to test file....")
rf = open("mfcc_file.te", "w")
for x in mfcc_feat:
    j = 0
    rf.write("0 ")
    while j < 12:
        j += 1
        rf.write(str(j))
        rf.write(":")
        rf.write(str(x[j]))
        rf.write(" ")
    rf.write("\n")
Example #55
0
def load_wav_to_torch(full_path):
    """
    Loads wavdata into torch array
    """
    sampling_rate, data = read(full_path)
    return torch.from_numpy(data).float(), sampling_rate
Example #56
0
def getPower(file):
    fs, data = read(file)
    data = data.astype("float32")
    power = (data*data).sum()
    return power
Example #57
0
def get_sound_and_normalize(file_name):
    rate, sound = read(file_name)
    sound = sound.astype(np.float64)
    sound -= sound.mean()
    sound /= sound.std()
    return rate, sound
natural_speech_wavfile = args['-n']
world_anasyn_speech_wavfile = args['-w']
modified_anasyn_speech_wavfile = args['-m']
labfile = args['-l']
outdir = args['-o']

outdir_for_natural = join(outdir, "natural")
outdir_for_world = join(outdir, "world")
outdir_for_modified = join(outdir, "modified")

for outdir in [outdir_for_natural, outdir_for_world, outdir_for_modified]:
    if not exists(outdir):
        os.makedirs(outdir)

fs, natural = wavfile.read(natural_speech_wavfile)
world_anasyn = wavfile.read(world_anasyn_speech_wavfile)[1]
modified_anasyn = wavfile.read(modified_anasyn_speech_wavfile)[1]

with open(labfile, "r") as f:
    label = f.readlines()

label = [x.split(' ') for x in label]
# int(x) / 10000 -> [ms]
# [ms] / 1000 * fs -> the number of samples
label = [[
    int(int(x[0]) / 10000 / 1000. * fs),
    int(int(x[1]) / 10000 / 1000. * fs), x[2].split()[0]
] for x in label]

for number, lab in enumerate(label):
"""This script tests basic read/write functionality"""


import numpy as np
import sys
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile

print(sys.path)


# write sinusoid to 16-bit, 44100 Hz PCM Mono 
samplerate = 187321 # samples/s
sinusoid_frequency = 1000 # Hz
length_seconds = 1.0 # seconds


t = np.linspace(0., length_seconds, int(np.rint(length_seconds*samplerate)))
# amplitude = np.iinfo(np.int32).max
amplitude = 1.0
data = amplitude*np.sin(2.*np.pi*sinusoid_frequency*t)
data = data.astype(np.float32)
wavfile.write("..//audio_examples//example_write.wav",samplerate, data )



samplerate2, data2 = wavfile.read("..//audio_examples//OneCD.wav")
nsamples = data2.shape[0]
t2 = np.linspace(0., nsamples/samplerate2, nsamples)
# plt.plot(t,data)
plt.plot(t2,data2[:,0])
Example #60
0
    # Discard the imaginary component.
    comp_clip = n.fft.ifft(comp_fft).real

    # calculate actual compression ratio based on storage size
    original_size = 16.0 * L  # uncompressed size  (16 bits per sample)
    compressed_size = 8.0 * 2.0 * len(idxs) + 16.0 * len(
        idxs
    ) + 32.0  # sparse 8-bit spectra, 16-bit spectral indices, 32-bit scale factor
    real_comp_ratio = compressed_size / original_size

    return (comp_clip, comp_fft[0:L2], orig_fft, real_comp_ratio)


# read wav file
# this is the audio signal to be compressed
ts = sw.read("original.wav")
sr = ts[0]  # sample rate
clip = ts[1]  # extract audio file as numpy data vector
if len(clip.shape) == 2:  # if stereo, only use one channel
    print("using only one stereo channel")
    clip = ts[1][:, 0]

# compress and decompress audio file
# compression_ratio=0.95 means 95% reduction in
# file size.
cr = 0.95

# compress full length of the clip
W = 100000
n_window = int(n.floor(len(clip) / W))
comp_clip = n.zeros(len(clip))