def read_sound(fp): """ create a normalized float array and datarate from any audo file """ if fp.endswith('mp3'): try: oname = 'temp.wav' #cmd = 'lame --decode "{0}" {1}'.format( fp ,oname ) result = subprocess.call(['lame', '--decode', fp, oname]) assert(result is 0) samplerate, data = wav.read(oname) except: print "couldn't run lame" try: import moviepy.editor as mpy aud_clip = mpy.AudioFileClip(fp) samplerate = aud_clip.fps data = aud_clip.to_soundarray() except: print "moviepy not installed?" if fp.endswith('aif'): #sf = aifc.open(fp) oname = fp sf = Sndfile(fp, 'r') sf.seek(0) data = sf.read_frames(sf.nframes) samplerate = sf.samplerate if fp.endswith('wav'): samplerate, data = wav.read(fp) if len(data.shape)>1: data = data[:,0] data = data.astype('float64') data /= data.max() return data, samplerate
def test_energy_denois(self): original_file = self.auxiliary_files_url + "/nai_sample.wav" denoised_file = self.auxiliary_files_url + "/nai_sample_energy_denoised.wav" result = self.energy_denoise_module.energyDenoise(\ original_file,\ 0.2,\ denoised_file,\ False) # The function thinks the denoising succeded self.assertEqual(result, True) # Check for the denoised file denoised_exists = os.path.isfile(denoised_file) self.assertEqual(denoised_exists, True) # Check if denoised energy is lower than the initial one samp_freq, signal_orig = wavfile.read(original_file) energy_orig = 0.0 for i in signal_orig: energy_orig += i * 1.0 * i samp_freq, signal_denoised = wavfile.read(denoised_file) energy_denoised = 0.0 for i in signal_denoised: energy_denoised += i * 1.0 * i self.assertGreater(energy_orig, energy_denoised) # erase the denoised file os.remove(denoised_file)
def loadReferenceWords(self, word1_path, word2_path): fs, self.word1 = wavfile.read(word1_path) fs, self.word2 = wavfile.read(word2_path) self.word1 = self.scaler(self.word1) self.word2 = self.scaler(self.word2) self.word1 = self.word1[self.get_startingpoint(self.word1):self.get_endingpoint(self.word1),:] self.word2 = self.word2[self.get_startingpoint(self.word2):self.get_endingpoint(self.word2),:]
def remove_silence(filename): (rate,sig) = wav.read(filename) framelength = int(round(FRAMELENGTH * rate)) frameamount = int(math.ceil(len(sig) / framelength)) newsig = np.array([]) for i in xrange(0,frameamount+1): start = i * framelength end = start + framelength print end if (end > len(sig)): end = len(sig) if (start >= len(sig)): start = len(sig) - 1 length = end - start energy = 0.0 for j in xrange(start,end): energy = energy + pow(float(sig[j]), 2) energy = energy / length if (energy >= TRESHOLD): newsig = np.concatenate((newsig,sig[start:end])) newsig = newsig.astype(sig.dtype) print "silence removed, saving: "+ filename+".sr" wav.write(filename+".sr", rate, newsig) (rate,sig) = wav.read(filename+".sr")
def cut_video(recording_path, datapack_dir): # Read the start/end pattern sr1, pattern_wav = wav.read('pattern.wav') workingdir = tempfile.mkdtemp() # Open the video file clip = VideoFileClip(recording_path) # Save its audio track temporarily on disk clip.audio.write_audiofile(os.path.join(workingdir,"temp_audio.wav")) # Read the audio samples, mix down to mono (if necessary), and delete the temporary audio track sr2, recording_wav = wav.read(os.path.join(workingdir,"temp_audio.wav")) if recording_wav.shape[1]>1: recording_wav = numpy.mean(recording_wav,1) shutil.rmtree(workingdir) # Detect the start and end audio pattern start, end = detect_start_end_times(pattern_wav, recording_wav, sr2, 4) # Cut the video and write it into two separate video and audio files clip.subclip(start+0.4, end).write_videofile(os.path.join(datapack_dir, 'video.mp4'), codec='libx264') clip.subclip(start+0.4, end).audio.write_audiofile(os.path.join(datapack_dir,'audio.wav'))
def extract_features(recording_files, nr_ceps=12): print("skipping features") return Ceps()(range(100)) nr_utt_in_ubm = 300 win_length_ms = 25 # The window length of the cepstral analysis in milliseconds win_shift_ms = 10 # The window shift of the cepstral analysis in milliseconds nr_filters = 24 # NOTSURE The number of filter bands nr_ceps = nr_ceps # The number of cepstral coefficients f_min = 0. # NOTSURE The minimal frequency of the filter bank f_max = 4000. # NOTSURE The maximal frequency of the filter bank delta_win = 2 # NOTSURE The integer delta value used for computing the first and second order derivatives pre_emphasis_coef = 0.97 # NOTSURE The coefficient used for the pre-emphasis dct_norm = True # NOTSURE A factor by which the cepstral coefficients are multiplied mel_scale = True # Tell whether cepstral features are extracted on a linear (LFCC) or Mel (MFCC) scale # TODO add feature wrapping if glob.has_magic(recording_files): recording_files = glob.glob(recording_files) rate, ubm_wav = wavfile.read(recording_files.pop()) for recording_file in recording_files: rate, signal = wavfile.read(recording_file) ubm_wav = np.append(ubm_wav, signal) c = Ceps(rate, win_length_ms, win_shift_ms, nr_filters, nr_ceps, f_min, f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm) ubm_wav = np.cast['float'](ubm_wav) # vector should be in **float** mfcc = c(ubm_wav) return mfcc
def get_data(path): """ Gets the data associated with an audio file, converting to wav when necessary. :param path: path to audio file :return: sample rate, audio data """ if path.endswith(".wav"): bee_rate, bee_data = read(path) else: temp = tempfile.NamedTemporaryFile(suffix=".wav") temp.close() if path.endswith(".flac"): sound = AudioSegment.from_file(path, "flac") sound.export(temp.name, format="wav") elif path.endswith(".mp3"): sound = AudioSegment.from_file(path, "mp3") sound.export(temp.name, format="wav") bee_rate, bee_data = read(temp.name) os.remove(temp.name) data_type = np.iinfo(bee_data.dtype) dmin = data_type.min dmax = data_type.max bee_data = bee_data.astype(np.float64) bee_data = 2.0 * ((bee_data - dmin) / (dmax - dmin)) - 1.0 bee_data = bee_data.astype(np.float32) return bee_rate, bee_data
def main(argv): if len(argv) == 6: window_size = int(argv[1]) a_fname = argv[2] b_fname = argv[3] a_phase_b_mag_fname = argv[4] b_phase_a_mag_fname = argv[5] else: print( 'Usage: %s ' % argv[0] + '<WINDOW_WIDTH> <FILE_1> <FILE_2> <OUTFILE_1> <OUTFILE_2>' + ( '\n\nSwap magnitude and phase of WAV files FILE_1 and FILE_2.' '\n\nWINDOW_WIDTH: STFT frame length (integer # of samples)' '\nOUTFILE_1: phase of FILE_1, magnitude of FILE_2' '\nOUTFILE_2: phase of FILE_2, magnitude of FILE_1')) return 1 a_rate, a = wavfile.read(a_fname) b_rate, b = wavfile.read(b_fname) assert a_rate == b_rate assert a.dtype == b.dtype print('Window width: %d samples = %.3f ms' % (window_size, 1e3 * window_size / a_rate)) a_phase_b_mag, b_phase_a_mag = swap_wav_magnitude(a, b, window_size) wavfile.write(a_phase_b_mag_fname, a_rate, a_phase_b_mag) wavfile.write(b_phase_a_mag_fname, a_rate, b_phase_a_mag) return 0
def mainfn(): print "In comb4.py" #Read the file f = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_0.wav') rdata1 = f[1] fs = f[0] print "Sampling frequency: " + str(fs) print "shape of rdata1: ", str(rdata1.shape) #rdata1 = np.matrix(rdata1) #print "shape of rdata1: ", str(rdata1.shape) g = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_1.wav') rdata2 = g[1] #rdata2 = np.matrix(rdata2) h = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_2.wav') rdata3 = h[1] #rdata3 = np.matrix(rdata3) i = wavfile.read('/home/akshay/anaconda/ModulesPython/MUSICGEVD/sep_3.wav') rdata4 = i[1] #rdata4 = np.matrix(rdata4) rdata = np.array([[rdata1],[rdata2],[rdata3],[rdata4]]) rdatat = rdata[:,0,:] print "Shape of rdatat: ", str(rdatat.shape) return(rdatat)
def output(partIdx): """Uses the student code to compute the output for test cases.""" outputString = '' if partIdx == 0: # This is ScaledFFTdB from assignment1 import scaled_fft_db r,x = wavfile.read('data/a1_submissionInput.wav') X = scaled_fft_db(x) for val in X: outputString += '%.5f ' % (val) elif partIdx == 1: # This is PrototypeFilter from assignment2 import prototype_filter h = prototype_filter() # test signal s = np.loadtxt('data/a2_submissionInput.txt') r = np.convolve(h, s)[4*512:5*512]/2 for val in r: outputString += '%.5f ' % val elif partIdx == 2: # This is SubbandFiltering from assignment3 import subband_filtering r,x = wavfile.read('data/a3_submissionInput.wav') h = np.hanning(512) X = subband_filtering(x, h) for val in X: outputString += '%.5f ' % (val) elif partIdx == 3: # This is Quantization from assignment4 import quantization from parameters import EncoderParameters params = EncoderParameters(44100, 2, 64) val_in = np.loadtxt('data/a4_submissionInput.txt') for r,row in enumerate(val_in): val = row[0] scf = row[1] ba = int(row[2]) QCa = params.table.qca[ba-2] QCb = params.table.qcb[ba-2] val = quantization(val, scf, ba, QCa, QCb) outputString += '%d ' % (val) return outputString.strip()
def processing(): """post-processing of MLSbuf and recBuf, using the matched filter functions""" # -- start recording and playback in async. mode play_while_recording() global SAMPLE_RATE # -- latency for input and output devs, obtained using portaudio pa_devs script inputLatency = 0.0087 outputLatency = 0.0087 # -- convert latencies to num. of samples latencySamples = math.ceil((inputLatency+outputLatency)*SAMPLE_RATE) # -- calibration samples (uncomment for debugging) calSamp = 52 # -- load recording buffer into numpy array recData = read("recBuf.wav") recBuf = np.array(recData[1],dtype =float) # -- index of internal delays & calibritation samples to subtract interDelaySamp = np.s_[0:(latencySamples + calSamp)] recBuf = np.delete(recBuf,interDelaySamp) # -- remove excess samples from the recording buffer removeExcessSamples = np.s_[6000:] recBuf = np.delete(recBuf,removeExcessSamples) # -- load playback buffer MLSdata = read("MLS.wav") MLSbuf = np.array(MLSdata[1],dtype =float) # -- compute delay using Matched Filters & normalize xcorr = matched_filter(MLSbuf,recBuf)/50000000000.0 # -- get gain gain = get_gain(MLSbuf,recBuf) # -- peak detector prop_delay = peak_detector(xcorr) # -- plot recorded seq, Tx MLS seq. (uncomment for debugging) plt.figure(1) plt.plot(MLSbuf) plt.title("MLS sequence") plt.xlabel("samples") plt.grid(True) plt.figure(2) plt.plot(recBuf) plt.title("Recorded MLS sequence") plt.xlabel("samples") plt.ylabel("Amplitude") plt.grid(True) plt.figure(3) plt.plot(abs(xcorr)) plt.title("Matched Filter Output") plt.xlabel("delay (samples)") plt.ylabel("Rxy") plt.grid(True) plt.show()
def plot_from_wavfile(file1, file2): ''' Given two wav files, plot their frequency spectrums ''' rate1, data1 = wavefile.read(file1) rate2, data2 = wavefile.read(file2) plot_from_rawdata(data1, data2, rate1)
def test_write_edge_values(self): # Write edge values 1.0 samples = numpy.ones((441, 1), dtype=numpy.float32) dest_file = NamedTemporaryFile(delete=True) wfile, infos = wav.open_write_mode(dest_file.name, 44100, 1) wav.write_block(wfile, samples) wfile._file.flush() # To force the file to be written to the disk frame_rate, samples_written = sp_wavfile.read(dest_file.name) numpy.testing.assert_array_equal(samples_written, numpy.array([2**15 - 1] * 441, dtype=numpy.int16)) dest_file.close() # Write value 2.0, clipped to 1.0 samples = numpy.ones((441, 1), dtype=numpy.float32) * 2.0 dest_file = NamedTemporaryFile(delete=True) wfile, infos = wav.open_write_mode(dest_file.name, 44100, 1) wav.write_block(wfile, samples) wfile._file.flush() # To force the file to be written to the disk frame_rate, samples_written = sp_wavfile.read(dest_file.name) numpy.testing.assert_array_equal(samples_written, numpy.array([2**15 - 1] * 441, dtype=numpy.int16)) dest_file.close() # Write edge values -1.0 samples = numpy.ones((441, 1), dtype=numpy.float32) * -1 dest_file = NamedTemporaryFile(delete=True) wfile, infos = wav.open_write_mode(dest_file.name, 44100, 1) wav.write_block(wfile, samples) wfile._file.flush() # To force the file to be written to the disk frame_rate, samples_written = sp_wavfile.read(dest_file.name) numpy.testing.assert_array_equal(samples_written, numpy.array([-2**15] * 441, dtype=numpy.int16)) dest_file.close()
def __init__(self, snd, fps=None, bitrate=3000): Clip.__init__(self) if isinstance(snd, str): if not snd.endswith('.wav'): temp = 'temp.wav' ffmpeg.extract_sound(snd, temp, fps, bitrate) fps, arr = wavfile.read(temp) # os.remove(temp) else: fps, arr = wavfile.read(snd) self.array = arr self.fps = fps else: self.array = snd self.fps = fps self.duration = 1.0 * len(self.array) / self.fps def gf(t): i = int(self.fps * t) if i < 0 or i >= len(self.array): return 0 else: return self.array[i] self.get_frame = gf
def test_realFile(self): original_file = self.auxiliary_files_url + "/nai_sample.wav" denoised_file = self.auxiliary_files_url + "/nai_sample_sox_denoised.wav" user = '******' audio_type = 'nao_wav_1_ch' scale = 0.2 result = self.sox_denoise_module.soxDenoise(\ user,\ audio_type,\ original_file,\ denoised_file,\ scale) # The function thinks the denoising succeded self.assertEqual(result, "true") # Check for the denoised file denoised_exists = os.path.isfile(denoised_file) self.assertEqual(denoised_exists, True) # Check if denoised energy is lower than the initial one samp_freq, signal_orig = wavfile.read(original_file) energy_orig = 0.0 for i in signal_orig: energy_orig += i * 1.0 * i samp_freq, signal_denoised = wavfile.read(denoised_file) energy_denoised = 0.0 for i in signal_denoised: energy_denoised += i * 1.0 * i self.assertGreater(energy_orig, energy_denoised) # erase the denoised file os.remove(denoised_file)
def get_offset_wav(wav_filename1, wav_filename2, time_limit=300): """Return offset in seconds between wav_filename1 and wav_filename2, which are recordings of the same event with potentially different starting times. Returns the number of seconds that wav_filename2 starts after wav_filename1 (possibly negative). If time_limit is provided, clip files to first time_limit seconds. This can substantially speed up offset detection""" rate1, data1 = sp_wav.read(wav_filename1) rate2, data2 = sp_wav.read(wav_filename2) # the two files must have the same sampling rate assert(rate1==rate2) if time_limit is not None: data1 = data1[0:rate1 * time_limit] data2 = data2[0:rate2 * time_limit] offset_samples = get_offset_xcorr(data1, data2) offset_seconds = offset_samples / float(rate1) return offset_seconds
def perf_eval(param): # wrtitten in 3000 basis, finding the nperseg value from param nperseg=param % 3000 # wrtitten in 3000 basis, finding the number of music segments num_of_seg_idx=(param-nperseg)/3000 num_of_seg=num_of_segs[num_of_seg_idx] input_rate,input_sig=wavfile.read(input_dir+song_name+'.wav') output_rate,output_sig=wavfile.read(output_dir+song_name+'.wav') #the +1 in denominator is because we exclude the last piece of music to only # consider music pieces of the same size. input_seg_len=input_sig.shape[0]/(num_of_seg+1) output_seg_len=output_sig.shape[0]/(num_of_seg+1) if input_rate!=output_rate: print ("Rate Mistmatch!") sys.exit(0) #print (nperseg,nperseg_step,input_seg_len,output_seg_len) if np.min((input_seg_len,output_seg_len))*0.7 < nperseg * nperseg_step: print ("Nothing to do!") sys.exit(0) res=estim_diff(input_sig, input_seg_len, output_sig, output_seg_len, nperseg, num_of_seg, nperseg_step) f=open('/agbs/cluster/naji/Linear Filters/Echo/out/Winter/Room/'+str(num_of_seg)+'/'+str(nperseg)+'.txt','w') print (nperseg,file=f) print (np.mean(res>0),file=f)
def test_ubm_var_channel(): ubm = GMM.load('model/ubm.mixture-32.person-20.immature.model') train_duration = 8. nr_test = 5 test_duration = 3. audio_files = ['xinyu.vad.wav', 'wyx.wav'] X_train, y_train, X_test, y_test = [], [], [], [] for audio_file in audio_files: fs, signal = wavfile.read(audio_file) signal = monotize_signal(signal) train_len = int(fs * train_duration) test_len = int(fs * test_duration) X_train.append(mix_feature((fs, signal[:train_len]))) y_train.append(audio_file) for i in range(nr_test): start = random.randint(train_len, len(signal) - test_len) X_test.append(mix_feature((fs, signal[start:start+train_len]))) y_test.append(audio_file) gmmset = GMMSet(32, ubm=ubm) gmmset.fit(X_train, y_train) y_pred = gmmset.predict_with_reject(X_test) for i in xrange(len(y_pred)): print y_test[i], y_pred[i], '' if y_test[i] == y_pred[i] else 'wrong' for imposter_audio_file in map( lambda x: 'test-{}.wav'.format(x), range(5)): fs, signal = wavfile.read(imposter_audio_file) signal = monotize_signal(signal) imposter_x = mix_feature((fs, signal)) print gmmset.predict_one_with_rejection(imposter_x)
def find_offset(file1, file2, fs=8000, trim=60*15, correl_nframes=1000): tmp1 = convert_and_trim(file1, fs, trim) tmp2 = convert_and_trim(file2, fs, trim) # Removing warnings because of 18 bits block size # outputted by ffmpeg # https://trac.ffmpeg.org/ticket/1843 warnings.simplefilter("ignore", wavfile.WavFileWarning) a1 = wavfile.read(tmp1, mmap=True)[1] / (2.0 ** 15) a2 = wavfile.read(tmp2, mmap=True)[1] / (2.0 ** 15) # We truncate zeroes off the beginning of each signals # (only seems to happen in ffmpeg, not in sox) a1 = ensure_non_zero(a1) a2 = ensure_non_zero(a2) mfcc1 = mfcc(a1, nwin=256, nfft=512, fs=fs, nceps=13)[0] mfcc2 = mfcc(a2, nwin=256, nfft=512, fs=fs, nceps=13)[0] mfcc1 = std_mfcc(mfcc1) mfcc2 = std_mfcc(mfcc2) c = cross_correlation(mfcc1, mfcc2, nframes=correl_nframes) max_k_index = np.argmax(c) # The MFCC window overlap is hardcoded in scikits.talkbox offset = max_k_index * 160.0 / float(fs) # * over / sample rate score = (c[max_k_index] - np.mean(c)) / np.std(c) # standard score of peak os.remove(tmp1) os.remove(tmp2) return offset, score
def load_data(syllable, N, used_samples, snr, sample_order = None): """Function that goes through all N samples of syllable and loads its wave data. :param syllable: complete path name of syllable (string) :param N: number of samples to load :param used_samples: number of samples to skip in the beginning :param snr: the strength of the noise :param sample_order: if not None should be vector of indices of samples to be loaded (default = None) :returns syllable_waves: list of N sample waves of syllable """ samples = [files for files in os.listdir(syllable)] syllable_waves = [] if sample_order is None: for i in range(int(N)): rate, wave = wav.read(syllable + '/' + samples[i + used_samples]) if (snr != 0.0): noiseLvl = np.sqrt(np.var(wave) / snr) else: noiseLvl = 0.0 wave = wave + noiseLvl * np.random.randn(len(wave)) syllable_waves.append([wave,rate]) else: for i in sample_order: rate, wave = wav.read(syllable + '/' + samples[i]) if(snr != 0.0): noiseLvl = np.sqrt(np.var(wave) / snr) else: noiseLvl = 0.0 wave = wave + noiseLvl * np.random.randn(len(wave)) syllable_waves.append([wave,rate]) return syllable_waves
def estim_diff(percent=256): sound_counter=0 res=np.empty(len(input_file_names)) for i in range(res.shape[0]): input_rate,input_sig=wavfile.read(input_dir+'Segments/'+input_file_names[i]) output_rate,output_sig=wavfile.read(output_dir+'Segments/'+output_file_names[i]) input_sig=pcm2float(input_sig,'float32') output_sig=pcm2float(output_sig,'float32') min_size=np.min((input_sig[:,0].shape[0],output_sig[:,0].shape[0])) #print min_size,min_size*percent #S_inp=np.absolute(fft(input_sig[:min_size,0]-np.mean(input_sig[:min_size,0]))) #S_out=np.absolute(fft(output_sig[:min_size,0]-np.mean(output_sig[:min_size,0]))) t=time() nperseg=int(min_size*percent)-np.mod(int(min_size*percent),10) real_perc=float(float(nperseg)/int(min_size*percent)) S_inp=signal.welch(input_sig[:min_size,0],nperseg=nperseg)[1] S_out=signal.welch(output_sig[:min_size,0],nperseg=nperseg)[1] #S_inp=ndim_welch(input_sig[:min_size,0][None,...],nperseg=int(min_size*percent))[1] #S_out=ndim_welch(output_sig[:min_size,0][None,...],nperseg=int(min_size*percent))[1] #print time()-t #print S_inp_1,S_inp_2 res[sound_counter]=delta_estimator_3(S_out/S_inp,S_inp)-delta_estimator_3(S_inp/S_out,S_out) #out=float2pcm(output_sig,'int16') sound_counter+=1 return real_perc,int(min_size*percent),res
def generate_mixture(src1, src2, fname, attn1, attn2): """ mixes 10 seconds of two sources of the same sample rate and saves them as fname Args: src1: filename for the first source src2: filename for the second source fname: output filename to save as attn1: relative attenuation for the first source attn2: relative attenuation for the second source Returns: """ sr1, data1 = wav.read(src1) if data1.dtype == np.dtype("int16"): data1 = data1 / float(np.iinfo(data1.dtype).max) sr2, data2 = wav.read(src2) if data2.dtype == np.dtype("int16"): data2 = data2 / float(np.iinfo(data2.dtype).max) if sr1 != sr2: raise ValueError("Both sources muse have same sample rate") attn1 = float(attn1 + 1) / 2 attn2 = float(attn2 + 1) / 2 sample1 = data1[0:10 * sr1] sample2 = data2[0:10 * sr1] left = attenuate(sample1, attn1) + attenuate(sample2, attn2) right = attenuate(sample1, 1-attn1) + attenuate(sample2, 1-attn2) signal = np.vstack((left, right)) scipy.io.wavfile.write(fname, sr1, signal.T)
def simple_noise_filter(target, files, method=median_by_intensity, combination=flatten, section_length=4096): # load all .mp3 files into an arrays # bin each to a certain length #print time() feeds = [section_by_length(wavfile.read(file)[1], section_length) for file in files] samplerate = wavfile.read(files[0])[0] #print time() # perform fft on each bin, select median of each max_len = len(max(feeds, key=len)) sections = [] for i in range(max_len): begin = time() freqs = [fft.fft(feed[i], axis=0) for feed in feeds] #print "Fourier per ~.1s feed: ", #print (time()-begin)/3. begin = time() #filtered_freqs = [median_by_intensity(freqs, j) for j in range(len(freqs[0]))] # traverse the arrays in parallel filtered_freqs = [method(freqs, j) for j in range(len(freqs[0]))] #print "Filtering: ", #print (time()-begin) begin = time() sections += [real(fft.ifft(filtered_freqs, axis=0)).astype(feeds[0][0].dtype)] #print "Inversing per ~.1s feed: ", #print (time() - begin) # output #print time() samples = combination(sections) wavfile.write(target, samplerate, samples)
def generate_reverb(signal, reverb, fname, iter_range): """ Adds reverb from the path reverb to the data in the path signal and saves it as fname. Applies reverb iteratively over iter_range :param signal: the filename for the stereo input signal :param reverb: the filename for the stereo impulse response :param fname: the output filename to save as :param iter_range: the max number of iterations to convolve with the signal :return: """ sr, data = wav.read(signal) if data.dtype == np.dtype("int16"): data = data / float(np.iinfo(data.dtype).max) sr_ir, data_ir = wav.read(reverb) if data_ir.dtype == np.dtype("int16"): data_ir = data_ir / float(np.iinfo(data_ir.dtype).max) if sr_ir != sr: raise ValueError("Impulse Response must have same sample rate as signal") prev_data = data for i in xrange(0, iter_range+1): if i > 0: mix = add_reverb(prev_data.T, data_ir.T) prev_data = np.copy(mix).T else: mix = data.T if not os.path.exists(os.path.splitext(fname)[0]+'-'+str(i)+'.wav'): scipy.io.wavfile.write(os.path.splitext(fname)[0]+'-'+str(i)+'.wav', sr, mix.T)
def mix_files(f1,f2): base1 = f1.split('/')[-1].split('.wav')[0] base2 = f2.split('/')[-1].split('.wav')[0] (fs,sig) = wav.read(f1) s1 = sig.reshape((len(sig),1)) del sig (fs,sig) = wav.read(f2) s2 = sig.reshape((len(sig),1)) del sig block_length = 5*fs s1_blocks = enframe(s1,block_length,block_length) s2_blocks = enframe(s2,block_length,block_length) del s1, s2 nrg1 = 0.707*np.sqrt(np.sum(np.power(s1_blocks,2),axis=1)) nrg2 = 0.707*np.sqrt(np.sum(np.power(s2_blocks,2),axis=1)) for i in range(len(nrg1)): db1 = np.log(nrg1[i]) db2 = np.log(nrg2[i]) if (db1 >= 9) and (db2 >= 9) and (0.1 < abs(db1 - db2) < 5): sir = '%.2f' % (db1 - db2) ovl_name = '/erasable/nxs113020/wav_ovl/'+base1+'_'+base2+'_sir'+sir+'_'+str(i)+'.wav' overlapped = s1_blocks[i,:] + s2_blocks[i,:] nrg_ovl = 0.707*np.sqrt(np.sum(np.power(overlapped,2))) scikits.audiolab.wavwrite(overlapped/nrg_ovl, ovl_name, fs, 'pcm16')
def generate(self, fileList, inputParam1, inputParam2, inputParam3, inputParam4): for path in fileList: # add Music objects to pl (playlist). Pass in filename, full L/R data, and mean-ed data self.pl.append(Music(path.split("/")[-1], wav.read(path)[1], wav.read(path)[1].mean(axis=1))) print(self.pl[1].title) print(self.pl[1].data) print(self.pl[1].avgData) print(inputParam1, inputParam2, inputParam3, inputParam4)
def wait_for_wav(filename): # Super ugly hack! Since Csound might not be finished writing to the file, we try to read it, and upon fail (i.e. it was not closed) we wait .05 seconds. while True: try: wavfile.read(filename) break except: time.sleep(.05) return filename
def main(): fs, bg_signal = wavfile.read(sys.argv[1]) ltsd = LTSD_VAD() ltsd.init_params_by_noise(fs, bg_signal) fs, signal = wavfile.read(sys.argv[2]) vaded_signal = ltsd.filter(signal) wavfile.write('vaded.wav', fs, vaded_signal)
def training(nfiltbank, orderLPC): nSpeaker = 8 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train'; fname = str() for i in range(nSpeaker): fname = '/s' + str(i+1) + '.wav' print('Now speaker ', str(i+1), 'features are being trained' ) (fs,s) = read(directory + fname) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid) codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid) plt.figure(i) plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) + ' centroids') for j in range(nCentroid): plt.subplot(211) plt.stem(codebooks_mfcc[i,:,j]) plt.ylabel('MFCC') plt.subplot(212) markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j]) plt.setp(markerline,'markerfacecolor','r') plt.setp(baseline,'color', 'k') plt.ylabel('LPC') plt.axis(ymin = -1, ymax = 1) plt.xlabel('Number of features') plt.show() print('Training complete') #plotting 5th and 6th dimension MFCC features on a 2D plane #comment lines 54 to 71 if you don't want to see codebook codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/s' + str(i+2) + '.wav' (fs,s) = read(directory + fname) mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68] codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100, color = 'r', marker = 'o') c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+') s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100, color = 'b', marker = 'o') c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+') plt.grid() plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def run(): print "Creating file voiceCepstrums.arff..." files = [] output = open("voiceCepstrums.arff", 'w') for x in os.walk("AudioRecordings"): files.append(x) females = files[1][2] males = files[2][2] output.write("@relation voiceCepstrums\n\n") output.write("@attribute coefficient1 Continuous\n") output.write("@attribute coefficient2 Continuous\n") output.write("@attribute coefficient3 Continuous\n") output.write("@attribute coefficient4 Continuous\n") output.write("@attribute coefficient5 Continuous\n") output.write("@attribute gender {male, female}\n\n") output.write("@data\n") for filename in males: if filename.endswith(".wav") or filename.endswith(".WAV"): sampFreq, data = wavfile.read('AudioRecordings/Male/' + filename) cepstrum = getCepstrum(data) frequencies = {} for i in range(0, len(data)): coefficient = data[i] if coefficient[0] != float('Inf') and coefficient[0] != 0.0: frequency = (i*sampFreq)/len(data) frequencies[frequency] = coefficient[0]; sortedFrequencies = sorted(frequencies.items(), key=operator.itemgetter(1), reverse=True); sortedFrequencies = sortedFrequencies for i in range(0, 5): output.write(str(sortedFrequencies[i][0])) output.write(", ") output.write("male\n") for filename in females: if filename.endswith(".wav") or filename.endswith(".WAV"): sampFreq, data = wavfile.read('AudioRecordings/Female/' + filename) # time =float(len(data))/float(sampFreq) # frequency = cepstrum = getCepstrum(data) frequencies = {} for i in range(0, len(data)): coefficient = data[i] if coefficient[0] != float('Inf') and coefficient[0] != 0.0: frequency = (i*44100)/len(data) frequencies[frequency] = coefficient[0]; sortedFrequencies = sorted(frequencies.items(), key=operator.itemgetter(1), reverse=True); sortedFrequencies = sortedFrequencies for i in range(0, 5): output.write(str(sortedFrequencies[i][0])) output.write(", ") output.write("female\n")
from features import ssc import scipy.io.wavfile as wav file_name = "splateroyyo.wav" (rate, sig) = wav.read(file_name) #or whatever the filename is f = ssc(rate, sig) print f.shape fd = open("x_train.npy", "a+b") np.save(fd, f) fd.close() if file_name[0] == 's': y_train = np.ones(f.shape[0], 1) elif file_name[0] == 'e': y_train = np.zeros(f.shape[0], 1) fdd = open("y_train.npy", "a+b") np.save(fdd, y_train) fdd.close()
def load_metadata_from_wavs(): global background_noise background = [ f for f in os.listdir(join(TRAIN_AUDIO_PATH, '_background_noise_')) if f.endswith('.wav') ] for wav in background: samples, sample_rate = librosa.load(join( join(TRAIN_AUDIO_PATH, '_background_noise_'), wav), sr=INPUT_SAMPLES) background_noise.append(samples) dirs = [ f for f in os.listdir(TRAIN_AUDIO_PATH) if isdir(join(TRAIN_AUDIO_PATH, f)) ] dirs.sort() wavs = [] labels = [] unknown_wavs = [] unknown_list = [ d for d in dirs if d not in TARGET_LIST and d != '_background_noise_' ] print('target_list : ', end='') print(TARGET_LIST) print('unknowns_list : ', end='') print(unknown_list) print('silence : _background_noise_') i = 0 for directory in dirs[1:]: waves = [ f for f in os.listdir(join(TRAIN_AUDIO_PATH, directory)) if f.endswith('.wav') ] for j, wav in enumerate(waves): # samples, sample_rate = librosa.load(join(join(TRAIN_AUDIO_PATH, directory), wav), sr=16000) sample_rate, samples = wavfile.read( join(join(TRAIN_AUDIO_PATH, directory), wav)) samples = np.concatenate((np.zeros((INPUT_SAMPLES - 8000) // 2, dtype="float32"), samples[::2], np.zeros((INPUT_SAMPLES - 8000) // 2, dtype="float32"))) if len(samples) != INPUT_SAMPLES: continue if directory in unknown_list: unknown_wavs.append((wav, directory)) else: wavs.append((TYPE_REGULAR, wav)) labels.append(directory) wavc = len(wavs) for n in range(NOISE_MULTIPLIER): for i in range(wavc): wavs.append((TYPE_NOISED, wavs[i][1])) labels.append(labels[i]) for i in range(UNKNOWN_COUNT): wavs.append((TYPE_UNKNOWN, random.choice(unknown_wavs))) labels.append("unknown") for i in range(SILENCE_COUNT): wavs.append((TYPE_SILENCE, random.randrange(0, len(background_noise)))) labels.append("silence") return wavs, labels
room_dim = [8, 9] # source location source = np.array([1, 4.5]) # create an anechoic room with sources and mics room = pra.ShoeBox(room_dim, fs=16000, max_order=15, absorption=0.35, sigma2_awgn=1e-8) # get signals signals = [ np.concatenate( [wavfile.read(f)[1].astype(np.float32) for f in source_files]) for source_files in wav_files ] delays = [1., 0.] locations = [[2.5, 3], [2.5, 6]] # add mic and good source to room # Add silent signals to all sources for sig, d, loc in zip(signals, delays, locations): room.add_source(loc, signal=np.zeros_like(sig), delay=d) # add microphone array room.add_microphone_array( pra.MicrophoneArray(np.c_[[6.5, 4.49], [6.5, 4.51]], fs=room.fs)) # compute RIRs
def audiofile_to_input_vector(audio_filename, numcep, numcontext): # Load wav files fs, audio = wav.read(audio_filename) # Get mfcc coefficients orig_inputs = mfcc(audio, samplerate=fs, numcep=numcep) # For each time slice of the training set, we need to copy the context this makes # the numcep dimensions vector into a numcep + 2*numcep*numcontext dimensions # because of: # - numcep dimensions for the current mfcc feature set # - numcontext*numcep dimensions for each of the past and future (x2) mfcc feature set # => so numcep + 2*numcontext*numcep train_inputs = np.array([], np.float32) train_inputs.resize((orig_inputs.shape[0], numcep + 2*numcep*numcontext)) # Prepare pre-fix post fix context (TODO: Fill empty_mfcc with MCFF of silence) empty_mfcc = np.array([]) empty_mfcc.resize((numcep)) # Prepare train_inputs with past and future contexts time_slices = range(train_inputs.shape[0]) context_past_min = time_slices[0] + numcontext context_future_max = time_slices[-1] - numcontext for time_slice in time_slices: ### Reminder: array[start:stop:step] ### slices from indice |start| up to |stop| (not included), every |step| # Pick up to numcontext time slices in the past, and complete with empty # mfcc features need_empty_past = max(0, (context_past_min - time_slice)) empty_source_past = list(empty_mfcc for empty_slots in range(need_empty_past)) data_source_past = orig_inputs[max(0, time_slice - numcontext):time_slice] assert(len(empty_source_past) + len(data_source_past) == numcontext) # Pick up to numcontext time slices in the future, and complete with empty # mfcc features need_empty_future = max(0, (time_slice - context_future_max)) empty_source_future = list(empty_mfcc for empty_slots in range(need_empty_future)) data_source_future = orig_inputs[time_slice + 1:time_slice + numcontext + 1] assert(len(empty_source_future) + len(data_source_future) == numcontext) if need_empty_past: past = np.concatenate((empty_source_past, data_source_past)) else: past = data_source_past if need_empty_future: future = np.concatenate((data_source_future, empty_source_future)) else: future = data_source_future past = np.reshape(past, numcontext*numcep) now = orig_inputs[time_slice] future = np.reshape(future, numcontext*numcep) train_inputs[time_slice] = np.concatenate((past, now, future)) assert(len(train_inputs[time_slice]) == numcep + 2*numcep*numcontext) # Whiten inputs (TODO: Should we whiten) train_inputs = (train_inputs - np.mean(train_inputs))/np.std(train_inputs) # Return results return train_inputs
def mainfn(): #Read the file wavfil = raw_input("Wav file to be read: ") f = wavfile.read( '/home/akshay/anaconda/ModulesPython/MUSICGEVD/{}'.format(wavfil)) rdata = f[1] nc = rdata.shape[1] print "Number of channels: " + str(nc) fs = f[0] print "Sampling frequency: " + str(fs) (Pxx, freqs, bins, im) = plt.specgram(rdata[:, 0], NFFT=512, Fs=fs, noverlap=160) plt.title('Spectrogram') plt.xlabel('Time') plt.ylabel('Frequency in Hz') plt.show() tf, az, Nd = readingdat.mainfn() #print tf #print str(tf.shape) # INPUTS: N = 512 # Block Size & N point FFT M = 160 # Block Increment WINDOW = 50 # Time averaging of the CM, WINDOW_TYPE is FUTURE ## Plot the audio signal - Channel 1 ##xaxis = np.arange(len(rdata)) ##plt.plot(xaxis,rdata[:,0]) ##plt.title('Channel 1: First microphone') #plt.show() # Divide into frames with overlap - Use blockd ## Assumption: rdata has channels on columns, and samples on rows rdata = np.transpose( rdata) #Comment out if samples on col. and channels on rows num_blocks = nblocks(rdata[0, :], N, M) blockeddata = np.zeros((nc, num_blocks, N)) # blockeddata is a 3D matrix for i in range(nc): blockeddata[i, :, :] = blockd(rdata[i, :], N, M) print "Shape of blocked data: " + str(blockeddata.shape) #EACH FRAME COMPUTATION: #Each frame has to be taken for FFT ## fftmat = mfft(blockeddata[:,0,:]) fnum = 0 #fnum is the frame index #The correlation matrix is calculated once every PERIOD number of frames. # Time averaging is done with WINDOW_TYPE = FUTURE #print "Calculating the time-averaged correlation matrices" N_avg = noisecormat.mainfn() for t in range(int(math.ceil(num_blocks / WINDOW))): #fnum is Frame index i = 0 R_tot = np.zeros((nc, nc, N / 2 + 1), dtype=complex) print "Localization for frames: ", str( t * 50), " to ", str((t + 1) * 50 - 1) while i < WINDOW: if fnum >= num_blocks: break else: fftmat = mfft(blockeddata[:, fnum, :]) #print fftmat #print "Shape of fftmat: " + str(fftmat.shape) R_tot = R_tot + Rmatrix(fftmat) fnum = fnum + 1 i = i + 1 ## print i ## print fnum R_avg = R_tot / (i) print "R_avg matrix for freq bin 20: " print R_avg[:, :, 20] ## print "Shape of R_avg is: " + str(R_avg.shape) print "N_avg shape: ", str(N_avg.shape) for fy in range(N / 2 + 1): N_avg2 = sl.inv(np.matrix(N_avg[:, :, fy])) N_avg2 = sl.sqrtm(N_avg2) R_avg[:, :, fy] = N_avg2 * R_avg[:, :, fy] R_avg[:, :, fy] = R_avg[:, :, fy] * N_avg2 #Eigen value decomposition is done for each frequency w = np.zeros((nc, N / 2 + 1), dtype=complex) v = np.zeros((nc, nc, N / 2 + 1), dtype=complex) for fy in range(N / 2 + 1): w[:, fy], v[:, :, fy] = np.linalg.eig(R_avg[:, :, fy]) #print "Shape of w: " + str(w.shape) #print "Shape of v: " + str(v.shape) print "values: ", w[:, 5] #printing the 3rd freq bin's eigen values and eigen vectors #print w[:,2].shape print "vectors: ", v[:, :, 5] ## print "v shape: ", str(v[:,1,2].shape) ## Calculation of MUSIC spectrum print "Calculating MUSIC spectrum" powerspec.mainfn(tf, v, t, az, Nd)
def load_model(): # learning loop model = FunctionSet(l1=F.Linear(2 * (dim * 2 + 1), n_units, initialW=initializer), l2=F.Linear(n_units, n_units, initialW=initializer), l3=F.Linear(n_units, 1, initialW=initializer)) # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) model.to_gpu() # Neural net architecture def forward(x_data, y_data, ratio=0.5, train=True): x, t = Variable(x_data), Variable(y_data) h1 = F.dropout(F.sigmoid(model.l1(x)), ratio=ratio, train=train) h2 = F.dropout(F.sigmoid(model.l2(h1)), ratio=ratio, train=train) y = model.l3(h2) return F.mean_squared_error(y, t), y with open('selectfrq_estimated_data/pretrain_write1.csv', 'r') as csvfile: readfile = csv.reader(csvfile) for row in readfile: print len(row) # print row l1_W.append(row) with open('selectfrq_estimated_data/pretrain_write2.csv', "r") as csvfile: reader = csv.reader(csvfile) for row in reader: # print row l2_W.append(row) with open('selectfrq_estimated_data/pretrain_write3.csv', "r") as csvfile: reader = csv.reader(csvfile) for row in reader: # print row l3_W.append(row) #test loop SNRList = ["-10dB", "-5dB", "0dB", "5dB", "10dB", "-20dB"] for SNRnum, SNR in enumerate(SNRList): #-10,-5,0,5,10,-20dB loss_sum = np.zeros(testsize) for idx in np.arange(learnsize + SNRnum * testsize, learnsize + (SNRnum + 1) * testsize): fs, signal_data = read(estimated_signal[idx], "r") fs, noise_data = read(estimated_noise[idx], "r") fs, teacher_data = read( teacher_signal[idx - testsize * SNRnum], "r") signal_data = signal_data / np.sqrt(np.mean(signal_data**2)) noise_data = noise_data / np.sqrt(np.mean(noise_data**2)) teacher_data = teacher_data / np.sqrt(np.mean(teacher_data**2)) Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data) Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data) Tspectrum_, synparam = FFTanalysis.FFTanalysis(teacher_data) N_FRAMES = np.shape(Sspectrum_)[0] HFFTL = np.shape(Sspectrum_)[1] x_data = np.zeros((N_FRAMES, HFFTL * 2)) y_data = np.zeros((N_FRAMES, HFFTL)) for nframe in xrange(N_FRAMES): spectrum = np.append(Sspectrum_[nframe], Nspectrum_[nframe]) x_data[nframe] = [ np.sqrt(c.real**2 + c.imag**2) for c in spectrum ] #DNN indata #phaseSpectrum = [np.arctan2(c.imag, c.real) for c in spectrum] Spower = np.array([ np.sqrt(c.real**2 + c.imag**2) for c in Sspectrum_[nframe] ]) Tpower = np.array([ np.sqrt(c.real**2 + c.imag**2) for c in Tspectrum_[nframe] ]) for i, x in enumerate(Spower): if x == 0: Spower[i] = 1e-10 y_data[nframe] = Tpower / Spower calcSNR = np.empty((N_FRAMES, 0), float) totalloss = np.zeros(HFFTL, float) # testing for frq in xrange(HFFTL): model.l1.W.data = cuda.to_gpu(l1_W[frq]) model.l2.W.data = cuda.to_gpu(l2_W[frq]) model.l3.W.data = cuda.to_gpu(l3_W[frq]) # testing x_frqdata = np.zeros( (np.shape(x_data)[0], 2 * (dim * 2 + 1)), float) x_frqdata[:, dim] = x_data[:, frq] x_frqdata[:, dim * 3 + 1] = x_data[:, frq + HFFTL] for j in np.arange(1, dim + 1): if (frq - j) >= 0: x_frqdata[:, dim - j] = x_data[:, frq - j] x_frqdata[:, dim * 3 + 1 - j] = x_data[:, frq + HFFTL - j] if ((HFFTL - 1) - (j + frq)) >= 0: x_frqdata[:, dim + j] = x_data[:, frq + j] x_frqdata[:, dim * 3 + 1 + j] = x_data[:, frq + HFFTL + j] y_frqdata = np.zeros((np.shape(y_data)[0], 1), float) y_frqdata = y_data[:, frq].reshape(np.shape(y_data)[0], 1) x_frqdata = x_frqdata.astype(np.float32) y_frqdata = y_frqdata.astype(np.float32) if args.gpu >= 0: x_frqdata = cuda.to_gpu(x_frqdata) y_frqdata = cuda.to_gpu(y_frqdata) loss, pred = forward(x_frqdata, y_frqdata, train=False) totalloss[frq] = cuda.to_cpu(loss.data) pred = np.reshape(cuda.to_cpu(pred.data), (N_FRAMES, 1)) calcSNR = np.append(calcSNR, pred, axis=1) fs, teacher_data = read( teacher_signal[idx - testsize * SNRnum], "r") if teacher_data.dtype == "int16": teacher_data = teacher_data / norm y_out = Sspectrum_ * calcSNR wf_signal = FFTanalysis.Synth(y_out, synparam, BPFon=0) wf_signal = wf_signal * np.sqrt( np.mean(teacher_data**2) / np.mean(wf_signal**2)) write( dir + SNR + "/dim{}_DNNbased_No{}.wav".format( dim, idx - testsize * SNRnum), Fs, wf_signal)
def DNNbasedWienerfilter(): #pretrain loop startexec = time.time() for epoch in xrange(pretrain_epoch): print "now proc: pretraining epoch{}".format(epoch) startepoch = time.time() perm = np.random.permutation(learnsize) for idx in np.arange(0, pretrainsize, 3): #utterance Number training dataset # start = time.time() x_batch = np.empty((0, HFFTL * 2), float) for iter in xrange(3): fs, signal_data = read(estimated_signal[perm[idx + iter]], "r") fs, noise_data = read(estimated_noise[perm[idx + iter]], "r") signal_data = signal_data / np.sqrt(np.mean(signal_data** 2)) noise_data = noise_data / np.sqrt(np.mean(noise_data**2)) #FFT Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data) Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data) N_FRAMES = np.shape(Sspectrum_)[0] HFFTL = np.shape(Sspectrum_)[1] x_data = np.zeros((N_FRAMES, HFFTL * 2)) for nframe in xrange(N_FRAMES): spectrum = np.append(Sspectrum_[nframe], Nspectrum_[nframe]) x_data[nframe] = [ np.sqrt(c.real**2 + c.imag**2) for c in spectrum ] #DNN indata if iter == 0: x_batch = np.append(x_batch, x_data, axis=0) else: x_batch = np.vstack((x_batch, x_data)) for frq in xrange(HFFTL): x_frqbatch = np.zeros( (np.shape(x_batch)[0], 2 * (dim * 2 + 1)), float) x_frqbatch[:, dim] = x_batch[:, frq] x_frqbatch[:, dim * 3 + 1] = x_batch[:, frq + HFFTL] for j in np.arange(1, dim + 1): if (frq - j) >= 0: x_frqbatch[:, dim - j] = x_batch[:, frq - j] x_frqbatch[:, dim * 3 + 1 - j] = x_batch[:, frq + HFFTL - j] if ((HFFTL - 1) - (j + frq)) >= 0: x_frqbatch[:, dim + j] = x_batch[:, frq + j] x_frqbatch[:, dim * 3 + 1 + j] = x_batch[:, frq + HFFTL + j] x_frqbatch = x_frqbatch.astype(np.float32) if epoch != 0 or idx != 0: #except first batch modelL1.l1.W.data = cuda.to_gpu(l1_W.pop(0)) modelL2.l1.W.data = cuda.to_gpu(l2_W.pop(0)) modelL1.l2.W.data = cuda.to_gpu(l1b_W.pop(0)) modelL2.l2.W.data = cuda.to_gpu(l2b_W.pop(0)) # training if args.gpu >= 0: x_frqbatch = cuda.to_gpu(x_frqbatch) optL1.zero_grads() loss, hidden = pretrain_L1(x_frqbatch, ratio=0.5) loss.backward() optL1.update() optL2.zero_grads() loss, hidden = pretrain_L2(hidden, ratio=0.5) loss.backward() optL2.update() #model parameter saving l1_W.append(cuda.to_cpu(modelL1.l1.W.data)) l2_W.append(cuda.to_cpu(modelL2.l1.W.data)) l1b_W.append(cuda.to_cpu(modelL1.l2.W.data)) l2b_W.append(cuda.to_cpu(modelL2.l2.W.data)) print 'pretrain epoch time:{0}sec'.format( np.round(time.time() - startepoch, decimals=2)) # learning loop model = FunctionSet(l1=F.Linear(2 * (dim * 2 + 1), n_units, initialW=initializer), l2=F.Linear(n_units, n_units, initialW=initializer), l3=F.Linear(n_units, 1, initialW=initializer)) # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) model.to_gpu() # Neural net architecture def forward(x_data, y_data, ratio=0.5, train=True): x, t = Variable(x_data), Variable(y_data) h1 = F.dropout(F.sigmoid(model.l1(x)), ratio=ratio, train=train) h2 = F.dropout(F.sigmoid(model.l2(h1)), ratio=ratio, train=train) y = model.l3(h2) return F.mean_squared_error(y, t), y startexec = time.time() for epoch in xrange(n_epoch): print "now proc: learning epoch{}".format(epoch) startepoch = time.time() perm = np.random.permutation(learnsize) for idx in np.arange(0, learnsize, 3): #utterance Number training dataset # start = time.time() x_batch = np.empty((0, HFFTL * 2), float) y_batch = np.empty((0, HFFTL), float) for iter in xrange(3): fs, signal_data = read(estimated_signal[perm[idx + iter]], "r") fs, noise_data = read(estimated_noise[perm[idx + iter]], "r") fs, teacher_data = read(teacher_signal[perm[idx + iter]], "r") signal_data = signal_data / np.sqrt(np.mean(signal_data** 2)) noise_data = noise_data / np.sqrt(np.mean(noise_data**2)) teacher_data = teacher_data / np.sqrt( np.mean(teacher_data**2)) #FFT Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data) Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data) Tspectrum_, synparam = FFTanalysis.FFTanalysis( teacher_data) N_FRAMES = np.shape(Sspectrum_)[0] HFFTL = np.shape(Sspectrum_)[1] x_data = np.zeros((N_FRAMES, HFFTL * 2)) y_data = np.zeros((N_FRAMES, HFFTL)) if epoch == 0: learned_data += N_FRAMES for nframe in xrange(N_FRAMES): spectrum = np.append(Sspectrum_[nframe], Nspectrum_[nframe]) x_data[nframe] = [ np.sqrt(c.real**2 + c.imag**2) for c in spectrum ] #DNN indata #phaseSpectrum = [np.arctan2(c.imag, c.real) for c in spectrum] Spower = np.array([ np.sqrt(c.real**2 + c.imag**2) for c in Sspectrum_[nframe] ]) Tpower = np.array([ np.sqrt(c.real**2 + c.imag**2) for c in Tspectrum_[nframe] ]) for i, x in enumerate(Spower): if x == 0: Spower[i] = 1e-10 y_data[nframe] = Tpower / Spower if iter == 0: x_batch = np.append(x_batch, x_data, axis=0) y_batch = np.append(y_batch, y_data, axis=0) else: x_batch = np.vstack((x_batch, x_data)) y_batch = np.vstack((y_batch, y_data)) for frq in xrange(HFFTL): x_frqbatch = np.zeros( (np.shape(x_batch)[0], 2 * (dim * 2 + 1)), float) x_frqbatch[:, dim] = x_batch[:, frq] x_frqbatch[:, dim * 3 + 1] = x_batch[:, frq + HFFTL] for j in np.arange(1, dim + 1): if (frq - j) >= 0: x_frqbatch[:, dim - j] = x_batch[:, frq - j] x_frqbatch[:, dim * 3 + 1 - j] = x_batch[:, frq + HFFTL - j] if ((HFFTL - 1) - (j + frq)) >= 0: x_frqbatch[:, dim + j] = x_batch[:, frq + j] x_frqbatch[:, dim * 3 + 1 + j] = x_batch[:, frq + HFFTL + j] y_frqbatch = np.zeros((np.shape(y_batch)[0], 1), float) y_frqbatch = y_batch[:, frq].reshape(np.shape(y_batch)[0], 1) x_frqbatch = x_frqbatch.astype(np.float32) y_frqbatch = y_frqbatch.astype(np.float32) model.l1.W.data = cuda.to_gpu(l1_W.pop(0)) model.l2.W.data = cuda.to_gpu(l2_W.pop(0)) if epoch != 0 or idx != 0: #except first batch model.l3.W.data = cuda.to_gpu(l3_W.pop(0)) # training if args.gpu >= 0: x_frqbatch = cuda.to_gpu(x_frqbatch) y_frqbatch = cuda.to_gpu(y_frqbatch) optimizer.zero_grads() loss, pred = forward(x_frqbatch, y_frqbatch, ratio=0.5) loss.backward() optimizer.update() #model parameter saving l1_W.append(cuda.to_cpu(model.l1.W.data)) l2_W.append(cuda.to_cpu(model.l2.W.data)) l3_W.append(cuda.to_cpu(model.l3.W.data)) print 'epoch time:{0}sec'.format( np.round(time.time() - startepoch, decimals=2)) f = open('selectfrq_estimated_data/pretrain_write1.csv', 'w') writer = csv.writer(f) writer.writerows(l1_W) f.close() f = open('selectfrq_estimated_data/pretrain_write2.csv', 'w') writer = csv.writer(f) writer.writerows(l2_W) f.close() f = open('selectfrq_estimated_data/pretrain_write3.csv', 'w') writer = csv.writer(f) writer.writerows(l3_W) f.close() #test loop SNRList = ["-10dB", "-5dB", "0dB", "5dB", "10dB", "-20dB"] for SNRnum, SNR in enumerate(SNRList): #-10,-5,0,5,10,-20dB loss_sum = np.zeros(testsize) for idx in np.arange(learnsize + SNRnum * testsize, learnsize + (SNRnum + 1) * testsize): fs, signal_data = read(estimated_signal[idx], "r") fs, noise_data = read(estimated_noise[idx], "r") fs, teacher_data = read( teacher_signal[idx - testsize * SNRnum], "r") signal_data = signal_data / np.sqrt(np.mean(signal_data**2)) noise_data = noise_data / np.sqrt(np.mean(noise_data**2)) teacher_data = teacher_data / np.sqrt(np.mean(teacher_data**2)) Sspectrum_, synparam = FFTanalysis.FFTanalysis(signal_data) Nspectrum_, synparam = FFTanalysis.FFTanalysis(noise_data) Tspectrum_, synparam = FFTanalysis.FFTanalysis(teacher_data) N_FRAMES = np.shape(Sspectrum_)[0] HFFTL = np.shape(Sspectrum_)[1] x_data = np.zeros((N_FRAMES, HFFTL * 2)) y_data = np.zeros((N_FRAMES, HFFTL)) for nframe in xrange(N_FRAMES): spectrum = np.append(Sspectrum_[nframe], Nspectrum_[nframe]) x_data[nframe] = [ np.sqrt(c.real**2 + c.imag**2) for c in spectrum ] #DNN indata #phaseSpectrum = [np.arctan2(c.imag, c.real) for c in spectrum] Spower = np.array([ np.sqrt(c.real**2 + c.imag**2) for c in Sspectrum_[nframe] ]) Tpower = np.array([ np.sqrt(c.real**2 + c.imag**2) for c in Tspectrum_[nframe] ]) for i, x in enumerate(Spower): if x == 0: Spower[i] = 1e-10 y_data[nframe] = Tpower / Spower calcSNR = np.empty((N_FRAMES, 0), float) totalloss = np.zeros(HFFTL, float) # testing for frq in xrange(HFFTL): model.l1.W.data = cuda.to_gpu(l1_W[frq]) model.l2.W.data = cuda.to_gpu(l2_W[frq]) model.l3.W.data = cuda.to_gpu(l3_W[frq]) # testing x_frqdata = np.zeros( (np.shape(x_data)[0], 2 * (dim * 2 + 1)), float) x_frqdata[:, dim] = x_data[:, frq] x_frqdata[:, dim * 3 + 1] = x_data[:, frq + HFFTL] for j in np.arange(1, dim + 1): if (frq - j) >= 0: x_frqdata[:, dim - j] = x_data[:, frq - j] x_frqdata[:, dim * 3 + 1 - j] = x_data[:, frq + HFFTL - j] if ((HFFTL - 1) - (j + frq)) >= 0: x_frqdata[:, dim + j] = x_data[:, frq + j] x_frqdata[:, dim * 3 + 1 + j] = x_data[:, frq + HFFTL + j] y_frqdata = np.zeros((np.shape(y_data)[0], 1), float) y_frqdata = y_data[:, frq].reshape(np.shape(y_data)[0], 1) x_frqdata = x_frqdata.astype(np.float32) y_frqdata = y_frqdata.astype(np.float32) if args.gpu >= 0: x_frqdata = cuda.to_gpu(x_frqdata) y_frqdata = cuda.to_gpu(y_frqdata) loss, pred = forward(x_frqdata, y_frqdata, train=False) totalloss[frq] = cuda.to_cpu(loss.data) pred = np.reshape(cuda.to_cpu(pred.data), (N_FRAMES, 1)) calcSNR = np.append(calcSNR, pred, axis=1) fs, teacher_data = read( teacher_signal[idx - testsize * SNRnum], "r") if teacher_data.dtype == "int16": teacher_data = teacher_data / norm y_out = Sspectrum_ * calcSNR wf_signal = FFTanalysis.Synth(y_out, synparam, BPFon=0) wf_signal = wf_signal * np.sqrt( np.mean(teacher_data**2) / np.mean(wf_signal**2)) write( dir + SNR + "/dim{}_DNNbased_No{}.wav".format( dim, idx - testsize * SNRnum), Fs, wf_signal) print 'exec time:{0}sec'.format( np.round(time.time() - startexec, decimals=2)) print "data: ", learned_data
from scipy.io import wavfile import numpy as np ii = 0 length = 20 half_length = length//2 frequency = 48000 for i in range(0, 46): print(i) temp_wav = wavfile.read('youtube_downloader/wav/'+str(i)+'.wav')[1] wav_length = len(temp_wav) if (wav_length > frequency * length): mid = wav_length // 2 temp_wav = temp_wav[mid-half_length * frequency: mid+half_length*frequency] if (not np.isnan(temp_wav).any()) and (not np.isinf(temp_wav).any()): wavfile.write('48000_wavs/'+str(ii)+'.wav', frequency, temp_wav) ii += 1
def load_wav_to_torch(full_path): sampling_rate, data = read(full_path) return torch.FloatTensor(data.astype(np.float32)), sampling_rate
def _read_wav(self, wave_file): self.rate, self.data = wf.read(wave_file) self.channels = len(self.data.shape) self.filename = wave_file return self
createPath(TEMP_FOLDER) command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1" f = open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(command, shell=True, stdout=f) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search('Stream #.*Video.* ([0-9]*) fps', line) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))
import numpy as np import scipy as sp from scipy.io.wavfile import read from scipy.io.wavfile import write from scipy import signal from scipy.signal.signaltools import wiener import matplotlib.pyplot as plt # get_ipython().magic('matplotlib inline') (Frequency, array) = read('mimii_dummy.wav') len(array) plt.plot(array) plt.title('Original Signal Spectrum') plt.xlabel('Frequency(Hz)') plt.ylabel('Amplitude') # plt.show() FourierTransformation = sp.fft(array) # array = sp.fft(array) scale = sp.linspace(0, Frequency, len(array)) plt.stem(scale[0:5000], np.abs(FourierTransformation[0:5000]), 'r') # array = FourierTransformation filteredSignal = signal.wiener(array) plt.plot(filteredSignal) # plotting the signal. plt.title('wiener filter plot') plt.xlabel('Frequency(Hz)')
from scipy.fftpack import fft, ifft import numpy as np import matplotlib.pyplot as pl from scipy.io.wavfile import read def configFilter(data): out = [] for i in range(1, len(data), 1): out.append(data[i] - 0.98 * data[i - 1]) return out fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7) = pl.subplots(7, 1, sharex=False) rate, data = read('Xe.wav') sizecut = 512 start = int(0.4 * rate) # start = 100 sg = data[start:start + sizecut] ax1.plot(sg) ax1.set_title('Original') sg = configFilter(sg) ax2.plot(sg) ax2.set_title('After Adjust Filter')
# coding:utf-8 ''' @time: Created on 2018-10-19 06:02:12 @author: Lanqing @Func: src.wav ''' from scipy.io import wavfile import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn import tree, preprocessing from sklearn.model_selection import train_test_split import sklearn.metrics as metrics import pandas as pd, numpy as np fs1, data1 = wavfile.read('C:/Users/jhh/Desktop/audio/REC20181019054833.wav') fs2, data2 = wavfile.read('C:/Users/jhh/Desktop/audio/REC20181019055029.wav') df1 = pd.DataFrame(data1).iloc[:, 0] df2 = pd.DataFrame(data2).iloc[:, 0] df1.plot() plt.show() df2.plot() plt.show() n = 800 timestep = 1 / 48000 list_df1 = [list(df1[i:i + n].values) for i in range(0, df1.shape[0], n)][:-1] list_df2 = [list(df2[i:i + n].values) for i in range(0, df2.shape[0], n)][:-1]
X, y = list(), list() for i in range(len(sequence)): # find the end of this pattern end_ix = i + n_steps # check if we are beyond the sequence if end_ix > len(sequence) - 1: break # gather input and output parts of the pattern seq_x, seq_y = sequence[i:end_ix], sequence[end_ix] X.append(seq_x) y.append(seq_y) return array(X), array(y) # define input sequence rate, raw_seq = wavfile.read('songs/hakuna_matata.wav') raw_seq = raw_seq[np.logical_not(np.isnan(raw_seq))] raw_seq = raw_seq.astype(int) print(max(raw_seq)) # choose a number of time steps n_steps = 1 # sample #raw_seq = raw_seq # random sample. dev purposes. # split into samples X = raw_seq[0:1323000] #split_sequence(raw_seq, n_steps) y = raw_seq[1323000:1345050] # reshape from [samples, timesteps] into [samples, timesteps, features]
def audio_fft(): rate, data = wav.read('31beethovens3a.wav') fft_out = fft(data) signs = data / np.absolute(data) return [fft_out, signs]
import IPython.display as ipd import os fulldatasetpath = "D:/Datasets/UrbanSound8K/" os.chdir(fulldatasetpath) import librosa from scipy.io import wavfile as wav import numpy as np filename = fulldatasetpath + 'audio/fold9/106955-6-0-0.wav' librosa_audio, librosa_sample_rate = librosa.load(filename) scipy_sample_rate, scipy_audio = wav.read(filename) print('Original sample rate:', scipy_sample_rate) print('Librosa sample rate:', librosa_sample_rate) print('Original audio file min~max range:', np.min(scipy_audio), 'to', np.max(scipy_audio)) print('Librosa audio file min~max range:', np.min(librosa_audio), 'to', np.max(librosa_audio)) import matplotlib.pyplot as plt # Original audio with 2 channels #plt.figure(figsize=(12, 4)) #plt.plot(scipy_audio) mfccs = librosa.feature.mfcc(y=librosa_audio, sr=librosa_sample_rate,
B = np.asarray(B).reshape(y1, x1) rgbArray = np.zeros((y1, x1, 3), 'uint8') rgbArray[..., 0] = R rgbArray[..., 1] = G rgbArray[..., 2] = B plt.figure(dpi=1200) plt.imshow(rgbArray, interpolation='spline16') plt.style.use('dark_background') plt.axis('off') plt.savefig(filename) plt.close() fs, data = wavfile.read('Melulu_rev1_viz.wav') # load the data a_0 = data.T[0] / (2.**15 ) # this is a two channel soundtrack,get the first track n_0 = len(a_0) sample_period = 0.2 ### in seconds sample_count = mh.floor(n_0 / (fs * sample_period)) a_1 = chunkIt(a_0, sample_count) for a in a_1: freqArray_top5 = np.empty([]) n = len(a) freqArray, dB = freq_dB(a, n, fs, -300) dB_sort = np.argsort(dB) freqArray = freqArray[dB_sort]
def read_files(): org = wfile.read('org.wav') long = wfile.read('long2.wav') short = wfile.read('short2.wav') return org, long, short
for j in range(1,taille2): matrice_distance_elastique[0,j] = matrice_distance_elastique[0,j-1] + cout_horiz*matrice_distance[0,j] # remplissage du reste for i in range(1,taille1): for j in range(1,taille2): chemin_vert = matrice_distance_elastique[i-1,j] + cout_vert*matrice_distance[i,j] chemin_horiz = matrice_distance_elastique[i,j-1] + cout_horiz*matrice_distance[i,j] chemin_diag = matrice_distance_elastique[i-1,j-1] + cout_diag*matrice_distance[i,j] matrice_distance_elastique[i,j] = min(chemin_vert, chemin_horiz, chemin_diag) return matrice_distance_elastique[-1,-1] /(taille1+taille2) #%% test nbr_prononce = 5 locuteur=liste_nom[0] essai = 2 N = 10 chemin1 = genere_nom(nbr_prononce, locuteur, essai) chemin2 = genere_nom(5, liste_nom[0], 2) samplerate1, data1 = wav.read(chemin1) samplerate2, data2 = wav.read(chemin2) if data1.ndim > 1 : data1 = data1[:,0] #print(np.shape(calcul_coeff_lpc_fenetre(data[0:160], 10))) matrice1 =calcul_lpc(samplerate1, data1, 10) matrice2 =calcul_lpc(samplerate2, data2, 10) truc_a_print = distance_elastique(matrice1, matrice2) print(truc_a_print) #affichage(chemin)
def getCaliPower(file): fs, data = read(file) data = data.astype("float32") filtData = bandPass(data, 1000, fs) cali = (filtData * filtData).sum() return cali
#print "DISTANCE!!!" + str(dist) if dist < distmin: distmin = dist speaker = k return speaker print "|||||STARTING TEST|||||\n" for i, wave_file in enumerate(wave_files): fname = '/' + wave_file to_print = 'Speaker [' + str( i) + '] File:' + wave_file + ' Testing features...' print to_print (fs, s) = read(directory + fname) #Passing test file to MFCC mel_coefs = mfcc_p(s, fs) mel_coefs = mel_coefs.transpose() mel_coefs[0, :] = np.zeros( mel_coefs.shape[1] ) # 0th coefficient does not carry significant information #Passing test file to LPC lpc_coefs = lpc(s, fs, orderLPC) sp_mfcc = minDistance(mel_coefs, codebooks_mfcc) sp_lpc = minDistance(lpc_coefs, codebooks_lpc) print 'Speaker [' + str(i) + '] matches Speaker [' + str( sp_mfcc) + '] ||MFCC||'
filename = '..\Data\TallShips.wav' port = 'com4' baudRate = 12_000_000 blockSize = 3 scale = 2 ** 21 # 75 kHz frequency deviation lowPassCutOff = 15_000 lowPassOrder = 11 chunkSize = 20_000 updatePeriod = 1000 uart = serial.Serial(port, baudrate = baudRate, bytesize = 8, parity = 'N', stopbits = 1) print('Loading file \'%s\'...' % filename) audioRate, wave = wavfile.read(filename) print('Low-pass filtering...') b, a = signal.butter(lowPassOrder, lowPassCutOff / (audioRate / 2.0), 'low') waveL = signal.filtfilt(b, a, wave[:, 0].astype(numpy.float)) waveR = signal.filtfilt(b, a, wave[:, 1].astype(numpy.float)) print('Resampling...') symbolRate = baudRate / (blockSize * 10) resampleScale = symbolRate / audioRate waveL = interpolation.zoom(waveL, zoom = resampleScale, order = 3) waveR = interpolation.zoom(waveR, zoom = resampleScale, order = 3)
# 'F': 'happiness', # 'T': 'sadness', # 'N': 'neutral', # } emo = { 'W': '1', 'L': '2', 'E': '3', 'A': '4', 'F': '5', 'T': '6', 'N': '0', } (rate, sig) = wav.read(sys.argv[1]) print("Wave File Read.") mfcc_feat = mfcc(sig, rate) print("MFCC Calculated.") print("Writing to test file....") rf = open("mfcc_file.te", "w") for x in mfcc_feat: j = 0 rf.write("0 ") while j < 12: j += 1 rf.write(str(j)) rf.write(":") rf.write(str(x[j])) rf.write(" ") rf.write("\n")
def load_wav_to_torch(full_path): """ Loads wavdata into torch array """ sampling_rate, data = read(full_path) return torch.from_numpy(data).float(), sampling_rate
def getPower(file): fs, data = read(file) data = data.astype("float32") power = (data*data).sum() return power
def get_sound_and_normalize(file_name): rate, sound = read(file_name) sound = sound.astype(np.float64) sound -= sound.mean() sound /= sound.std() return rate, sound
natural_speech_wavfile = args['-n'] world_anasyn_speech_wavfile = args['-w'] modified_anasyn_speech_wavfile = args['-m'] labfile = args['-l'] outdir = args['-o'] outdir_for_natural = join(outdir, "natural") outdir_for_world = join(outdir, "world") outdir_for_modified = join(outdir, "modified") for outdir in [outdir_for_natural, outdir_for_world, outdir_for_modified]: if not exists(outdir): os.makedirs(outdir) fs, natural = wavfile.read(natural_speech_wavfile) world_anasyn = wavfile.read(world_anasyn_speech_wavfile)[1] modified_anasyn = wavfile.read(modified_anasyn_speech_wavfile)[1] with open(labfile, "r") as f: label = f.readlines() label = [x.split(' ') for x in label] # int(x) / 10000 -> [ms] # [ms] / 1000 * fs -> the number of samples label = [[ int(int(x[0]) / 10000 / 1000. * fs), int(int(x[1]) / 10000 / 1000. * fs), x[2].split()[0] ] for x in label] for number, lab in enumerate(label):
"""This script tests basic read/write functionality""" import numpy as np import sys import matplotlib.pyplot as plt import scipy.io.wavfile as wavfile print(sys.path) # write sinusoid to 16-bit, 44100 Hz PCM Mono samplerate = 187321 # samples/s sinusoid_frequency = 1000 # Hz length_seconds = 1.0 # seconds t = np.linspace(0., length_seconds, int(np.rint(length_seconds*samplerate))) # amplitude = np.iinfo(np.int32).max amplitude = 1.0 data = amplitude*np.sin(2.*np.pi*sinusoid_frequency*t) data = data.astype(np.float32) wavfile.write("..//audio_examples//example_write.wav",samplerate, data ) samplerate2, data2 = wavfile.read("..//audio_examples//OneCD.wav") nsamples = data2.shape[0] t2 = np.linspace(0., nsamples/samplerate2, nsamples) # plt.plot(t,data) plt.plot(t2,data2[:,0])
# Discard the imaginary component. comp_clip = n.fft.ifft(comp_fft).real # calculate actual compression ratio based on storage size original_size = 16.0 * L # uncompressed size (16 bits per sample) compressed_size = 8.0 * 2.0 * len(idxs) + 16.0 * len( idxs ) + 32.0 # sparse 8-bit spectra, 16-bit spectral indices, 32-bit scale factor real_comp_ratio = compressed_size / original_size return (comp_clip, comp_fft[0:L2], orig_fft, real_comp_ratio) # read wav file # this is the audio signal to be compressed ts = sw.read("original.wav") sr = ts[0] # sample rate clip = ts[1] # extract audio file as numpy data vector if len(clip.shape) == 2: # if stereo, only use one channel print("using only one stereo channel") clip = ts[1][:, 0] # compress and decompress audio file # compression_ratio=0.95 means 95% reduction in # file size. cr = 0.95 # compress full length of the clip W = 100000 n_window = int(n.floor(len(clip) / W)) comp_clip = n.zeros(len(clip))