def gen_chirp_sig(freqs, L, fs, octave=2): x = np.arange(0.0,float(L)/float(fs),1.0/float(fs)) data = np.zeros(x.shape) for fbase in freqs: f = np.linspace(fbase, fbase*(2**octave), L) data += np.sin(2.0*np.pi*f*x) return Signal(data, fs, normalize=True, mono=True)
def expe2(): sig = gen_chirp_sig([440.0, 512.0], L, fs) sig = Signal('/sons/sqam/voicemale.wav', mono=True, normalize=True) #sig, c = gen_vibrato_sig([440.0,], L, fs, rate=10,ratio=0.1) sk = sketch.CorticoSketch() sk.recompute(sig) plt.figure() plt.subplot(121) plt.imshow(np.abs(sk.cort.cor[-1,0,:,:])) plt.subplot(122) plt.imshow(np.abs(sk.cort.cor[0,-1,:,:])) plt.show() plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[-1,0,:,:])) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[-1,0,:,:]), axis=0)) #plt.show() plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[0,-1,:,:].T)) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[0,-1,:,:]), axis=1)) plt.show()
def get_audio(filepath, seg_start, seg_duration, targetfs=None, verbose=True): """ for use only with wav files from rwc database """ # rewriting using scikits.audiolab import scikits.audiolab as audiolab # small hack search for alternate if not op.exists(filepath): filepath = op.splitext(filepath)[0] + '.wav' if not op.exists(filepath): filepath = op.splitext(filepath)[0] + '.WAV' sndfile = audiolab.Sndfile(filepath, 'r') fs = sndfile.samplerate (n, c) = (sndfile.nframes, sndfile.channels) if verbose: print "Reading" # initalize position sndfile.seek(int(seg_start * fs), 0, 'r') audiodata = sndfile.read_frames(int(seg_duration * fs)) sndfile.close() if verbose: print "Done" if targetfs is not None and not (targetfs == fs): if verbose: print "Resampling" sig = Signal(audiodata, fs) sig.resample(targetfs) audiodata = sig.data fs = targetfs if verbose: print "Done" return audiodata, fs
def expe_2(): sig_1_path = '/sons/voxforge/main/Learn/cmu_us_slt_arctic/wav/arctic_a0372.wav' sig_2_path = '/sons/voxforge/main/Learn/cmu_us_rms_arctic/wav/arctic_a0372.wav' i = 0 for sig_path in [sig_1_path, sig_2_path]: synth_sig = Signal(sig_path, normalize=True, mono=True) #synth_sig.crop(0.1*synth_sig.fs, 3.5*synth_sig.fs) #synth_sig.resample(32000) plt.figure(figsize=(10, 10)) plt.subplot(211) plt.plot( np.arange(.0, synth_sig.length) / float(synth_sig.fs), synth_sig.data) plt.xticks([]) plt.ylim([-1, 1]) plt.grid() plt.subplot(212) synth_sig.spectrogram(1024, 64, order=0.5, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'voice_%d_spectro.pdf' % i)) synth_sig.write(op.join(audio_output_path, 'voice_%d_spectro.wav' % i)) i += 1
def runTest(self): """ Creating and manipulating a corticogram """ sig = Signal(audio_test_file, mono=True, normalize=True) # convert to auditory gram = cochleo_tools.Cochleogram(sig.data, load_coch_filt=True) gram.build_aud() # Cortico-gram : 2D complex transform of y5 # we need to define y = gram.y5, para1= vector pf parameters, rv = rate vector, sv = scale vector y = np.array(gram.y5).T cort = cochleo_tools.Corticogram(gram) cort.build_cor() cort.invert() rec_aud = cort.rec plt.figure() plt.subplot(121) plt.imshow(np.abs(rec_aud)) plt.subplot(122) plt.imshow(np.abs(y)) plt.show() rec_aud *= np.max(y) / np.max(rec_aud) print np.linalg.norm(np.abs(y) - np.abs(rec_aud), 'fro') self.assertTrue( np.linalg.norm(np.abs(y) - np.abs(rec_aud), 'fro') < 0.8 * np.linalg.norm(y))
def expe1(): shifts = [0,] # in samples fgpts = [] for shift in shifts: sig = Signal(audio_test_file, normalize=True, mono=True) sig.crop(shift, shift+L) sk = sketch.CorticoIHTSketch() sk.recompute(sig) sk.sparsify(100) fgpts.append(sk.fgpt()) # sk.represent() # plt.suptitle("Shift of %2.2f sec"%(float(shift)/float(fs))) colors = ['b', 'r', 'c','m'] score = [] bin_nnz_ref = np.flatnonzero(fgpts[0]) #plt.figure() for i, fgpt in enumerate(fgpts): bin_nnz = np.flatnonzero(fgpt) # plt.stem(bin_nnz,[1]*len(bin_nnz), colors[i]) score.append(len(np.intersect1d(bin_nnz_ref, bin_nnz, assume_unique=True))) print score
def runTest(self): sig = Signal(audio_test_file, mono=True, normalize=True) gram = cochleo_tools.Cochleogram(sig.data) gram._toy2() rec_data = gram.invert_y2()
def gen_vibrato_sig(freqs, L, fs, octave=2, rate=0.1, ratio=0.1): x = np.arange(0.0,float(L)/float(fs),1.0/float(fs)) data = np.zeros(x.shape) from scipy.special import sici for fbase in freqs: # merci wolfram f = fbase*(1.0 + ratio*sici(2.0*np.pi*rate*x)[1]) data += np.sin(2.0*np.pi*f*x) return Signal(data, fs, normalize=True, mono=True), f*x
def get_stft(x, wsize=512, tstep=256, sigma=None): """ if necessary load the wav file and get the stft""" if isinstance(x, str): sig = Signal(x, mono=True, normalize=True) x = sig.data if sigma is not None: x += sigma * np.random.randn(*x.shape) return np.squeeze(stft.stft(x, wsize, tstep))
def runTest(self): # test bad call sig = Signal(audio_test_file) gram = cochleo_tools.Cochleogram(sig.data, load_coch_filt=True) self.assertRaises(NotImplementedError, gram.build_aud) sig = Signal(audio_test_file, mono=True, normalize=True) gram = cochleo_tools.Cochleogram(sig.data, load_coch_filt=True) gram.build_aud() gram.plot_aud() t = time.clock() init_rec_data = gram.init_inverse() rec_data = gram.invert(init_vec=init_rec_data, nb_iter=10, display=False) print "Elapsed :", time.clock() - t min_error = min([ np.sum((rec_data - sig.data[0:rec_data.shape[0]])**2), np.sum((rec_data + sig.data[0:rec_data.shape[0]])**2) ])
def recons_save_fig_audio(magspec, target_name, n_max_frames, fs=22050, format=(8, 3), nb_gl_iter=30): init_vec = np.random.randn(128 * n_max_frames) x_recon = transforms.gl_recons(magspec[:, :n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) rec_sig = Signal(x_recon, fs, normalize=True) rec_sig.write(os.path.join(output_audio_path, '%s.wav' % target_name)) plt.figure(figsize=format) rec_sig.spectrogram(512, 128, order=1, log=True, cmap=cm.jet, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s.png' % target_name))
def save_audio(learntype, np, test_file, n_feat, rescale_str, sigout, fs, norm_segments=False): """ saving output vector to an audio wav""" norm_str = '' if norm_segments: norm_str = 'normed' mean_energy = np.mean( [np.sum(sig**2) / float(len(sig)) for sig in sigout]) for sig in sigout: sig /= np.sum(sig**2) / float(len(sig)) sig *= mean_energy rec_sig = Signal(np.concatenate(sigout), fs, normalize=True) rec_sig.write('%s/%s_with%s_%dfeats_%s%s.wav' % (outputpath, os.path.split(test_file)[-1], learntype, n_feat, rescale_str, norm_str))
def save_audio_full_ref(learntype, test_file, n_feat, rescale_str, sigout, fs, norm_segments=False): """ do not cut the sounds """ # first pass for total length max_idx = int(sigout[-1][1] + len(sigout[-1][0])) + 4 * fs print "total length of ", max_idx sig_data = np.zeros((max_idx, )) # seg_energy = np.sum(sigout[-1][0]**2) for (sig, startidx) in sigout: # print sig.shape, sig_data[int(startidx):int(startidx)+sig.shape[0]].shape sig_data[int(startidx):int(startidx) + sig.shape[0]] += sig #*seg_energy/np.sum(sig**2) rec_sig = Signal(sig_data, fs, normalize=True) rec_sig.write('%s/%s_with%s_%dfeats_%s%s.wav' % (outputpath, os.path.split(test_file)[-1], learntype, n_feat, rescale_str, 'full_ref'))
def expe_1(): synth_sig = Signal(audio_test_file, normalize=True, mono=True) synth_sig.crop(0.1 * synth_sig.fs, 3.5 * synth_sig.fs) #synth_sig.resample(32000) plt.figure(figsize=(10, 5)) plt.subplot(211) plt.plot( np.arange(.0, synth_sig.length) / float(synth_sig.fs), synth_sig.data) plt.xticks([]) plt.ylim([-1, 1]) plt.grid() plt.subplot(212) synth_sig.spectrogram(1024, 64, order=0.25, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'glocs_spectro.pdf')) plt.show()
## Initialize the sketchifier #sk = STFTPeaksSketch(**{'scale':2048, 'step':512}) sk = CorticoIndepSubPeaksSketch(**{ 'fs': fs, 'downsample': fs, 'frmlen': 8, 'shift': 0, 'fac': -2, 'BP': 1 }) #sk = CochleoPeaksSketch(**{'fs':fs,'step':512,'downsample':fs}) sk_id = sk.__class__.__name__[:-6] # initialize the sketch on noise sk.recompute(Signal(np.random.randn(seg_dur * fs), fs, mono=True)) (N, M) = sk.cort.cor.shape[:2] sizes = np.zeros((N, M / 2, len(sparsities))) nkeys = np.zeros((N, M / 2, len(sparsities))) scores = np.zeros((N, M / 2, len(sparsities))) cons_scores = np.zeros((N, M / 2, len(sparsities))) times = [] for sp_ind, sparsity in enumerate(sparsities): # we just need a short adaptation sk.sparsify(sparsity) sc_name = "%s_%s_k%d_%s_%dsec_%dfs_test%d_step%d.mat" % ( set_id, sk_id, sparsity, sk.get_sig(), int(seg_dur), int(fs),
def find_indexes(startIdx, array, stopvalue): """ get the indexes in the (sorted) array such that elements are smaller than value """ idxset = [] idx = startIdx while idx <= array.shape[0] - 1 and array[idx] < stopvalue: idxset.append(idx) idx += 1 # print idx, array[idx] return idxset original = Signal(audiofile, mono=True) max_duration = 20 # in seconds original.crop(0, max_duration * original.fs) wsize = 1024 tstep = 512 # Get the magnitude spectrum for the given audio file learn_specs = features.get_stft(original.data, wsize, tstep) learn_specs = learn_specs.T # Read the features in the h5 file h5 = hdf5_getters.open_h5_file_read(h5file) timbre = hdf5_getters.get_segments_timbre(h5) loudness_start = hdf5_getters.get_segments_loudness_start(h5) C = hdf5_getters.get_segments_pitches(h5)
learn_magspecs_all = lstruct['learn_magspecs_all'] learn_files = lstruct['learn_files'] add_col_str = 'add_%s_col' % learn_magspecs_all.shape[0] learn_feats = np.concatenate((learn_feats, learn_feats_all)) learn_magspecs = np.concatenate((learn_magspecs, learn_magspecs_all)) test_feats = Feats[start_t_frame:start_t_frame + nb_test_frames, :] test_magspecs = Feats[start_t_frame:start_t_frame + nb_test_frames, :] learn_sample = learn_ratio * Datas.shape[0] start_l_sample = start_l_ratio * Datas.shape[0] test_sample = test_ratio * Datas.shape[0] start_t_sample = start_t_ratio * Datas.shape[0] ref_learn_data = Datas[start_l_sample:start_l_sample + learn_sample] sig_learn_ref = Signal(ref_learn_data, sr) ref_test_data = Datas[start_t_sample:start_t_sample + test_sample] sig_test_ref = Signal(ref_test_data, sr) nb_median = 5 nb_iter_gl = 20 l_medfilt = 1 params = {} params['win_size'] = int(wintime * sr) params['step_size'] = int(steptime * sr) res_array = regression.eval_knn(learn_feats, learn_magspecs, test_feats, test_magspecs, ref_test_data, nb_median, nb_iter_gl, l_medfilt, params) output_path = '/home/manu/workspace/audio-sketch/src/results/'
def runTest(self): ''' take the base previously constructed and retrieve the song index based on 200 atoms/seconds ''' print "------------------ Test6 recognition ---------" nbCandidates = 8 ppdb = STFTPeaksBDB('LargeSTFTdb.db', load=True) print 'Large Db of ' + str(ppdb.get_stats()['nkeys']) + ' and ' + str( ppdb.get_stats()['ndata']) # Now take a song, decompose it and try to retrieve it fileIndex = 6 RandomAudioFilePath = file_names[fileIndex] print 'Working on ' + str(RandomAudioFilePath) pySig = Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True) pyDico = LODico(sizes) segDuration = 5 offsetDuration = 7 offset = offsetDuration * pySig.fs nbAtom = 50 segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] pySig.crop(offset, offset + segmentLength) approx, decay = mp.mp(pySig, pyDico, 40, nbAtom, pad=True) # plt.figure() # approx.plotTF() # plt.show() res = map(ppdb.get, map(ppdb.kform, approx.atoms), [(a.time_position - pyDico.get_pad()) / approx.fs for a in approx.atoms]) # #res = map(bdb.get, map(bdb.kform, approx.atoms)) histogram = np.zeros((600, nbCandidates)) for i in range(approx.atom_number): print res[i] histogram[res[i]] += 1 max1 = np.argmax(histogram[:]) Offset1 = max1 / nbCandidates estFile1 = max1 % nbCandidates # candidates , offsets = ppdb.retrieve(approx); # print approx.atom_number histograms = ppdb.retrieve(approx, offset=0, nbCandidates=8) # print histograms , np.max(histograms) , np.argmax(histograms, axis=0) , # np.argmax(histograms, axis=1) # plt.figure() # plt.imshow(histograms[0:20,:],interpolation='nearest') # plt.show() maxI = np.argmax(histograms[:]) OffsetI = maxI / nbCandidates estFileI = maxI % nbCandidates print fileIndex, offsetDuration, estFileI, OffsetI, estFile1, Offset1, max1, maxI import matplotlib.pyplot as plt # plt.figure(figsize=(12,6)) # plt.subplot(121) # plt.imshow(histograms,aspect='auto',interpolation='nearest') # plt.subplot(122) # plt.imshow(histogram,aspect='auto',interpolation='nearest') ## plt.imshow(histograms,aspect='auto',interpolation='nearest') ## plt.colorbar() # plt.show() print maxI, OffsetI, estFileI self.assertEqual(histograms[OffsetI, estFileI], np.max(histograms)) self.assertEqual(fileIndex, estFileI) self.assertTrue(abs(offsetDuration - OffsetI) <= 2.5)
def runTest(self): print "------------------ Test3 Populate from a true pair of peaks ---------" fileIndex = 2 RandomAudioFilePath = file_names[fileIndex] print 'Working on %s' % RandomAudioFilePath sizes = [2**j for j in range(7, 15)] segDuration = 5 nbAtom = 20 pySig = Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True, normalize=True) segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] nbSeg = floor(pySig.length / segmentLength) # cropping pySig.crop(0, segmentLength) # create the sparsified matrix of peaks # the easiest is to use the existing PeakPicking in sketch from classes import sketch sk = sketch.STFTPeaksSketch() sk.recompute(pySig) sk.sparsify(100) fgpt = sk.fgpt(sparse=True) ppdb = STFTPeaksBDB('STFTPeaksdb.db', load=False) # ppdb.keyformat = None # compute the pairs of peaks peak_indexes = np.nonzero(fgpt[0, :, :]) # Take one peak peak_ind = (peak_indexes[0][2], peak_indexes[1][2]) f_target_width = 2 * sk.params['f_width'] t_target_width = 2 * sk.params['t_width'] import matplotlib.pyplot as plt plt.figure() plt.imshow( np.log( np.abs(fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width, peak_ind[1]:peak_ind[1] + t_target_width]))) target_points_i, target_points_j = np.nonzero( fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width, peak_ind[1]:peak_ind[1] + t_target_width]) # now we can build a pair of peaks , and thus a key f1 = (float(peak_ind[0]) / sk.params['scale']) * pySig.fs f2 = (float(peak_ind[0] + target_points_i[1]) / sk.params['scale']) * pySig.fs delta_t = float(target_points_j[1] * sk.params['step']) / float( pySig.fs) t1 = float(peak_ind[1] * sk.params['step']) / float(pySig.fs) key = (f1, f2, delta_t) print key, t1 ppdb.populate(sk.fgpt(), sk.params, fileIndex) nKeys = ppdb.get_stats()['ndata'] # compare the number of keys in the base to the number of atoms # print ppdb.get_stats() self.assertEqual(nKeys, 116) # now try to recover the fileIndex knowing one key T, fileI = ppdb.get(key) self.assertEqual(fileI[0], fileIndex) Tpy = np.array(T) print Tpy self.assertTrue((np.abs(Tpy - t1)).min() < 0.5) # last check: what does a request for non-existing atom in base return? T, fileI = ppdb.get((11, 120.0, 0.87)) self.assertEqual(T, []) self.assertEqual(fileI, []) # now let's just retrieve the atoms from the base and see if they are # the same histograms = ppdb.retrieve(fgpt, sk.params) # plt.figure() # plt.imshow(histograms[0:10,:]) # plt.show() del ppdb
for c in range(n_distance.shape[0]): print "Cand %d: " % c, learn_feats[cands[c], 12:15], distance[c] cand_DeltaL = learn_feats[cands[c], 13] - learn_feats[cands[c], 12] n_distance[c] += lambda_L * np.abs(cand_DeltaL - DeltaL) if cand_DeltaL < thresh_lambda and forceAttack: n_distance[c] = 0 b_c = np.argmin(n_distance) print "New best candidate is %d score of %1.4f" % ( b_c, n_distance[b_c]), n_distance return cands[b_c], n_distance[b_c] # load the audio data and the features audio_file_path = '/sons/rwc/Learn/rwc-g-m01_1.wav' output_path = '/home/manu/workspace/audio-sketch/src/results/audio' orig_sig = Signal(audio_file_path) test_file = 'rwc-g-m01_1' h5_file_path = '/sons/rwc/Learn/hdf5/rwc-g-m01_1.h5' feats = [] segs = [] get_ten_features_from_file(feats, segs, [], h5_file_path) # plot part of the audio and teh segmentation seg_starts = segs[0][0] seg_duration = np.diff(seg_starts) nseg = 100 max_time = seg_starts[nseg] + seg_duration[nseg] fs = orig_sig.fs
plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[-1,0,:,:])) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[-1,0,:,:]), axis=0)) #plt.show() plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[0,-1,:,:].T)) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[0,-1,:,:]), axis=1)) plt.show() sig = Signal(os.path.abspath('../reporting/audio/original_surprise.wav'), mono=True, normalize=True) #sig.crop(0, 2*sig.fs) sk = CorticoSubPeaksSketch(**{'n_inv_iter':5}) sk.recompute(sig) #sk.sparsify(100) #sk.represent() #plt.show() combis = [(0,6),(4,6),(0,11),(4,11)] for combi in combis: sk.sp_rep = np.zeros_like(sk.rep) sk.sp_rep[combi[0], combi[1], :,:] = sk.rep[combi[0], combi[1], :,:] aud_path = os.path.abspath('../reporting/figures/') f = plt.figure(figsize=(10,6))
output_audio_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Audio/' output_fig_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Figures/' colormap = cm.jet format = (8,3) # also load the Dan Ellis's synthesized version # The Piano cross-synthesis and the Viterbi smoothed Musaicing? # resynthesize using the first N frames n_max_frames = 900 nb_gl_iter = 30 init_vec = np.random.randn(128*n_max_frames) x_recon_median = transforms.gl_recons(median_magspec[:,:n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) sig_median = Signal(x_recon_median, 22050,normalize=True) sig_median.write(os.path.join(output_audio_path, '%s_add_median.wav'%t_name)) plt.figure(figsize=format) sig_median.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_add_median.png'%t_name)) init_vec = np.random.randn(128*n_max_frames) x_recon_orig = transforms.gl_recons(orig_spec[:,:n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) sig_orig= Signal(x_recon_orig, 22050,normalize=True) sig_orig.write(os.path.join(output_audio_path, '%s_original.wav'%t_name)) plt.figure(figsize=format) sig_orig.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_original.png'%t_name)) init_vec = np.random.randn(128*n_max_frames)
synth_sig.spectrogram(1024, 64, order=0.5, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'voice_%d_spectro.pdf' % i)) synth_sig.write(op.join(audio_output_path, 'voice_%d_spectro.wav' % i)) i += 1 sig_1_path = '/sons/voxforge/main/Learn/cmu_us_slt_arctic/wav/arctic_a0372.wav' sk = sketch.STFTPeaksSketch(**{'scale': 256, 'step': 128}) sk2 = sketch.STFTPeaksSketch(**{'scale': 4096, 'step': 512}) sk.recompute(Signal(sig_1_path, mono=True)) sk2.recompute(Signal(sig_1_path, mono=True)) sk.sparsify(1000) sk2.sparsify(1000) sparse_sig = sk.synthesize(sparse=True) sparse_sig2 = sk2.synthesize(sparse=True) plt.figure() #plt.subplot(211) sparse_sig.spectrogram(256, 128, order=0.5, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'STFTPeaks_voice_256.pdf')) sparse_sig.write(op.join(audio_output_path, 'STFTPeaks_voice_256.wav')) plt.figure() sparse_sig2.spectrogram(256, 128, order=0.5,
plt.show() ############## DEBUG Part # why do we have some Nans ? #scores = do_feat_invert_test1(1, # 100000, 20, 0.032, # 16000, [5,], # [5], test_filepath) l_specs, l_feats = load_learned_database(50000, 1, 0.032, 7) knn = NearestNeighbors(n_neighbors=3) knn.fit(l_feats) t_specs, t_feats, t_data = load_test_datas(test_filepath, 0.032, 7) distance, neighbs = knn.kneighbors(t_feats, n_neighbors=5, return_distance=True) x_recon = reconstruct(l_specs, t_specs, neighbs, 5, int(0.032 * 16000), 10) sti.stiFromAudio(t_data, x_recon, 16000, calcref=False, downsample=None, name="unnamed") sig = Signal(x_recon, 16000, normalize=True) #### the best is: np.unravel_index(np.argmax(masked_sti_scores), masked_sti_scores.shape)
print "Loading ", filepath signalin, fs = get_audio(filepath, ref_audio_start, ref_audio_duration) target_length = target_audio_duration*fs print "Loaded %s length of %d "%( filepath, len(signalin)) print "Stretching to %2.2f"%target_length # adjust the Loudness ? if rescale: rescale_str = 'normed' signalin = signalin.astype(float) signalin /= 8192.0 signalin /= np.max(signalin) # N = float(len(signalin)) # target_loudness = test_feats[test_seg_idx, 13] # adjust = target_loudness - 10*np.log10((1.0/N)*np.sum(signalin**2)) # signalin *= 10**(adjust/10.) signalin *= 8192.0 signalin = signalin.astype(np.int16) sigout[num_neigh].append(time_stretch(signalin, tscale, wsize=1024, tstep=128)[128:-1024]) for num_neigh in range(n_neighbs): rec_sig = Signal(np.concatenate(sigout[num_neigh]), fs, normalize=True) rec_sig.write('/home/manu/workspace/audio-sketch/src/results/audio/%s_with%s_%dfeats_%s_neighbor_%d.wav'%( os.path.split(test_file)[-1], learntype, n_feat, rescale_str, num_neigh))
def expe_1_synth_from_same_sample(): input_dir = '/sons/rwc/Learn/' output_dir = '/sons/rwc/Learn/hdf5/' audiofile = input_dir + 'rwc-g-m01_1.wav' h5file = output_dir + 'rwc-g-m01_1.h5' # load the Echo Nest features h5 = hdf5_getters.open_h5_file_read(h5file) timbre = hdf5_getters.get_segments_timbre(h5) loudness_start = hdf5_getters.get_segments_loudness_start(h5) loudness_max = hdf5_getters.get_segments_loudness_max(h5) loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5) C = hdf5_getters.get_segments_pitches(h5) segments_all = hdf5_getters.get_segments_start(h5) learn_feats_all = np.hstack((timbre, loudness_start.reshape((loudness_start.shape[0],1)), C)) # Ok That was the best possible case, now let us try to find the nearest neighbors, # get the segment back and resynthesize! learn_duration = 200 # in seconds test_start = 200 test_duration = 5 # Get learning data learning = Signal(audiofile, mono=True) learning.crop(0, learn_duration*learning.fs) wsize = 1024 tstep = 512 # Get the magnitude spectrum for the given audio file learn_specs = features.get_stft(learning.data, wsize, tstep) learn_specs = learn_specs.T max_l_seg_idx = np.where(segments_all < learn_duration)[0][-1] l_segments = segments_all[:max_l_seg_idx] l_segment_lengths = (l_segments[1:] - l_segments[0:-1])*learning.fs learn_feats = learn_feats_all[:max_l_seg_idx,:] # we must keep in mind for each segment index, the corresponding indices in the learn_spec mat l_seg_bounds = [] ref_time = np.arange(0., float(learning.length)/float(learning.fs), float(tstep)/float(learning.fs)) for segI in range(len(l_segments)-1): startIdx = np.where(ref_time > l_segments[segI])[0][0] endIdx = np.where(ref_time > l_segments[segI+1])[0][0] l_seg_bounds.append((startIdx,endIdx)) l_seg_bounds.append((endIdx, ref_time.shape[0])) # Get testing data testing = Signal(audiofile, mono=True) testing.crop(test_start*testing.fs, (test_start+test_duration)*learning.fs) # get the testing features min_t_seg_idx = np.where(segments_all < test_start)[0][-1] max_t_seg_idx = np.where(segments_all < test_start + test_duration)[0][-1] t_segments = segments_all[min_t_seg_idx:max_t_seg_idx] t_segment_lengths = (t_segments[1:] - t_segments[0:-1])*testing.fs test_feats = learn_feats_all[min_t_seg_idx:max_t_seg_idx,:] # find the nearest neighbors from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(1) # fit on the learning data neigh.fit(learn_feats) neighb_segments_idx = neigh.kneighbors(test_feats, return_distance=False) # kneighs is a set of segment indices, we need to get the spectrogram back from the learning data # then fit the new segment lengths target_length = int(test_duration*testing.fs) neighb_segments = zip(neighb_segments_idx[:,0], t_segment_lengths.astype(int)) morphed_spectro = spec_morph(np.abs(learn_specs), target_length, neighb_segments, l_seg_bounds) # retrieve true stft for comparison test_specs = features.get_stft(testing.data, wsize, tstep) plt.figure() plt.subplot(121) plt.imshow(np.log(np.abs(test_specs)), origin='lower') plt.colorbar() plt.subplot(122) plt.imshow(np.log(morphed_spectro.T), origin='lower') plt.colorbar() plt.show() init_vec = np.random.randn(morphed_spectro.shape[0]*tstep) rec_method2 = transforms.gl_recons(morphed_spectro.T, init_vec, 10, wsize, tstep, display=False) rec_sig_2 = Signal(rec_method2, testing.fs, mono=True, normalize=True) rec_sig_2.write('/sons/tests/rec_sig2.wav')
filter_key= None, t_name=genre, n_learn_max = 1000) # Recover the dev data in Solo Piano case l_feats_piano, l_segments_piano, n_learn_piano = get_learns_multidir(solo_piano_dirs, filter_key= None, t_name=genre, n_learn_max = 1000) # Loading the reference and Ellis reconstruction print t_path + '/' + t_name + '.au' orig, fs = get_audio(t_path + '/' + t_name + '.au', 0, target_duration, targetfs=22050) sig_orig = Signal(orig, fs, normalize=True) #sig_orig.write(op.join(recons_audio_path, '_original.wav')) save_fig_audio(sig_orig, recons_audio_path, recons_fig_path, "original") print "Working on %s duration of %2.2f"%(t_name, np.sum(t_seg_duration[:nb_max_seg])) orig_spec = np.abs(stft.stft(orig, 512,128)[0,:,:]) Lmin = orig_spec.shape[1] sig_ellis = Signal('%sellis_resynth%s.wav'%(output_audio_path,t_name), normalize=True) #sig_ellis.write(op.join(recons_audio_path, '_ellisrec.wav')) save_fig_audio(sig_ellis, recons_audio_path, recons_fig_path, "ellisrec")
from PyMP import Signal from scipy.signal import lfilter, hann #audio_test_file = '/home/manu/workspace/recup_angelique/Sketches/NLS Toolbox/Hand-made Toolbox/forAngelique/61_sadness.wav' audio_test_file = op.abspath('./audio/original_surprise.wav') audio_name = 'surprise' from classes.sketches.bench import * from classes.sketches.misc import * from classes.sketches.cochleo import * from classes.sketches.cortico import * from classes.pydb import * fgpthandle = STFTPeaksBDB(None, **{'wall': False}) sk = STFTPeaksSketch(**{'scale': 2048, 'step': 512}) orig_sig = Signal(audio_test_file, normalize=True, mono=True) noisy_sig = Signal(orig_sig.data + 0.2 * np.random.randn(orig_sig.length), orig_sig.fs, normalize=True, mono=True) sk.recompute(orig_sig) sk.sparsify(20) plt.figure(figsize=(10, 6)) plt.subplot(221) orig_sig.spectrogram(512, 128, order=2, log=True, ax=plt.gca(), cmap=cm.bone_r,
sys.path.append('/home/manu/workspace/meeg_denoise') from src.tools import cochleo_tools #from classes import sketch import matplotlib.pyplot as plt from PyMP import Signal from scipy.signal import lfilter, hann from scipy.io import loadmat #from scipy.fftpack import fft, ifft from numpy.fft import fft, ifft plt.switch_backend('Agg') audio_test_file = '/home/manu/workspace/recup_angelique/Sketches/NLS Toolbox/nsltools/_done.au' audio_test_file = '/sons/jingles/panzani.wav' ############################### Inversion sig = Signal(audio_test_file, mono=True, normalize=True) sig.downsample(8000) # convert to auditory params = {'frmlen': 8, 'shift': 0, 'fac': -2, 'BP': 1} gram = cochleo_tools.Cochleogram(sig.data, **params) import cProfile cProfile.runctx('gram.build_aud()', globals(), locals()) cProfile.runctx('gram.build_aud_old()', globals(), locals()) aud = gram.build_aud() # Cortico-gram : 2D complex transform of y5 # we need to define y = gram.y5, para1= vector pf parameters, rv = rate vector, sv = scale vector y = np.array(gram.y5)
def gen_harmo_sig(freqs, L, fs): x = np.arange(0.0,float(L)/float(fs),1.0/float(fs)) data = np.zeros(x.shape) for f in freqs: data += np.sin(2.0*np.pi*f*x) return Signal(data, fs, normalize=True, mono=True)