def runTest(self): name = "orchestra" pySig = Signal(op.join(audio_filepath, "Bach_prelude_40s.wav"), mono=True, normalize=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(16384) sigEnergy = np.sum(pySig.data ** 2) dico = [128, 1024, 8192] nbAtoms = 200 classicDIco = mdct_dico.Dico(dico) spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0.1, maskSize=10) approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms) approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False) import matplotlib.pyplot as plt plt.figure(figsize=(16, 8)) plt.subplot(121) approxClassic.plot_tf(ylim=[0, 4000]) plt.title("Classic decomposition : 200 atoms 3xMDCT") plt.subplot(122) approxSpread.plot_tf(ylim=[0, 4000]) plt.title("Decomposition with TF masking: 200 atoms 3xMDCT") # plt.savefig(name + '_TestTFMasking.eps') plt.figure() plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic]) plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r") plt.legend(("Classic decomposition", "Spreading Atoms")) plt.ylabel("Residual energy decay(dB)") plt.xlabel("Iteration")
def runTest(self): # create a SpreadDico pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(2048) dico = [128, 1024, 8192] parallelProjections.initialize_plans(np.array(dico), np.array([2] * len(dico))) classicDIco = mdct_dico.Dico(dico) spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0, maskSize=3) self.assertEqual(spreadDico.mask_times, [3, 3, 3]) classicDIco.initialize(pySig) spreadDico.initialize(pySig) classicDIco.update(pySig, 2) spreadDico.update(pySig, 2) classicAtom1 = classicDIco.get_best_atom(0) spreadAtom1 = spreadDico.get_best_atom(0) # print classicAtom1, spreadAtom1 self.assertEqual(classicAtom1, spreadAtom1) pySig.subtract(classicAtom1) classicDIco.update(pySig, 2) spreadDico.update(pySig, 2) classicAtom2 = classicDIco.get_best_atom(0) spreadAtom2 = spreadDico.get_best_atom(0) self.assertNotEqual(classicAtom2, spreadAtom2)
def expe1(): shifts = [0,] # in samples fgpts = [] for shift in shifts: sig = Signal(audio_test_file, normalize=True, mono=True) sig.crop(shift, shift+L) sk = sketch.CorticoIHTSketch() sk.recompute(sig) sk.sparsify(100) fgpts.append(sk.fgpt()) # sk.represent() # plt.suptitle("Shift of %2.2f sec"%(float(shift)/float(fs))) colors = ['b', 'r', 'c','m'] score = [] bin_nnz_ref = np.flatnonzero(fgpts[0]) #plt.figure() for i, fgpt in enumerate(fgpts): bin_nnz = np.flatnonzero(fgpt) # plt.stem(bin_nnz,[1]*len(bin_nnz), colors[i]) score.append(len(np.intersect1d(bin_nnz_ref, bin_nnz, assume_unique=True))) print score
def runTest(self): pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(2048) scale = 1024 parallelProjections.initialize_plans(np.array([scale]), np.array([2])) classicBlock = mdct_block.Block(scale, pySig, 0, debug_level=3) spreadBlock = mdct_block.SpreadBlock(scale, pySig, 0, debug_level=3, penalty=0, maskSize=5) # compute the projections, should be equivalent classicBlock.update(pySig, 0, -1) spreadBlock.update(pySig, 0, -1) maxClassicAtom1 = classicBlock.get_max_atom() print maxClassicAtom1.length, maxClassicAtom1.frame, print maxClassicAtom1.freq_bin, maxClassicAtom1.mdct_value maxSpreadcAtom1 = spreadBlock.get_max_atom() print maxSpreadcAtom1.length, maxSpreadcAtom1.frame, print maxSpreadcAtom1.freq_bin, maxSpreadcAtom1.mdct_value # assert equality using the inner comparison method of MDCT atoms self.assertEqual(maxClassicAtom1, maxSpreadcAtom1) # verifying the masking index construction mask_frame_width = 2 mask_bin_width = 1 spreadBlock.compute_mask(maxSpreadcAtom1, mask_bin_width, mask_frame_width, 0.5) c_frame = int(np.ceil(maxSpreadcAtom1.time_position / (scale / 2))) c_bin = int(maxSpreadcAtom1.reduced_frequency * scale) z1 = np.arange(int(c_frame - mask_frame_width), int(c_frame + mask_frame_width) + 1) z2 = np.arange(int(c_bin - mask_bin_width), int(c_bin + mask_bin_width) + 1) # x, y = np.meshgrid(z1, z2) # print spreadBlock.mask_index_x # np.testing.assert_array_equal(spreadBlock.mask_index_x, z1) # np.testing.assert_array_equal(spreadBlock.mask_index_y, z2) pySig.subtract(maxSpreadcAtom1) # recompute the projections classicBlock.update(pySig, 0, -1) spreadBlock.update(pySig, 0, -1) # plt.show() maxClassicAtom2 = classicBlock.get_max_atom() print maxClassicAtom2.length, maxClassicAtom2.frame, maxClassicAtom2.freq_bin, maxClassicAtom2.mdct_value maxSpreadcAtom2 = spreadBlock.get_max_atom() print maxSpreadcAtom2.length, maxSpreadcAtom2.frame, maxSpreadcAtom2.freq_bin, maxSpreadcAtom2.mdct_value self.assertNotEqual(maxClassicAtom2, maxSpreadcAtom2) parallelProjections.clean_plans()
def runTest(self): name = "orchestra" pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True, normalize=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(16384) sigEnergy = np.sum(pySig.data ** 2) dico = [128, 1024, 8192] nbAtoms = 200 classicDIco = mdct_dico.Dico(dico, useC=False) spreadDico = mdct_dico.SpreadDico( dico, all_scales=False, spread_scales=[1024, 8192], penalty=0.1, mask_time=2, mask_freq=2 ) approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms) approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False) plt.figure(figsize=(16, 8)) plt.subplot(121) approxClassic.plot_tf(ylim=[0, 4000]) plt.title("Classic decomposition : 200 atoms 3xMDCT") plt.subplot(122) approxSpread.plot_tf(ylim=[0, 4000]) plt.title("Decomposition with TF masking: 200 atoms 3xMDCT") # plt.savefig(name + '_TestTFMasking.eps') plt.figure() plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic]) plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r") plt.legend(("Classic decomposition", "Spreading Atoms")) plt.ylabel("Residual energy decay(dB)") plt.xlabel("Iteration") # plt.savefig(name + '_decayTFMasking.eps') plt.figure() for blockI in range(1, 3): block = spreadDico.blocks[blockI] plt.subplot(2, 2, blockI) print block.mask.shape, block.mask.shape[0] / (block.scale / 2), block.scale / 2 plt.imshow( np.reshape(block.mask, (block.mask.shape[0] / (block.scale / 2), block.scale / 2)), interpolation="nearest", aspect="auto", ) plt.colorbar() plt.subplot(2, 2, blockI + 2) # print block.mask.shape, block.mask.shape[0] / (block.scale/2), # block.scale/2 block.im_proj_matrix() plt.colorbar()
def expe_1(): synth_sig = Signal(audio_test_file, normalize=True, mono=True) synth_sig.crop(0.1 * synth_sig.fs, 3.5 * synth_sig.fs) #synth_sig.resample(32000) plt.figure(figsize=(10, 5)) plt.subplot(211) plt.plot( np.arange(.0, synth_sig.length) / float(synth_sig.fs), synth_sig.data) plt.xticks([]) plt.ylim([-1, 1]) plt.grid() plt.subplot(212) synth_sig.spectrogram(1024, 64, order=0.25, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'glocs_spectro.pdf')) plt.show()
""" get the indexes in the (sorted) array such that elements are smaller than value """ idxset = [] idx = startIdx while idx <= array.shape[0] - 1 and array[idx] < stopvalue: idxset.append(idx) idx += 1 # print idx, array[idx] return idxset original = Signal(audiofile, mono=True) max_duration = 20 # in seconds original.crop(0, max_duration * original.fs) wsize = 1024 tstep = 512 # Get the magnitude spectrum for the given audio file learn_specs = features.get_stft(original.data, wsize, tstep) learn_specs = learn_specs.T # Read the features in the h5 file h5 = hdf5_getters.open_h5_file_read(h5file) timbre = hdf5_getters.get_segments_timbre(h5) loudness_start = hdf5_getters.get_segments_loudness_start(h5) C = hdf5_getters.get_segments_pitches(h5) segments_all = hdf5_getters.get_segments_start(h5) learn_feats_all = np.hstack(
import matplotlib.pyplot as plt import os from PyMP import Signal, mp, mp_coder from PyMP.mdct import Dico abPath = os.path.abspath("../../data/") sig = Signal(abPath + "/ClocheB.wav", mono=True) # Load Signal sig.crop(0, 4.0 * sig.fs) # Keep only 4 seconds # atom of scales 8, 64 and 512 ms scales = [(s * sig.fs / 1000) for s in (8, 64, 512)] # Dictionary for Standard MP pyDico = Dico(scales) # Launching decomposition, stops either at 20 dB of SRR or 2000 iterations mpApprox, mpDecay = mp.mp(sig, pyDico, 20, 2000) # mpApprox.atomNumber SNR, bitrate, quantizedApprox = mp_coder.simple_mdct_encoding(mpApprox, 2000, Q=14) quantizedApprox.plot_tf() plt.show()
mpl.rcParams['image.interpolation'] = 'Nearest' #mpl.rcParams['text.usetex'] = True from PyMP import Signal, mp from PyMP.mdct import Dico sizes = [128, 1024, 8192] n_atoms = 1000 abPath = os.path.abspath('../../data/') sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True) # taking only the first musical phrase (3.5 seconds approximately) sig.crop(0, 3.5 * sig.fs) sig.pad(8192) # add some minor noise to avoid null areas sig.data += 0.0001 * np.random.randn(sig.length) # create MDCT multiscale dictionary dico = Dico(sizes) # run the MP routine approx, decay = mp.mp(sig, dico, 50, n_atoms) # plotting the results timeVec = np.arange(0, float(sig.length)) / sig.fs plt.figure(figsize=(10, 6))
def runTest(self): ''' take the base previously constructed and retrieve the song index based on 200 atoms/seconds ''' print "------------------ Test6 recognition ---------" nbCandidates = 8 ppdb = STFTPeaksBDB('LargeSTFTdb.db', load=True) print 'Large Db of ' + str(ppdb.get_stats()['nkeys']) + ' and ' + str( ppdb.get_stats()['ndata']) # Now take a song, decompose it and try to retrieve it fileIndex = 6 RandomAudioFilePath = file_names[fileIndex] print 'Working on ' + str(RandomAudioFilePath) pySig = Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True) pyDico = LODico(sizes) segDuration = 5 offsetDuration = 7 offset = offsetDuration * pySig.fs nbAtom = 50 segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] pySig.crop(offset, offset + segmentLength) approx, decay = mp.mp(pySig, pyDico, 40, nbAtom, pad=True) # plt.figure() # approx.plotTF() # plt.show() res = map(ppdb.get, map(ppdb.kform, approx.atoms), [(a.time_position - pyDico.get_pad()) / approx.fs for a in approx.atoms]) # #res = map(bdb.get, map(bdb.kform, approx.atoms)) histogram = np.zeros((600, nbCandidates)) for i in range(approx.atom_number): print res[i] histogram[res[i]] += 1 max1 = np.argmax(histogram[:]) Offset1 = max1 / nbCandidates estFile1 = max1 % nbCandidates # candidates , offsets = ppdb.retrieve(approx); # print approx.atom_number histograms = ppdb.retrieve(approx, offset=0, nbCandidates=8) # print histograms , np.max(histograms) , np.argmax(histograms, axis=0) , # np.argmax(histograms, axis=1) # plt.figure() # plt.imshow(histograms[0:20,:],interpolation='nearest') # plt.show() maxI = np.argmax(histograms[:]) OffsetI = maxI / nbCandidates estFileI = maxI % nbCandidates print fileIndex, offsetDuration, estFileI, OffsetI, estFile1, Offset1, max1, maxI import matplotlib.pyplot as plt # plt.figure(figsize=(12,6)) # plt.subplot(121) # plt.imshow(histograms,aspect='auto',interpolation='nearest') # plt.subplot(122) # plt.imshow(histogram,aspect='auto',interpolation='nearest') ## plt.imshow(histograms,aspect='auto',interpolation='nearest') ## plt.colorbar() # plt.show() print maxI, OffsetI, estFileI self.assertEqual(histograms[OffsetI, estFileI], np.max(histograms)) self.assertEqual(fileIndex, estFileI) self.assertTrue(abs(offsetDuration - OffsetI) <= 2.5)
sig_out = resynth_sequence(neighb_segments_idx[:,0], test_segs, t_seg_duration, learn_segs, learn_feats, ref_audio_dir, ext, 22050, dotime_stretch=False, max_synth_idx=max_synth_idx, normalize=False, marge=3, verbose=True) sig_out_normalized = resynth_sequence(neighb_segments_idx[:,0], test_segs, t_seg_duration, learn_segs, learn_feats, ref_audio_dir, ext, 22050, dotime_stretch=True, max_synth_idx=max_synth_idx, normalize=True, marge=3, verbose=True) #sig_viterbi = Signal(sig_out_viterbi, 22050, normalize=True) rec_sig = Signal(sig_out, 22050, normalize=True) rec_sig.crop(0, test_segs[max_synth_idx]*rec_sig.fs) save_fig_audio(rec_sig, '%s_plain_cross_%s'%(test_file,learntype)) rec_sig_normalized = Signal(sig_out_normalized, 22050, normalize=True) rec_sig_normalized.crop(0, test_segs[max_synth_idx]*rec_sig.fs) save_fig_audio(rec_sig_normalized, '%s_normalized_cross_%s'%(test_file,learntype)) # load original audio orig_data, fs = get_audio(audio_file_path, 0, rec_sig.get_duration(), targetfs=None, verbose=True) orig_sig = Signal(orig_data, fs, normalize=True)
""" import numpy as np from PyMP.mdct import Dico, atom from PyMP import Signal, approx sig = Signal('../data/glocs.wav', debug_level=3) print sig print sig.data # sig.plot() # sig.write('newDestFile.wav') # editing print 'Before cropping Length of ', sig.length sig.crop(0, 2048) print 'After cropping Length of ', sig.length sub_sig = sig[0:2048] print sub_sig new_sig = Signal(np.ones((8,)), 1) new_sig.data print "Padding" new_sig.pad(4) new_sig.data print "De-Padding" new_sig.depad(4) new_sig.data
def expe_1_synth_from_same_sample(): input_dir = '/sons/rwc/Learn/' output_dir = '/sons/rwc/Learn/hdf5/' audiofile = input_dir + 'rwc-g-m01_1.wav' h5file = output_dir + 'rwc-g-m01_1.h5' # load the Echo Nest features h5 = hdf5_getters.open_h5_file_read(h5file) timbre = hdf5_getters.get_segments_timbre(h5) loudness_start = hdf5_getters.get_segments_loudness_start(h5) loudness_max = hdf5_getters.get_segments_loudness_max(h5) loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5) C = hdf5_getters.get_segments_pitches(h5) segments_all = hdf5_getters.get_segments_start(h5) learn_feats_all = np.hstack((timbre, loudness_start.reshape((loudness_start.shape[0],1)), C)) # Ok That was the best possible case, now let us try to find the nearest neighbors, # get the segment back and resynthesize! learn_duration = 200 # in seconds test_start = 200 test_duration = 5 # Get learning data learning = Signal(audiofile, mono=True) learning.crop(0, learn_duration*learning.fs) wsize = 1024 tstep = 512 # Get the magnitude spectrum for the given audio file learn_specs = features.get_stft(learning.data, wsize, tstep) learn_specs = learn_specs.T max_l_seg_idx = np.where(segments_all < learn_duration)[0][-1] l_segments = segments_all[:max_l_seg_idx] l_segment_lengths = (l_segments[1:] - l_segments[0:-1])*learning.fs learn_feats = learn_feats_all[:max_l_seg_idx,:] # we must keep in mind for each segment index, the corresponding indices in the learn_spec mat l_seg_bounds = [] ref_time = np.arange(0., float(learning.length)/float(learning.fs), float(tstep)/float(learning.fs)) for segI in range(len(l_segments)-1): startIdx = np.where(ref_time > l_segments[segI])[0][0] endIdx = np.where(ref_time > l_segments[segI+1])[0][0] l_seg_bounds.append((startIdx,endIdx)) l_seg_bounds.append((endIdx, ref_time.shape[0])) # Get testing data testing = Signal(audiofile, mono=True) testing.crop(test_start*testing.fs, (test_start+test_duration)*learning.fs) # get the testing features min_t_seg_idx = np.where(segments_all < test_start)[0][-1] max_t_seg_idx = np.where(segments_all < test_start + test_duration)[0][-1] t_segments = segments_all[min_t_seg_idx:max_t_seg_idx] t_segment_lengths = (t_segments[1:] - t_segments[0:-1])*testing.fs test_feats = learn_feats_all[min_t_seg_idx:max_t_seg_idx,:] # find the nearest neighbors from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(1) # fit on the learning data neigh.fit(learn_feats) neighb_segments_idx = neigh.kneighbors(test_feats, return_distance=False) # kneighs is a set of segment indices, we need to get the spectrogram back from the learning data # then fit the new segment lengths target_length = int(test_duration*testing.fs) neighb_segments = zip(neighb_segments_idx[:,0], t_segment_lengths.astype(int)) morphed_spectro = spec_morph(np.abs(learn_specs), target_length, neighb_segments, l_seg_bounds) # retrieve true stft for comparison test_specs = features.get_stft(testing.data, wsize, tstep) plt.figure() plt.subplot(121) plt.imshow(np.log(np.abs(test_specs)), origin='lower') plt.colorbar() plt.subplot(122) plt.imshow(np.log(morphed_spectro.T), origin='lower') plt.colorbar() plt.show() init_vec = np.random.randn(morphed_spectro.shape[0]*tstep) rec_method2 = transforms.gl_recons(morphed_spectro.T, init_vec, 10, wsize, tstep, display=False) rec_sig_2 = Signal(rec_method2, testing.fs, mono=True, normalize=True) rec_sig_2.write('/sons/tests/rec_sig2.wav')
sig_orig.write(os.path.join(output_audio_path, '%s_original.wav'%t_name)) plt.figure(figsize=format) sig_orig.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_original.png'%t_name)) init_vec = np.random.randn(128*n_max_frames) x_recon_max = transforms.gl_recons(max_magspec[:,:n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) sig_max= Signal(x_recon_max, 22050,normalize=True) sig_max.write(os.path.join(output_audio_path, '%s_add_max.wav'%t_name)) plt.figure(figsize=format) sig_max.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_max.png'%t_name)) sig_ellis = Signal('/home/manu/workspace/audio-sketch/src/expe_scripts/invert/ellis_resynth%s.wav'%t_name, normalize=True) sig_ellis.crop(0,sig_max.length) plt.figure(figsize=format) sig_ellis.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_ellis.png'%t_name)) plt.figure(figsize=(16,12)) ax1 = plt.subplot(411) #plt.imshow(np.log(orig_spec), origin='lower') sig_orig.spectrogram(512, 128, order=1, log=True, ax=ax1, cmap=colormap, cbar=False) ax2 = plt.subplot(412, sharex=ax1, sharey=ax1) #plt.imshow(np.log(median_magspec), origin='lower') sig_median.spectrogram(512, 128, order=1, log=True, ax=ax2, cmap=colormap, cbar=False) ax3 = plt.subplot(413, sharex=ax1, sharey=ax1) #plt.imshow(np.log(max_magspec), origin='lower') sig_max.spectrogram(512, 128, order=1, log=True, ax=ax3, cmap=colormap, cbar=False)
from PyMP import Signal, mp from PyMP.mdct import Dico, LODico import matplotlib as mpl mpl.rcParams['lines.linewidth'] = 1.0 mpl.rcParams['font.size'] = 16.0 mpl.rcParams['legend.fancybox'] = True mpl.rcParams['legend.shadow'] = True mpl.rcParams['image.interpolation'] = 'Nearest' #mpl.rcParams['text.usetex'] = True # Load glockenspiel signal abPath = os.path.abspath('../../data/') sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True) sig.crop(0, 3 * sig.fs) scales = [128, 1024, 8192] n_atoms = 500 srr = 30 mp_dico = Dico(scales) lomp_dico = LODico(scales) mp_approx, mp_decay = mp.mp(sig, mp_dico, srr, n_atoms, pad=True) lomp_approx, lomp_decay = mp.mp(sig, lomp_dico, srr, n_atoms, pad=False) plt.figure() plt.subplot(211) mp_approx.plot_tf() plt.subplot(212)
# sim_mat[t,:] = np.sum((t_feats - t_feats[t,:])**2, axis=1) # #plt.figure() #plt.imshow(sim_mat, origin='lower') #plt.colorbar() #plt.show() # now try to viterbi decode this shit from tools.learning_tools import Viterbi vit_path = Viterbi(neigh, distance, trans_penalty=0.01, c_value=20) vit_cands = [neigh[ind, neighbind] for ind, neighbind in enumerate(vit_path)] # sig_out_viterbi = resynth_sequence(np.squeeze(vit_cands), t_seg_starts, t_seg_duration, l_segments, l_feats, ref_audio_dir, '.au', 22050, dotime_stretch=True, max_synth_idx=40, normalize=True) sig_viterbi = Signal(sig_out_viterbi, 22050, normalize=True) sig_viterbi.write( '%s/%s_viterbi_%dFeats_%dLearns_Filter%d.wav' % (outputpath, h5files[t_index - 1], nbFeats, n_learn, filter_key)) sig_viterbi.crop(0, 9.5 * sig_viterbi.fs) # #sig_viterbi = save_audio(outputpath, '%s_viterbi'%h5files[t_index], sig_out_viterbi, 22050, norm_segments=False)
def runTest(self): print "------------------ Test3 Populate from a true pair of peaks ---------" fileIndex = 2 RandomAudioFilePath = file_names[fileIndex] print 'Working on %s' % RandomAudioFilePath sizes = [2**j for j in range(7, 15)] segDuration = 5 nbAtom = 20 pySig = Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True, normalize=True) segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] nbSeg = floor(pySig.length / segmentLength) # cropping pySig.crop(0, segmentLength) # create the sparsified matrix of peaks # the easiest is to use the existing PeakPicking in sketch from classes import sketch sk = sketch.STFTPeaksSketch() sk.recompute(pySig) sk.sparsify(100) fgpt = sk.fgpt(sparse=True) ppdb = STFTPeaksBDB('STFTPeaksdb.db', load=False) # ppdb.keyformat = None # compute the pairs of peaks peak_indexes = np.nonzero(fgpt[0, :, :]) # Take one peak peak_ind = (peak_indexes[0][2], peak_indexes[1][2]) f_target_width = 2 * sk.params['f_width'] t_target_width = 2 * sk.params['t_width'] import matplotlib.pyplot as plt plt.figure() plt.imshow( np.log( np.abs(fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width, peak_ind[1]:peak_ind[1] + t_target_width]))) target_points_i, target_points_j = np.nonzero( fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width, peak_ind[1]:peak_ind[1] + t_target_width]) # now we can build a pair of peaks , and thus a key f1 = (float(peak_ind[0]) / sk.params['scale']) * pySig.fs f2 = (float(peak_ind[0] + target_points_i[1]) / sk.params['scale']) * pySig.fs delta_t = float(target_points_j[1] * sk.params['step']) / float( pySig.fs) t1 = float(peak_ind[1] * sk.params['step']) / float(pySig.fs) key = (f1, f2, delta_t) print key, t1 ppdb.populate(sk.fgpt(), sk.params, fileIndex) nKeys = ppdb.get_stats()['ndata'] # compare the number of keys in the base to the number of atoms # print ppdb.get_stats() self.assertEqual(nKeys, 116) # now try to recover the fileIndex knowing one key T, fileI = ppdb.get(key) self.assertEqual(fileI[0], fileIndex) Tpy = np.array(T) print Tpy self.assertTrue((np.abs(Tpy - t1)).min() < 0.5) # last check: what does a request for non-existing atom in base return? T, fileI = ppdb.get((11, 120.0, 0.87)) self.assertEqual(T, []) self.assertEqual(fileI, []) # now let's just retrieve the atoms from the base and see if they are # the same histograms = ppdb.retrieve(fgpt, sk.params) # plt.figure() # plt.imshow(histograms[0:10,:]) # plt.show() del ppdb
""" Tutorial provided as part of PyMP M. Moussallam """ from PyMP.mdct import Dico, LODico from PyMP.mdct.rand import SequenceDico from PyMP import mp, mp_coder, Signal signal = Signal('../data/ClocheB.wav', mono=True) # Load Signal signal.crop(0, 4.0 * signal.fs) # Keep only 4 seconds # atom of scales 8, 64 and 512 ms scales = [(s * signal.fs / 1000) for s in (8, 64, 512)] signal.pad(scales[-1]) # Dictionary for Standard MP dico = Dico(scales) # Launching decomposition, stops either at 20 dB of SRR or 2000 iterations app, dec = mp.mp(signal, dico, 20, 2000, pad=False) app.atom_number snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 8000, Q=14) print (snr, bitrate) print "With Q=5" snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 8000, Q=5) print (snr, bitrate)