Esempio n. 1
0
    def runTest(self):
        name = "orchestra"
        pySig = Signal(op.join(audio_filepath, "Bach_prelude_40s.wav"), mono=True, normalize=True)
        pySig.crop(0, 5 * pySig.fs)
        pySig.pad(16384)
        sigEnergy = np.sum(pySig.data ** 2)
        dico = [128, 1024, 8192]
        nbAtoms = 200

        classicDIco = mdct_dico.Dico(dico)
        spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0.1, maskSize=10)

        approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms)
        approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False)
        import matplotlib.pyplot as plt

        plt.figure(figsize=(16, 8))
        plt.subplot(121)
        approxClassic.plot_tf(ylim=[0, 4000])
        plt.title("Classic decomposition : 200 atoms 3xMDCT")
        plt.subplot(122)
        approxSpread.plot_tf(ylim=[0, 4000])
        plt.title("Decomposition with TF masking: 200 atoms 3xMDCT")
        #        plt.savefig(name + '_TestTFMasking.eps')

        plt.figure()
        plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic])
        plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r")
        plt.legend(("Classic decomposition", "Spreading Atoms"))
        plt.ylabel("Residual energy decay(dB)")
        plt.xlabel("Iteration")
Esempio n. 2
0
    def runTest(self):
        # create a SpreadDico
        pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True)
        pySig.crop(0, 5 * pySig.fs)
        pySig.pad(2048)

        dico = [128, 1024, 8192]

        parallelProjections.initialize_plans(np.array(dico), np.array([2] * len(dico)))

        classicDIco = mdct_dico.Dico(dico)
        spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0, maskSize=3)

        self.assertEqual(spreadDico.mask_times, [3, 3, 3])

        classicDIco.initialize(pySig)
        spreadDico.initialize(pySig)

        classicDIco.update(pySig, 2)
        spreadDico.update(pySig, 2)

        classicAtom1 = classicDIco.get_best_atom(0)
        spreadAtom1 = spreadDico.get_best_atom(0)
        #        print classicAtom1, spreadAtom1
        self.assertEqual(classicAtom1, spreadAtom1)

        pySig.subtract(classicAtom1)
        classicDIco.update(pySig, 2)
        spreadDico.update(pySig, 2)

        classicAtom2 = classicDIco.get_best_atom(0)
        spreadAtom2 = spreadDico.get_best_atom(0)

        self.assertNotEqual(classicAtom2, spreadAtom2)
def expe1():
    shifts = [0,] # in samples
    fgpts = []
    for shift in shifts:
        sig =  Signal(audio_test_file, normalize=True, mono=True)
        sig.crop(shift, shift+L)
    
        sk = sketch.CorticoIHTSketch()    
        sk.recompute(sig)
        
        sk.sparsify(100)
        fgpts.append(sk.fgpt())
        
    #    sk.represent()
    #    plt.suptitle("Shift of %2.2f sec"%(float(shift)/float(fs)))
    colors = ['b', 'r', 'c','m']
    score = []
    bin_nnz_ref = np.flatnonzero(fgpts[0])
    #plt.figure()
    for i, fgpt in enumerate(fgpts):
        bin_nnz = np.flatnonzero(fgpt)
    #    plt.stem(bin_nnz,[1]*len(bin_nnz), colors[i])    
        score.append(len(np.intersect1d(bin_nnz_ref, bin_nnz, assume_unique=True)))
    
    print score
Esempio n. 4
0
    def runTest(self):
        pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True)
        pySig.crop(0, 5 * pySig.fs)
        pySig.pad(2048)

        scale = 1024
        parallelProjections.initialize_plans(np.array([scale]), np.array([2]))

        classicBlock = mdct_block.Block(scale, pySig, 0, debug_level=3)

        spreadBlock = mdct_block.SpreadBlock(scale, pySig, 0, debug_level=3, penalty=0, maskSize=5)

        # compute the projections, should be equivalent
        classicBlock.update(pySig, 0, -1)
        spreadBlock.update(pySig, 0, -1)

        maxClassicAtom1 = classicBlock.get_max_atom()
        print maxClassicAtom1.length, maxClassicAtom1.frame,
        print maxClassicAtom1.freq_bin, maxClassicAtom1.mdct_value
        maxSpreadcAtom1 = spreadBlock.get_max_atom()
        print maxSpreadcAtom1.length, maxSpreadcAtom1.frame,
        print maxSpreadcAtom1.freq_bin, maxSpreadcAtom1.mdct_value
        # assert equality using the inner comparison method of MDCT atoms
        self.assertEqual(maxClassicAtom1, maxSpreadcAtom1)

        # verifying the masking index construction
        mask_frame_width = 2
        mask_bin_width = 1
        spreadBlock.compute_mask(maxSpreadcAtom1, mask_bin_width, mask_frame_width, 0.5)

        c_frame = int(np.ceil(maxSpreadcAtom1.time_position / (scale / 2)))
        c_bin = int(maxSpreadcAtom1.reduced_frequency * scale)

        z1 = np.arange(int(c_frame - mask_frame_width), int(c_frame + mask_frame_width) + 1)
        z2 = np.arange(int(c_bin - mask_bin_width), int(c_bin + mask_bin_width) + 1)
        #        x, y = np.meshgrid(z1, z2)
        #        print spreadBlock.mask_index_x
        #        np.testing.assert_array_equal(spreadBlock.mask_index_x, z1)
        #        np.testing.assert_array_equal(spreadBlock.mask_index_y, z2)

        pySig.subtract(maxSpreadcAtom1)

        # recompute the projections
        classicBlock.update(pySig, 0, -1)
        spreadBlock.update(pySig, 0, -1)

        #        plt.show()
        maxClassicAtom2 = classicBlock.get_max_atom()
        print maxClassicAtom2.length, maxClassicAtom2.frame, maxClassicAtom2.freq_bin, maxClassicAtom2.mdct_value
        maxSpreadcAtom2 = spreadBlock.get_max_atom()
        print maxSpreadcAtom2.length, maxSpreadcAtom2.frame, maxSpreadcAtom2.freq_bin, maxSpreadcAtom2.mdct_value
        self.assertNotEqual(maxClassicAtom2, maxSpreadcAtom2)

        parallelProjections.clean_plans()
Esempio n. 5
0
    def runTest(self):
        name = "orchestra"
        pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True, normalize=True)
        pySig.crop(0, 5 * pySig.fs)
        pySig.pad(16384)
        sigEnergy = np.sum(pySig.data ** 2)
        dico = [128, 1024, 8192]
        nbAtoms = 200

        classicDIco = mdct_dico.Dico(dico, useC=False)
        spreadDico = mdct_dico.SpreadDico(
            dico, all_scales=False, spread_scales=[1024, 8192], penalty=0.1, mask_time=2, mask_freq=2
        )

        approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms)
        approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False)

        plt.figure(figsize=(16, 8))
        plt.subplot(121)
        approxClassic.plot_tf(ylim=[0, 4000])
        plt.title("Classic decomposition : 200 atoms 3xMDCT")
        plt.subplot(122)
        approxSpread.plot_tf(ylim=[0, 4000])
        plt.title("Decomposition with TF masking: 200 atoms 3xMDCT")
        #        plt.savefig(name + '_TestTFMasking.eps')

        plt.figure()
        plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic])
        plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r")
        plt.legend(("Classic decomposition", "Spreading Atoms"))
        plt.ylabel("Residual energy decay(dB)")
        plt.xlabel("Iteration")
        #        plt.savefig(name + '_decayTFMasking.eps')

        plt.figure()
        for blockI in range(1, 3):
            block = spreadDico.blocks[blockI]
            plt.subplot(2, 2, blockI)
            print block.mask.shape, block.mask.shape[0] / (block.scale / 2), block.scale / 2
            plt.imshow(
                np.reshape(block.mask, (block.mask.shape[0] / (block.scale / 2), block.scale / 2)),
                interpolation="nearest",
                aspect="auto",
            )
            plt.colorbar()
            plt.subplot(2, 2, blockI + 2)
            # print block.mask.shape, block.mask.shape[0] / (block.scale/2),
            # block.scale/2
            block.im_proj_matrix()
            plt.colorbar()
Esempio n. 6
0
def expe_1():
    synth_sig = Signal(audio_test_file, normalize=True, mono=True)
    synth_sig.crop(0.1 * synth_sig.fs, 3.5 * synth_sig.fs)

    #synth_sig.resample(32000)
    plt.figure(figsize=(10, 5))
    plt.subplot(211)
    plt.plot(
        np.arange(.0, synth_sig.length) / float(synth_sig.fs), synth_sig.data)
    plt.xticks([])
    plt.ylim([-1, 1])
    plt.grid()
    plt.subplot(212)
    synth_sig.spectrogram(1024,
                          64,
                          order=0.25,
                          log=False,
                          cmap=cm.hot,
                          cbar=False)

    plt.savefig(op.join(figure_output_path, 'glocs_spectro.pdf'))
    plt.show()
Esempio n. 7
0
    """ get the indexes in the (sorted) array such that
    elements are smaller than value """
    idxset = []
    idx = startIdx
    while idx <= array.shape[0] - 1 and array[idx] < stopvalue:
        idxset.append(idx)
        idx += 1


#        print idx, array[idx]
    return idxset

original = Signal(audiofile, mono=True)

max_duration = 20  # in seconds
original.crop(0, max_duration * original.fs)

wsize = 1024
tstep = 512

# Get the magnitude spectrum for the given audio file
learn_specs = features.get_stft(original.data, wsize, tstep)
learn_specs = learn_specs.T
# Read the features in the h5 file
h5 = hdf5_getters.open_h5_file_read(h5file)
timbre = hdf5_getters.get_segments_timbre(h5)
loudness_start = hdf5_getters.get_segments_loudness_start(h5)
C = hdf5_getters.get_segments_pitches(h5)
segments_all = hdf5_getters.get_segments_start(h5)

learn_feats_all = np.hstack(
Esempio n. 8
0
import matplotlib.pyplot as plt
import os

from PyMP import Signal, mp, mp_coder
from PyMP.mdct import Dico


abPath = os.path.abspath("../../data/")
sig = Signal(abPath + "/ClocheB.wav", mono=True)  # Load Signal
sig.crop(0, 4.0 * sig.fs)  # Keep only 4 seconds

# atom of scales 8, 64 and 512 ms
scales = [(s * sig.fs / 1000) for s in (8, 64, 512)]

# Dictionary for Standard MP
pyDico = Dico(scales)

# Launching decomposition, stops either at 20 dB of SRR or 2000 iterations
mpApprox, mpDecay = mp.mp(sig, pyDico, 20, 2000)

# mpApprox.atomNumber

SNR, bitrate, quantizedApprox = mp_coder.simple_mdct_encoding(mpApprox, 2000, Q=14)

quantizedApprox.plot_tf()
plt.show()
Esempio n. 9
0
mpl.rcParams['image.interpolation'] = 'Nearest'
#mpl.rcParams['text.usetex'] = True

from PyMP import Signal, mp
from PyMP.mdct import Dico


sizes = [128, 1024, 8192]
n_atoms = 1000


abPath = os.path.abspath('../../data/')
sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True)

# taking only the first musical phrase (3.5 seconds approximately)
sig.crop(0, 3.5 * sig.fs)
sig.pad(8192)

# add some minor noise to avoid null areas
sig.data += 0.0001 * np.random.randn(sig.length)

# create MDCT multiscale dictionary
dico = Dico(sizes)

# run the MP routine
approx, decay = mp.mp(sig, dico, 50, n_atoms)

# plotting the results
timeVec = np.arange(0, float(sig.length)) / sig.fs

plt.figure(figsize=(10, 6))
Esempio n. 10
0
    def runTest(self):
        ''' take the base previously constructed and retrieve the song index based on 200 atoms/seconds
        '''
        print "------------------ Test6  recognition ---------"

        nbCandidates = 8
        ppdb = STFTPeaksBDB('LargeSTFTdb.db', load=True)

        print 'Large Db of ' + str(ppdb.get_stats()['nkeys']) + ' and ' + str(
            ppdb.get_stats()['ndata'])
        # Now take a song, decompose it and try to retrieve it
        fileIndex = 6
        RandomAudioFilePath = file_names[fileIndex]
        print 'Working on ' + str(RandomAudioFilePath)
        pySig = Signal(op.join(audio_files_path, RandomAudioFilePath),
                       mono=True)

        pyDico = LODico(sizes)
        segDuration = 5
        offsetDuration = 7
        offset = offsetDuration * pySig.fs
        nbAtom = 50
        segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1]
        pySig.crop(offset, offset + segmentLength)

        approx, decay = mp.mp(pySig, pyDico, 40, nbAtom, pad=True)

        #        plt.figure()
        #        approx.plotTF()
        #        plt.show()
        res = map(ppdb.get, map(ppdb.kform, approx.atoms),
                  [(a.time_position - pyDico.get_pad()) / approx.fs
                   for a in approx.atoms])
        #
        #res = map(bdb.get, map(bdb.kform, approx.atoms))

        histogram = np.zeros((600, nbCandidates))

        for i in range(approx.atom_number):
            print res[i]
            histogram[res[i]] += 1

        max1 = np.argmax(histogram[:])
        Offset1 = max1 / nbCandidates
        estFile1 = max1 % nbCandidates
        #        candidates , offsets = ppdb.retrieve(approx);
        #        print approx.atom_number
        histograms = ppdb.retrieve(approx, offset=0, nbCandidates=8)
        # print histograms , np.max(histograms) , np.argmax(histograms, axis=0) ,
        # np.argmax(histograms, axis=1)

        #        plt.figure()
        #        plt.imshow(histograms[0:20,:],interpolation='nearest')
        #        plt.show()

        maxI = np.argmax(histograms[:])
        OffsetI = maxI / nbCandidates
        estFileI = maxI % nbCandidates

        print fileIndex, offsetDuration, estFileI, OffsetI, estFile1, Offset1, max1, maxI
        import matplotlib.pyplot as plt
        #        plt.figure(figsize=(12,6))
        #        plt.subplot(121)
        #        plt.imshow(histograms,aspect='auto',interpolation='nearest')
        #        plt.subplot(122)
        #        plt.imshow(histogram,aspect='auto',interpolation='nearest')
        ##        plt.imshow(histograms,aspect='auto',interpolation='nearest')
        ##        plt.colorbar()
        #        plt.show()

        print maxI, OffsetI, estFileI
        self.assertEqual(histograms[OffsetI, estFileI], np.max(histograms))

        self.assertEqual(fileIndex, estFileI)
        self.assertTrue(abs(offsetDuration - OffsetI) <= 2.5)
 sig_out =  resynth_sequence(neighb_segments_idx[:,0], test_segs, t_seg_duration, 
             learn_segs, learn_feats, ref_audio_dir, ext, 22050,
             dotime_stretch=False, max_synth_idx=max_synth_idx, normalize=False,
             marge=3, verbose=True)
 
 sig_out_normalized = resynth_sequence(neighb_segments_idx[:,0], test_segs, t_seg_duration, 
             learn_segs, learn_feats, ref_audio_dir, ext, 22050,
             dotime_stretch=True, max_synth_idx=max_synth_idx, normalize=True,
             marge=3, verbose=True)
 
 
 
 #sig_viterbi = Signal(sig_out_viterbi, 22050, normalize=True)
 
 rec_sig = Signal(sig_out, 22050, normalize=True)
 rec_sig.crop(0, test_segs[max_synth_idx]*rec_sig.fs)
 
 save_fig_audio(rec_sig, '%s_plain_cross_%s'%(test_file,learntype))
 
 rec_sig_normalized = Signal(sig_out_normalized, 22050, normalize=True)
 rec_sig_normalized.crop(0, test_segs[max_synth_idx]*rec_sig.fs)
 
 save_fig_audio(rec_sig_normalized, '%s_normalized_cross_%s'%(test_file,learntype))
 
 # load original audio
 orig_data, fs = get_audio(audio_file_path, 0, rec_sig.get_duration(),
                           targetfs=None, verbose=True)
 
 
 
 orig_sig = Signal(orig_data, fs, normalize=True)
Esempio n. 12
0
"""
import numpy as np
from PyMP.mdct import Dico, atom
from PyMP import Signal, approx

sig = Signal('../data/glocs.wav', debug_level=3)
print sig
print sig.data

# sig.plot()
# sig.write('newDestFile.wav')
# editing

print 'Before cropping Length of ', sig.length
sig.crop(0, 2048)
print 'After cropping Length of ', sig.length

sub_sig = sig[0:2048]
print sub_sig

new_sig = Signal(np.ones((8,)), 1)
new_sig.data
print "Padding"
new_sig.pad(4)
new_sig.data
print "De-Padding"
new_sig.depad(4)
new_sig.data

Esempio n. 13
0
def expe_1_synth_from_same_sample():        
    input_dir = '/sons/rwc/Learn/'
    output_dir = '/sons/rwc/Learn/hdf5/'
    
    audiofile = input_dir + 'rwc-g-m01_1.wav'
    h5file = output_dir + 'rwc-g-m01_1.h5'
    
    # load the Echo Nest features
    h5 = hdf5_getters.open_h5_file_read(h5file)
    timbre = hdf5_getters.get_segments_timbre(h5)
    loudness_start = hdf5_getters.get_segments_loudness_start(h5)
    loudness_max = hdf5_getters.get_segments_loudness_max(h5)
    loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
    C = hdf5_getters.get_segments_pitches(h5)
    segments_all = hdf5_getters.get_segments_start(h5)
    
    learn_feats_all = np.hstack((timbre,
                             loudness_start.reshape((loudness_start.shape[0],1)),
                            C))
    
    # Ok That was the best possible case, now let us try to find the nearest neighbors, 
    # get the segment back and resynthesize!
    
    
    learn_duration = 200 # in seconds
    test_start = 200
    test_duration = 5
    
    # Get learning data
    learning = Signal(audiofile, mono=True)
    learning.crop(0, learn_duration*learning.fs)
    
    wsize = 1024
    tstep = 512
    # Get the magnitude spectrum for the given audio file
    learn_specs = features.get_stft(learning.data, wsize, tstep)
    learn_specs = learn_specs.T
    
    max_l_seg_idx = np.where(segments_all < learn_duration)[0][-1]
    l_segments = segments_all[:max_l_seg_idx]
    l_segment_lengths = (l_segments[1:] - l_segments[0:-1])*learning.fs
    
    
    learn_feats = learn_feats_all[:max_l_seg_idx,:]
    # we must keep in mind for each segment index, the corresponding indices in the learn_spec mat
    l_seg_bounds = []
    ref_time = np.arange(0., float(learning.length)/float(learning.fs), float(tstep)/float(learning.fs))
    for segI in range(len(l_segments)-1):
        startIdx = np.where(ref_time > l_segments[segI])[0][0]
        endIdx = np.where(ref_time > l_segments[segI+1])[0][0]
        l_seg_bounds.append((startIdx,endIdx))
    l_seg_bounds.append((endIdx, ref_time.shape[0]))
    
    # Get testing data
    testing = Signal(audiofile, mono=True)
    testing.crop(test_start*testing.fs, (test_start+test_duration)*learning.fs)
    
    # get the testing features
    min_t_seg_idx =  np.where(segments_all < test_start)[0][-1]
    max_t_seg_idx =  np.where(segments_all < test_start + test_duration)[0][-1]
    t_segments = segments_all[min_t_seg_idx:max_t_seg_idx]
    t_segment_lengths = (t_segments[1:] - t_segments[0:-1])*testing.fs
    test_feats = learn_feats_all[min_t_seg_idx:max_t_seg_idx,:]
    
    # find the nearest neighbors
    from sklearn.neighbors import NearestNeighbors
    neigh = NearestNeighbors(1)
    # fit on the learning data
    neigh.fit(learn_feats)
    neighb_segments_idx = neigh.kneighbors(test_feats, return_distance=False)
    
    # kneighs is a set of segment indices, we need to get the spectrogram back from the learning data
    # then fit the new segment lengths
    
    target_length = int(test_duration*testing.fs)
    
    neighb_segments = zip(neighb_segments_idx[:,0], t_segment_lengths.astype(int))



    morphed_spectro = spec_morph(np.abs(learn_specs), target_length, neighb_segments, l_seg_bounds)
    
    
    # retrieve true stft for comparison
    test_specs = features.get_stft(testing.data, wsize, tstep)
    
    plt.figure()
    plt.subplot(121)
    plt.imshow(np.log(np.abs(test_specs)), origin='lower')
    plt.colorbar()
    plt.subplot(122)
    plt.imshow(np.log(morphed_spectro.T), origin='lower')
    plt.colorbar()
    plt.show()
    
    
    init_vec = np.random.randn(morphed_spectro.shape[0]*tstep)
    rec_method2 = transforms.gl_recons(morphed_spectro.T, init_vec, 10, wsize, tstep, display=False)
    rec_sig_2 = Signal(rec_method2, testing.fs, mono=True, normalize=True)
    rec_sig_2.write('/sons/tests/rec_sig2.wav')
sig_orig.write(os.path.join(output_audio_path, '%s_original.wav'%t_name))
plt.figure(figsize=format)
sig_orig.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_original.png'%t_name))

init_vec = np.random.randn(128*n_max_frames)
x_recon_max = transforms.gl_recons(max_magspec[:,:n_max_frames], init_vec, nb_gl_iter,
                                       512, 128, display=False)
sig_max= Signal(x_recon_max, 22050,normalize=True)
sig_max.write(os.path.join(output_audio_path, '%s_add_max.wav'%t_name))
plt.figure(figsize=format)
sig_max.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_max.png'%t_name))

sig_ellis = Signal('/home/manu/workspace/audio-sketch/src/expe_scripts/invert/ellis_resynth%s.wav'%t_name, normalize=True)
sig_ellis.crop(0,sig_max.length)
plt.figure(figsize=format)
sig_ellis.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_ellis.png'%t_name))


plt.figure(figsize=(16,12))
ax1 = plt.subplot(411)
#plt.imshow(np.log(orig_spec), origin='lower')
sig_orig.spectrogram(512, 128, order=1, log=True, ax=ax1, cmap=colormap, cbar=False)
ax2 = plt.subplot(412, sharex=ax1, sharey=ax1)
#plt.imshow(np.log(median_magspec), origin='lower')
sig_median.spectrogram(512, 128, order=1, log=True, ax=ax2, cmap=colormap, cbar=False)
ax3 = plt.subplot(413, sharex=ax1, sharey=ax1)
#plt.imshow(np.log(max_magspec), origin='lower')
sig_max.spectrogram(512, 128, order=1, log=True, ax=ax3, cmap=colormap, cbar=False)
Esempio n. 15
0
from PyMP import Signal, mp
from PyMP.mdct import Dico, LODico

import matplotlib as mpl
mpl.rcParams['lines.linewidth'] = 1.0
mpl.rcParams['font.size'] = 16.0
mpl.rcParams['legend.fancybox'] = True
mpl.rcParams['legend.shadow'] = True
mpl.rcParams['image.interpolation'] = 'Nearest'
#mpl.rcParams['text.usetex'] = True

# Load glockenspiel signal
abPath = os.path.abspath('../../data/')
sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True)

sig.crop(0, 3 * sig.fs)

scales = [128, 1024, 8192]
n_atoms = 500
srr = 30

mp_dico = Dico(scales)
lomp_dico = LODico(scales)

mp_approx, mp_decay = mp.mp(sig, mp_dico, srr, n_atoms, pad=True)
lomp_approx, lomp_decay = mp.mp(sig, lomp_dico, srr, n_atoms, pad=False)

plt.figure()
plt.subplot(211)
mp_approx.plot_tf()
plt.subplot(212)
Esempio n. 16
0
#    sim_mat[t,:] = np.sum((t_feats - t_feats[t,:])**2, axis=1)
#
#plt.figure()
#plt.imshow(sim_mat, origin='lower')
#plt.colorbar()
#plt.show()

# now try to viterbi decode this shit
from tools.learning_tools import Viterbi
vit_path = Viterbi(neigh, distance, trans_penalty=0.01, c_value=20)
vit_cands = [neigh[ind, neighbind] for ind, neighbind in enumerate(vit_path)]
#
sig_out_viterbi = resynth_sequence(np.squeeze(vit_cands),
                                   t_seg_starts,
                                   t_seg_duration,
                                   l_segments,
                                   l_feats,
                                   ref_audio_dir,
                                   '.au',
                                   22050,
                                   dotime_stretch=True,
                                   max_synth_idx=40,
                                   normalize=True)

sig_viterbi = Signal(sig_out_viterbi, 22050, normalize=True)
sig_viterbi.write(
    '%s/%s_viterbi_%dFeats_%dLearns_Filter%d.wav' %
    (outputpath, h5files[t_index - 1], nbFeats, n_learn, filter_key))
sig_viterbi.crop(0, 9.5 * sig_viterbi.fs)
#
#sig_viterbi = save_audio(outputpath, '%s_viterbi'%h5files[t_index], sig_out_viterbi, 22050, norm_segments=False)
Esempio n. 17
0
    def runTest(self):
        print "------------------ Test3  Populate from a true pair of peaks ---------"
        fileIndex = 2
        RandomAudioFilePath = file_names[fileIndex]
        print 'Working on %s' % RandomAudioFilePath
        sizes = [2**j for j in range(7, 15)]
        segDuration = 5
        nbAtom = 20

        pySig = Signal(op.join(audio_files_path, RandomAudioFilePath),
                       mono=True,
                       normalize=True)

        segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1]
        nbSeg = floor(pySig.length / segmentLength)
        # cropping
        pySig.crop(0, segmentLength)

        # create the sparsified matrix of peaks
        # the easiest is to use the existing PeakPicking in sketch
        from classes import sketch
        sk = sketch.STFTPeaksSketch()
        sk.recompute(pySig)
        sk.sparsify(100)
        fgpt = sk.fgpt(sparse=True)
        ppdb = STFTPeaksBDB('STFTPeaksdb.db', load=False)
        #        ppdb.keyformat = None

        # compute the pairs of peaks
        peak_indexes = np.nonzero(fgpt[0, :, :])
        # Take one peak
        peak_ind = (peak_indexes[0][2], peak_indexes[1][2])
        f_target_width = 2 * sk.params['f_width']
        t_target_width = 2 * sk.params['t_width']
        import matplotlib.pyplot as plt
        plt.figure()
        plt.imshow(
            np.log(
                np.abs(fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width,
                            peak_ind[1]:peak_ind[1] + t_target_width])))

        target_points_i, target_points_j = np.nonzero(
            fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width,
                 peak_ind[1]:peak_ind[1] + t_target_width])
        # now we can build a pair of peaks , and thus a key
        f1 = (float(peak_ind[0]) / sk.params['scale']) * pySig.fs
        f2 = (float(peak_ind[0] + target_points_i[1]) /
              sk.params['scale']) * pySig.fs
        delta_t = float(target_points_j[1] * sk.params['step']) / float(
            pySig.fs)
        t1 = float(peak_ind[1] * sk.params['step']) / float(pySig.fs)
        key = (f1, f2, delta_t)
        print key, t1
        ppdb.populate(sk.fgpt(), sk.params, fileIndex)

        nKeys = ppdb.get_stats()['ndata']
        # compare the number of keys in the base to the number of atoms

        #        print ppdb.get_stats()
        self.assertEqual(nKeys, 116)

        # now try to recover the fileIndex knowing one key
        T, fileI = ppdb.get(key)

        self.assertEqual(fileI[0], fileIndex)
        Tpy = np.array(T)
        print Tpy
        self.assertTrue((np.abs(Tpy - t1)).min() < 0.5)

        # last check: what does a request for non-existing atom in base return?
        T, fileI = ppdb.get((11, 120.0, 0.87))
        self.assertEqual(T, [])
        self.assertEqual(fileI, [])

        # now let's just retrieve the atoms from the base and see if they are
        # the same
        histograms = ppdb.retrieve(fgpt, sk.params)
        #        plt.figure()
        #        plt.imshow(histograms[0:10,:])
        #        plt.show()
        del ppdb
Esempio n. 18
0
"""

Tutorial provided as part of PyMP

M. Moussallam

"""
from PyMP.mdct import Dico, LODico
from PyMP.mdct.rand import SequenceDico
from PyMP import mp, mp_coder, Signal
signal = Signal('../data/ClocheB.wav', mono=True)  # Load Signal
signal.crop(0, 4.0 * signal.fs)     # Keep only 4 seconds
# atom of scales 8, 64 and 512 ms
scales = [(s * signal.fs / 1000) for s in (8, 64, 512)]
signal.pad(scales[-1])
# Dictionary for Standard MP
dico = Dico(scales)
# Launching decomposition, stops either at 20 dB of SRR or 2000 iterations
app, dec = mp.mp(signal, dico, 20, 2000, pad=False)

app.atom_number

snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding(
    app, 8000, Q=14)
print (snr, bitrate)

print "With Q=5"
snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding(
    app, 8000, Q=5)
print (snr, bitrate)