Ejemplo n.º 1
0
def reconstruct(l_specs, t_specs, neighbs, k, winsize, n_iter_gl):
    """ reconstruct by taking the median of the Knn and GL """
    Y_hat = np.zeros_like(t_specs)
    T = neighbs.shape[0]
    for t in range(T):
        Y_hat[t, :] = np.median(l_specs[neighbs[t, :k], :], 0)
    init_vec = np.random.randn(128 * Y_hat.shape[0])
    x_recon = transforms.gl_recons(Y_hat.T,
                                   init_vec,
                                   n_iter_gl,
                                   winsize,
                                   128,
                                   display=False)
    return x_recon
Ejemplo n.º 2
0
                               t_seg_starts,
                               t_seg_duration,
                                l_segments_all, l_feats, '', '.au', 22050,
                                dotime_stretch=False,
                                max_synth_idx=nb_max_seg,
                                marge=marge, normalize=True)
     # stft
     magspecs.append(np.abs(stft.stft(sigout,512,128)[0,:,:]))
 magspecarr = np.array(magspecs)
 
 
 Lmin = min(Lmin, magspecarr.shape[2])
 print "Add-Max"
 max_magspec = np.max(magspecarr, 0)
 init_vec = np.random.randn(128*Lmin)
 x_recon = gl_recons(max_magspec[:,:Lmin], init_vec, 20,
                                            512, 128, display=False)
 sig_add_max = Signal(x_recon, fs, normalize=True)
 #sig_add_max.write(op.join(recons_audio_path, '_add_max_%s_P%d.wav'%(feat_comb,P)))
 save_fig_audio(sig_add_max,
                recons_audio_path,
                recons_fig_path,
                "add_max_%s_P%d"%(feat_comb,P))
 print "KL value %2.2f"%KLspec(orig_spec[:,:Lmin], max_magspec[:,:Lmin])
 print "Add-Mean"
 mean_magspec = np.mean(magspecarr, 0)
 init_vec = np.random.randn(128*Lmin)
 x_recon = gl_recons(mean_magspec[:,:Lmin], init_vec, 20,
                                            512, 128, display=False)
 sig_add_mean = Signal(x_recon, fs, normalize=True)
 save_fig_audio(sig_add_mean,
                recons_audio_path,
Ejemplo n.º 3
0
        1, (wsize / 2) + 1).repeat(len(indexes), 0)

plt.figure()
plt.subplot(131)
plt.imshow(np.log(np.abs(learn_specs.T)), origin='lower')
plt.subplot(132)
plt.imshow(np.log(averaged_specs.T), origin='lower')
plt.subplot(133)
plt.imshow(np.log(reconstructed_averaged_specs.T), origin='lower')
plt.show()

# time for resynthesis
init_vec = np.random.randn(original.data.shape[0])
rec_method1 = transforms.gl_recons(reconstructed_averaged_specs.T,
                                   init_vec,
                                   10,
                                   wsize,
                                   tstep,
                                   display=False)
rec_sig = Signal(rec_method1, original.fs, mono=True, normalize=True)
# CONCLUSION: WE HAVE KEPT ONLY ONE SPECTRUM PER "NOTE" : DIGITALIZED SOUND

# METHOD 2: use the original waveform to resynthesize
# We have the segmentation and for each segment we have a waveform
# we should directly use the "potentially time-extended" original waveform
# to resynthesize !
#waveform_list = []
#for i in range(1,segments.shape[0]):
#    waveform_list.append(original.data)
# This is a little too easy in this context, we should try it after the nearest neighbors search

# METHOD 3 for each feature bag, we have a M x F spectogram (or magspec)
Ejemplo n.º 4
0
def expe_1_synth_from_same_sample():        
    input_dir = '/sons/rwc/Learn/'
    output_dir = '/sons/rwc/Learn/hdf5/'
    
    audiofile = input_dir + 'rwc-g-m01_1.wav'
    h5file = output_dir + 'rwc-g-m01_1.h5'
    
    # load the Echo Nest features
    h5 = hdf5_getters.open_h5_file_read(h5file)
    timbre = hdf5_getters.get_segments_timbre(h5)
    loudness_start = hdf5_getters.get_segments_loudness_start(h5)
    loudness_max = hdf5_getters.get_segments_loudness_max(h5)
    loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
    C = hdf5_getters.get_segments_pitches(h5)
    segments_all = hdf5_getters.get_segments_start(h5)
    
    learn_feats_all = np.hstack((timbre,
                             loudness_start.reshape((loudness_start.shape[0],1)),
                            C))
    
    # Ok That was the best possible case, now let us try to find the nearest neighbors, 
    # get the segment back and resynthesize!
    
    
    learn_duration = 200 # in seconds
    test_start = 200
    test_duration = 5
    
    # Get learning data
    learning = Signal(audiofile, mono=True)
    learning.crop(0, learn_duration*learning.fs)
    
    wsize = 1024
    tstep = 512
    # Get the magnitude spectrum for the given audio file
    learn_specs = features.get_stft(learning.data, wsize, tstep)
    learn_specs = learn_specs.T
    
    max_l_seg_idx = np.where(segments_all < learn_duration)[0][-1]
    l_segments = segments_all[:max_l_seg_idx]
    l_segment_lengths = (l_segments[1:] - l_segments[0:-1])*learning.fs
    
    
    learn_feats = learn_feats_all[:max_l_seg_idx,:]
    # we must keep in mind for each segment index, the corresponding indices in the learn_spec mat
    l_seg_bounds = []
    ref_time = np.arange(0., float(learning.length)/float(learning.fs), float(tstep)/float(learning.fs))
    for segI in range(len(l_segments)-1):
        startIdx = np.where(ref_time > l_segments[segI])[0][0]
        endIdx = np.where(ref_time > l_segments[segI+1])[0][0]
        l_seg_bounds.append((startIdx,endIdx))
    l_seg_bounds.append((endIdx, ref_time.shape[0]))
    
    # Get testing data
    testing = Signal(audiofile, mono=True)
    testing.crop(test_start*testing.fs, (test_start+test_duration)*learning.fs)
    
    # get the testing features
    min_t_seg_idx =  np.where(segments_all < test_start)[0][-1]
    max_t_seg_idx =  np.where(segments_all < test_start + test_duration)[0][-1]
    t_segments = segments_all[min_t_seg_idx:max_t_seg_idx]
    t_segment_lengths = (t_segments[1:] - t_segments[0:-1])*testing.fs
    test_feats = learn_feats_all[min_t_seg_idx:max_t_seg_idx,:]
    
    # find the nearest neighbors
    from sklearn.neighbors import NearestNeighbors
    neigh = NearestNeighbors(1)
    # fit on the learning data
    neigh.fit(learn_feats)
    neighb_segments_idx = neigh.kneighbors(test_feats, return_distance=False)
    
    # kneighs is a set of segment indices, we need to get the spectrogram back from the learning data
    # then fit the new segment lengths
    
    target_length = int(test_duration*testing.fs)
    
    neighb_segments = zip(neighb_segments_idx[:,0], t_segment_lengths.astype(int))



    morphed_spectro = spec_morph(np.abs(learn_specs), target_length, neighb_segments, l_seg_bounds)
    
    
    # retrieve true stft for comparison
    test_specs = features.get_stft(testing.data, wsize, tstep)
    
    plt.figure()
    plt.subplot(121)
    plt.imshow(np.log(np.abs(test_specs)), origin='lower')
    plt.colorbar()
    plt.subplot(122)
    plt.imshow(np.log(morphed_spectro.T), origin='lower')
    plt.colorbar()
    plt.show()
    
    
    init_vec = np.random.randn(morphed_spectro.shape[0]*tstep)
    rec_method2 = transforms.gl_recons(morphed_spectro.T, init_vec, 10, wsize, tstep, display=False)
    rec_sig_2 = Signal(rec_method2, testing.fs, mono=True, normalize=True)
    rec_sig_2.write('/sons/tests/rec_sig2.wav')
orig_spec_name = 'origrray_%s_Trial%d_seed%d.npy'%(t_name,min_idx[0],rndseed)
orig_spec = np.load(os.path.join(out_dir,orig_spec_name))


output_audio_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Audio/'
output_fig_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Figures/'
colormap = cm.jet
format = (8,3)
# also load the Dan Ellis's synthesized version
# The Piano cross-synthesis and the Viterbi smoothed Musaicing?
# resynthesize using the first N frames
n_max_frames = 900
nb_gl_iter = 30
init_vec = np.random.randn(128*n_max_frames)
x_recon_median = transforms.gl_recons(median_magspec[:,:n_max_frames], init_vec, nb_gl_iter,
                                       512, 128, display=False)

sig_median = Signal(x_recon_median, 22050,normalize=True)
sig_median.write(os.path.join(output_audio_path, '%s_add_median.wav'%t_name))
plt.figure(figsize=format)
sig_median.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_add_median.png'%t_name))

init_vec = np.random.randn(128*n_max_frames)
x_recon_orig = transforms.gl_recons(orig_spec[:,:n_max_frames], init_vec, nb_gl_iter,
                                       512, 128, display=False)
sig_orig= Signal(x_recon_orig, 22050,normalize=True)
sig_orig.write(os.path.join(output_audio_path, '%s_original.wav'%t_name))
plt.figure(figsize=format)
sig_orig.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_original.png'%t_name))
Ejemplo n.º 6
0
print "Took ", time.time() - t , " secs"
#plt.figure();plt.imshow(Ktest_dev); 
#plt.colorbar()
#plt.show()

# optionnal step: median filtering for smoothing the data:
Y_hat = median_filter(Y_hat,(1,10))

#plt.figure()
#plt.subplot(211)
#plt.imshow(np.log(Y),
#           origin='lower')
#plt.colorbar()
#plt.title('Original')
#plt.subplot(212)
#plt.imshow(np.log(Y_hat),
#           origin='lower')
#plt.colorbar()
#plt.title('Estimation from Nadaraya-Watson')
#plt.show()

sig_orig = Signal(test_audiofilepath,  normalize=True, mono=True)
#init_vec = np.random.randn(step_size*Y_hat.shape[1])
init_vec = np.random.randn(sig_orig.length)
x_recon = transforms.gl_recons(Y_hat, init_vec, 10, win_size, step_size, display=False)
plt.show()

sig_recon = Signal(x_recon, 32000, normalize=True)

err = 10.0*np.log10(np.sum((sig_recon.data - sig_orig.data)**2)/np.sum((sig_orig.data**2)))
Ejemplo n.º 7
0
                win_size = params['wintime'] * params['sr']
                step_size = params['steptime'] * params['sr']
                # sliding median filtering ?
                if l_medfilt > 1:
                    estimated_spectrum = median_filter(
                        estimated_spectrum_full + estimated_spectrum_harmo,
                        (1, l_medfilt))

                print "reconstruction"

                #init_vec = np.random.randn(step_size*Y_hat.shape[1])
                init_vec = np.random.randn(step_size *
                                           estimated_spectrum.shape[1])
                x_recon = transforms.gl_recons(estimated_spectrum,
                                               init_vec,
                                               nb_iter_gl,
                                               win_size,
                                               step_size,
                                               display=False)

                # Get the rythmic part by using all coefficients
                #                res_array = regression.eval_knn( learn_feats[:,0:20], learn_magspecs,
                #                                                 test_feats[:,0:20] ,
                #                                                 test_magspecs, ref_t_data,
                #                                                 nb_median, nb_iter_gl,
                #                                                 l_medfilt, params)
                #
                #                # now get a harmonic candidate bu using only the chroma coefficients
                #                res_array_harmo = regression.eval_knn( learn_feats[:,-48:], learn_magspecs,
                #                                                 test_feats[:,-48:] ,
                #                                                 test_magspecs, ref_t_data,
                #                                                 nb_median, nb_iter_gl,
Ejemplo n.º 8
0
import os
from feat_invert import regression, transforms, features
sys.path.append('/home/manu/workspace/audio-sketch')
sys.path.append('/home/manu/workspace/PyMP')
sys.path.append('/home/manu/workspace/meeg_denoise')
import stft
# load the sinewave speech
sinewave = Signal('/sons/sqam/vegaSWS.wav', mono=True)

spectro = stft.stft(sinewave.data, wsize=1024, tstep=256)[0, :, :]

init_vec = np.random.randn(sinewave.data.shape[0])

rec_gl_data = transforms.gl_recons(np.abs(spectro),
                                   init_vec,
                                   niter=20,
                                   wsize=1024,
                                   tstep=256)

sig_rec = Signal(rec_gl_data, sinewave.fs, mono=True, normalize=True)
sig_rec.write('/sons/sqam/vegaSWS_gl.wav')

# ok it's working just fine'
# now compare with reconstruction from original spectrogram
original = Signal('/sons/sqam/vega.wav', mono=True)
spectro = stft.stft(original.data, wsize=1024, tstep=256)[0, :, :]
init_vec = np.random.randn(original.data.shape[0])
rec_gl_data = transforms.gl_recons(np.abs(spectro),
                                   init_vec,
                                   niter=20,
                                   wsize=1024,
Ejemplo n.º 9
0
knn.fit(learn_feats_all[:, Learnidxs])

distance, neighbs = knn.kneighbors(Feats[:, Learnidxs],
                                   n_neighbors=3,
                                   return_distance=True)

# reconstruct
Y_hat = np.zeros_like(MagSpectrums)
T = neighbs.shape[0]
for t in range(T):
    Y_hat[t, :] = np.median(learn_magspecs_all[neighbs[t, :], :], 0)

init_vec = np.random.randn(128 * Y_hat.shape[0])
x_recon = transforms.gl_recons(Y_hat.T,
                               init_vec,
                               50,
                               wsize,
                               128,
                               display=False)

import sti

orig_sig = Signal(learn_audiofilepath, mono=True, normalize=True)
orig_sig.downsample(16000)
sig = Signal(x_recon, 16000, normalize=True)

score = sti.stiFromAudio(orig_sig.data,
                         x_recon,
                         16000,
                         calcref=False,
                         downsample=None,
                         name="unnamed")