Esempio n. 1
0
def expe_2():
    sig_1_path = '/sons/voxforge/main/Learn/cmu_us_slt_arctic/wav/arctic_a0372.wav'
    sig_2_path = '/sons/voxforge/main/Learn/cmu_us_rms_arctic/wav/arctic_a0372.wav'
    i = 0
    for sig_path in [sig_1_path, sig_2_path]:
        synth_sig = Signal(sig_path, normalize=True, mono=True)
        #synth_sig.crop(0.1*synth_sig.fs, 3.5*synth_sig.fs)
        #synth_sig.resample(32000)
        plt.figure(figsize=(10, 10))
        plt.subplot(211)
        plt.plot(
            np.arange(.0, synth_sig.length) / float(synth_sig.fs),
            synth_sig.data)
        plt.xticks([])
        plt.ylim([-1, 1])
        plt.grid()
        plt.subplot(212)
        synth_sig.spectrogram(1024,
                              64,
                              order=0.5,
                              log=False,
                              cmap=cm.hot,
                              cbar=False)

        plt.savefig(op.join(figure_output_path, 'voice_%d_spectro.pdf' % i))
        synth_sig.write(op.join(audio_output_path, 'voice_%d_spectro.wav' % i))
        i += 1
Esempio n. 2
0
def recons_save_fig_audio(magspec,
                          target_name,
                          n_max_frames,
                          fs=22050,
                          format=(8, 3),
                          nb_gl_iter=30):

    init_vec = np.random.randn(128 * n_max_frames)
    x_recon = transforms.gl_recons(magspec[:, :n_max_frames],
                                   init_vec,
                                   nb_gl_iter,
                                   512,
                                   128,
                                   display=False)
    rec_sig = Signal(x_recon, fs, normalize=True)

    rec_sig.write(os.path.join(output_audio_path, '%s.wav' % target_name))
    plt.figure(figsize=format)
    rec_sig.spectrogram(512, 128, order=1, log=True, cmap=cm.jet, cbar=False)
    plt.savefig(os.path.join(output_fig_path, '%s.png' % target_name))
Esempio n. 3
0
def save_audio(learntype,
               np,
               test_file,
               n_feat,
               rescale_str,
               sigout,
               fs,
               norm_segments=False):
    """ saving output vector to an audio wav"""
    norm_str = ''
    if norm_segments:
        norm_str = 'normed'
        mean_energy = np.mean(
            [np.sum(sig**2) / float(len(sig)) for sig in sigout])
        for sig in sigout:
            sig /= np.sum(sig**2) / float(len(sig))
            sig *= mean_energy
    rec_sig = Signal(np.concatenate(sigout), fs, normalize=True)
    rec_sig.write('%s/%s_with%s_%dfeats_%s%s.wav' %
                  (outputpath, os.path.split(test_file)[-1], learntype, n_feat,
                   rescale_str, norm_str))
Esempio n. 4
0
def save_audio_full_ref(learntype,
                        test_file,
                        n_feat,
                        rescale_str,
                        sigout,
                        fs,
                        norm_segments=False):
    """ do not cut the sounds """
    # first pass for total length
    max_idx = int(sigout[-1][1] + len(sigout[-1][0])) + 4 * fs
    print "total length of ", max_idx
    sig_data = np.zeros((max_idx, ))
    #    seg_energy = np.sum(sigout[-1][0]**2)
    for (sig, startidx) in sigout:
        #        print sig.shape, sig_data[int(startidx):int(startidx)+sig.shape[0]].shape
        sig_data[int(startidx):int(startidx) +
                 sig.shape[0]] += sig  #*seg_energy/np.sum(sig**2)

    rec_sig = Signal(sig_data, fs, normalize=True)
    rec_sig.write('%s/%s_with%s_%dfeats_%s%s.wav' %
                  (outputpath, os.path.split(test_file)[-1], learntype, n_feat,
                   rescale_str, 'full_ref'))
Esempio n. 5
0
            if dist_idx[-1] * ratio > distorted_spec.shape[0]:
                break
            new_spec = np.abs(learn_specs[[int(j * ratio)
                                           for j in dist_idx], :])
        distorted_spec[dist_idx, :] = new_spec

plt.figure()
plt.imshow(np.log(distorted_spec.T), origin='lower')
plt.show()

# resynthesize
init_vec = np.random.randn(distorted_spec.shape[0] * tstep)
rec_method2 = transforms.gl_recons(distorted_spec.T,
                                   init_vec,
                                   10,
                                   wsize,
                                   tstep,
                                   display=False)
rec_sig_2 = Signal(rec_method2, original.fs, mono=True, normalize=True)
rec_sig_2.write('/sons/tests/rec_sig2.wav')
# let us see if the spectrum joined together look alike
#seg_idx = 10
#plt.figure()
#plt.subplot(121)
#plt.imshow(np.log(np.abs(learn_specs[spec_idx[seg_idx],:]).T))
#plt.subplot(122)
#plt.plot(np.median(np.log(np.abs(learn_specs[spec_idx[seg_idx],:]).T), axis=1))
#plt.plot(np.log(np.mean(np.abs(learn_specs[spec_idx[seg_idx],:].T), axis=1)),'k')
#plt.plot(np.mean(np.log(np.abs(learn_specs[spec_idx[seg_idx],:]).T), axis=1),'r')
#plt.show()
sig_learn_ref = Signal(ref_learn_data, sr)
ref_test_data = Datas[start_t_sample:start_t_sample + test_sample]
sig_test_ref = Signal(ref_test_data, sr)

nb_median = 5
nb_iter_gl = 20
l_medfilt = 1
params = {}
params['win_size'] = int(wintime * sr)
params['step_size'] = int(steptime * sr)

res_array = regression.eval_knn(learn_feats, learn_magspecs, test_feats,
                                test_magspecs, ref_test_data, nb_median,
                                nb_iter_gl, l_medfilt, params)

output_path = '/home/manu/workspace/audio-sketch/src/results/'
res_sig = Signal(res_array[1], sr, mono=True, normalize=True)

res_sig.write(output_path + 'audio/test_rwc-g-m01_4_learn_%s%s_%dmedian.wav' %
              (add_sample_str, add_col_str, nb_median))
sig_test_ref.write(output_path + 'audio/ref_rwc-g-m04_4_learn_%d.wav' %
                   int(100 * learn_ratio))

#plt.figure()
#plt.plot(res_array[2])
#plt.show()
## terrible idea: use the waveforms directly?
#Xdev = learn_feats
#Ydev = learn_feats
#estimated_windowed_wf = regression.ann(Xdev, Ydev, X, Y, display=False, K=1)
Esempio n. 7
0
def expe_1_synth_from_same_sample():        
    input_dir = '/sons/rwc/Learn/'
    output_dir = '/sons/rwc/Learn/hdf5/'
    
    audiofile = input_dir + 'rwc-g-m01_1.wav'
    h5file = output_dir + 'rwc-g-m01_1.h5'
    
    # load the Echo Nest features
    h5 = hdf5_getters.open_h5_file_read(h5file)
    timbre = hdf5_getters.get_segments_timbre(h5)
    loudness_start = hdf5_getters.get_segments_loudness_start(h5)
    loudness_max = hdf5_getters.get_segments_loudness_max(h5)
    loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
    C = hdf5_getters.get_segments_pitches(h5)
    segments_all = hdf5_getters.get_segments_start(h5)
    
    learn_feats_all = np.hstack((timbre,
                             loudness_start.reshape((loudness_start.shape[0],1)),
                            C))
    
    # Ok That was the best possible case, now let us try to find the nearest neighbors, 
    # get the segment back and resynthesize!
    
    
    learn_duration = 200 # in seconds
    test_start = 200
    test_duration = 5
    
    # Get learning data
    learning = Signal(audiofile, mono=True)
    learning.crop(0, learn_duration*learning.fs)
    
    wsize = 1024
    tstep = 512
    # Get the magnitude spectrum for the given audio file
    learn_specs = features.get_stft(learning.data, wsize, tstep)
    learn_specs = learn_specs.T
    
    max_l_seg_idx = np.where(segments_all < learn_duration)[0][-1]
    l_segments = segments_all[:max_l_seg_idx]
    l_segment_lengths = (l_segments[1:] - l_segments[0:-1])*learning.fs
    
    
    learn_feats = learn_feats_all[:max_l_seg_idx,:]
    # we must keep in mind for each segment index, the corresponding indices in the learn_spec mat
    l_seg_bounds = []
    ref_time = np.arange(0., float(learning.length)/float(learning.fs), float(tstep)/float(learning.fs))
    for segI in range(len(l_segments)-1):
        startIdx = np.where(ref_time > l_segments[segI])[0][0]
        endIdx = np.where(ref_time > l_segments[segI+1])[0][0]
        l_seg_bounds.append((startIdx,endIdx))
    l_seg_bounds.append((endIdx, ref_time.shape[0]))
    
    # Get testing data
    testing = Signal(audiofile, mono=True)
    testing.crop(test_start*testing.fs, (test_start+test_duration)*learning.fs)
    
    # get the testing features
    min_t_seg_idx =  np.where(segments_all < test_start)[0][-1]
    max_t_seg_idx =  np.where(segments_all < test_start + test_duration)[0][-1]
    t_segments = segments_all[min_t_seg_idx:max_t_seg_idx]
    t_segment_lengths = (t_segments[1:] - t_segments[0:-1])*testing.fs
    test_feats = learn_feats_all[min_t_seg_idx:max_t_seg_idx,:]
    
    # find the nearest neighbors
    from sklearn.neighbors import NearestNeighbors
    neigh = NearestNeighbors(1)
    # fit on the learning data
    neigh.fit(learn_feats)
    neighb_segments_idx = neigh.kneighbors(test_feats, return_distance=False)
    
    # kneighs is a set of segment indices, we need to get the spectrogram back from the learning data
    # then fit the new segment lengths
    
    target_length = int(test_duration*testing.fs)
    
    neighb_segments = zip(neighb_segments_idx[:,0], t_segment_lengths.astype(int))



    morphed_spectro = spec_morph(np.abs(learn_specs), target_length, neighb_segments, l_seg_bounds)
    
    
    # retrieve true stft for comparison
    test_specs = features.get_stft(testing.data, wsize, tstep)
    
    plt.figure()
    plt.subplot(121)
    plt.imshow(np.log(np.abs(test_specs)), origin='lower')
    plt.colorbar()
    plt.subplot(122)
    plt.imshow(np.log(morphed_spectro.T), origin='lower')
    plt.colorbar()
    plt.show()
    
    
    init_vec = np.random.randn(morphed_spectro.shape[0]*tstep)
    rec_method2 = transforms.gl_recons(morphed_spectro.T, init_vec, 10, wsize, tstep, display=False)
    rec_sig_2 = Signal(rec_method2, testing.fs, mono=True, normalize=True)
    rec_sig_2.write('/sons/tests/rec_sig2.wav')
Esempio n. 8
0
        print "Loading ", filepath
        signalin, fs = get_audio(filepath, ref_audio_start, ref_audio_duration)
        target_length = target_audio_duration*fs
        print "Loaded %s length of %d "%( filepath, len(signalin))
        print "Stretching to %2.2f"%target_length
        
        # adjust the Loudness ?
        if rescale:
            rescale_str = 'normed'
            signalin = signalin.astype(float)
            signalin /= 8192.0
            signalin /= np.max(signalin)
    #        N = float(len(signalin))
    #        target_loudness = test_feats[test_seg_idx, 13]
    #        adjust = target_loudness - 10*np.log10((1.0/N)*np.sum(signalin**2))
    #        signalin *= 10**(adjust/10.)
            signalin *= 8192.0
            signalin = signalin.astype(np.int16)
        sigout[num_neigh].append(time_stretch(signalin, tscale, wsize=1024, tstep=128)[128:-1024])


for num_neigh in range(n_neighbs):
    rec_sig = Signal(np.concatenate(sigout[num_neigh]), fs, normalize=True)
    rec_sig.write('/home/manu/workspace/audio-sketch/src/results/audio/%s_with%s_%dfeats_%s_neighbor_%d.wav'%(
                                                    os.path.split(test_file)[-1],
                                                    learntype,
                                                    n_feat,
                                                    rescale_str,
                                                    num_neigh))

output_audio_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Audio/'
output_fig_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Figures/'
colormap = cm.jet
format = (8,3)
# also load the Dan Ellis's synthesized version
# The Piano cross-synthesis and the Viterbi smoothed Musaicing?
# resynthesize using the first N frames
n_max_frames = 900
nb_gl_iter = 30
init_vec = np.random.randn(128*n_max_frames)
x_recon_median = transforms.gl_recons(median_magspec[:,:n_max_frames], init_vec, nb_gl_iter,
                                       512, 128, display=False)

sig_median = Signal(x_recon_median, 22050,normalize=True)
sig_median.write(os.path.join(output_audio_path, '%s_add_median.wav'%t_name))
plt.figure(figsize=format)
sig_median.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_add_median.png'%t_name))

init_vec = np.random.randn(128*n_max_frames)
x_recon_orig = transforms.gl_recons(orig_spec[:,:n_max_frames], init_vec, nb_gl_iter,
                                       512, 128, display=False)
sig_orig= Signal(x_recon_orig, 22050,normalize=True)
sig_orig.write(os.path.join(output_audio_path, '%s_original.wav'%t_name))
plt.figure(figsize=format)
sig_orig.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False)
plt.savefig(os.path.join(output_fig_path, '%s_original.png'%t_name))

init_vec = np.random.randn(128*n_max_frames)
x_recon_max = transforms.gl_recons(max_magspec[:,:n_max_frames], init_vec, nb_gl_iter,
Esempio n. 10
0
sr = 16000
win_size = steptime * 2 * sr
step_size = steptime * sr
# sliding median filtering ?
from scipy.ndimage.filters import median_filter
estimated_spectrum_filt = median_filter(estimated_spectrum, (1, 20))

plt.figure()
plt.imshow(np.log(estimated_spectrum_filt), origin='lower')
plt.show()
# reconstruction

#init_vec = np.random.randn(step_size*Y_hat.shape[1])
init_vec = np.random.randn(step_size * estimated_spectrum.shape[1])
#x_recon = transforms.gl_recons(estimated_spectrum_filt, init_vec, 20,
#                               win_size, step_size, display=False)

x_recon = transforms.gl_recons_vary_size(estimated_spectrum,
                                         n_segments_start,
                                         20,
                                         win_size,
                                         step_size,
                                         display=False)

output_path = '/home/manu/workspace/audio-sketch/src/results/'
res_sig = Signal(x_recon, sr, mono=True, normalize=True)

res_sig.write(output_path + 'audio/resynth_%s_%dmedian.wav' %
              (title, nb_median))
#sig_test_ref.write(output_path+'audio/resynth_%s_learn_%d.wav'%int(100*learn_ratio))
Esempio n. 11
0
#    sim_mat[t,:] = np.sum((t_feats - t_feats[t,:])**2, axis=1)
#
#plt.figure()
#plt.imshow(sim_mat, origin='lower')
#plt.colorbar()
#plt.show()

# now try to viterbi decode this shit
from tools.learning_tools import Viterbi
vit_path = Viterbi(neigh, distance, trans_penalty=0.01, c_value=20)
vit_cands = [neigh[ind, neighbind] for ind, neighbind in enumerate(vit_path)]
#
sig_out_viterbi = resynth_sequence(np.squeeze(vit_cands),
                                   t_seg_starts,
                                   t_seg_duration,
                                   l_segments,
                                   l_feats,
                                   ref_audio_dir,
                                   '.au',
                                   22050,
                                   dotime_stretch=True,
                                   max_synth_idx=40,
                                   normalize=True)

sig_viterbi = Signal(sig_out_viterbi, 22050, normalize=True)
sig_viterbi.write(
    '%s/%s_viterbi_%dFeats_%dLearns_Filter%d.wav' %
    (outputpath, h5files[t_index - 1], nbFeats, n_learn, filter_key))
sig_viterbi.crop(0, 9.5 * sig_viterbi.fs)
#
#sig_viterbi = save_audio(outputpath, '%s_viterbi'%h5files[t_index], sig_out_viterbi, 22050, norm_segments=False)
Esempio n. 12
0
        params['forbidden_names'] = [os.path.basename(i) for i in learn_files]
        [test_feats_all, test_magspecs, n_f_test, ref_t_data,
         test_files] = load_yaafedata(params)

        # search for any test file that is already in the learning set
        isinbase = any([
            os.path.basename(p) in params['forbidden_names']
            for p in test_files
        ])

    save_test_name = 'test_audio_seed_%d_%d_trial%s' % (
        learn_seed, params['shuffle'], trialIdx)

    # also save the audio
    res_sig = Signal(ref_t_data, params['sr'], mono=True, normalize=True)
    res_sig.write(output_path + 'audio/' + save_test_name + '.wav')
    for nli in range(len(nb_learns)):
        nb_learn = nb_learns[nli]

        for mfi in range(len(nb_features)):
            nb_feat = nb_features[mfi]

            learn_feats = learn_feats_all[0:nb_learn, 0:nb_feat]
            learn_magspecs = learn_magspecs_all[:, 0:nb_learn]

            test_feats = test_feats_all[:, 0:nb_feat]

            for nmi in range(len(nb_medians)):
                nb_median = nb_medians[nmi]

                # Getting the spectrum with all features considered
Esempio n. 13
0
sys.path.append('/home/manu/workspace/PyMP')
sys.path.append('/home/manu/workspace/meeg_denoise')
import stft
# load the sinewave speech
sinewave = Signal('/sons/sqam/vegaSWS.wav', mono=True)

spectro = stft.stft(sinewave.data, wsize=1024, tstep=256)[0, :, :]

init_vec = np.random.randn(sinewave.data.shape[0])

rec_gl_data = transforms.gl_recons(np.abs(spectro),
                                   init_vec,
                                   niter=20,
                                   wsize=1024,
                                   tstep=256)

sig_rec = Signal(rec_gl_data, sinewave.fs, mono=True, normalize=True)
sig_rec.write('/sons/sqam/vegaSWS_gl.wav')

# ok it's working just fine'
# now compare with reconstruction from original spectrogram
original = Signal('/sons/sqam/vega.wav', mono=True)
spectro = stft.stft(original.data, wsize=1024, tstep=256)[0, :, :]
init_vec = np.random.randn(original.data.shape[0])
rec_gl_data = transforms.gl_recons(np.abs(spectro),
                                   init_vec,
                                   niter=20,
                                   wsize=1024,
                                   tstep=256)
sig_rec = Signal(rec_gl_data, sinewave.fs, mono=True, normalize=True)
sig_rec.write('/sons/sqam/vega_gl.wav')