Esempio n. 1
0
def analyse_rec(sound_files, nsources=1, wind_sec=0.092, min_len=.3,
                recognise=None, output_csv='', output_text_grid=''):
    # segment recordings
    w=[]
    for ff in sound_files:
        sr,wi=wavread(ff)
        w.append(wi.T)
    
    w=np.vstack(w).T
    sys.stderr.write("Read {} files, {} channels, {} samples\n"\
                     .format(len(sound_files),w.shape[1],w.shape[0]))
    sys.stderr.write("Segmenting audio\n")
    if nsources>1:
        seg = MultiChannelSegmenter(w,sr=sr,min_len=min_len)
    else:
        #w=w.squeeze()
        if len(w.shape)>1:
            w = np.mean(w,axis=1)
        seg = SilenceDetector(w.squeeze(), sr=sr, method = 'pct05',
                                min_len=min_len, wind_sec=wind_sec)
        seg.label = [1 for tst in seg.tst]
        seg.centers = np.array([[0,0],[1,0]])
        
    if recognise:
        seg.recognise(mode=recognise)

                                
            
    sys.stderr.write("Found {} chunks\n".format(len(seg.label)))
    
    output_results(seg, output_csv=output_csv, 
                        output_text_grid=output_text_grid)
Esempio n. 2
0
def compare(control_path, exp_path):
    """
    Compares two wav files and returns a score. Uses mel frequency ceptrum coefficients as well as dynamic time warping.

    :param control_path: the 'correct' wav - what you are comparing to
    :param exp_path: the unknown wav
    """
    (rate,sig) = wavread(control_path)
    (rate2,sig2) = wavread(exp_path)

    x = mfcc(sig,rate)
    y = mfcc(sig2,rate2)

    dist, cost, acc = dtw.dtw(x, y, dist=lambda x, y: dtw.norm(x - y, ord=1))\

    return dist
Esempio n. 3
0
def load_wav(filename):
    if filename.endswith('.wav'):
        fs, x = wavread(filename)
        if fs != 8000:
            x = resample(x, int(16000/fs*len(x)))
        return x
    return np.array([])
Esempio n. 4
0
def load_vgmwav(wav_fp):
  fs, wav = wavread(wav_fp)
  assert fs == 44100
  if wav.ndim == 2:
    wav = wav[:, 0]
  wav = wav.astype(np.float32)
  wav /= 32767.
  return wav
Esempio n. 5
0
def example():
    sig = wavread("ISSpkt.wav")[1]
    NRZIa = nc_afskDemod(sig)
    fig = plt.figure(figsize=(16,4))
    plt.plot(NRZIa)
    NRZI = np.sign(NRZIa)
    packets ,lastflag = detectFrames(NRZI)
    ax = decodeAX25(packets[0])
    print("Dest: %s | Source: %s | Digis: %s | %s |" %(ax.destination ,ax.source ,ax.digipeaters,ax.info))
    print lastflag
Esempio n. 6
0
def get_click_sounds():
    """
    http://127.0.0.1:5000/get_tabla_sounds
    simple! and you get the json data :)
    """
    #read a wav sound
    output = {}
    for stroke in clickStrokes.keys():
        fs, data = wavread(clickStrokes[stroke])
        output[stroke] = data.tolist()
    return jsonify(**output)
Esempio n. 7
0
File: ltsa.py Progetto: tryan/LTSA
    def __init__(self, _file, channel=0):

        self.ltsa = None

        if isinstance(_file, str) and _file[-4:] == '.wav':
            self.fs, self.signal = wavread(_file)
            if self.signal.ndim > 1:
                self.signal = self.signal[:,channel] # take only one channel
        else:
            raise TypeError('Input is not a path to a .wav file: %s' % str(_file))

        self._init_params()
Esempio n. 8
0
def make_features(wav_dir, mfcc_dir, energy=False, n=13):

    if not os.path.exists(mfcc_dir):
        os.mkdir(mfcc_dir)

    for f in os.listdir(wav_dir):
        if f.endswith('.wav'):
            fs, w = wavread(wav_dir + '/' + f)
            m = mfcc(w, samplerate=fs, appendEnergy=energy, numcep=n)
            mean=m.mean(axis=0)
            std=m.std(axis=0)
            m=(m-mean)/std
            np.save(mfcc_dir + '/' + f[:-3] + 'npy', m)
Esempio n. 9
0
File: mix.py Progetto: sduc/urlnn
def mixsounds():
    """Return 9 linear mixtures of sound signals.

    The sound signals have to be in '.../sources/'.
    """

    files = [('../sources/source%i.wav' % i) for i in range(1,10)]
    source = np.zeros((50000,9))
    for i in range(9):
        source[:,i] = wavread(files[i])[1]
    source -= np.mean(source, 0)
    mix = np.random.rand(9,9)
    data = np.dot(source, mix)
    return data
Esempio n. 10
0
def test_decoding():
    # Load ISS Packet
    Qin = Queue.Queue()
    sig = wavread("ISSpkt_full.wav")[1]
    print len(sig)
    for n in r_[0:len(sig):1024]:
        Qin.put(sig[n:n+1024])
    Qin.put("END")

    length = 43
    end = False
    count = 1
    while(Qin.not_empty):
        buf = np.array([])
        for i in range(length):
            chunk = Qin.get()
            if chunk == "END":
                print chunk
                end = True
                break
            else:
                buf = np.append(buf, chunk)
        NRZIa = nc_afskDemod(buf)
        NRZI = np.sign(NRZIa)
        packets, lastflag = detectFrames(NRZI)
        # make recursive?
        while(lastflag > 0):
            for i in range(20):
                chunk = Qin.get()
                if chunk == "END":
                    print chunk
                    end = True
                    break
                else:
                    buf = np.append(buf, chunk)
            NRZIa = nc_afskDemod(buf)
            NRZI = np.sign(NRZIa)
            packets, lastflag = detectFrames(NRZI)
            if lastflag>0:
                print lastflag

        for p in packets:
            #print "%d. %s"%(count, str(decodeAX25(p)))
            ax = decodeAX25(p)
            print ("%d. Dest: %s | Source: %s | Digis: %s | %s" %(count, ax.destination ,ax.source , ax.digipeaters, ax.info))
            count += 1
        if end:
            return
    def __init__(self, file_path, verbose=False):

        if verbose:
            print 'Read the audio file:', file_path
        try:
            sr, sig = wavread(file_path)
        except IOError:
            print "Error: can\'t read the audio file:", file_path
        else:
            if verbose:
                print '\tSuccessful read of the audio file:', file_path
            self.sr = sr
            self.sig_int = sig
            self.sig_float = pcm2float(sig,dtype='float64')
            self.niquist = sr/2
            self.file_path = file_path
            self.file_name = basename(file_path)
            self.filtered = False
            self.duration = len(sig)/float(sr)
            self.indices = dict()  # empty dictionary of Index
Esempio n. 12
0
def main(**kwargs):
  outfile = kwargs['outfile'][0]
  infile = kwargs['infile']
  print "Filtering %s to %s" % (infile, outfile)
  rate, sound_samples = wavread(infile)
  mono = True
  if 'ndarray' in str(type(sound_samples[0])):
    mono = False
  # data,r = ffmpeg_load_audio('32but.wav', 44100, True, dtype=np.float32)
  rate, sound_samples = ffmpeg_load_audio(infile, rate, mono, dtype=np.float32)

  fs = 44100.0
  lowcut = 100.0
  highcut = 3000.0

  # b,a = butter_bandpass(lowcut, highcut, fs, 5)

  # filtered = lfilter(b, a, sound_samples)

  # filtered = butter_bandpass_filter(sound_samples, lowcut, highcut, fs, 5)

  # filtered = butter_bandpass_filter_two(sound_samples, lowcut, highcut, fs, 5)

  wavwrite(outfile, rate, sound_samples)
Esempio n. 13
0
def create_ceps(path):
  sample_rate, X = wavread(path)
  ceps, mspec, spec = mfcc(X)
  write_ceps(ceps, path)
Esempio n. 14
0
all_electrodes = ((0, ), (0, ), (1, ))

waveform_means = [np.random.randn(30, 1) for _ in range(3)]

for spike_times, electrodes, waveform_mean in \
        zip(all_spike_times, all_electrodes, waveform_means):
    nwbfile.add_unit(spike_times=spike_times,
                     electrodes=electrodes,
                     waveform_mean=waveform_mean)

# analog data
# microphone data
# Be careful! This might contain identifying information
mic_path = '/Users/bendichter/Desktop/Chang/video_abstract/word_emphasis.wav'
mic_fs, mic_data = wavread(mic_path)
nwbfile.add_acquisition(
    TimeSeries('microphone',
               mic_data,
               'audio unit',
               rate=float(mic_fs),
               description="audio recording from microphone in room"))
# all analog data can be added like the microphone example (speaker, button press, etc.)
spk_path = '/Users/bendichter/Desktop/Chang/video_abstract/word_emphasis.wav'
spk_fs, spk_data = wavread(spk_path)
nwbfile.add_stimulus(
    TimeSeries('speaker1',
               spk_data,
               'audio unit',
               rate=float(spk_fs),
               description="speaker recording"))
Esempio n. 15
0
#!/usr/bin/env python

import numpy as np

from scipy.io.wavfile import read as wavread
from scipy.io.wavfile import write as wavwrite
from sklearn.metrics import mean_squared_error

# Print entire, readable ndarrays
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=np.nan)


f_true, data_true = wavread('umbrella.wav')
f_user, data_user = wavread('cucumberfiltered.wav')
zero_array = np.zeros(3746, dtype=np.float)
data_true = np.concatenate([data_true, zero_array])
print mean_squared_error(data_true, data_user)

fft_true = np.abs(np.fft.fft(data_true))**2
fft_user = np.abs(np.fft.fft(data_user))**2

print mean_squared_error(fft_true, fft_user)

f_true, data_true = wavread('umbrella.wav')
f_user, data_user = wavread('umbrellaonefiltered.wav')

zero_array = np.zeros(4770, dtype=np.float)
data_true = np.concatenate([data_true, zero_array])
Esempio n. 16
0
    def __init__(self, contours, neutral, SHOW_LINGUAGRAM, SHOW_NEUTRAL, SHOW_WAVEFORM, SHOW_SPECTROGRAM):
        '''center points determined by transforming the point (426, 393) several times
           with peterotron, and taking the average.
        '''
        self.static_dir = os.getcwd() + '/'
        #self.centerX = 710
        #self.centerY = 638

        # these come from hand tuning to find the smallest range of y values of polar mags
        self.centerX = 665
        self.centerY = 525	

        self.gladefile = self.static_dir + "LinguaViewer.glade"
        self.wTree = gtk.glade.XML(self.gladefile, "window1")
        self.win = self.wTree.get_widget("window1")
        self.win.set_title(contours)
        self.title = contours

        self.mainVBox = self.wTree.get_widget("vbox1")

        dic = { "on_window1_destroy": self.onDestroy,
                "on_tbPlay_clicked" : self.playSound,
                "on_tbSave_clicked" : self.onSave,
                "on_tbLabel_clicked": self.onLabel}
        
        self.wTree.signal_autoconnect(dic)
        
        self.X, self.Y = self.loadContours(contours)
        self.wavname = contours[:-4] + ".wav"
        
        #Linguagram
        if (SHOW_LINGUAGRAM == True):
            x1 = array(self.X)
            y1 = array(self.Y)
            Z = []
            for i in range(len(self.X)):
                zs = []
                for j in range(32):
                    zs.append(i+1)
                Z.append(zs)
            z1 = array(Z)
            self.fig = Figure()
            canvas = FigureCanvas(self.fig)
            #ax = Axes3D(self.fig, rect=[-.23,-.2,1.447,1.4])        
            ax = self.fig.add_subplot(1, 1, 1, projection='3d')
            self.fig.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1)
            ax.mouse_init()
            surf = ax.plot_surface(z1, -x1, -y1, rstride=1, cstride=1, cmap=cm.jet)
            ax.view_init(90,-90)

            canvas.show()
            canvas.set_size_request(600, 200)
            self.mainVBox.pack_start(canvas, True, True)

        #Neutral
        if (SHOW_NEUTRAL == True):
            cx, cy = self.getNeutral(neutral)
            cmags = self.makePolar(cx, cy)
            M = self.batchConvert2Polar(self.X, self.Y)
            #D = self.batchGetMinD(M, cmags)    	
            fakeX = []
            for i in range(len(M)):
                xs = []
                for j in range(1,33):
                    xs.append(j)
                fakeX.append(xs)
			
            x1 = array(fakeX)
            y1 = array(M)
            Z = []
            for i in range(len(M)):
                zs = []
                for j in range(32):
                    zs.append(i)
                Z.append(zs)
            z1 = array(Z)

            self.fig3 = Figure()
            canvas3 = FigureCanvas(self.fig3)
            ax = self.fig3.add_subplot(1, 1, 1, projection='3d')
            self.fig3.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1)
            ax.mouse_init()
            ax.plot_surface(z1, -x1, y1, rstride=1, cstride=1, cmap=cm.jet)
            ax.view_init(90,-90)
                    
            canvas3.show()
            canvas3.set_size_request(600, 200)
            self.mainVBox.pack_start(canvas3, True, True)
		
        #Waveform
        windowsize = 0
        self.fig2 = Figure()
        canvas2 = FigureCanvas(self.fig2)
        if (SHOW_WAVEFORM == True):
            fs, snd = wavread(self.wavname)
            chan = snd[:,0]
            t=array(range(len(chan)))/float(fs);
            if SHOW_SPECTROGRAM == True:
        	    wavax = self.fig2.add_subplot(2, 1, 1)
            else:
        	    wavax = self.fig2.add_subplot(1, 1, 1)
            wavax.plot(t,chan,'black');
            wavax.set_xlim(0,max(t))
            windowsize += 200
        
        #Spectrogram
        if (SHOW_SPECTROGRAM == True):
            '''This calls Praat to get the spectrogram and adds it to the viewer'''
            specname = contours[:-4] + '.Spectrogram'
            cleanname = contours[:-4] + '.clean'
            cmd = ['/Applications/Praat.app/Contents/MacOS/Praat', self.static_dir + 'makeSpec.praat', self.wavname, specname]
            proc = subprocess.Popen(cmd)
            status = proc.wait()
            cmd2 = ['bash', self.static_dir + 'cleanspec.sh', specname, cleanname]
            proc2 = subprocess.Popen(cmd2)
            status2 = proc2.wait()
       
            f = open(cleanname, 'r').readlines()
            last = len(f)-1
            x = f[last].split('\t')
            rows = int(x[0])
            cols = int(x[1])

            img = zeros((rows, cols))
            
            for i in range(len(f)):
                x = f[i][:-1].split('\t')
                img[int(x[0])-1,int(x[1])-1] = float(x[2])

            img = log(img)
            if SHOW_WAVEFORM == True:
                specax = self.fig2.add_subplot(2, 1, 2)
            else:
                specax = self.fig2.add_subplot(1, 1, 1)
            specax.imshow(img, cmap=cm.gray_r, origin='lower', aspect='auto')
            windowsize += 200

        # show it
        if (SHOW_WAVEFORM == True) or (SHOW_SPECTROGRAM == True):
            canvas2.show()
            canvas2.set_size_request(600, windowsize)
            self.mainVBox.pack_start(canvas2, True, True)
            
        self.SHOW_LINGUAGRAM = SHOW_LINGUAGRAM
        self.SHOW_NEUTRAL = SHOW_NEUTRAL
        self.SHOW_WAVEFORM = SHOW_WAVEFORM
        self.SHOW_SPECTROGRAM = SHOW_SPECTROGRAM
        self.windowsize = windowsize
Esempio n. 17
0
        vocoded = lfilter((error_power,), a, vocoded)
        vocoded *= hann(len(block))
        out[idx:idx+len(block)] += deemphasis(vocoded)
    return out

def preemphasis(signal):
    return lfilter([1, -0.70], 1, signal)

def deemphasis(signal):
    return lfilter([1, 0.70], 1, signal)

def rms(signal):
    return sqrt(mean(power(signal, 2)))

if __name__ == "__main__":
    fs, data = wavread('Mann.wav')
    data = array(data, dtype=double)
    data /= amax(absolute(data))
    data = decimate(data, 4)
    fs = round(fs/4)

    block_len = 0.032
    overlap = 0.5
    order = 16

    out = vocode(data, fs, block_len, overlap, order)

    wavwrite('vocoded.wav', fs, array(out/amax(absolute(out)) * (2**15-1), dtype=int16))

    figure()
    plot(data)
Esempio n. 18
0
# built-in imports
import timeit

# 3rd-party imports
import numpy as np
from scipy.io.wavfile import read as wavread
from scipy.io.wavfile import write

# local imports
from world import main

fs, x_int16 = wavread('test-mwm.wav')
x = x_int16 / (2**15 - 1)
vocoder = main.World()

# profile
print(
    timeit.timeit("vocoder.encode(fs, x, f0_method='harvest')",
                  globals=globals(),
                  number=1))
Esempio n. 19
0
'''

# imports
# numpy
import numpy as np
# imports of scipy
from scipy.io.wavfile import read as wavread
from scipy.fftpack import fft
from scipy.signal import lfilter, butter
# graphs
import matplotlib.pyplot as plt
#
from pylab import arange

#
[Fs, samples] = wavread("xmitas02.wav")

#Fs = 150.0;  # sampling rate
Ts = 1.0 / Fs
# sampling interval
t = np.arange(0, Fs)  # time vector
nyq = 0.5 * Fs  # pro filter

# vygeneruj pasmovou propust na 4kHz +- 100Hz
b, a = butter(1, [3900 / nyq, 4100 / nyq], 'bandpass', analog=False)

filtered = lfilter(b, a, samples)

#
plt.plot(t, filtered, 'green', linewidth=.1)  # plotting the spectrum
#
Esempio n. 20
0
                         down_sample_factor=dsf)

    # draw frequecny response
    bpf.H0_show(freq_high=20000)

    # draw frequecny response, using scipy
    bpf.f_show()

    # load a sample wav
    #path0='wav/400Hz-10dB_44100Hz_400msec.wav'
    #path0='wav/1KHz-10dB_44100Hz_400msec.wav'
    #path0='wav/3KHz-10dB_44100Hz_400msec.wav'
    #path0='wav/5KHz-10dB_44100Hz_400msec.wav'
    path0 = 'wav/1KHz-10dB_44100Hz_400ms-TwoTube_stereo.wav'
    try:
        sr, y = wavread(path0)
    except:
        print('error: wavread ')
        sys.exit()
    else:
        yg = y / (2**15)
        if yg.ndim == 2:  # if stereo
            yg = np.average(yg, axis=1)
        print('sampling rate ', sr)
        print('y.shape', yg.shape)

    y2 = bpf.filtering(yg)  # iir2( yg)

    # Exponential Moving Average with Half-wave rectification
    ema1 = Class_EMA1()
    y3 = ema1(y2)
Esempio n. 21
0
    #     display_sample_rate = f.samplerate

    # sound_time = f.nframes*1.0/f.samplerate
    # sound_data = f.read_frames(f.nframes)
    # samples_to_take = int(math.floor(sound_time * display_sample_rate))
    # time_step_for_samples = f.samplerate*1.0/display_sample_rate

    # wave = []

    # for i in xrange(samples_to_take):
    #     frame_offset = i * time_step_for_samples
    #     if num_channels == 1:
    #         wave.append(sound_data[frame_offset])
    #     else:
    #         wave.append(sound_data[frame_offset][0])
    rate, wave = wavread(infile)

    wavwrite('test.wav', rate, wave)
    (freq, amp) = get_component_frequencies(wave)

            # print type(s)
    # with open('data.txt', 'a') as textOutputFile:
    #     for line in amp:
    #         textOutputFile.write(str(line))
    #         textOutputFile.write(',')

    # Only plot first 4000 Hz

    hz=4000
    freq = freq[0:hz]
    amp = amp[0:hz]
Esempio n. 22
0
def read_wav(wav_file):
    fr, wav = wavread(wav_file)
    wav = wav/np.max(np.abs(wav))
    return wav, fr
Esempio n. 23
0
                        default='na_1_48k.wav',
                        help='input wav file')
    parser.add_argument('--methodF0',
                        '-m',
                        default='harvest',
                        help='F0 estimation method, harvest or dio ')
    parser.add_argument(
        '--not_requiem',
        action='store_false',
        help='use new waveform generator method from WORLD version 0.2.2')
    args = parser.parse_args()

    # load wav file
    wav_path = Path(args.inFILE)
    print('input wave path ', wav_path)
    fs, x_int16 = wavread(wav_path)
    x = x_int16 / (2**15 - 1)
    print('fs', fs)

    if 0:  # resample
        fs_new = 16000
        x = signal.resample_poly(x, fs_new, fs)
        fs = fs_new

    if 0:  # low-cut
        B = signal.firwin(127, [0.01], pass_zero=False)
        A = np.array([1.0])
        if 0:
            import matplotlib.pyplot as plt
            w, H = signal.freqz(B, A)
Esempio n. 24
0
import librosa
import pyrenn
import IPython
import matplotlib.pyplot as plt

# Set the folders
speakers = ['awb','bdl','clb','jmk','ksp','rms','slt']
root = os.getcwd()
folderpath = os.path.join(root,'datasets',speakers[0],'wav')
files = sorted(os.listdir(folderpath))


# Read the files
for file in files:
    file = os.path.join(folderpath,file)
    fs,audio = wavread(file)
    break
# IPython.display.Audio(file)









# YAAPT pitches
signal = basic.SignalObj(file)
pitchY = pYAAPT.yaapt(signal, frame_length=25, frame_space=5, f0_min=40, f0_max=300)
def slice_signal(path, win_len, hop_len, win_frames, hop_frames, sampling_rate,
                 stream):
    slices = []
    sr, wavform = wavread(path)
    assert sampling_rate == sr
    wavform = torch.from_numpy(normalize_wave_minmax(wavform))
    stft_complex = torch.stft(wavform, win_len, hop_len)
    stft_real_orig, stft_imag_orig = stft_complex[:, :, 0].numpy(
    ), stft_complex[:, :, 1].numpy()

    assert stream in ['in', 'out']
    if stream == 'in':
        stft_real = in_real_scale(stft_real_orig)
        stft_imag = in_imag_scale(stft_imag_orig)
    else:
        stft_real = out_real_scale(stft_real_orig)
        stft_imag = out_imag_scale(stft_imag_orig)

    # print(np.max(np.abs(stft_real_recover - stft_real_orig)))
    # assert stft_real_recover.all() == stft_real_orig.all()
    # assert stft_imag_recover.all() == stft_imag_orig.all()
    # stft_real_recover = inverse_in_real_scale(stft_real)
    # stft_imag_recover = inverse_in_imag_scale(stft_imag)
    #
    # stft_recover = np.stack([stft_real_recover, stft_imag_recover], axis=-1)
    # signal_recover = torch.istft(torch.from_numpy(stft_recover), n_fft=400, hop_length=160)
    # wavwrite('./recover.wav', 16000, signal_recover.numpy())
    # stft_orig = np.stack([stft_real_orig, stft_imag_orig], axis=-1)
    # signal_orig = torch.istft(torch.from_numpy(stft_orig), n_fft=400, hop_length=160)
    # wavwrite('./orig.wav', 16000, signal_orig.numpy())

    # stft_real = inverse_out_real_scale(stft_real)
    # stft_imag = inverse_out_imag_scale(stft_imag)
    # stft = np.stack([np.expand_dims(stft_real, axis=0), np.expand_dims(stft_imag, axis=0)], axis=-1)

    len_frames = stft_complex.size()[-2]
    num_slices = math.floor((len_frames - win_frames) / hop_frames) + 1
    if num_slices > 0:
        for idx_slice in range(num_slices):
            slices.append([
                stft_real[:, idx_slice * hop_frames:idx_slice * hop_frames +
                          win_frames],
                stft_imag[:, idx_slice * hop_frames:idx_slice * hop_frames +
                          win_frames]
            ])
            # slices_imag.append(stft_imag[:, idx_slice * hop_frames : idx_slice * hop_frames + win_frames].numpy())
    # num_slices = len(slices)
    # slices_real, slices_imag = [], []
    # for idx in range(num_slices):
    #     slice_real = slices[idx][0][:, 2]
    #     slice_imag = slices[idx][1][:, 2]
    #     slices_real.append(slice_real)
    #     slices_imag.append(slice_imag)
    #
    # stft_real = np.stack(slices_real)
    # stft_imag = np.stack(slices_imag)
    #
    # stft_real = inverse_out_real_scale(stft_real).T
    # stft_imag = inverse_out_imag_scale(stft_imag).T
    # stft = np.stack([np.expand_dims(stft_real, axis=0), np.expand_dims(stft_imag, axis=0)], axis=-1)
    # wav = torch.istft(torch.from_numpy(stft), 400, 160)
    # wavwrite('../save_wav/test2.wav', 16000, wav.numpy().T)
    return slices
Esempio n. 26
0
import tensorflow as tf
import librosa
import scipy
from scipy.io.wavfile import read as wavread
import numpy as np

# load in audio as an array
data, sample_rate = librosa.load('load.wav', sr=None, mono=False)
_, data2 = wavread('load.wav', True)

print(np.max(np.abs(data2-data.T)))

# sample partially
data = data[:100]

# FFT window size to be power of 2, exactly nonoverlapping
chunk_size = 4

# try manual padding of synthesized data
data = np.array([2,6,0,8,1,9,9,5]).astype(np.float32)

# SciPy
_, _, scipy_stft = scipy.signal.stft(data, window='hann', nperseg=chunk_size,
        noverlap=chunk_size*3//4, nfft=chunk_size, return_onesided=True,
        padded=True, axis=-1)
_, scipy_istft = scipy.signal.istft(scipy_stft, fs=sample_rate, window='hann',
            nperseg=chunk_size, noverlap=chunk_size*3//4, nfft=chunk_size, input_onesided=True)

# librosa
rosa_stft = librosa.stft(data, n_fft=chunk_size, hop_length=chunk_size//4,
        win_length=chunk_size, window='hann', center=True,
Esempio n. 27
0
    def _get_image(self,
                   name,
                   nperseg=126,
                   noverlap=None,
                   mag_scale=np.log10(2**15)):
        """
        From audio in the file name construct the magnitude/phase tensor.

        Parameters
        ----------
        name : string
            Name of the audio file.
        nperseg : int
            Size of each FFT window for the STFT.
        noverlap : int or None
            Size of the overlap to the STFT. If None, then a half-step is
            used.
        mag_scale : float
            Value with which the magnitude will be scaled.

        Returns
        -------
        mag_phase : ndarray, shape (stft_width, stft_heigth, 2)
            The magnitude/phase tensor.
        """

        # Read the audio and downscale the rate by 2
        rate, audio_sig = wavread(self.im_dir + name)
        rate, audio_sig = self._downsample(rate, audio_sig)

        # Set global rate
        if self.rate is None:
            self.rate = rate

        # Right pad audio to desired size
        if noverlap is None:
            noverlap = (nperseg + 1) // 2

        length_orig = len(audio_sig)
        length_pad = int(np.ceil(length_orig / noverlap) * noverlap)
        audio_sig = np.pad(audio_sig, (0, length_pad - length_orig),
                           'constant')

        # Make a Short time Fourier transform
        frequencies, times, stft = signal.stft(audio_sig,
                                               fs=rate,
                                               nperseg=nperseg,
                                               noverlap=noverlap)

        # Convert to log10 magnitude and phase
        spectrogram = np.log10(np.absolute(stft) + 1e-10)
        phasegram = np.angle(stft) / np.pi  # Scale angles to [-1, 1]

        # Scale the magnitude
        spectrogram /= mag_scale

        if stft.shape[1] != 128:
            # Pad the matrices
            spectrogram = np.pad(spectrogram,
                                 [(0, 0), (0, 128 - stft.shape[1])], 'minimum')
            phasegram = np.pad(phasegram, [(0, 0), (0, 128 - stft.shape[1])],
                               'constant')

        # Join into a two-channel tensor
        return np.stack((phasegram, spectrogram), axis=-1)
import os
from os.path import join as pjoin
from scipy.io.wavfile import read as wavread, write as wavwrite
clean_dir = '/nas/staff/data_work/Sure/Edinburg_Speech/clean_testset_wav_16k'
noisy_dir = '/nas/staff/data_work/Sure/Edinburg_Speech/noisy_testset_wav_16k'
noise_dir = '/nas/staff/data_work/Sure/Edinburg_Speech/noise_testset_wav_16k'

filenames = os.listdir(clean_dir)
num_filenames = len(filenames)
file_counter = 0

for filename in filenames:
    file_counter += 1
    print('Processing audio file [{}/{}]: {}'.format(file_counter,
                                                     num_filenames, filename))
    clean_path = pjoin(clean_dir, filename)
    noisy_path = pjoin(noisy_dir, filename)
    noise_path = pjoin(noise_dir, filename)
    fs, clean_waveform = wavread(clean_path)
    _, noisy_waveform = wavread(noisy_path)
    noise_waveform = noisy_waveform - clean_waveform
    wavwrite(noise_path, fs, noise_waveform)
Esempio n. 29
0
#!/usr/bin/env python

from __future__ import division

import numpy as np
import matplotlib.pyplot as plt

from scipy.io.wavfile import read as wavread
from scipy.io.wavfile import write as wavwrite
# Print entire, readable ndarrays
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=np.nan)
f, data = wavread('test.wav')

ps = np.abs(np.fft.fft(data))**2

time_step = 1 / 44100
freqs = np.fft.fftfreq(data.size, time_step)
idx = np.argsort(freqs)

print ps

plt.plot(freqs, ps)
plt.show()
Esempio n. 30
0
def show_spectrogram(path):
    sample_rate, X = wavread(path)
    output = specgram(X, Fs=sample_rate)
#     #     # quando necessário é plotado o áudio do arquivo
#     #     t = np.linspace(0, N/fs, N)
#     #     plt.plot(t, data_file)
#         MFCCsample = librosa.feature.mfcc(y=frameSample, sr=fs, fmin=fmin, fmax=fmax,
#                                                  n_mfcc=n_mfcc, n_mels=n_mels, n_fft=n_fft)

#         frameMFCC[j] = MFCCsample[ofs_mfcc:]

#     kwFeat[i] = frameMFCC

frameMFCC = {}
kwFeat = {}
for i in range(len(df['file'])):
    wavstr = df['file'][
        i]  # extrai a string contendo o nome do arquivo de audio
    [_, data_file] = wavread('../../' + wavstr)  # Lê todo o arquivo de audio

    data_file = data_file / 32767  # normaliza as amostras do  áudio para o range [-1,1]
    N = data_file.shape[0]  # indica o tamanho do arquivo

    #     # quando necessário é plotado o áudio do arquivo
    #     t = np.linspace(0, N/fs, N)
    #     plt.plot(t, data_file)
    for j in range(len(
            iAbre[i])):  # para cada audio, retira os frames kw e as features
        if iAbre[i][j] < 0:
            break

        frameSample = data_file[iAbre[i][j] - frame_lenD2:iAbre[i][j] +
                                frame_lenD2]
        MFCCsample = librosa.feature.mfcc(y=frameSample,
Esempio n. 32
0
        if len(data.shape) == 1 and self.output_channels != 1:
            # replicate first channel and broadcast to (chan, 1)
            data = np.tile(data, (self.output_channels, 1)).T
        if data.shape != (num_frames, self.output_channels):
            error = 'Can not broadcast array of shape {} to {}'.format(
                data.shape, (num_frames, self.output_channels))
            raise ValueError(error)
        data = data.flatten().tostring()
        err = _pa.Pa_WriteStream(self._stream[0], data, num_frames)
        self._handle_error(err)


if __name__ == '__main__':
    from scipy.io.wavfile import read as wavread
    import time
    fs, wave = wavread('thistle.wav')
    wave = np.array(wave, dtype=np.float32)
    wave /= 2**15
    block_length = 4
    def callback(in_data, frame_count, time_info, status):
        if status != 0:
            print(status)
        return (in_data, continue_flag)
    s = Stream(sample_rate=fs, block_length=block_length, callback=callback)
    s.start()
    # for n in range(int(fs*5/block_length)):
    #     s.write(s.read(block_length))
    # for idx in range(0, wave.size, block_length):
    #     s.write(wave[idx:idx+block_length])
    time.sleep(5)
    s.stop()
Esempio n. 33
0
        current_hop = resolution * round(float(current_hop)/resolution)



    return indicies, ideal_vals





if __name__ == '__main__':
    from scipy.io.wavfile import read as wavread
    import matplotlib.pyplot as plt

    #'''
    fs, y = wavread('New Seal and New Spring_conv.wav')
    #fs, y = wavread('equation9sec.wav')
    y = y[...,0]
    t = y.shape[0] / np.float32(fs)

    #'''

    '''
    f0 = 440
    fs = 48000
    t = 5
    n = np.arange(fs*t)
    y = 0.5*np.cos(2*np.pi*f0*n/float(fs))
    '''

Esempio n. 34
0
import sys
import time
import numpy as np
from scipy.io.wavfile import read as wavread
from pysoundcard import Stream, continue_flag, complete_flag

"""Play an audio file."""

fs, wave = wavread(sys.argv[1])
wave = np.array(wave, dtype=np.float32)
wave /= 2 ** 15  # normalize -max_int16..max_int16 to -1..1
play_position = 0

def callback(in_data, out_data, time_info, status):
    global play_position
    out_data[:] = wave[play_position:play_position + block_length]
    # TODO: handle last (often incomplete) block
    play_position += block_length
    if play_position + block_length < len(wave):
        return continue_flag
    else:
        return complete_flag

block_length = 16
s = Stream(sample_rate=fs, block_length=block_length, callback=callback)
s.start()
while s.is_active():
    time.sleep(0.1)
Esempio n. 35
0
def inception_score(audio_fps,
                    k,
                    metagraph_fp,
                    ckpt_fp,
                    batch_size=100,
                    tf_ffmpeg_ext=None,
                    fix_length=False):
    use_tf_ffmpeg = tf_ffmpeg_ext is not None
    if not use_tf_ffmpeg:
        from scipy.io.wavfile import read as wavread

    if len(audio_fps) % k != 0:
        raise Exception(
            'Number of audio files ({}) is not divisible by k ({})'.format(
                len(audio_fps), k))
    group_size = len(audio_fps) // k

    # Restore graph
    graph = tf.Graph()
    with graph.as_default():
        saver = tf.train.import_meta_graph(metagraph_fp)

        if use_tf_ffmpeg:
            x_fp = tf.placeholder(tf.string, [])
            x_bin = tf.read_file(x_fp)
            x_samps = tf.contrib.ffmpeg.decode_audio(x_bin, tf_ffmpeg_ext,
                                                     16000, 1)[:, 0]
    x = graph.get_tensor_by_name('x:0')
    scores = graph.get_tensor_by_name('scores:0')

    # Restore weights
    sess = tf.Session(graph=graph)
    saver.restore(sess, ckpt_fp)

    # Evaluate audio
    _all_scores = []
    for i in xrange(0, len(audio_fps), batch_size):
        batch = audio_fps[i:i + batch_size]

        # Load audio files
        _xs = []
        for audio_fp in batch:
            if use_tf_ffmpeg:
                _x = sess.run(x_samps, {x_fp: audio_fp})
            else:
                fs, _x = wavread(audio_fp)
                if fs != 16000:
                    raise Exception('Invalid sample rate ({})'.format(fs))
                if _x.dtype == np.int16:
                    _x = _x.astype(np.float32)
                    _x /= 32767.

            if _x.ndim != 1:
                raise Exception('Invalid shape ({})'.format(_x.shape))

            if fix_length:
                _x = _x[:16384]
                #_x = _x[-16384:]
                _x = np.pad(_x, (0, 16384 - _x.shape[0]), 'constant')

            if _x.shape[0] != 16384:
                raise Exception('Invalid number of samples ({})'.format(
                    _x.shape[0]))

            _xs.append(_x)

        # Compute model scores
        _all_scores.append(sess.run(scores, {x: _xs}))

    sess.close()

    # Find labels
    _all_scores = np.concatenate(_all_scores, axis=0)
    _all_labels = np.argmax(_all_scores, axis=1)

    # Compute inception scores
    _inception_scores = []
    for i in xrange(k):
        _group = _all_scores[i * group_size:(i + 1) * group_size]
        _kl = _group * (np.log(_group) -
                        np.log(np.expand_dims(np.mean(_group, 0), 0)))
        _kl = np.mean(np.sum(_kl, 1))
        _inception_scores.append(np.exp(_kl))

    return np.mean(_inception_scores), np.std(_inception_scores), _all_labels
	def __init__(self, path0):  # , sampling_rate=48000):
		# initalize
		sr, y = wavread(path0)
		self.yg= y / (2 ** 15)
		self.sr= sr
		print ('sampling rate ', sr)
Esempio n. 37
0
    References
    ----------
    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
           representations for monosyllabic word recognition in continuously
           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
           ASSP-28 (4): 357-366, August 1980."""

    https://www.researchgate.net/publication/261914482_Feature_Extraction_Methods_LPC_PLP_and_MFCC_In_Speech_Recognition

    Source Code
    -----------
    https://github.com/cournape/talkbox
'''

from scipy.io.wavfile import read as wavread
from scikits.talkbox.features import mfcc
from scikits.talkbox.linpred.levinson_lpc import *
# data: raw audio data
# fs: sample rate
sr, signal =  wavread('../recordings/obama.wav')
# ceps: cepstral cofficients
coeffs=13
ceps, mspec, spec = mfcc(signal, nwin=2048, nfft=2048, fs=sr, nceps=coeffs)
print ("************************ MFCC ************************")
print (ceps)

# https://github.com/cournape/talkbox/blob/ee0ec30a6a6d483eb9284f72bdaf26bd99765f80/scikits/talkbox/linpred/levinson_lpc.py
lpcResult = lpc(signal,1)
print ("************************ LPC ************************")
print (lpcResult)
Esempio n. 38
0
import sys
import numpy as np
from scipy.io.wavfile import read as wavread
from pysoundcard import Stream
"""Play an audio file."""

fs, wave = wavread(sys.argv[1])
wave = np.array(wave, dtype=np.int16)

blocksize = 256
s = Stream(samplerate=fs, blocksize=blocksize, dtype='int16')
s.start()
while True:
    s.write(wave[0:(1024 * 100)])
s.stop()
Esempio n. 39
0
def read_as_mfcc(path):
  sample_rate, X = wavread(path)
  ceps, mspec, spec = mfcc(X)
  return ceps
Esempio n. 40
0
import numpy as np
from scipy.io.wavfile import read as wavread

x = np.loadtxt("out.txt")
fs, audio = wavread("test.wav")
audio = audio / (2**15)
print("Error:", np.mean(np.abs(x - audio)))
Esempio n. 41
0
    def __init__(self, contours, neutral, SHOW_LINGUAGRAM, SHOW_NEUTRAL,
                 SHOW_WAVEFORM, SHOW_SPECTROGRAM):
        '''center points determined by transforming the point (426, 393) several times
           with peterotron, and taking the average.
        '''
        self.static_dir = os.getcwd() + '/'
        #self.centerX = 710
        #self.centerY = 638

        # these come from hand tuning to find the smallest range of y values of polar mags
        self.centerX = 665
        self.centerY = 525

        self.gladefile = self.static_dir + "LinguaViewer.glade"
        self.wTree = gtk.glade.XML(self.gladefile, "window1")
        self.win = self.wTree.get_widget("window1")
        self.win.set_title(contours)
        self.title = contours

        self.mainVBox = self.wTree.get_widget("vbox1")

        dic = {
            "on_window1_destroy": self.onDestroy,
            "on_tbPlay_clicked": self.playSound,
            "on_tbSave_clicked": self.onSave,
            "on_tbLabel_clicked": self.onLabel
        }

        self.wTree.signal_autoconnect(dic)

        self.X, self.Y = self.loadContours(contours)
        self.wavname = contours[:-4] + ".wav"

        #Linguagram
        if (SHOW_LINGUAGRAM == True):
            x1 = array(self.X)
            y1 = array(self.Y)
            Z = []
            for i in range(len(self.X)):
                zs = []
                for j in range(32):
                    zs.append(i + 1)
                Z.append(zs)
            z1 = array(Z)
            self.fig = Figure()
            canvas = FigureCanvas(self.fig)
            #ax = Axes3D(self.fig, rect=[-.23,-.2,1.447,1.4])
            ax = self.fig.add_subplot(1, 1, 1, projection='3d')
            self.fig.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1)
            ax.mouse_init()
            surf = ax.plot_surface(z1,
                                   -x1,
                                   -y1,
                                   rstride=1,
                                   cstride=1,
                                   cmap=cm.jet)
            ax.view_init(90, -90)

            canvas.show()
            canvas.set_size_request(600, 200)
            self.mainVBox.pack_start(canvas, True, True)

        #Neutral
        if (SHOW_NEUTRAL == True):
            cx, cy = self.getNeutral(neutral)
            cmags = self.makePolar(cx, cy)
            M = self.batchConvert2Polar(self.X, self.Y)
            #D = self.batchGetMinD(M, cmags)
            fakeX = []
            for i in range(len(M)):
                xs = []
                for j in range(1, 33):
                    xs.append(j)
                fakeX.append(xs)

            x1 = array(fakeX)
            y1 = array(M)
            Z = []
            for i in range(len(M)):
                zs = []
                for j in range(32):
                    zs.append(i)
                Z.append(zs)
            z1 = array(Z)

            self.fig3 = Figure()
            canvas3 = FigureCanvas(self.fig3)
            ax = self.fig3.add_subplot(1, 1, 1, projection='3d')
            self.fig3.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1)
            ax.mouse_init()
            ax.plot_surface(z1, -x1, y1, rstride=1, cstride=1, cmap=cm.jet)
            ax.view_init(90, -90)

            canvas3.show()
            canvas3.set_size_request(600, 200)
            self.mainVBox.pack_start(canvas3, True, True)

        #Waveform
        windowsize = 0
        self.fig2 = Figure()
        canvas2 = FigureCanvas(self.fig2)
        if (SHOW_WAVEFORM == True):
            fs, snd = wavread(self.wavname)
            chan = snd[:, 0]
            t = array(range(len(chan))) / float(fs)
            if SHOW_SPECTROGRAM == True:
                wavax = self.fig2.add_subplot(2, 1, 1)
            else:
                wavax = self.fig2.add_subplot(1, 1, 1)
            wavax.plot(t, chan, 'black')
            wavax.set_xlim(0, max(t))
            windowsize += 200

        #Spectrogram
        if (SHOW_SPECTROGRAM == True):
            '''This calls Praat to get the spectrogram and adds it to the viewer'''
            specname = contours[:-4] + '.Spectrogram'
            cleanname = contours[:-4] + '.clean'
            cmd = [
                '/Applications/Praat.app/Contents/MacOS/Praat',
                self.static_dir + 'makeSpec.praat', self.wavname, specname
            ]
            proc = subprocess.Popen(cmd)
            status = proc.wait()
            cmd2 = [
                'bash', self.static_dir + 'cleanspec.sh', specname, cleanname
            ]
            proc2 = subprocess.Popen(cmd2)
            status2 = proc2.wait()

            f = open(cleanname, 'r').readlines()
            last = len(f) - 1
            x = f[last].split('\t')
            rows = int(x[0])
            cols = int(x[1])

            img = zeros((rows, cols))

            for i in range(len(f)):
                x = f[i][:-1].split('\t')
                img[int(x[0]) - 1, int(x[1]) - 1] = float(x[2])

            img = log(img)
            if SHOW_WAVEFORM == True:
                specax = self.fig2.add_subplot(2, 1, 2)
            else:
                specax = self.fig2.add_subplot(1, 1, 1)
            specax.imshow(img, cmap=cm.gray_r, origin='lower', aspect='auto')
            windowsize += 200

        # show it
        if (SHOW_WAVEFORM == True) or (SHOW_SPECTROGRAM == True):
            canvas2.show()
            canvas2.set_size_request(600, windowsize)
            self.mainVBox.pack_start(canvas2, True, True)

        self.SHOW_LINGUAGRAM = SHOW_LINGUAGRAM
        self.SHOW_NEUTRAL = SHOW_NEUTRAL
        self.SHOW_WAVEFORM = SHOW_WAVEFORM
        self.SHOW_SPECTROGRAM = SHOW_SPECTROGRAM
        self.windowsize = windowsize
Esempio n. 42
0
def PMBSegmentation(argv, nameFileOutputXML):
    inputPath = None
    outputPath = 'out.lab'
    boundariesPath = None
    verbose = False

    # Communs
    wLen = 0.016
    wStep = 0.008
    withEntropy = False
    with4Hz = False
    withNBS = False
    withLS = False
    moduLen = 1
    speech_labels = {0: 'Non Speech', 1: 'Speech'}
    music_labels = {0: 'Non Music', 1: 'Music'}

    sort = False
    # entropy
    entropyTh = 0.4

    # 4 Hz
    fcenter = 4.0
    fwidth = 0.5
    normalized = True
    N = 2048
    ordre = 100
    nbFilters = 30
    energyTh = 1.5

    # Music
    musicLen = 1.0
    musicStep = 0.1

    maxSegForLength = 1000

    thLen = 0.04
    thNb = 20

    segments = []
    boundaries = None
    # Lecture des arguments
    opts = argv
    #print opts
    i = 0
    while (i < len(argv)):
        #print str(i)
        if opts[i] == '-h':
            printhelp()
        elif opts[i] == '-i':
            i = i + 1
            inputPath = opts[i]
        elif opts[i] == '-o':
            outputPath = opts[i]
        elif opts[i] == '-b':
            i = i + 1
            boundariesPath = opts[i]
        elif opts[i] == '-v':
            verbose = True

        elif opts[i] == '--sorted':
            sort = True
        elif opts[i] == '--Entropy':
            withEntropy = True
        elif opts[i] == '--4Hz':
            with4Hz = True
        elif opts[i] == '--NBS':
            withNBS = True
        elif opts[i] == '--LS':
            withLS = True
        elif opts[i] == '-w':
            i = i + 1
            wLen = float(opts[i])
        elif opts[i] == '-s':
            i = i + 1
            wStep = float(opts[i])
        i = i + 1

    if inputPath == None:
        printhelp()
        exit(1)
    else:
        #print "Audio file path : "+ inputPath
        fe, data = wavread(inputPath)
        print "Audio file opened : " + inputPath
        fe = float(fe)
        m = iinfo(data[0]).max
        data = [float(d) / m for d in data]
        demi = int(wLen / 2 * fe)
        timeScale = range(demi, len(data) - demi, int(wStep * fe))
        frames = [data[t - demi:t + demi] for t in timeScale]

    if withEntropy:

        if verbose:
            print 'Analyse de la modulation d\'entropy'
        entropy_values = [entropy(f) for f in frames]
        entropy_modulation = computeModulation(entropy_values,
                                               moduLen / wStep,
                                               withLog=False)
        with open('entropy.lab', 'w') as f:
            for t, v in zip(timeScale, entropy_modulation):
                f.write('%f\t%f\n' % (float(t) / fe, v))
        entropy_modulation = [(e / entropyTh) - 1 if e < 2 * entropyTh else 1
                              for e in entropy_modulation]

        segments_entropy = decoupe(entropy_modulation)
        segments_entropy = [(s[0] * wStep, s[1] * wStep,
                             speech_labels[s[2]] + ' (Entropy)')
                            for s in segments_entropy]
        segments.extend(segments_entropy)

    if with4Hz:
        if verbose:
            print 'Analyse de la modulation d\'energie a 4Hz'
        Wo = fcenter / fe
        Wn = [Wo - (fwidth / 2) / fe, Wo + (fwidth / 2) / fe]
        num = firwin(ordre, Wn, pass_zero=False)
        melFilter = melFilterBank(nbFilters, N, fe)
        hw = hamming(wLen * fe)
        energy = [
            dot(abs(rfft(hw * f, n=2 * N)[0:N])**2, melFilter) for f in frames
        ]
        #                      transposition de list of list
        energy = lfilter(num, 1, map(list, zip(*energy)), 0)
        energy = sum(energy)
        if normalized:
            energy = energy / mean(energy)

        energy_modulation = computeModulation(energy,
                                              moduLen / wStep,
                                              withLog=True)
        with open('energy.lab', 'w') as f:
            for t, v in zip(timeScale, energy_modulation):
                f.write('%f\t%f\n' % (float(t) / fe, v))
        energy_modulation = [(e / energyTh) - 1 if e < 2 * energyTh else 1
                             for e in energy_modulation]
        segments_energy = decoupe(energy_modulation)
        segments_energy = [(s[0] * wStep, s[1] * wStep,
                            speech_labels[s[2]] + ' (4Hz)')
                           for s in segments_energy]
        segments.extend(segments_energy)

    if withLS:

        if verbose:
            print 'Analyse de la longueur des segments'

        if boundariesPath == None:
            a, b = segment(data, fe)
            boundaries = [(float(st[0]) / fe, ) for st in a]

        else:
            boundaries = readBoundaries(boundariesPath)

        times = array([b[0] for b in boundaries])
        demi = musicLen / 2
        timeScale = arange(demi, times[-1] - demi, musicStep)

        # On prend les plus petits !!
        segframes = [
            sorted(diff(times[logical_and(times >= t - demi,
                                          times <= t + demi)]),
                   reverse=True) for t in timeScale
        ]
        lengths = [mean(s[:min([maxSegForLength, len(s)])]) for s in segframes]

        with open('LS.lab', 'w') as f:
            for t, v in zip(timeScale, lengths):
                f.write('%f\t%f\n' % (float(t) / fe, v))

        lengths = [(l / thLen) - 1 if l < 2 * thLen else 1 for l in lengths]

        segments_length = decoupe(lengths)
        segments_length = [(s[0] * musicStep, s[1] * musicStep,
                            music_labels[s[2]] + ' (LS)')
                           for s in segments_length]

        segments.extend(segments_length)

    if withNBS:
        if verbose:
            print 'Analyse du nombre de segments'

        if boundariesPath == None:

            if boundaries == None:
                a, b = segment(data, fe)
                boundaries = [(float(st[0]) / fe, ) for st in a]

        else:

            if boundaries == None:
                boundaries = readBoundaries(boundariesPath)

        times = array([b[0] for b in boundaries])
        demi = musicLen / 2
        timeScale = arange(demi, times[-1] - demi, musicStep)
        segnb = [
            float(npSum(logical_and(times >= t - demi, times <= t + demi)))
            for t in timeScale
        ]
        with open('NBS.lab', 'w') as f:
            for t, v in zip(timeScale, segnb):
                f.write('%f\t%f\n' % (float(t) / fe, v))

        segnb = [-(l / thNb) + 1 if l < 2 * thNb else 1 for l in segnb]

        segments_nb = decoupe(segnb)
        segments_nb = [(s[0] * musicStep, s[1] * musicStep,
                        music_labels[s[2]] + ' (NBS)') for s in segments_nb]
        segments.extend(segments_nb)

    if sort:
        segments = sorted(segments, key=lambda x: x[0])
    v = writeToXML(segments, nameFileOutputXML, withNBS, with4Hz, withLS,
                   withEntropy)
    return v
    #print "Audio file processed sucessfully"
    '''
Esempio n. 43
0
        wav_fps = wav_fps[:args.n]

    # Graph to calculate feats
    x = tf.placeholder(tf.float32, [None])
    x_trim = x[:16384]
    x_trim = tf.pad(x_trim, [[0, 16384 - tf.shape(x_trim)[0]]])
    X = tf.contrib.signal.stft(x_trim, 2048, 128, pad_end=True)
    X_mag = tf.abs(X)
    W_mel = tf.contrib.signal.linear_to_mel_weight_matrix(
        num_mel_bins=128,
        num_spectrogram_bins=1025,
        sample_rate=16000,
        lower_edge_hertz=40.,
        upper_edge_hertz=7800.,
    )
    X_mel = tf.matmul(X_mag, W_mel)
    X_lmel = tf.log(X_mel + 1e-6)
    X_feat = X_lmel

    # Calculate feats for each wav file
    with tf.Session() as sess:
        _X_feats = []
        for wav_fp in tqdm(wav_fps):
            _, _x = wavread(wav_fp)

            _X_feats.append(sess.run(X_feat, {x: _x}))
        _X_feats = np.array(_X_feats)

    with open(args.out_fp, 'wb') as f:
        pickle.dump(_X_feats, f)
Esempio n. 44
0
from scipy.io.wavfile import read as wavread

file_name = "quantized/piano/single/mono_sound/piano_mono_2.wav"
point = 44248
length = 100

rate, data = wavread(file_name)

print("rate", rate)
print("data", data.shape)
print("expected", rate * 4)

left = data[:, 1].tolist()

episode = left[point:point + length]


def find_subpattern(lst, pattern):
    indices = []
    for i in range(len(lst) - length):
        if lst[i:i + length] == pattern:
            indices.append(i)
    return indices


indexs = find_subpattern(left, episode)
start = indexs[0]
for i in range(len(indexs) - 1):
    indexs[i + 1] = indexs[i + 1] - start - 44100 * i
print("indexs", indexs)
Esempio n. 45
0
#!/usr/bin/python
import sys
import argparse
import math
import numpy as np
from scipy.io.wavfile import read as wavread
from scipy.io.wavfile import write as wavwrite

# Print entire, readable ndarrays
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=np.nan)

rate_one, data_one = wavread('umbrellaonefiltered.wav')
rate_two, data_two = wavread('umbrellatwofiltered.wav')

zero_count = int(math.floor(len(data_one) / 2))
zero_count_two = int(math.floor(len(data_two) / 2))
if zero_count_two > zero_count:
  zero_count = zero_count_two

# Zero pad file one
zero_array = np.zeros(zero_count, dtype=np.float)
data_one = np.concatenate([data_one, zero_array])
length_one = int(math.floor(len(data_one)))
# FFT file one
fft_one = np.fft.rfft(data_one)

# Zero pad file two

length_two = int(math.floor(len(data_two)))
Esempio n. 46
0
        plt.grid()

        fig.tight_layout()
        plt.show()


if __name__ == '__main__':

    from scipy.io.wavfile import read as wavread

    # instance
    w = Class_Wavelet1()

    # load wav sample
    path = 'sample1.wav'
    sr, x = wavread(path)

    # (1) show back to original waveform by transform and inverse transform
    # select switch (filter):  if value is 0.0: then doesn't use the element
    #    s1,  s2,  s3,  s4,  s5,  s6,  s7,  s8
    flt = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]  # use all elements
    lng0 = 2048  # set length of wavelet transform
    y = w.trans_itrans_level8(x[0:lng0], filter=flt, show=True)

    # (2) show comparison with composition from selected elements only
    # select switch (filter):  if value is 0.0: then doesn't use the element
    #    s1,  s2,  s3,  s4,  s5,  s6,  s7,  s8
    flt = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0,
           1.0]  # only s6, s7, and s8 are used
    lng0 = 2048
    y = w.trans_itrans_level8(x[0:lng0], filter=flt, show=True)