def extract(self, segmentation, feature, **kwargs): """ Given a segmentation and a feature class (plus **kwargs), extract feature for each segment in the segmentation. """ if type(segmentation.media) is not str: raise TypeError("Only file segmentation extraction is currently supported.") x,SR,fmt = sound.wavread(segmentation.media, last=1) for seg in segmentation: x, xsr, fmt = sound.wavread(segmentation.media, first=int(seg.time_span.start_time*SR), last=int(seg.time_span.duration*SR)) f = feature(x, **kwargs) seg.features.append(f)
def extract(self, segmentation, feature, **kwargs): """ Given a segmentation and a feature class (plus **kwargs), extract feature for each segment in the segmentation. """ if type(segmentation.media) is not str: raise TypeError( "Only file segmentation extraction is currently supported.") x, SR, fmt = sound.wavread(segmentation.media, last=1) for seg in segmentation: x, xsr, fmt = sound.wavread(segmentation.media, first=int(seg.time_span.start_time * SR), last=int(seg.time_span.duration * SR)) f = feature(x, **kwargs) seg.features.append(f)
def play_segs(self, k): """ Play audio segments corresponding to cluster k (zero-based index) """ if k is None: raise TypeError("play_segs requires an integrer cluster index: k") clusters = self.assigns[self.diffs[:-1]] sr = self.feature.sample_rate for seg in np.where(clusters==k)[0]: print self.segmentation[seg] x = sound.wavread(self.media, first=int(self.segmentation[seg].time_span.start_time*sr), last=int(self.segmentation[seg].time_span.duration*sr)) sound.play(x[0].T, sr)
def play_segs(self, k): """ Play audio segments corresponding to cluster k (zero-based index) """ if k is None: raise TypeError("play_segs requires an integrer cluster index: k") clusters = self.assigns[self.diffs[:-1]] sr = self.feature.sample_rate for seg in np.where(clusters == k)[0]: print(self.segmentation[seg]) x = sound.wavread(self.media, first=int(self.segmentation[ seg].time_span.start_time * sr), last=int(self.segmentation[seg].time_span.duration * sr)) sound.play(x[0].T, sr)
import numpy as np import scipy.signal as signal import sound import matplotlib.pyplot as plt # loading sound file as array [s, r] = sound.wavread('Track32.wav') # Taking first chanel of audio data = s[:, 0] # filter #First is a Low Pass Filter h1 = signal.remez(64, [0, 0.0625, 0.0627, 0.5], [1, 0], [1, 100]) # nyquist frequency normalized to 0.5 #Band Pass filters h2 = signal.remez(64, [0, 0.0625, 0.0627, 0.125, 0.127, 0.5], [0, 1, 0], [100, 1, 100]) h3 = signal.remez(64, [0, 0.125, 0.127, 0.1875, 0.1877, 0.5], [0, 1, 0], [100, 1, 100]) h4 = signal.remez(64, [0, 0.1875, 0.1877, 0.25, 0.27, 0.5], [0, 1, 0], [100, 1, 100]) h5 = signal.remez(64, [0, 0.25, 0.27, 0.3125, 0.3127, 0.5], [0, 1, 0], [100, 1, 100]) h6 = signal.remez(64, [0, 0.3125, 0.3127, 0.375, 0.377, 0.5], [0, 1, 0], [100, 1, 100]) h7 = signal.remez(64, [0, 0.375, 0.377, 0.4375, 0.4377, 0.5], [0, 1, 0], [100, 1, 100]) #High Pass filter h8 = signal.remez(64, [0, 0.4375, 0.4377, 0.5], [0, 1], [100, 1])
#Script for calling the online MDCT implementation analysis and synthesis, for time measurement #and as simple implementation example. #Gerald Schuller, November 2017 from pyrecplayfastMDCT import * import sound import os import time N=1024 #Number of subbands and block size #initialize filter bank memory: initFB(N) fb=np.sin(np.pi/(2*N)*(np.arange(0,int(1.5*N))+0.5)) X, fs= sound.wavread('test.wav') print("X.shape:",X.shape) print("fs=", fs) X=X*1.0/2**15 blocks=len(X)/N y=np.zeros((blocks,N)) xrek=np.zeros(blocks*N) startime=time.time() #analysis filter bank: for m in range(blocks): #print("X[m*N+np.arange(N).shape:", X[m*N+np.arange(N)].shape) #Analysis Filter Bank: y[m,:]=LDFB(X[m*N+np.arange(N)],fb) #Synthesis Filter Bank: xrek[m*N+np.arange(N)]=LDFBinv(y[m,:],fb);
# ## Check the frquency response: # In[3]: w, H = signal.freqz(b) plt.plot(w, 20 * np.log10(abs(H) + 1e-6)) plt.title('Filter Magnitude Frequency Response') plt.xlabel('Normalized Frequency') plt.ylabel('dB') # ## Now the **analysis filtering and down sampling:** # In[4]: import sound as snd [s, rate] = snd.wavread('sndfile.wav') print("length of sound in samples: ", len(s)) plt.plot(s) plt.title('Original Signal') # ## Filter implementation: # In[5]: filtered = signal.lfilter(b, 1, s) print("length of filtered sound in samples: ", len(filtered)) plt.plot(filtered) # ## Play the filtered sound: # In[6]:
#Gerald Schuller, November 2017 from pyrecplayfastLDFB import * import sound import os import time N = 512 #Number of subbands and block size #initialize filter bank memory: initFB(N) fb = np.loadtxt('fb2048t1023d512bbitcs.mat') #N=1024: #fb=np.loadtxt('fb4096t2047d1024bbitc.mat') X, fs = sound.wavread('teststereo.wav') print("X.shape:", X.shape) print("fs=", fs) X = X * 1.0 / 2**15 blocks = len(X) / N y = np.zeros((blocks, 2, N)) xrek = np.zeros((blocks * N, 2)) startime = time.time() #analysis filter bank: for m in range(blocks): #print("X[m*N+np.arange(N).shape:", X[m*N+np.arange(N)].shape) #Analysis Filter Bank: #Left channel: y[m, 0, :] = LDFB(X[m * N + np.arange(N), 0], fb) #Right channel: y[m, 1, :] = LDFB(X[m * N + np.arange(N), 1], fb)
import scipy.signal import numpy as np import matplotlib.pyplot as plt import sys import cv2 CHUNK = 1024 print("filename=", sys.argv[1]) sndfile=sys.argv[1] #sndfile="amfile.wav"; #sndfile="amfilegs.wav"; #sndfile="amrec.wav"; #read in sound file: [AM, FS]=sound.wavread(sndfile) #length of the sound: lenAM=scipy.size(AM) #compute the low pass filter coefficients, #with 10 Hz cutoff frequency: [b,a]=scipy.signal.iirfilter(2, 20.0/(FS/2),rp=60,btype='lowpass') [w,H]=scipy.signal.freqz(b,a) Ha=scipy.absolute(H) fig=plt.figure() #plt.plot(w,Ha) #In dB on normalized frequency axis w: plt.plot(w,20*np.log10(Ha)) plt.title('The frequence response of the low pass filter') plt.xlabel('Normalized frequency (pi=Nyquist freq.)') plt.ylabel('Magnitude response (dB)')