def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) # reading inputFile w = get_window(window, M) # obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # estimating F0 # 1. convert f0 values from Hz to Cents (as described in pdf document) f0_cents = np.maximum(1200.0 * np.log2(f0 / 55.0), 0.0) # 2. create an array containing standard deviation of last winStable samples sd = np.zeros(len(f0_cents)) for i in range(winStable, len(f0_cents)): sd[i] = np.std(f0_cents[i-winStable:i]) # 3. apply threshold on standard deviation values to find indexes of the stable points in melody stable_indices = np.where(sd < stdThsld)[0] # 4. create segments of continuous stable points such that consecutive stable points belong to same segment all_segments = np.empty(shape=(0, 2)) start = None for i in range(1, len(stable_indices)): if stable_indices[i] == stable_indices[i - 1] + 1: if start is None: start = i - 1 else: if start is not None: first_index = stable_indices[start] - 1 last_index_inclusive = stable_indices[i - 1] - 1 segment = np.array([[first_index, last_index_inclusive]]) all_segments = np.concatenate((all_segments, segment)) start = None # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length minNoteDurSamples = fs * minNoteDur minNoteDurFrames = minNoteDurSamples / H segments = np.array([x for x in all_segments if x[1] - x[0] > minNoteDurFrames]) #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed # return segments return segments
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0
def estimate(inputFile='a7q2-harmonic.wav', window='blackman', M=2101, N=4096, t=-90, minSineDur=0.1, nH=50, minf0=100, maxf0=200, f0et=5, harmDevSlope=0.01): Ns = 512 H = 128 fs, x = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # plt.plot(x) # plt.plot(y) # plt.show() size = min([x.size, y.size]) diff = np.sum(np.abs(x[:size] - y[:size])) std = np.std(f0) print "diff:{0} & std:{1}, M={2} N={3} t={4} minSineDur={5} nH={6} min/max={7}/{8} f0et={9} harmDevSlope={10}" \ .format(diff, std, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) return diff, std
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) # reading inputFile w = get_window(window, M) # obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0Cents = 1200 * np.log2((f0 + eps) / 55.0) # 2. create an array containing standard deviation of last winStable samples stDevs = np.zeros(f0Cents.size) for f in range(winStable-1, f0Cents.size): stDevs[f] = np.std(f0Cents[f-winStable+1:f+1]) # 3. apply threshold on standard deviation values to find indexes of the stable points in melody stdWhere = np.where(stDevs <= stdThsld)[0] stdWhere = stdWhere[winStable:] # 4. create segments of continuous stable points such that consecutive stable points belong to same segment segments = np.empty((0, 2), int) startIdx = stdWhere[0] endIdx = stdWhere[0] for i in range(1,stdWhere.size): if stdWhere[i] == stdWhere[i-1]+1: endIdx = stdWhere[i] else: segments = np.vstack([segments, [startIdx, endIdx]]) startIdx = stdWhere[i] endIdx = startIdx # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length segLens = segments[:, 1] - segments[:, 0] minNoteDurSamples = int(minNoteDur * fs / H) segsToKeep = np.where(segLens >= minNoteDurSamples)[0] segments = segments[segsToKeep, :] #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed return segments
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes w = get_window(window, M) harmDevSlope = 0.01 minSineDur = 0.0 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # 2. Extract the segment in which you need to compute the inharmonicity. b1 = np.ceil(t1 * float(fs) / H) b2 = np.ceil(t2 * float(fs) / H) bhfreq = hfreq[b1:b2] bf0 = f0[b1:b2] # 3. Compute the mean inharmonicity for the segment inhm = np.array([]) for idx, h in enumerate(bhfreq): coef = np.arange(1, h.size + 1) i = np.abs(h - coef * bf0[idx]) / coef inhm = np.append(inhm, np.sum(i) / len(i)) return np.sum(inhm) / len(inhm)
def segmentStableNotesRegions(inputFile = 'sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 # 1. convert f0 values from Hz to Cents for i in range(0, len(f0)): if f0[i] == 0: f0[i] = eps f0cent = 1200*np.log2(f0/55.0) # 2. create an array containing standard deviation of last winStable samples f0dev = np.zeros(len(f0cent)-winStable+2) for i in range(0, len(f0cent)-winStable+2): f0dev[i] = np.std(f0cent[i: i+winStable]) # 3. apply threshold on standard deviation values to find indices of the stable points in melody segindex = np.zeros(len(f0dev)) for i in range(0, len(f0dev)-1): if f0dev[i] <= stdThsld: segindex[i+winStable-1] = i+winStable-1 # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment segment = np.array_split(segindex,np.where(np.diff(segindex)!=1)[0]+1) # 5. apply segment filtering segments = np.array([]) for i in range(0, len(segment)): #print(len(segment[i]),i) if len(segment[i]) >= fs*minNoteDur/float(H): a = np.array([segment[i][0],segment[i][len(segment[i])-1]]) segments = np.append(segments,a) print(segments) segments = np.reshape(segments,(-1,2)) # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # return() return(segments)
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes harmDevSlope = 0.01 minSineDur = 0.0 xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # 2. Extract the time segment in which you need to compute the inharmonicity. l1 = int(np.ceil(t1 * float(fs) / H)) #frame start l2 = int(np.ceil(t2 * float(fs) / H)) #frame end harmonicsFrame = xhfreq[l1:l2] f0Frame = f0[l1:l2] # 3. Compute the mean inharmonicity of the segment tempInhm = np.array([]) for a, b in enumerate(harmonicsFrame): coefficient = np.arange(1, b.size + 1) inhP = np.abs(b - coefficient * f0Frame[a]) / coefficient tempInhm = np.append(tempInhm, np.sum(inhP) / len(inhP)) meanInhm = np.sum(tempInhm) / len(tempInhm) return meanInhm
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[f0 < eps] = eps f0_cents = 1200 * np.log2(f0 / 55.0) # 2. create an array containing standard deviation of last winStable samples numFrames = len(f0_cents) frameIndex = np.arange(winStable - 1, numFrames) sds = np.array(map(lambda i: np.std(f0_cents[i + 1 - winStable:i+1]), frameIndex)) # 3. apply threshold on standard deviation values to find indices of the stable points in melody stableF0Indices = winStable - 1 + np.where(sds < stdThsld)[0] #print zip(sds, winStable - 1 + np.arange(len(sds))) # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment segments = groupConsecutiveRuns(stableF0Indices) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length minNoteDurFrames = int(minNoteDur * fs / H) segments = filter(lambda x: len(x) >= minNoteDurFrames, segments) segments = map(lambda xs: [xs[0], xs[-1]], segments) segments = np.array(segments) #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0Cents = 1200. * np.log2(f0 / 55.) #2. create an array containing standard deviation of last winStable samples #3. apply threshold on standard deviation values to find indexes of the stable points in melody stdBelowTh = np.zeros(np.shape(f0), np.bool) for i in range(winStable,len(f0)): stdBelowTh[i] = np.std(f0Cents[i-winStable:i]) < stdThsld #4. create segments of continuous stable points such that consecutive stable points belong to same segment segments = [] currSeg = [] for i in range(winStable,len(f0)): if stdBelowTh[i]: currSeg.append(i) else: if len(currSeg) > 0: segments.append([currSeg[0]-1, currSeg[-1]-1]) currSeg = [] #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length segments = np.array(filter(lambda x: x[1] - x[0] >= 1.*fs*minNoteDur/H, segments)) # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed # return segments return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[f0==0] = eps f0c = 1200 * np.log2(f0 / 55.0) # 2. create an array containing standard deviation of last winStable samples std = np.zeros(f0c.size) std[winStable-1:] = np.array([f0c[i:i+winStable].std() for i in xrange(f0c.size-winStable+1)]) std[:winStable-1] = np.nan # 3. apply threshold on standard deviation values to find indices of the stable points in melody ends = np.where((std[:-1] < stdThsld) & (std[1:] >= stdThsld))[0] starts = np.where((std[:-1] >= stdThsld) & (std[1:] < stdThsld))[0]+1 # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment all_segments = np.array([starts, ends]) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length min_segment_size = minNoteDur / (x.size / fs / f0.size) segments = all_segments[:,(all_segments[1,:] - all_segments[0,:]) > min_segment_size] segments = np.transpose(segments) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0c = 1200 * np.log2(f0 / 55.0) # 2. create an array containing standard deviation of last winStable samples idx = range(len(f0c)-winStable) fsd = np.array(map(lambda x: np.std(f0c[x:x+winStable]), idx)) # 3. apply threshold on standard deviation values to find indices of the stable points in melody stidx = np.where(fsd < stdThsld)[0] + winStable - 1 # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment grps = np.split(stidx, np.where(stidx[1:]-stidx[:-1] > 1)[0] + 1) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length seqs = filter(lambda x: len(x)*H / float(fs) >= minNoteDur, grps) segments = np.array(map(lambda x: [x[0], x[-1]], seqs)) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) return segments
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et) # 2. Extract the time segment in which you need to compute the inharmonicity. starting = int(np.ceil(fs * t1 / H)) ending = int(np.floor(fs * t2 / H)) # 3. Compute the mean inharmonicity of the segment mean_inharmonicity = compute_inharmonicity(xhreq, starting, ending, nH) return mean_inharmonicity
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes w = get_window(window, M) harmDevSlope=0.01 minSineDur=0.0 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # 2. Extract the segment in which you need to compute the inharmonicity. b1 = np.ceil(t1 * float(fs)/H) b2 = np.ceil(t2 * float(fs)/H) bhfreq = hfreq[b1:b2] bf0 = f0[b1:b2] # 3. Compute the mean inharmonicity for the segment inhm = np.array([]) for idx, h in enumerate(bhfreq): coef = np.arange(1, h.size+1) i = np.abs(h - coef * bf0[idx])/coef inhm = np.append(inhm, np.sum(i) / len(i)) return np.sum(inhm) / len(inhm)
def detect_f0(audio_path, window_size, Hop_size): fs, data = wavfile.read(audio_path) data = np.float32(data) / norm_fact[data.dtype.name] window_length_in_samples = window_size length_of_audio = len(data) / float(fs) w = get_window('hanning', window_length_in_samples) N = 2048 * 2 t = -50 minf0 = 100 maxf0 = 700 f0et = 7 H = Hop_size f0 = HM.f0Detection(data, fs, w, N, H, t, minf0, maxf0, f0et) return f0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 size = f0.size # Step 1 f0_cents = np.zeros(size) for i in range(size): if (f0[i] != 0): f0_cents[i] = 1200.0 * np.log2(float(f0[i] / 55.0)) # Step 2 SD_win = np.zeros(size) for i in range(size): arr = f0_cents[i - winStable + 1:i + 1] SD_win[i] = standardDeviation(arr, winStable) # Step 3 stableNote = np.array( []) # Append as we don't know how many stable regions for i in range(winStable, size): if (SD_win[i] < stdThsld): stableNote = np.append(stableNote, i) # Step 4 duration = 1 # including first count = 0 start_end = [] # Do this so we can initialise the ndarray properly for i in range(1, stableNote.size): if (stableNote[i - 1] == stableNote[i] - 1): duration += 1 else: # Step 5 if (duration * H / float(fs) >= minNoteDur): start_end.append((stableNote[i - duration], stableNote[i - 1])) duration = 1 segments = np.ndarray(shape=(len(start_end), 2)) for i in range(len(start_end)): segments[i] = start_end[i] plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0ct = 1200 * np.log2(f0 / 55.0) # 2. create an array containing standard deviation of last winStable samples offset = winStable - 1 stds = np.zeros(offset) for i in range(offset, f0ct.size): stds = np.append(stds, np.std(f0ct[i-offset:i+1])) # 3. apply threshold on standard deviation values to find indices of the stable points in melody stables = np.where(stds < stdThsld)[0] # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment terms = [] if len(stables) > 0: sequence = [stables[0]] index = 0 for s in stables: if s != (sequence[0] + index): terms.append([sequence[0], sequence[len(sequence) - 1]]) sequence = [] sequence.append(s) index = len(sequence) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length terms_fd = [] for t in terms: if (t[1] - t[0]) * H / float(fs) >= minNoteDur: terms_fd.append(t) # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) segments = np.array(terms_fd) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # return segments return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[np.where(f0<eps)] = eps f0Cent = 1200 * np.log2(f0 / 55.0) #2. create an array containing standard deviation of last winStable samples devf0 = np.zeros(f0Cent.size) for i in range(winStable, f0Cent.size): devf0[i] = np.std(f0Cent[i-winStable:i]) #3. apply threshold on standard deviation values to find indexes of the stable points in melody stablePts = np.where(devf0<stdThsld)[0] #4. create segments of continuous stable points such that consecutive stable points belong to same segment segmentsList = np.array([[]], dtype = int).reshape(0,2) #list of stable segment isConsecutive = False #create a flag to check consecutivity between stable node stbSegment_start = 0 #initialize start and end indexes for stable segment stbSegment_end = 0 for i in range(0,stablePts.size-1): #iterate through stablePts until 2nd last element, look for consecutive point if (isConsecutive == False): #if not already in consecutive stable region, check for it if (stablePts[i+1] - stablePts[i] == 1): #if true, this is the start of a new consecutive stable region stbSegment_start = stablePts[i] #update segment starting point isConsecutive = True continue else: #if false,still in a non - consecutive region continue else: #isConsecutive == True #already in a consecutive stable region, check for the end if (stablePts[i+1] - stablePts[i] == 1): #if true, we are still in the same consecutive stable region continue else: #if false, reached the end of the the consecutive stable region stbSegment_end = stablePts[i] #update segment ending point isConsecutive = False #append the starting and ending point of the segment to the segment list segmentsList = np.vstack([segmentsList, np.array([[stbSegment_start, stbSegment_end]])]) if (isConsecutive == True): #that means the final stale region runs until the end of stablePts segmentsList = np.vstack([segmentsList, np.array([[stbSegment_start, stablePts[-1] ]])]) #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length # To convert from minNoteDur [s] to frame index: frame index = minNoteDur*fs/H segmentsList = np.delete(segmentsList, np.where(segmentsList[:,1 ] - segmentsList[:,0] < (minNoteDur*fs)/H), axis = 0) #plotSpectogramF0Segments(x, fs, w, N, H, f0, segmentsList) # Plot spectrogram and F0 if needed return segmentsList
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. interval_start = int(math.ceil(t1 * fs / float(H))) interval_end = int(math.ceil(t2 * fs / float(H))) # 3. Compute the mean inharmonicity of the segment # Refer to the pdf for the formulas used f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) f0_slice = f0[interval_start:interval_end] sliced = xhfreq[interval_start:interval_end] inharmon = np.zeros(sliced.size) for index, arr in enumerate(sliced): tmp_sum = 0 for j in range(1, arr.size): val = j + 1 tmp_sum += np.abs(arr[j] - val * f0_slice[index]) / float(val) inharmon[index] = tmp_sum * (1 / float(nH)) mean_inharmon = sum(inharmon) / (interval_end - interval_start + 1) return mean_inharmon
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[f0 < eps] = eps f0_cent = Hz2Cent(f0) #2. create an array containing standard deviation of last winStable samples std_val_array = compute_std(f0_cent, winStable) #3. apply threshold on standard deviation values to find indexes of the stable points in melody stable_frame_index = find_stable_index(std_val_array, stdThsld, winStable) #4. create segments of continuous stable points such that consecutive stable points belong to same segment segments = group_stable_frame_index(stable_frame_index) #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length num_frame = f0.size each_frame_duration = x.size / (fs * num_frame) filtered_segments = filter_segments_by_min_duration( segments, minNoteDur, each_frame_duration) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed # return segments return filtered_segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) # reading inputFile w = get_window(window, M) # obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # estimating F0 # 1. convert f0 values from Hz to Cents (as described in pdf document) f0_in_cents = 1200.0*np.log2(f0/55.0 + eps) #2. create an array containing standard deviation of last winStable samples std_F0 = (stdThsld + eps)*np.ones(winStable - 1,dtype = float) for i in range(winStable - 1, len(f0_in_cents)): std_F0 = np.append(std_F0,np.std(f0_in_cents[(i - winStable + 1):(i + 1)])) # print 'step = ' + str(i) # print 'nBinLow = ' + str(i - winStable + 1) # print 'nBinHigh = ' + str(i + 1) # print 'Values = ' + str(f0_in_cents[(i - winStable + 1):(i + 1)]) # print '****' #3. apply threshold on standard deviation values to find indexes of the stable points in melody idx = np.where(std_F0 < stdThsld)[0] # idx = np.array([3, 4, 5, 6, 12, 13, 17, 18, 19]) #4. create segments of continuous stable points such that consecutive stable points belong to same segment idx_Start = np.array([],dtype=np.int64) idx_End = np.array([],dtype=np.int64) pointer_Start = 0 pointer_End = 0 for i in range(0, len(idx)-1): # print 'pointer_Start = ' + str(pointer_Start) # print 'pointer_End = ' + str(pointer_End) # print '****' if((idx[i+1] - idx[i]) != 1): idx_Start = np.append(idx_Start,pointer_Start) idx_End = np.append(idx_End,pointer_End) pointer_End += 1 pointer_Start = (i+1) else: pointer_End += 1 idx_Start = np.append(idx_Start,pointer_Start) idx_End = np.append(idx_End,pointer_End) #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length idx_segments = np.where((idx_End - idx_Start + 1)*H/float(fs) >= minNoteDur)[0] segments_Start = idx[idx_Start[idx_segments]] segments_End = idx[idx_End[idx_segments]] segments = np.array([segments_Start,segments_End]) segments = np.transpose(segments) #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) return segments
def segmentStableNotesRegions(inputFile='../../sounds/cello-phrase.wav', stdThsld=20, minNoteDur=0.5, winStable=3, window='hamming', M=1025, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here #print(f0.shape,x.shape) f0[f0 < eps] = eps # 1. convert f0 values from Hz to Cents (as described in pdf document) f0_cent = 1200 * np.log2(f0 / 55.0) #2. create an array containing standard deviation of last winStable samples SD_winstable = [] for i in range(2, len(f0_cent)): SD_winstable.append( np.std([f0_cent[i], f0_cent[i - 1], f0_cent[i - 2]])) #3. apply threshold on standard deviation values to find indexes of the stable points in melody SD_winstable = np.array(SD_winstable) winstable_index = np.where(SD_winstable < stdThsld)[0] + 2 #print("winstable_index",winstable_index[:250]) #4. create segments of continuous stable points such that consecutive stable points belong to same segment all_segments = [] i = 1 while i < (len(winstable_index)): j = i buffer = [] counter = 1 if j < (len(winstable_index) - 1): if ((winstable_index[j + 1] - winstable_index[j]) == 1): buffer.append(winstable_index[j]) j += 1 while ((winstable_index[j] - winstable_index[j - 1]) == 1): counter += 1 #j+=1 buffer.append(winstable_index[j]) j += 1 #print("updating buffer with",winstable_index[j]) if (counter > 1): all_segments.append(np.array(buffer)) if (j != (len(winstable_index) - 1)): all_segments.append(winstable_index[j]) #if j < len(winstable_index): # all_segments.append(winstable_index[j]) #else : # all_segments.append(np.array(buffer)) #print("buffer is ",buffer) #if counter > 4410: # segments.append(f0_cent[i:(i+counter-1)]) i = j + 1 #segments=np.array(segments) #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length #print(type(segments[0]),segments[0]) #print(type(segments[1]),segments[1]) segments = [] for index, seg in enumerate(all_segments): #print("Seg size is",seg.size) if (seg.size * H / float(fs) > minNoteDur): segments.append([seg[0], seg[-1]]) #print(type(segments) ) segments = np.array(segments) #print(segments.shape) #selection=np.nonzero(segments) #print("Printing winstable index",winstable_index[:200]) #("Printing all segments",all_segments) #print(segments) #segments=segments[np.any(np.nonzero(segments))] #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 # f0Detection splits the signal into "frames" of length H (hop size) and returns an f0 for each of these frames # So the number of frames in the signal is simply the length of f0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0 = 1200 * np.log2(f0/55.0) numFrames = len(f0) #f0_sd = [] # 2. create an array containing standard deviation of last winStable samples # for i in np.arange(winStable,numFrames - winStable -1): # print str(winStable) + str(i) # f0_sd[i - winStable] = np.std(f0[i - winStable : i]) frameIndex = np.arange(winStable - 1, numFrames) # New index from winStable - 1 to num of frames print frameIndex # It's winStable - 1 because the SD includes current sample also. i.e. for winStable = 3, you need to use indexes 0 - 2 for first sample # Find a new array of standard deviations where each sample is the SD with the last winStable frames f0_sd = np.array(map(lambda i: np.std(f0[i - winStable + 1:i+1]),frameIndex)) # 3. apply threshold on standard deviation values to find indices of the stable points in melody # To get the IDs of stable frames, you need to add the stidx = np.where(f0_sd < stdThsld)[0] + winStable - 1 # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment segments = groupConsecutiveRuns(stidx) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length # Now segments is a list of arrays of the form [[startidx_1, endidx_1],[startidx_2, endidx_2]...] # We need to remove all segments that are not long enough minFrames = int(minNoteDur * fs / H) opseg = [] for item in segments: if(filterShortSegments(item[0],item[1],minFrames) == True): print "Got segment: " + str(item) opseg.append(item) segments = np.array(opseg) # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) print str(segments) # return segments return segments
def segmentStableNotesRegions(inputFile = '../sms-tools/sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[np.where(f0 == 0)] = eps f0_cents = 1200 * np.log2(f0 / 55) f0_cents[np.where(f0_cents<0)] = eps # 2. create an array containing standard deviation of last winStable samples max_idx = len(f0_cents) - (winStable + 1) // 2 idx = 1 stdarr = np.full_like(f0, eps) while idx < max_idx: under_std = idx - winStable // 2 upper_std = idx + (winStable + 1) // 2 stdarr[idx] =np.std(f0_cents[under_std:upper_std]) idx += 1 # 3. apply threshold on standard deviation values to find indexes of the stable points in melody filtered_std = np.where(stdarr <= stdThsld) # 4. create segments of continuous stable points(csp) such that consecutive stable points belong to same segment csp_delta = (filtered_std[0] - np.roll(filtered_std[0], 1)) csp_loc = np.where(np.abs(csp_delta) > 1) csp_start = filtered_std[0][csp_loc[0]] csp_end = np.roll(csp_start - csp_delta[csp_loc[0]], -1) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length min_note_frames = int(minNoteDur * fs / H) # print(min_note_frames, len(x) / fs) # for idx in range(len(csp_start)): # # delta_length = csp_end - csp_start # print(csp_start[idx], csp_end[idx], csp_end[idx] - csp_start[idx], csp_end[idx] - csp_start[idx] > min_note_frames) delta = csp_end - csp_start csp_start = csp_start[np.where(delta >= min_note_frames)] csp_end = csp_end[np.where(delta >= min_note_frames)] segments = np.vstack((csp_start, csp_end)).T # plt.plot(stdarr) # plt.plot(csp_start, stdarr[csp_start], 'x') # plt.plot(csp_end, stdarr[csp_end], '+') plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed return(segments)
def segment_stable_notes_monophonic( inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 # your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[f0 < eps] = eps tuning = 55.0 # A4=440 Hz -> tuning=A1=55 Hz cent_f0 = 1200 * np.log2(f0 / tuning) # 2. create an array containing standard deviation of last winStable samples std_winStable = [ np.std(cent_f0[index - winStable:index]) for index in range(winStable, cent_f0.size + 1) ] std_winStable = np.array(std_winStable) # 3. apply threshold on standard deviation values to find indexes of the stable points in melody std_below_threshold = np.where(std_winStable < stdThsld)[0] # 4. create segments of continuous stable points such that consecutive stable points belong to same segment std_contiguous = std_below_threshold[1:] - std_below_threshold[:-1] contiguous_index = np.where(std_contiguous == 1) initial = [ x for x in contiguous_index[0] if x - 1 not in contiguous_index[0] and x + 1 in contiguous_index[0] ] final = [ x for x in contiguous_index[0] if x - 1 in contiguous_index[0] and x + 1 not in contiguous_index[0] ] segments = list(zip(initial, final)) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length samples_minNoteDur = int(minNoteDur * fs / H) segments = [(x, y) for x, y in segments if y - x >= samples_minNoteDur] segments = np.array(segments) # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed return segments
import numpy as np from scipy.signal import get_window import sys, os, time sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/')) import dftModel as DFT import utilFunctions as UF import stft as STFT import harmonicModel as HM import sineModel as SM (fs, x) = UF.wavread('../../sounds/sawtooth-440.wav') w = get_window('blackman', 2001) N = 2048 * 2 t = -50 minf0 = 300 maxf0 = 500 f0et = 1 H = 1000 f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=5, minNoteDur=0.6, winStable=3, window='hamming', M=1025, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0Cents = 1200 * np.log2((f0 + eps) / 55.0) #2. create an array containing standard deviation of last winStable samples f0std = np.zeros(f0.size - winStable + 1) for i in range(winStable - 1, f0.size): f0std[i - winStable + 1] = np.std(f0Cents[(i - winStable + 1):i + 1]) #3. apply threshold on standard deviation values to find indexes of the stable points in melody for i in range(f0std.size): if (f0std[i] < stdThsld): f0std[i] = 1 else: f0std[i] = 0 #4. create segments of continuous stable points such that consecutive stable points belong to same segment #5. apply segment filtering, i.e. remove segments which are < minNoteDur in length c = 0 flag1 = True seg = np.zeros((20, 2), dtype=np.int) for i in range(f0std.size - 1): if (f0std[i] == 1): if (flag1 == True): start = i flag1 = False if (f0std[i + 1] == 0): end = i flag1 = True seglen = (end - start + 1) * x.size / f0.size if (seglen >= (minNoteDur * 44100)): seg[c, 0] = start + 2 seg[c, 1] = end + 1 c = c + 1 segments = seg[0:c, :] # Plot spectrogram and F0 if needed #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # return segments return segments
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0ct = 1200 * np.log2(f0 / 55.0) # 2. create an array containing standard deviation of last winStable samples offset = winStable - 1 stds = np.zeros(offset) for i in range(offset, f0ct.size): stds = np.append(stds, np.std(f0ct[i - offset:i + 1])) # 3. apply threshold on standard deviation values to find indices of the stable points in melody stables = np.where(stds < stdThsld)[0] # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment terms = [] if len(stables) > 0: sequence = [stables[0]] index = 0 for s in stables: if s != (sequence[0] + index): terms.append([sequence[0], sequence[len(sequence) - 1]]) sequence = [] sequence.append(s) index = len(sequence) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length terms_fd = [] for t in terms: if (t[1] - t[0]) * H / float(fs) >= minNoteDur: terms_fd.append(t) # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) segments = np.array(terms_fd) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # return segments return segments
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'): """ Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the f0 contour on the spectrogram and synthesize the f0 contour. Input: inputFile (string): wav file including the path Output: f0 (numpy array): array of the estimated fundamental frequency (f0) values """ ### Change these analysis parameter values marked as XX window = 'blackman' M = 4001 N = 4096 f0et = 11 t = -80 minf0 = 130 maxf0 = 210 ### Do not modify the code below H = 256 #fix hop size fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window ### Method 1 f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 startFrame = np.floor(0.5*fs/H) endFrame = np.ceil(4.0*fs/H) f0[:startFrame] = 0 f0[endFrame:] = 0 y = UF.sinewaveSynth(f0, 0.8, H, fs) UF.wavwrite(y, fs, 'synthF0Contour.wav') ## Code for plotting the f0 contour on top of the spectrogram # frequency range to plot maxplotfreq = 500.0 fontSize = 16 plot = 1 fig = plt.figure() ax = fig.add_subplot(111) mX, pX = stft.stftAnal(x, w, N, H) #using same params as used for analysis mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1]) timeStamps = np.arange(mX.shape[1])*H/float(fs) binFreqs = np.arange(mX.shape[0])*fs/float(N) plt.pcolormesh(timeStamps, binFreqs, mX) plt.plot(timeStamps, f0, color = 'k', linewidth=1.5) plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.autoscale(tight=True) plt.ylabel('Frequency (Hz)', fontsize = fontSize) plt.xlabel('Time (s)', fontsize = fontSize) plt.legend(('f0',)) xLim = ax.get_xlim() yLim = ax.get_ylim() ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0]))) if plot == 1: #save the plot too! plt.autoscale(tight=True) plt.show() else: fig.tight_layout() fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight') return f0
inputFile = '../../sounds/cello-phrase.wav' stdThsld = 10 minNoteDur = 0.1 winStable = 3 window = 'hamming' M = 1025 N = 2048 H = 256 f0et = 5.0 t = -100 minf0 = 310 maxf0 = 650 fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0[np.where(f0 < eps)] = eps f0Cent = 1200 * np.log2(f0 / 55.0) #2. create an array containing standard deviation of last winStable samples devf0 = np.zeros(f0Cent.size) for i in range(winStable, f0Cent.size): devf0[i] = np.std(f0Cent[i - winStable:i]) stablePts = np.where(devf0 < stdThsld)[0] #4. create segments of continuous stable points such that consecutive stable points belong to same segment
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'): """ Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the f0 contour on the spectrogram and synthesize the f0 contour. Input: inputFile (string): wav file including the path Output: f0 (numpy array): array of the estimated fundamental frequency (f0) values """ ### Change these analysis parameter values window = "blackman" M = 4401 N = 8192 f0et = 7 t = -90.0 minf0 = 140 maxf0 = 210 ### Do not modify the code below H = 256 #fix hop size fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 startFrame = np.floor(0.5*fs/H) endFrame = np.ceil(4.0*fs/H) f0[:startFrame] = 0 f0[endFrame:] = 0 y = UF.sinewaveSynth(f0, 0.8, H, fs) UF.wavwrite(y, fs, 'synthF0Contour.wav') ## Code for plotting the f0 contour on top of the spectrogram # frequency range to plot maxplotfreq = 500.0 fontSize = 16 plot = 1 # plot = 1 plots the f0 contour, otherwise saves it to a file. fig = plt.figure() ax = fig.add_subplot(111) mX, pX = stft.stftAnal(x, fs, w, N, H) #using same params as used for analysis mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1]) timeStamps = np.arange(mX.shape[1])*H/float(fs) binFreqs = np.arange(mX.shape[0])*fs/float(N) plt.pcolormesh(timeStamps, binFreqs, mX) plt.plot(timeStamps, f0, color = 'k', linewidth=1.5) plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.autoscale(tight=True) plt.ylabel('Frequency (Hz)', fontsize = fontSize) plt.xlabel('Time (s)', fontsize = fontSize) plt.legend(('f0',)) xLim = ax.get_xlim() yLim = ax.get_ylim() ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0]))) if plot == 1: #save the plot too! plt.autoscale(tight=True) plt.show() else: fig.tight_layout() fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight') return f0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indices of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### Your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) f0_mod = f0[f0 > 0.0] """f0c = np.zeros(len(f0)) for i in range(len(f0)): if f0[i] > 0.0: f0c[i] = 1200.0 * np.log2(f0[i]/55.0) else: f0c[i] = 0.0""" epsilon = 10**-17 # Add epsilon to f0 values to prevent log(0) errors f0c = 1200.0 * np.log2((f0+epsilon)/55.0) # 2. create an array containing standard deviation of last winStable samples sd = np.zeros(len(f0c)) for i in range(len(f0c)): #samples = f0c[i-winStable:i+1] samples = [] #samples.append(f0c[i]) for j in range(winStable): if i-j >= 0: samples.append(f0c[i-j]) sd[i] = np.std(samples) # 3. apply threshold on standard deviation values to find indices of the stable points in melody thres = np.where(sd<stdThsld)[0] # 4. create segments of continuous stable points such that concequtive stable points belong to # same segment thres_array = thres segs = [] seg = np.array([]) for i in range(len(thres_array)): if len(seg) == 0: seg = np.append(seg, thres_array[i]) if i+1 < len(thres_array): if thres_array[i+1] - thres_array[i] == 1: seg = np.append(seg, thres_array[i+1]) else: segs.append(seg) seg = np.array([]) # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length minTrackLength = round(fs*minNoteDur/H) segs2 = [] for i in range(len(segs)): if len(segs[i]) > minTrackLength: segs2.append(segs[i]) segments = np.zeros((len(segs2), 2)) #segments = np.array([]) for i in range(len(segs2)): #ind = np.array((seg[0], seg[len(seg)-1])) segments[i][0] = segs2[i][0] segments[i][1] = segs2[i][len(segs2[i])-1] #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) return segments
import harmonicModel as HM (fs, x) = UF.wavread('../../../sounds/piano.wav') w = np.blackman(1501) N = 2048 t = -90 minf0 = 100 maxf0 = 300 f0et = 1 maxnpeaksTwm = 4 H = 128 x1 = x[int(1.5*fs):int(1.8*fs)] plt.figure(1, figsize=(9, 7)) mX, pX = STFT.stftAnal(x, w, N, H) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) f0 = UF.cleaningTrack(f0, 5) yf0 = UF.sinewaveSynth(f0, .8, H, fs) f0[f0==0] = np.nan maxplotfreq = 800.0 numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N*maxplotfreq/fs+1)])) plt.autoscale(tight=True) plt.plot(frmTime, f0, linewidth=2, color='k') plt.autoscale(tight=True) plt.title('mX + f0 (piano.wav), TWM') plt.tight_layout()
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, minf0=310, maxf0=650): """ Function to segment the stable note regions in an audio signal Input: inputFile (string): wav file including the path stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents) minNoteDur (float): minimum allowed segment length (note duration) winStable (integer): number of samples used for computing standard deviation window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz Output: segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment. """ fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 ### your code here # 1. convert f0 values from Hz to Cents (as described in pdf document) def hertzToCents(f): cents = 1200 * np.log2(f / 55.0) return cents f0inCents = hertzToCents(f0) indxs = np.where(f0inCents == -np.inf)[0] f0inCents[indxs] = -9999 # avoids -infs #2. create an array containing standard deviation of last winStable samples stdevs = np.array([]) for i in range(f0inCents.size): stdevs = np.append(stdevs, np.std(f0inCents[i - 2:i + 1])) #3. apply threshold on standard deviation values to find indexes of the stable points in melody lessThanThsldIndexs = np.where(stdevs < stdThsld)[0] #4. create segments of continuous stable points such that consecutive stable points belong to same segment stables = [] currentSegment = np.zeros(2) for i in lessThanThsldIndexs: if currentSegment[0] == 0: currentSegment[0] = i currentSegment[1] = i continue if i == (currentSegment[1] + 1): currentSegment[1] = i continue stables.append(currentSegment) # I use python array here currentSegment = np.zeros(2) #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length filteredSegments = [] for s in stables: if ((s[1] - s[0]) * H / float(fs) > minNoteDur): filteredSegments.append(s) segments = np.array(filteredSegments) plotSpectogramF0Segments(x, fs, w, N, H, f0, segments) # Plot spectrogram and F0 if needed # return segments return segments