Beispiel #1
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               # reading inputFile
    w = get_window(window, M)                                   # obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  # estimating F0

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0_cents = np.maximum(1200.0 * np.log2(f0 / 55.0), 0.0)

    # 2. create an array containing standard deviation of last winStable samples
    sd = np.zeros(len(f0_cents))
    for i in range(winStable, len(f0_cents)):
        sd[i] = np.std(f0_cents[i-winStable:i])

    # 3. apply threshold on standard deviation values to find indexes of the stable points in melody
    stable_indices = np.where(sd < stdThsld)[0]

    # 4. create segments of continuous stable points such that consecutive stable points belong to same segment
    all_segments = np.empty(shape=(0, 2))
    start = None
    for i in range(1, len(stable_indices)):
        if stable_indices[i] == stable_indices[i - 1] + 1:
            if start is None:
                start = i - 1
        else:
            if start is not None:
                first_index = stable_indices[start] - 1
                last_index_inclusive = stable_indices[i - 1] - 1
                segment = np.array([[first_index, last_index_inclusive]])
                all_segments = np.concatenate((all_segments, segment))
                start = None

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    minNoteDurSamples = fs * minNoteDur
    minNoteDurFrames = minNoteDurSamples / H
    segments = np.array([x for x in all_segments if x[1] - x[0] > minNoteDurFrames])

    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

    # return segments
    return segments
Beispiel #2
0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav',
                              stdThsld=10,
                              minNoteDur=0.1,
                              winStable=3,
                              window='hamming',
                              M=1024,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
Beispiel #3
0
def estimate(inputFile='a7q2-harmonic.wav',
             window='blackman',
             M=2101,
             N=4096,
             t=-90,
             minSineDur=0.1,
             nH=50,
             minf0=100,
             maxf0=200,
             f0et=5,
             harmDevSlope=0.01):

    Ns = 512
    H = 128

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
    y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

    # plt.plot(x)
    # plt.plot(y)
    # plt.show()

    size = min([x.size, y.size])
    diff = np.sum(np.abs(x[:size] - y[:size]))
    std = np.std(f0)

    print "diff:{0} & std:{1}, M={2} N={3} t={4} minSineDur={5} nH={6} min/max={7}/{8} f0et={9} harmDevSlope={10}" \
    .format(diff, std, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)

    return diff, std
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1,
							  winStable=3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100,
							  minf0=310, maxf0=650):
	"""
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
	fs, x = UF.wavread(inputFile)  		# reading inputFile
	w = get_window(window, M)  # obtaining analysis window
	f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  # estimating F0

	### your code here

	# 1. convert f0 values from Hz to Cents (as described in pdf document)
	f0Cents = 1200 * np.log2((f0 + eps) / 55.0)

	# 2. create an array containing standard deviation of last winStable samples
	stDevs = np.zeros(f0Cents.size)
	for f in range(winStable-1, f0Cents.size):
		stDevs[f] = np.std(f0Cents[f-winStable+1:f+1])

	# 3. apply threshold on standard deviation values to find indexes of the stable points in melody
	stdWhere = np.where(stDevs <= stdThsld)[0]
	stdWhere = stdWhere[winStable:]

	# 4. create segments of continuous stable points such that consecutive stable points belong to same segment
	segments = np.empty((0, 2), int)
	startIdx = stdWhere[0]
	endIdx = stdWhere[0]
	for i in range(1,stdWhere.size):
		if stdWhere[i] == stdWhere[i-1]+1:
			endIdx = stdWhere[i]
		else:
			segments = np.vstack([segments, [startIdx, endIdx]])
			startIdx = stdWhere[i]
			endIdx = startIdx

	# 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
	segLens = segments[:, 1] - segments[:, 0]
	minNoteDurSamples = int(minNoteDur * fs / H)
	segsToKeep = np.where(segLens >= minNoteDurSamples)[0]
	segments = segments[segsToKeep, :]

	#plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

	return segments
Beispiel #5
0
def estimateInharmonicity(inputFile='../../sounds/piano.wav',
                          t1=0.1,
                          t2=0.5,
                          window='hamming',
                          M=2048,
                          N=2048,
                          H=128,
                          f0et=5.0,
                          t=-90,
                          minf0=130,
                          maxf0=180,
                          nH=10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
    """
    ### Your code here

    # 0. Read the audio file
    fs, x = UF.wavread(inputFile)

    # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes
    w = get_window(window, M)
    harmDevSlope = 0.01
    minSineDur = 0.0
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)

    # 2. Extract the segment in which you need to compute the inharmonicity.
    b1 = np.ceil(t1 * float(fs) / H)
    b2 = np.ceil(t2 * float(fs) / H)
    bhfreq = hfreq[b1:b2]
    bf0 = f0[b1:b2]

    # 3. Compute the mean inharmonicity for the segment
    inhm = np.array([])
    for idx, h in enumerate(bhfreq):
        coef = np.arange(1, h.size + 1)
        i = np.abs(h - coef * bf0[idx]) / coef
        inhm = np.append(inhm, np.sum(i) / len(i))

    return np.sum(inhm) / len(inhm)
def segmentStableNotesRegions(inputFile = 'sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    # 1. convert f0 values from Hz to Cents
    for i in range(0, len(f0)):
        if f0[i] == 0:
            f0[i] = eps
    f0cent = 1200*np.log2(f0/55.0)
    # 2. create an array containing standard deviation of last winStable samples
    f0dev = np.zeros(len(f0cent)-winStable+2)
    for i in range(0, len(f0cent)-winStable+2):
        f0dev[i] = np.std(f0cent[i: i+winStable])
    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    
    segindex = np.zeros(len(f0dev))
    for i in range(0, len(f0dev)-1):
        if f0dev[i] <= stdThsld:
            segindex[i+winStable-1] = i+winStable-1
    
    # 4. create segments of continuous stable points such that concequtive stable points belong to
    #    same segment
    segment = np.array_split(segindex,np.where(np.diff(segindex)!=1)[0]+1)
    # 5. apply segment filtering
    segments = np.array([])
    for i in range(0, len(segment)):      
        #print(len(segment[i]),i)
        if len(segment[i]) >= fs*minNoteDur/float(H):
            a = np.array([segment[i][0],segment[i][len(segment[i])-1]])
            segments = np.append(segments,a)
            print(segments)
    segments = np.reshape(segments,(-1,2))
    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    # return()
    return(segments)
Beispiel #7
0
def estimateInharmonicity(inputFile='../../sounds/piano.wav',
                          t1=0.1,
                          t2=0.5,
                          window='hamming',
                          M=2048,
                          N=2048,
                          H=128,
                          f0et=5.0,
                          t=-90,
                          minf0=130,
                          maxf0=180,
                          nH=10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
    """
    # 0. Read the audio file and obtain an analysis window
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)

    # 1. Use harmonic model to compute the harmonic frequencies and magnitudes
    harmDevSlope = 0.01
    minSineDur = 0.0
    xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                                  maxf0, f0et, harmDevSlope,
                                                  minSineDur)
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
    # 2. Extract the time segment in which you need to compute the inharmonicity.

    l1 = int(np.ceil(t1 * float(fs) / H))  #frame start
    l2 = int(np.ceil(t2 * float(fs) / H))  #frame end
    harmonicsFrame = xhfreq[l1:l2]
    f0Frame = f0[l1:l2]

    # 3. Compute the mean inharmonicity of the segment
    tempInhm = np.array([])
    for a, b in enumerate(harmonicsFrame):
        coefficient = np.arange(1, b.size + 1)
        inhP = np.abs(b - coefficient * f0Frame[a]) / coefficient
        tempInhm = np.append(tempInhm, np.sum(inhP) / len(inhP))
    meanInhm = np.sum(tempInhm) / len(tempInhm)
    return meanInhm
Beispiel #8
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1,
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100,
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### Your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0[f0 < eps] = eps
    f0_cents = 1200 * np.log2(f0 / 55.0)

    # 2. create an array containing standard deviation of last winStable samples
    numFrames = len(f0_cents)
    frameIndex = np.arange(winStable - 1, numFrames)
    sds = np.array(map(lambda i: np.std(f0_cents[i + 1 - winStable:i+1]),
                       frameIndex))

    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    stableF0Indices = winStable - 1 + np.where(sds < stdThsld)[0]
    #print zip(sds, winStable - 1 + np.arange(len(sds)))

    # 4. create segments of continuous stable points such that concequtive stable points belong to
    #    same segment
    segments = groupConsecutiveRuns(stableF0Indices)

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    minNoteDurFrames = int(minNoteDur * fs / H)
    segments = filter(lambda x: len(x) >= minNoteDurFrames, segments)
    segments = map(lambda xs: [xs[0], xs[-1]], segments)
    segments = np.array(segments)

    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    return segments
Beispiel #9
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1,
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100,
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0Cents = 1200. * np.log2(f0 / 55.)

    #2. create an array containing standard deviation of last winStable samples
    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    stdBelowTh = np.zeros(np.shape(f0), np.bool)
    for i in range(winStable,len(f0)):
        stdBelowTh[i] = np.std(f0Cents[i-winStable:i]) < stdThsld

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    segments = []
    currSeg = []
    for i in range(winStable,len(f0)):
        if stdBelowTh[i]:
            currSeg.append(i)
        else:
            if len(currSeg) > 0:
                segments.append([currSeg[0]-1, currSeg[-1]-1])
                currSeg = []


    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    segments = np.array(filter(lambda x: x[1] - x[0] >= 1.*fs*minNoteDur/H, segments))

    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

    # return segments
    return segments
Beispiel #10
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### Your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0[f0==0] = eps
    f0c = 1200 * np.log2(f0 / 55.0)
    
    # 2. create an array containing standard deviation of last winStable samples
    std = np.zeros(f0c.size)
    std[winStable-1:] = np.array([f0c[i:i+winStable].std() for i in xrange(f0c.size-winStable+1)])
    std[:winStable-1] = np.nan

    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    ends = np.where((std[:-1] < stdThsld) & (std[1:] >= stdThsld))[0]
    starts = np.where((std[:-1] >= stdThsld) & (std[1:] < stdThsld))[0]+1

    # 4. create segments of continuous stable points such that concequtive stable points belong to 
    #    same segment
    all_segments = np.array([starts, ends])

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    min_segment_size = minNoteDur / (x.size / fs / f0.size)
    segments = all_segments[:,(all_segments[1,:] - all_segments[0,:]) > min_segment_size]
    segments = np.transpose(segments)

    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    return segments
Beispiel #11
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### Your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0c = 1200 * np.log2(f0 / 55.0)
    
    # 2. create an array containing standard deviation of last winStable samples
    idx = range(len(f0c)-winStable)
    fsd = np.array(map(lambda x: np.std(f0c[x:x+winStable]), idx))

    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    stidx = np.where(fsd < stdThsld)[0] + winStable - 1
    
    # 4. create segments of continuous stable points such that concequtive stable points belong to 
    #    same segment
    grps = np.split(stidx, np.where(stidx[1:]-stidx[:-1] > 1)[0] + 1)

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    seqs = filter(lambda x: len(x)*H / float(fs) >= minNoteDur, grps)
    segments = np.array(map(lambda x: [x[0], x[-1]], seqs))
    
    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    return segments
Beispiel #12
0
def estimateInharmonicity(inputFile='../../sounds/piano.wav',
                          t1=0.1,
                          t2=0.5,
                          window='hamming',
                          M=2048,
                          N=2048,
                          H=128,
                          f0et=5.0,
                          t=-90,
                          minf0=130,
                          maxf0=180,
                          nH=10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
    """
    # 0. Read the audio file and obtain an analysis window
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    # 1. Use harmonic model to compute the harmonic frequencies and magnitudes
    xhreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                                 maxf0, f0et)

    # 2. Extract the time segment in which you need to compute the inharmonicity.
    starting = int(np.ceil(fs * t1 / H))
    ending = int(np.floor(fs * t2 / H))

    # 3. Compute the mean inharmonicity of the segment
    mean_inharmonicity = compute_inharmonicity(xhreq, starting, ending, nH)

    return mean_inharmonicity
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', 
                            M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
    """    
    ### Your code here
    
    # 0. Read the audio file
    fs, x = UF.wavread(inputFile)
    
    # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes
    w  = get_window(window, M)
    harmDevSlope=0.01
    minSineDur=0.0
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
    
    # 2. Extract the segment in which you need to compute the inharmonicity. 
    b1 = np.ceil(t1 * float(fs)/H)
    b2 = np.ceil(t2 * float(fs)/H)
    bhfreq = hfreq[b1:b2]
    bf0 = f0[b1:b2]
    
    # 3. Compute the mean inharmonicity for the segment
    inhm = np.array([])
    for idx, h in enumerate(bhfreq):
        coef = np.arange(1, h.size+1)
        i = np.abs(h - coef * bf0[idx])/coef
        inhm = np.append(inhm, np.sum(i) / len(i))
    
    return np.sum(inhm) / len(inhm)
Beispiel #14
0
def detect_f0(audio_path, window_size, Hop_size):

    fs, data = wavfile.read(audio_path)
    data = np.float32(data) / norm_fact[data.dtype.name]
    window_length_in_samples = window_size

    length_of_audio = len(data) / float(fs)

    w = get_window('hanning', window_length_in_samples)
    N = 2048 * 2
    t = -50
    minf0 = 100
    maxf0 = 700
    f0et = 7
    H = Hop_size

    f0 = HM.f0Detection(data, fs, w, N, H, t, minf0, maxf0, f0et)
    return f0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
Beispiel #16
0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav',
                              stdThsld=10,
                              minNoteDur=0.1,
                              winStable=3,
                              window='hamming',
                              M=1024,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    size = f0.size

    # Step 1

    f0_cents = np.zeros(size)
    for i in range(size):
        if (f0[i] != 0):
            f0_cents[i] = 1200.0 * np.log2(float(f0[i] / 55.0))

    # Step 2

    SD_win = np.zeros(size)

    for i in range(size):
        arr = f0_cents[i - winStable + 1:i + 1]
        SD_win[i] = standardDeviation(arr, winStable)

    # Step 3
    stableNote = np.array(
        [])  # Append as we don't know how many stable regions

    for i in range(winStable, size):
        if (SD_win[i] < stdThsld):
            stableNote = np.append(stableNote, i)

    # Step 4
    duration = 1  # including first
    count = 0

    start_end = []

    # Do this so we can initialise the ndarray properly

    for i in range(1, stableNote.size):
        if (stableNote[i - 1] == stableNote[i] - 1):
            duration += 1
        else:  # Step 5
            if (duration * H / float(fs) >= minNoteDur):
                start_end.append((stableNote[i - duration], stableNote[i - 1]))
            duration = 1

    segments = np.ndarray(shape=(len(start_end), 2))

    for i in range(len(start_end)):
        segments[i] = start_end[i]

    plotSpectogramF0Segments(x, fs, w, N, H, f0,
                             segments)  # Plot spectrogram and F0 if needed

    return segments
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
    
    ### Your code here
    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0ct = 1200 * np.log2(f0 / 55.0)    
    
    # 2. create an array containing standard deviation of last winStable samples
    offset = winStable - 1
    stds = np.zeros(offset)
    for i in range(offset, f0ct.size):
        stds = np.append(stds, np.std(f0ct[i-offset:i+1]))
        
    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    stables = np.where(stds < stdThsld)[0]
    
    # 4. create segments of continuous stable points such that concequtive stable points belong to 
    #    same segment
    terms = []
    if len(stables) > 0:
        sequence = [stables[0]]
        index = 0
        for s in stables:
            if s != (sequence[0] + index):
                terms.append([sequence[0], sequence[len(sequence) - 1]])
                sequence = []
            sequence.append(s)
            index = len(sequence)
    
    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    terms_fd = []
    for t in terms:
        if (t[1] - t[0]) * H / float(fs) >= minNoteDur:
            terms_fd.append(t)

    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    segments = np.array(terms_fd)
    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    
    # return segments
    return segments
Beispiel #18
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0[np.where(f0<eps)] = eps
    f0Cent = 1200 * np.log2(f0 / 55.0)

    #2. create an array containing standard deviation of last winStable samples
    devf0 = np.zeros(f0Cent.size)
    for i in range(winStable, f0Cent.size):
        devf0[i] = np.std(f0Cent[i-winStable:i])
    

    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    stablePts = np.where(devf0<stdThsld)[0]

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    segmentsList = np.array([[]], dtype = int).reshape(0,2)          #list of stable segment
    isConsecutive = False                       #create a flag to check consecutivity between stable node
    stbSegment_start = 0                        #initialize start and end indexes for stable segment
    stbSegment_end = 0
    
    for i in range(0,stablePts.size-1):            #iterate through stablePts until 2nd last element, look for consecutive point
        if (isConsecutive == False):                    #if not already in consecutive stable region, check for it
            if (stablePts[i+1] - stablePts[i] == 1):          #if true, this is the start of a new consecutive stable region
                stbSegment_start = stablePts[i]                 #update segment starting point
                isConsecutive = True
                continue
            else:                                                    #if false,still in a non - consecutive region
                continue
        
        else: #isConsecutive == True                #already in a consecutive stable region, check for the end
            if (stablePts[i+1] - stablePts[i] == 1):          #if true, we are still in the same consecutive stable region
                continue
            else:                                                 #if false, reached the end of the the consecutive stable region
                stbSegment_end = stablePts[i]   #update segment ending point
                isConsecutive = False
                #append the starting and ending point of the segment to the segment list
                segmentsList = np.vstack([segmentsList, np.array([[stbSegment_start, stbSegment_end]])])
    
    if (isConsecutive == True): #that means the final stale region runs until the end of stablePts
        segmentsList = np.vstack([segmentsList, np.array([[stbSegment_start, stablePts[-1] ]])])
    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    # To convert from minNoteDur [s] to frame index: frame index = minNoteDur*fs/H
    segmentsList = np.delete(segmentsList, np.where(segmentsList[:,1 ] - segmentsList[:,0] < (minNoteDur*fs)/H), axis = 0)
    
    
    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segmentsList)  # Plot spectrogram and F0 if needed

    return segmentsList
Beispiel #19
0
def estimateInharmonicity(inputFile='../../sounds/piano.wav',
                          t1=0.1,
                          t2=0.5,
                          window='hamming',
                          M=2048,
                          N=2048,
                          H=128,
                          f0et=5.0,
                          t=-90,
                          minf0=130,
                          maxf0=180,
                          nH=10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
    """
    # 0. Read the audio file and obtain an analysis window

    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    # 1. Use harmonic model to compute the harmonic frequencies and magnitudes
    xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x,
                                                  fs,
                                                  w,
                                                  N,
                                                  H,
                                                  t,
                                                  nH,
                                                  minf0,
                                                  maxf0,
                                                  f0et,
                                                  harmDevSlope=0.01,
                                                  minSineDur=0.0)

    # 2. Extract the time segment in which you need to compute the inharmonicity.

    interval_start = int(math.ceil(t1 * fs / float(H)))
    interval_end = int(math.ceil(t2 * fs / float(H)))

    # 3. Compute the mean inharmonicity of the segment

    # Refer to the pdf for the formulas used

    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)

    f0_slice = f0[interval_start:interval_end]
    sliced = xhfreq[interval_start:interval_end]
    inharmon = np.zeros(sliced.size)

    for index, arr in enumerate(sliced):
        tmp_sum = 0

        for j in range(1, arr.size):
            val = j + 1
            tmp_sum += np.abs(arr[j] - val * f0_slice[index]) / float(val)

        inharmon[index] = tmp_sum * (1 / float(nH))

    mean_inharmon = sum(inharmon) / (interval_end - interval_start + 1)

    return mean_inharmon
Beispiel #20
0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav',
                              stdThsld=10,
                              minNoteDur=0.1,
                              winStable=3,
                              window='hamming',
                              M=1024,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0[f0 < eps] = eps
    f0_cent = Hz2Cent(f0)

    #2. create an array containing standard deviation of last winStable samples
    std_val_array = compute_std(f0_cent, winStable)

    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    stable_frame_index = find_stable_index(std_val_array, stdThsld, winStable)

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    segments = group_stable_frame_index(stable_frame_index)

    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    num_frame = f0.size
    each_frame_duration = x.size / (fs * num_frame)

    filtered_segments = filter_segments_by_min_duration(
        segments, minNoteDur, each_frame_duration)

    plotSpectogramF0Segments(x, fs, w, N, H, f0,
                             segments)  # Plot spectrogram and F0 if needed

    # return segments
    return filtered_segments
Beispiel #21
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               # reading inputFile
    w  = get_window(window, M)                                  # obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  # estimating F0

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0_in_cents = 1200.0*np.log2(f0/55.0 + eps)

    #2. create an array containing standard deviation of last winStable samples

    std_F0 = (stdThsld + eps)*np.ones(winStable - 1,dtype = float)
    for i in range(winStable - 1, len(f0_in_cents)):
        std_F0 = np.append(std_F0,np.std(f0_in_cents[(i - winStable + 1):(i + 1)]))
    #    print 'step = ' + str(i)
    #    print 'nBinLow = ' + str(i - winStable + 1)
    #    print 'nBinHigh = ' + str(i + 1)
    #    print 'Values = ' + str(f0_in_cents[(i - winStable + 1):(i + 1)])
    #    print '****'

    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    idx = np.where(std_F0 < stdThsld)[0]

    # idx = np.array([3, 4, 5, 6, 12, 13, 17, 18, 19])

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    idx_Start = np.array([],dtype=np.int64)
    idx_End = np.array([],dtype=np.int64)

    pointer_Start = 0
    pointer_End = 0

    for i in range(0, len(idx)-1):
    #    print 'pointer_Start = ' + str(pointer_Start)
    #    print 'pointer_End = ' + str(pointer_End)
    #    print '****'
        
        if((idx[i+1] - idx[i]) != 1):
            idx_Start = np.append(idx_Start,pointer_Start)
            idx_End = np.append(idx_End,pointer_End)

            pointer_End += 1
            pointer_Start = (i+1)
        else:
            pointer_End += 1
            
    idx_Start = np.append(idx_Start,pointer_Start)
    idx_End = np.append(idx_End,pointer_End)

    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    idx_segments = np.where((idx_End - idx_Start + 1)*H/float(fs) >= minNoteDur)[0]

    segments_Start = idx[idx_Start[idx_segments]]
    segments_End = idx[idx_End[idx_segments]]

    segments = np.array([segments_Start,segments_End])
    segments = np.transpose(segments)

    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    return segments
Beispiel #22
0
def segmentStableNotesRegions(inputFile='../../sounds/cello-phrase.wav',
                              stdThsld=20,
                              minNoteDur=0.5,
                              winStable=3,
                              window='hamming',
                              M=1025,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here
    #print(f0.shape,x.shape)
    f0[f0 < eps] = eps
    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0_cent = 1200 * np.log2(f0 / 55.0)

    #2. create an array containing standard deviation of last winStable samples
    SD_winstable = []
    for i in range(2, len(f0_cent)):
        SD_winstable.append(
            np.std([f0_cent[i], f0_cent[i - 1], f0_cent[i - 2]]))

    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    SD_winstable = np.array(SD_winstable)
    winstable_index = np.where(SD_winstable < stdThsld)[0] + 2

    #print("winstable_index",winstable_index[:250])
    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    all_segments = []
    i = 1
    while i < (len(winstable_index)):
        j = i
        buffer = []
        counter = 1
        if j < (len(winstable_index) - 1):
            if ((winstable_index[j + 1] - winstable_index[j]) == 1):
                buffer.append(winstable_index[j])
                j += 1
        while ((winstable_index[j] - winstable_index[j - 1]) == 1):
            counter += 1
            #j+=1
            buffer.append(winstable_index[j])
            j += 1
            #print("updating buffer with",winstable_index[j])
        if (counter > 1):
            all_segments.append(np.array(buffer))
        if (j != (len(winstable_index) - 1)):
            all_segments.append(winstable_index[j])

        #if j < len(winstable_index):
        #    all_segments.append(winstable_index[j])
        #else :
        #    all_segments.append(np.array(buffer))
        #print("buffer is ",buffer)
        #if counter > 4410:
        #    segments.append(f0_cent[i:(i+counter-1)])

        i = j + 1

    #segments=np.array(segments)
    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    #print(type(segments[0]),segments[0])
    #print(type(segments[1]),segments[1])
    segments = []
    for index, seg in enumerate(all_segments):
        #print("Seg size is",seg.size)
        if (seg.size * H / float(fs) > minNoteDur):
            segments.append([seg[0], seg[-1]])
    #print(type(segments) )
    segments = np.array(segments)
    #print(segments.shape)
    #selection=np.nonzero(segments)
    #print("Printing winstable index",winstable_index[:200])
    #("Printing all segments",all_segments)
    #print(segments)
    #segments=segments[np.any(np.nonzero(segments))]

    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

    return segments
Beispiel #23
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
    # f0Detection splits the signal into "frames" of length H (hop size) and returns an f0 for each of these frames
    # So the number of frames in the signal is simply the length of f0

    ### Your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0 = 1200 * np.log2(f0/55.0)
    numFrames = len(f0)
    
    #f0_sd = []
    # 2. create an array containing standard deviation of last winStable samples
#    for i in np.arange(winStable,numFrames - winStable -1):
#        print str(winStable) + str(i)
#        f0_sd[i - winStable] = np.std(f0[i - winStable : i])
    frameIndex = np.arange(winStable - 1, numFrames) # New index from winStable - 1 to num of frames
    print frameIndex
    # It's winStable - 1 because the SD includes current sample also. i.e. for winStable = 3, you need to use indexes 0 - 2 for first sample

    # Find a new array of standard deviations where each sample is the SD with the last winStable frames
    f0_sd = np.array(map(lambda i: np.std(f0[i - winStable + 1:i+1]),frameIndex))
    
    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    # To get the IDs of stable frames, you need to add the 
    stidx = np.where(f0_sd < stdThsld)[0] + winStable - 1 
    
    # 4. create segments of continuous stable points such that concequtive stable points belong to 
    #    same segment
    segments = groupConsecutiveRuns(stidx)

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    # Now segments is a list of arrays of the form [[startidx_1, endidx_1],[startidx_2, endidx_2]...]
    # We need to remove all segments that are not long enough
    minFrames = int(minNoteDur * fs / H)
    opseg = []
    for item in segments:
        if(filterShortSegments(item[0],item[1],minFrames) == True):
            print "Got segment: " + str(item)
            opseg.append(item)
    
    segments = np.array(opseg)
    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    print str(segments)
    # return segments
    return segments
Beispiel #24
0
def segmentStableNotesRegions(inputFile = '../sms-tools/sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1,
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0[np.where(f0 == 0)] = eps
    f0_cents = 1200 * np.log2(f0 / 55)
    f0_cents[np.where(f0_cents<0)] = eps
    # 2. create an array containing standard deviation of last winStable samples
    max_idx = len(f0_cents) - (winStable + 1) // 2
    idx = 1
    stdarr = np.full_like(f0, eps)
    while idx < max_idx:
        under_std = idx - winStable // 2
        upper_std = idx + (winStable + 1) // 2
        stdarr[idx] =np.std(f0_cents[under_std:upper_std])
        idx += 1

    # 3. apply threshold on standard deviation values to find indexes of the stable points in melody
    filtered_std = np.where(stdarr <= stdThsld)

    # 4. create segments of continuous stable points(csp) such that consecutive stable points belong to same segment
    csp_delta = (filtered_std[0] - np.roll(filtered_std[0], 1))
    csp_loc = np.where(np.abs(csp_delta) > 1)
    csp_start = filtered_std[0][csp_loc[0]]
    csp_end = np.roll(csp_start - csp_delta[csp_loc[0]], -1)

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    min_note_frames = int(minNoteDur * fs / H)
    # print(min_note_frames, len(x) / fs)
    # for idx in range(len(csp_start)):
    #     # delta_length = csp_end - csp_start
    #     print(csp_start[idx], csp_end[idx], csp_end[idx] - csp_start[idx], csp_end[idx] - csp_start[idx] > min_note_frames)
    delta = csp_end - csp_start
    csp_start = csp_start[np.where(delta >= min_note_frames)]
    csp_end = csp_end[np.where(delta >= min_note_frames)]
    segments = np.vstack((csp_start, csp_end)).T

    # plt.plot(stdarr)
    # plt.plot(csp_start, stdarr[csp_start], 'x')
    # plt.plot(csp_end, stdarr[csp_end], '+')
    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed
    return(segments)
Beispiel #25
0
def segment_stable_notes_monophonic(
        inputFile='../../sounds/sax-phrase-short.wav',
        stdThsld=10,
        minNoteDur=0.1,
        winStable=3,
        window='hamming',
        M=1024,
        N=2048,
        H=256,
        f0et=5.0,
        t=-100,
        minf0=310,
        maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """

    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    # your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)

    f0[f0 < eps] = eps
    tuning = 55.0  # A4=440 Hz -> tuning=A1=55 Hz
    cent_f0 = 1200 * np.log2(f0 / tuning)

    # 2. create an array containing standard deviation of last winStable samples

    std_winStable = [
        np.std(cent_f0[index - winStable:index])
        for index in range(winStable, cent_f0.size + 1)
    ]
    std_winStable = np.array(std_winStable)

    # 3. apply threshold on standard deviation values to find indexes of the stable points in melody

    std_below_threshold = np.where(std_winStable < stdThsld)[0]

    # 4. create segments of continuous stable points such that consecutive stable points belong to same segment

    std_contiguous = std_below_threshold[1:] - std_below_threshold[:-1]
    contiguous_index = np.where(std_contiguous == 1)
    initial = [
        x for x in contiguous_index[0]
        if x - 1 not in contiguous_index[0] and x + 1 in contiguous_index[0]
    ]
    final = [
        x for x in contiguous_index[0]
        if x - 1 in contiguous_index[0] and x + 1 not in contiguous_index[0]
    ]
    segments = list(zip(initial, final))

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length

    samples_minNoteDur = int(minNoteDur * fs / H)
    segments = [(x, y) for x, y in segments if y - x >= samples_minNoteDur]
    segments = np.array(segments)

    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

    return segments
Beispiel #26
0
import numpy as np
from scipy.signal import get_window
import sys, os, time

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../software/models/'))

import dftModel as DFT
import utilFunctions as UF
import stft as STFT
import harmonicModel as HM
import sineModel as SM

(fs, x) = UF.wavread('../../sounds/sawtooth-440.wav')
w = get_window('blackman', 2001)
N = 2048 * 2
t = -50
minf0 = 300
maxf0 = 500
f0et = 1
H = 1000

f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
Beispiel #27
0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav',
                              stdThsld=5,
                              minNoteDur=0.6,
                              winStable=3,
                              window='hamming',
                              M=1025,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0Cents = 1200 * np.log2((f0 + eps) / 55.0)

    #2. create an array containing standard deviation of last winStable samples
    f0std = np.zeros(f0.size - winStable + 1)
    for i in range(winStable - 1, f0.size):
        f0std[i - winStable + 1] = np.std(f0Cents[(i - winStable + 1):i + 1])

    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    for i in range(f0std.size):
        if (f0std[i] < stdThsld):
            f0std[i] = 1
        else:
            f0std[i] = 0

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    #5. apply segment filtering, i.e. remove segments which are < minNoteDur in length
    c = 0
    flag1 = True
    seg = np.zeros((20, 2), dtype=np.int)
    for i in range(f0std.size - 1):
        if (f0std[i] == 1):
            if (flag1 == True):
                start = i
                flag1 = False
            if (f0std[i + 1] == 0):
                end = i
                flag1 = True
                seglen = (end - start + 1) * x.size / f0.size
                if (seglen >= (minNoteDur * 44100)):
                    seg[c, 0] = start + 2
                    seg[c, 1] = end + 1
                    c = c + 1

    segments = seg[0:c, :]

    # Plot spectrogram and F0 if needed
    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    # return segments
    return segments
Beispiel #28
0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav',
                              stdThsld=10,
                              minNoteDur=0.1,
                              winStable=3,
                              window='hamming',
                              M=1024,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### Your code here
    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0ct = 1200 * np.log2(f0 / 55.0)

    # 2. create an array containing standard deviation of last winStable samples
    offset = winStable - 1
    stds = np.zeros(offset)
    for i in range(offset, f0ct.size):
        stds = np.append(stds, np.std(f0ct[i - offset:i + 1]))

    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    stables = np.where(stds < stdThsld)[0]

    # 4. create segments of continuous stable points such that concequtive stable points belong to
    #    same segment
    terms = []
    if len(stables) > 0:
        sequence = [stables[0]]
        index = 0
        for s in stables:
            if s != (sequence[0] + index):
                terms.append([sequence[0], sequence[len(sequence) - 1]])
                sequence = []
            sequence.append(s)
            index = len(sequence)

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    terms_fd = []
    for t in terms:
        if (t[1] - t[0]) * H / float(fs) >= minNoteDur:
            terms_fd.append(t)

    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    segments = np.array(terms_fd)
    plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    # return segments
    return segments
Beispiel #29
0
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'):
    """
    Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the 
    f0 contour on the spectrogram and synthesize the f0 contour.
    Input:
        inputFile (string): wav file including the path
    Output:
        f0 (numpy array): array of the estimated fundamental frequency (f0) values
    """

    ### Change these analysis parameter values marked as XX
    window = 'blackman'
    M = 4001
    N = 4096
    f0et = 11
    t = -80
    minf0 = 130
    maxf0 = 210

    ### Do not modify the code below 
    H = 256                                                     #fix hop size
      
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    
    ### Method 1
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
    startFrame = np.floor(0.5*fs/H)    
    endFrame = np.ceil(4.0*fs/H)
    f0[:startFrame] = 0
    f0[endFrame:] = 0
    y = UF.sinewaveSynth(f0, 0.8, H, fs)
    UF.wavwrite(y, fs, 'synthF0Contour.wav')

    ## Code for plotting the f0 contour on top of the spectrogram
    # frequency range to plot
    maxplotfreq = 500.0    
    fontSize = 16
    plot = 1

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=1.5)
    plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5)
    plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5)
    
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0',))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    

    if plot == 1: #save the plot too!
        plt.autoscale(tight=True) 
        plt.show()
    else:
        fig.tight_layout()
        fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight')

    return f0
Beispiel #30
0
inputFile = '../../sounds/cello-phrase.wav'
stdThsld = 10
minNoteDur = 0.1
winStable = 3
window = 'hamming'
M = 1025
N = 2048
H = 256
f0et = 5.0
t = -100
minf0 = 310
maxf0 = 650

fs, x = UF.wavread(inputFile)  #reading inputFile
w = get_window(window, M)  #obtaining analysis window
f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

### your code here

# 1. convert f0 values from Hz to Cents (as described in pdf document)
f0[np.where(f0 < eps)] = eps
f0Cent = 1200 * np.log2(f0 / 55.0)

#2. create an array containing standard deviation of last winStable samples
devf0 = np.zeros(f0Cent.size)
for i in range(winStable, f0Cent.size):
    devf0[i] = np.std(f0Cent[i - winStable:i])

stablePts = np.where(devf0 < stdThsld)[0]

#4. create segments of continuous stable points such that consecutive stable points belong to same segment
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'):
    """
    Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the 
    f0 contour on the spectrogram and synthesize the f0 contour.
    Input:
        inputFile (string): wav file including the path
    Output:
        f0 (numpy array): array of the estimated fundamental frequency (f0) values
    """

    ### Change these analysis parameter values
    window = "blackman"
    M = 4401
    N = 8192
    f0et = 7
    t = -90.0
    minf0 = 140
    maxf0 = 210

    ### Do not modify the code below 
    H = 256                                                     #fix hop size
      
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
    startFrame = np.floor(0.5*fs/H)    
    endFrame = np.ceil(4.0*fs/H)
    f0[:startFrame] = 0
    f0[endFrame:] = 0
    y = UF.sinewaveSynth(f0, 0.8, H, fs)
    UF.wavwrite(y, fs, 'synthF0Contour.wav')

    ## Code for plotting the f0 contour on top of the spectrogram
    # frequency range to plot
    maxplotfreq = 500.0    
    fontSize = 16
    plot = 1            # plot = 1 plots the f0 contour, otherwise saves it to a file.  

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, fs, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=1.5)
    plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5)
    plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5)
    
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0',))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    

    if plot == 1: #save the plot too!
        plt.autoscale(tight=True) 
        plt.show()
    else:
        fig.tight_layout()
        fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight')

    return f0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1, 
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100, 
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every 
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### Your code here
    
    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0_mod = f0[f0 > 0.0]
    
    """f0c = np.zeros(len(f0)) 
    for i in range(len(f0)):
        if f0[i] > 0.0:
            f0c[i] = 1200.0 * np.log2(f0[i]/55.0)
        else:
            f0c[i] = 0.0"""

    epsilon = 10**-17 # Add epsilon to f0 values to prevent log(0) errors
    f0c = 1200.0 * np.log2((f0+epsilon)/55.0)
    
    # 2. create an array containing standard deviation of last winStable samples
    sd = np.zeros(len(f0c))
    
    for i in range(len(f0c)):
        #samples = f0c[i-winStable:i+1]
        samples = []
        #samples.append(f0c[i])
        for j in range(winStable):
            if i-j >= 0:
                samples.append(f0c[i-j])
        
        sd[i] = np.std(samples)
    
    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    thres = np.where(sd<stdThsld)[0]
    
    # 4. create segments of continuous stable points such that concequtive stable points belong to 
    #    same segment
    thres_array = thres
    
    segs = []
    seg = np.array([])
    
    for i in range(len(thres_array)):
        if len(seg) == 0:
            seg = np.append(seg, thres_array[i])
            
        if i+1 < len(thres_array):
            if thres_array[i+1] - thres_array[i] == 1:
                seg = np.append(seg, thres_array[i+1])
            else:
                segs.append(seg)
                
                seg = np.array([])

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    minTrackLength = round(fs*minNoteDur/H)
    
    segs2 = []
    for i in range(len(segs)):
        if len(segs[i]) > minTrackLength:
            segs2.append(segs[i])

    segments = np.zeros((len(segs2), 2))
    #segments = np.array([])
    for i in range(len(segs2)):
        #ind = np.array((seg[0], seg[len(seg)-1]))
        segments[i][0] = segs2[i][0]
        segments[i][1] = segs2[i][len(segs2[i])-1]
        
    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)
    
    return segments
Beispiel #33
0
import harmonicModel as HM

(fs, x) = UF.wavread('../../../sounds/piano.wav')
w = np.blackman(1501)
N = 2048
t = -90
minf0 = 100
maxf0 = 300
f0et = 1
maxnpeaksTwm = 4
H = 128
x1 = x[int(1.5*fs):int(1.8*fs)]

plt.figure(1, figsize=(9, 7))
mX, pX = STFT.stftAnal(x, w, N, H)
f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
f0 = UF.cleaningTrack(f0, 5)
yf0 = UF.sinewaveSynth(f0, .8, H, fs)
f0[f0==0] = np.nan
maxplotfreq = 800.0
numFrames = int(mX[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)                             
binFreq = fs*np.arange(N*maxplotfreq/fs)/N                        
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N*maxplotfreq/fs+1)]))
plt.autoscale(tight=True)
  
plt.plot(frmTime, f0, linewidth=2, color='k')
plt.autoscale(tight=True)
plt.title('mX + f0 (piano.wav), TWM')

plt.tight_layout()
Beispiel #34
0
def segmentStableNotesRegions(inputFile='../../sounds/sax-phrase-short.wav',
                              stdThsld=10,
                              minNoteDur=0.1,
                              winStable=3,
                              window='hamming',
                              M=1024,
                              N=2048,
                              H=256,
                              f0et=5.0,
                              t=-100,
                              minf0=310,
                              maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    def hertzToCents(f):
        cents = 1200 * np.log2(f / 55.0)
        return cents

    f0inCents = hertzToCents(f0)
    indxs = np.where(f0inCents == -np.inf)[0]
    f0inCents[indxs] = -9999  # avoids -infs

    #2. create an array containing standard deviation of last winStable samples
    stdevs = np.array([])
    for i in range(f0inCents.size):
        stdevs = np.append(stdevs, np.std(f0inCents[i - 2:i + 1]))

    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    lessThanThsldIndexs = np.where(stdevs < stdThsld)[0]

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    stables = []
    currentSegment = np.zeros(2)

    for i in lessThanThsldIndexs:
        if currentSegment[0] == 0:
            currentSegment[0] = i
            currentSegment[1] = i
            continue

        if i == (currentSegment[1] + 1):
            currentSegment[1] = i
            continue

        stables.append(currentSegment)  # I use python array here
        currentSegment = np.zeros(2)

    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    filteredSegments = []
    for s in stables:
        if ((s[1] - s[0]) * H / float(fs) > minNoteDur):
            filteredSegments.append(s)

    segments = np.array(filteredSegments)

    plotSpectogramF0Segments(x, fs, w, N, H, f0,
                             segments)  # Plot spectrogram and F0 if needed

    # return segments
    return segments