def analyse_cry(dRec, xDB): """ dRec must be dictionary of form: dInput = { 'iSampleRate': iSampleRate, 'aTime': aTime, 'aAudio': aAudio, 'aCorr': aCorr } xDB can be string for filename, or dictionary of above form Correlation and least squares comparison between audio files classifications based on classifications by Pricilla Dunstan """ import numpy as np def normalize(aArray): iMax = np.max(aArray) return [i/iMax for i in aArray] def calc_misfit(a1, a2): """ calculates the misfit between the 2 arrays and returns a scalar shortest array length is taken """ iLen = len(a1) if len(a1)<len(a2) else len(a2) iMisfit = 0 for i in range(iLen): iMisfit += (a1[i] - a2[i])**2 return iMisfit def round_sci(iNum, iSigs): """ round number to significant figures """ return float(format(iNum, '.{0}g'.format(iSigs))) iRecSampleRate = dRec['iSampleRate'] aRecTime_NoShift = dRec['aTime'] aRecAudio = dRec['aAudio'] aRecCorr_NoShift = dRec['aCorr'] if type(xDB) == str: import common_fxns sDBDir = xDB (iDBSampleRate, aDBTime_NoShift, aDBAudio, aDBCorr_NoShift) = common_fxns.process_dir(sDBDir)[0:4] elif type(xDB) == dict: iDBSampleRate = xDB['iSampleRate'] aDBTime_NoShift = xDB['aTime'] aDBAudio = xDB['aAudio'] aDBCorr_NoShift = xDB['aCorr'] else: raise ValueError('Please enter a string or dict of the correct format') aRecCorr_NoShift = normalize(aRecCorr_NoShift) aDBCorr_NoShift = normalize(aDBCorr_NoShift) #shifted guassian correlations such that misfit has more meaning #shifted such that maxima line up iIndexMax = np.argmax(aRecCorr_NoShift) iDBIndexMax = np.argmax(aDBCorr_NoShift) #always shifting right: therefore shift where argmax is less #for y values, prepend 0, for tive values, prepend time increments if iDBIndexMax > iIndexMax: #shift signal 1 to the right iShift = iDBIndexMax - iIndexMax aRec = np.zeros(len(aRecCorr_NoShift) + iShift) aRec[iShift:] = aRecCorr_NoShift aRecTime = np.zeros(len(aRecTime_NoShift) + iShift) aRecTime[:iShift] = np.array([i/iRecSampleRate for i in range(iShift)]) aRecTime[iShift:] = aRecTime_NoShift + iShift/iRecSampleRate aDB = aDBCorr_NoShift aDBTime = aDBTime_NoShift else: #shift signal 2 to the right iShift = iIndexMax - iDBIndexMax aDB = np.zeros(len(aDBCorr_NoShift) + iShift) aDB[iShift:] = aDBCorr_NoShift aDBTime = np.zeros(len(aDBTime_NoShift) + iShift) aDBTime[:iShift] = np.array([i/iDBSampleRate for i in range(iShift)]) aDBTime[iShift:] = aDBTime_NoShift + iShift/iDBSampleRate aRec = aRecCorr_NoShift aRecTime = aRecTime_NoShift #correlation between 2 signals #order matters for the calculations to get iCorrArea bRecLonger = len(aRecTime) > len(aDBTime) #the fftconvolve needs the longer array to be placed first, time needs to be the longer one aCorrTime = aRecTime if bRecLonger else aDBTime aLonger = aRec if bRecLonger else aDB aShorter = aDB if bRecLonger else aRec #np.correlate(aRec, aDB, 'same') takes too long, fftconvolve is faster with near identical results from scipy import signal aCorr = signal.fftconvolve(aLonger, aShorter, mode='same') #least squares comparison to get misfit iMisfit = calc_misfit(aRec, aDB) iCorrArea = np.trapz(aCorr, aCorrTime) #round to 3 significant figures iMisfit = round_sci(iMisfit, 3) iCorrArea = round_sci(iCorrArea, 3) return iMisfit, iCorrArea
def isolate_cries(sDir, iResample=8000, bSave=False, sCutDir=None, iSigmaSecs=0.05): """ expected peak corrolation by convolution, then edge detection iResample = 8000 Hz resampling rate Hard coded variables obtained by trial: linerar smoothing coefficient: iLinearSmooth = 20 frames minima threshhold factor: iThreshFact = 10 times horozontal cluster threshhold: iThreshCluster = 0.01 seconds * iSampleRate keep sections threshhold factor: iThreshKeepFact = 5 can save each cut to separate file returns list of cuts, each cut is peak data (correlation with gaussian) """ import numpy as np from scipy import signal import common_fxns iLinearSmooth = 50 (iSampleRate, aTime, aAudio, aCorr, aOrigAudio) = common_fxns.process_dir(sDir, iSmooth=iLinearSmooth, iResample=iResample, iSigmaSecs=iSigmaSecs) iThreshFact = 5.0 iThreshClusterSecs = 0.01 iThreshCluster = iThreshClusterSecs * iSampleRate iThreshKeepFact = 4.0 from scipy import ndimage aFirstDeriv = ndimage.sobel(aCorr) aFirstDeriv = common_fxns.smooth(aFirstDeriv, iLinearSmooth) aSecondDeriv = ndimage.sobel(aFirstDeriv) #---Get minima and filter #intersecion of derivative with zero line (get maxima and minima) aZero = np.zeros(np.shape(aSecondDeriv)) iTolerance = 10.0 #tolerance to zero-intersect aZeroIntersect = np.argwhere( np.isclose(aZero, aFirstDeriv, atol=iTolerance)).reshape(-1) #positive second derivitave (get minima) aMinimaIndeces = np.array( [i for i in aZeroIntersect if aSecondDeriv[i] > 0]) #apply threshold filter for minima (y axis filter) iMax = np.max(aCorr) iMin = np.min(aCorr) iAmplThresh = (iMax - iMin) / iThreshFact #we only want to keep low minima aMinimaIndeces = np.array( [i for i in aMinimaIndeces if aCorr[i] < iAmplThresh]) #apply proximity filter to single out consecutive groups of minima lMinimaIndeces = [] for i in range(len(aMinimaIndeces) - 1): if aMinimaIndeces[i + 1] - aMinimaIndeces[i] > iThreshCluster: lMinimaIndeces.append(aMinimaIndeces[i]) if len(aMinimaIndeces) > 0: lMinimaIndeces.append(aMinimaIndeces[-1]) lMinimaIndecesFilt = [int(i) for i in lMinimaIndeces] #add the beginning and end of the track lMinimaIndecesFilt.insert(0, 0) lMinimaIndecesFilt.insert(len(lMinimaIndecesFilt), int(len(aCorr) - 1)) #get time values from indeces aTimeIntersects = np.array(aTime)[lMinimaIndecesFilt] #cut up audio according to minima indeces iMin = 1 iCut = 1 laAudios = [] laCorrs = [] ###TODO remove #lKeep = [] #lThrow = [] #lStart = [] while iMin < len(lMinimaIndecesFilt): iStart = lMinimaIndecesFilt[iMin - 1] iEnd = lMinimaIndecesFilt[iMin] ###TODO remove #iMean = iStart + (iEnd-iStart)/2 #iMeanTime = iMean/iSampleRate #lStart.append(iStart/iSampleRate) aAudioCut = np.array(aAudio[iStart:iEnd]) aCorrAudioCut = np.array(aCorr[iStart:iEnd]) aOrigAudioCut = np.array(aOrigAudio[iStart:iEnd]) if aCorrAudioCut.any(): #keep only peaks with a minimum amplitude bKeep = (np.max(aCorrAudioCut) - np.min(aCorrAudioCut)) > np.max(aCorr) / iThreshKeepFact else: bKeep = False #throw away sections with max amplitude less than max/iThreshKeepFact if bKeep: if bSave: sDirCutNum = '{0}/cut{1}.wav'.format(sCutDir, iCut) from scipy.io.wavfile import write write(sDirCutNum, iSampleRate, aOrigAudioCut) laAudios.append(aAudioCut) laCorrs.append(aCorrAudioCut) iCut += 1 ###TODO remove #lKeep.append(iMeanTime) #else: #lThrow.append(iMeanTime) iMin += 1 #def normalize(aArray): #iMax = np.max(aArray) #return [i/iMax for i in aArray] #iMax = np.max(aCorr) #iMax = 1.0 #aCorr = normalize(aCorr) ##aAudio = normalize(aAudio) ##aFirstDeriv = normalize(aFirstDeriv) ##aSecondDeriv = normalize(aSecondDeriv) ##aOrigAudio = normalize(aOrigAudio) #import matplotlib.pyplot as plt #plt.plot(aTime, aCorr, 'r') #plt.subplot(2,1,1) #plt.plot(aTime, aCorr, 'b') #plt.plot(lKeep, [iMax/3 for i in range(len(lKeep))], 'g^') #plt.plot(lThrow, [iMax/3 for i in range(len(lThrow))], 'rx') #plt.plot(lStart, [iMax/50 for i in range(len(lStart))], 'yo') #plt.subplot(2,1,2) #plt.title('Green: Orig, Blue: 1st, Red: 2nd') #plt.plot(aTime, aCorr, 'g') #plt.plot(aTime, aFirstDeriv, 'bx') #plt.plot(aTime, aSecondDeriv, 'r') #plt.plot(lKeep, [0 for i in range(len(lKeep))], 'g^') #plt.plot(lThrow, [0 for i in range(len(lThrow))], 'rx') #plt.plot(lStart, [0 for i in range(len(lStart))], 'yo') #plt.show() return iSampleRate, laAudios, laCorrs
def isolate_cries(sDir, iResample = 8000, iSigmaSecs=0.05): """ expected peak corrolation by convolution, then edge detection iResample = 8000 Hz resampling rate Hard coded variables obtained by trial: linerar smoothing coefficient: iLinearSmooth = 20 frames minima threshhold factor: iThreshFact = 10 times horozontal cluster threshhold: iThreshCluster = 0.01 seconds * iSampleRate keep sections threshhold factor: iThreshKeepFact = 5 can save each cut to separate file returns list of cuts, each cut is peak data (correlation with gaussian) """ import numpy as np from scipy import signal import common_fxns iLinearSmooth = 50 (iSampleRate, aTime, aAudio, aCorr, aOrigAudio) = common_fxns.process_dir(sDir, iSmooth=iLinearSmooth, iResample=iResample, iSigmaSecs=iSigmaSecs) iThreshFact = 5.0 iThreshClusterSecs = 0.01 iThreshCluster = iThreshClusterSecs * iSampleRate iThreshKeepFact = 4.0 from scipy import ndimage aFirstDeriv = ndimage.sobel(aCorr) aFirstDeriv = common_fxns.smooth(aFirstDeriv, iLinearSmooth) aSecondDeriv = ndimage.sobel(aFirstDeriv) #---Get minima and filter #intersecion of derivative with zero line (get maxima and minima) aZero = np.zeros(np.shape(aSecondDeriv)) iTolerance = 10.0 #tolerance to zero-intersect aZeroIntersect = np.argwhere(np.isclose(aZero, aFirstDeriv, atol=iTolerance)).reshape(-1) #positive second derivitave (get minima) aMinimaIndeces = np.array([ i for i in aZeroIntersect if aSecondDeriv[i] > 0 ]) #apply threshold filter for minima (y axis filter) iMax = np.max(aCorr) iMin = np.min(aCorr) iAmplThresh = (iMax - iMin)/iThreshFact #we only want to keep low minima aMinimaIndeces = np.array( [i for i in aMinimaIndeces if aCorr[i] < iAmplThresh] ) #apply proximity filter to single out consecutive groups of minima lMinimaIndeces = [] for i in range(len(aMinimaIndeces) - 1): if aMinimaIndeces[i+1] - aMinimaIndeces[i] > iThreshCluster: lMinimaIndeces.append(aMinimaIndeces[i]) if len(aMinimaIndeces) >0: lMinimaIndeces.append(aMinimaIndeces[-1]) lMinimaIndecesFilt = [int(i) for i in lMinimaIndeces] #add the beginning and end of the track lMinimaIndecesFilt.insert(0, 0) lMinimaIndecesFilt.insert(len(lMinimaIndecesFilt), int(len(aCorr)-1)) #get time values from indeces aTimeIntersects = np.array(aTime)[lMinimaIndecesFilt] #cut up audio according to minima indeces iMin = 1 iCut = 1 laAudios = [] laCorrs = [] while iMin < len(lMinimaIndecesFilt): iStart = lMinimaIndecesFilt[iMin -1] iEnd = lMinimaIndecesFilt[iMin] aAudioCut = np.array(aAudio[iStart : iEnd]) aCorrAudioCut = np.array(aCorr[iStart : iEnd]) aOrigAudioCut = np.array(aOrigAudio[iStart : iEnd]) if aCorrAudioCut.any(): #keep only peaks with a minimum amplitude bKeep = (np.max(aCorrAudioCut) - np.min(aCorrAudioCut)) > np.max(aCorr)/iThreshKeepFact else: bKeep = False #throw away sections with max amplitude less than max/iThreshKeepFact if bKeep: laAudios.append(aAudioCut) laCorrs.append(aCorrAudioCut) iCut += 1 iMin += 1 return iSampleRate, laAudios, laCorrs
def analyse_cry(xRec, xDB): """ inputs must either be path string or dictionary of form: dInput = { 'iSampleRate': iSampleRate, 'aTime': aTime, 'aAudio': aAudio, 'aCorr': aCorr } Correlation and least squares comparison between audio files classifications based on classifications by Pricilla Dunstan """ import numpy as np from scipy import signal import common_fxns def normalize(aArray): iMax = np.max(aArray) return [i / iMax for i in aArray] def calc_misfit(a1, a2): """ calculates the misfit between the 2 arrays and returns a scalar shortest array length is taken """ iLen = len(a1) if len(a1) < len(a2) else len(a2) iMisfit = 0 for i in range(iLen): iMisfit += (a1[i] - a2[i])**2 return iMisfit def round_sci(iNum, iSigs): """ round number to significant figures """ return float(format(iNum, '.{0}g'.format(iSigs))) if type(xRec) == str: sRecDir = xRec (iRecSampleRate, aRecTime_NoShift, aRecAudio, aRecCorr_NoShift) = common_fxns.process_dir(sRecDir)[0:4] elif type(xRec) == dict: iRecSampleRate = xRec['iSampleRate'] aRecTime_NoShift = xRec['aTime'] aRecAudio = xRec['aAudio'] aRecCorr_NoShift = xRec['aCorr'] else: raise ValueError('Please enter a string or dict of the correct format') if type(xDB) == str: sDBDir = xDB (iDBSampleRate, aDBTime_NoShift, aDBAudio, aDBCorr_NoShift) = common_fxns.process_dir(sDBDir)[0:4] elif type(xDB) == dict: iDBSampleRate = xDB['iSampleRate'] aDBTime_NoShift = xDB['aTime'] aDBAudio = xDB['aAudio'] aDBCorr_NoShift = xDB['aCorr'] else: raise ValueError('Please enter a string or dict of the correct format') aRecCorr_NoShift = normalize(aRecCorr_NoShift) aDBCorr_NoShift = normalize(aDBCorr_NoShift) #shifted guassian correlations such that misfit has more meaning #shifted such that maxima line up iIndexMax = np.argmax(aRecCorr_NoShift) iDBIndexMax = np.argmax(aDBCorr_NoShift) #always shifting right: therefore shift where argmax is less #for y values, prepend 0, for tive values, prepend time increments if iDBIndexMax > iIndexMax: #shift signal 1 to the right iShift = iDBIndexMax - iIndexMax aRec = np.zeros(len(aRecCorr_NoShift) + iShift) aRec[iShift:] = aRecCorr_NoShift aRecTime = np.zeros(len(aRecTime_NoShift) + iShift) aRecTime[:iShift] = np.array( [i / iRecSampleRate for i in range(iShift)]) aRecTime[iShift:] = aRecTime_NoShift + iShift / iRecSampleRate aDB = aDBCorr_NoShift aDBTime = aDBTime_NoShift else: #shift signal 2 to the right iShift = iIndexMax - iDBIndexMax aDB = np.zeros(len(aDBCorr_NoShift) + iShift) aDB[iShift:] = aDBCorr_NoShift aDBTime = np.zeros(len(aDBTime_NoShift) + iShift) aDBTime[:iShift] = np.array([i / iDBSampleRate for i in range(iShift)]) aDBTime[iShift:] = aDBTime_NoShift + iShift / iDBSampleRate aRec = aRecCorr_NoShift aRecTime = aRecTime_NoShift #correlation between 2 signals aCorr = np.correlate(aRec, aDB, 'same') aCorrTime = aRecTime if len(aRecTime) > len(aDBTime) else aDBTime #least squares comparison to get misfit iMisfit = calc_misfit(aRec, aDB) iCorrArea = np.trapz(aCorr, aCorrTime) #round to 3 significant figures iMisfit = round_sci(iMisfit, 3) iCorrArea = round_sci(iCorrArea, 3) return iMisfit, iCorrArea
def isolate_cries(sDir, iResample = 8000, bSave = False, sCutDir=None, iSigmaSecs=0.05): """ expected peak corrolation by convolution, then edge detection iResample = 8000 Hz resampling rate Hard coded variables obtained by trial: linerar smoothing coefficient: iLinearSmooth = 20 frames minima threshhold factor: iThreshFact = 10 times horozontal cluster threshhold: iThreshCluster = 0.01 seconds * iSampleRate keep sections threshhold factor: iThreshKeepFact = 5 can save each cut to separate file returns list of cuts, each cut is peak data (correlation with gaussian) """ import numpy as np from scipy import signal import common_fxns iLinearSmooth = 50 (iSampleRate, aTime, aAudio, aCorr, aOrigAudio) = common_fxns.process_dir(sDir, iSmooth=iLinearSmooth, iResample=iResample, iSigmaSecs=iSigmaSecs) iThreshFact = 5.0 iThreshClusterSecs = 0.01 iThreshCluster = iThreshClusterSecs * iSampleRate iThreshKeepFact = 4.0 from scipy import ndimage aFirstDeriv = ndimage.sobel(aCorr) aFirstDeriv = common_fxns.smooth(aFirstDeriv, iLinearSmooth) aSecondDeriv = ndimage.sobel(aFirstDeriv) #---Get minima and filter #intersecion of derivative with zero line (get maxima and minima) aZero = np.zeros(np.shape(aSecondDeriv)) iTolerance = 10.0 #tolerance to zero-intersect aZeroIntersect = np.argwhere(np.isclose(aZero, aFirstDeriv, atol=iTolerance)).reshape(-1) #positive second derivitave (get minima) aMinimaIndeces = np.array([ i for i in aZeroIntersect if aSecondDeriv[i] > 0 ]) #apply threshold filter for minima (y axis filter) iMax = np.max(aCorr) iMin = np.min(aCorr) iAmplThresh = (iMax - iMin)/iThreshFact #we only want to keep low minima aMinimaIndeces = np.array( [i for i in aMinimaIndeces if aCorr[i] < iAmplThresh] ) #apply proximity filter to single out consecutive groups of minima lMinimaIndeces = [] for i in range(len(aMinimaIndeces) - 1): if aMinimaIndeces[i+1] - aMinimaIndeces[i] > iThreshCluster: lMinimaIndeces.append(aMinimaIndeces[i]) if len(aMinimaIndeces) >0: lMinimaIndeces.append(aMinimaIndeces[-1]) lMinimaIndecesFilt = [int(i) for i in lMinimaIndeces] #add the beginning and end of the track lMinimaIndecesFilt.insert(0, 0) lMinimaIndecesFilt.insert(len(lMinimaIndecesFilt), int(len(aCorr)-1)) #get time values from indeces aTimeIntersects = np.array(aTime)[lMinimaIndecesFilt] #cut up audio according to minima indeces iMin = 1 iCut = 1 laAudios = [] laCorrs = [] ###TODO remove #lKeep = [] #lThrow = [] #lStart = [] while iMin < len(lMinimaIndecesFilt): iStart = lMinimaIndecesFilt[iMin -1] iEnd = lMinimaIndecesFilt[iMin] ###TODO remove #iMean = iStart + (iEnd-iStart)/2 #iMeanTime = iMean/iSampleRate #lStart.append(iStart/iSampleRate) aAudioCut = np.array(aAudio[iStart : iEnd]) aCorrAudioCut = np.array(aCorr[iStart : iEnd]) aOrigAudioCut = np.array(aOrigAudio[iStart : iEnd]) if aCorrAudioCut.any(): #keep only peaks with a minimum amplitude bKeep = (np.max(aCorrAudioCut) - np.min(aCorrAudioCut)) > np.max(aCorr)/iThreshKeepFact else: bKeep = False #throw away sections with max amplitude less than max/iThreshKeepFact if bKeep: if bSave: sDirCutNum = '{0}/cut{1}.wav'.format(sCutDir, iCut) from scipy.io.wavfile import write write(sDirCutNum, iSampleRate, aOrigAudioCut) laAudios.append(aAudioCut) laCorrs.append(aCorrAudioCut) iCut += 1 ###TODO remove #lKeep.append(iMeanTime) #else: #lThrow.append(iMeanTime) iMin += 1 #def normalize(aArray): #iMax = np.max(aArray) #return [i/iMax for i in aArray] #iMax = np.max(aCorr) #iMax = 1.0 #aCorr = normalize(aCorr) ##aAudio = normalize(aAudio) ##aFirstDeriv = normalize(aFirstDeriv) ##aSecondDeriv = normalize(aSecondDeriv) ##aOrigAudio = normalize(aOrigAudio) #import matplotlib.pyplot as plt #plt.plot(aTime, aCorr, 'r') #plt.subplot(2,1,1) #plt.plot(aTime, aCorr, 'b') #plt.plot(lKeep, [iMax/3 for i in range(len(lKeep))], 'g^') #plt.plot(lThrow, [iMax/3 for i in range(len(lThrow))], 'rx') #plt.plot(lStart, [iMax/50 for i in range(len(lStart))], 'yo') #plt.subplot(2,1,2) #plt.title('Green: Orig, Blue: 1st, Red: 2nd') #plt.plot(aTime, aCorr, 'g') #plt.plot(aTime, aFirstDeriv, 'bx') #plt.plot(aTime, aSecondDeriv, 'r') #plt.plot(lKeep, [0 for i in range(len(lKeep))], 'g^') #plt.plot(lThrow, [0 for i in range(len(lThrow))], 'rx') #plt.plot(lStart, [0 for i in range(len(lStart))], 'yo') #plt.show() return iSampleRate, laAudios, laCorrs