def testFeatures(): wavobj = wavio.read('D:\AviaNZ\Sound_Files\Denoising_paper_data\Primary_dataset\kiwi\male\male1.wav') fs = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] sp = SignalProc.SignalProc(sampleRate=fs, window_width=256, incr=128) # The next lines are to get a spectrogram that *should* precisely match the Raven one #sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Ones') #sg = sg ** 2 sg = sp.spectrogram(data, sgType='Standard',window_width=256, incr=128, window='Hann') f = Features(data, fs, 256, 128) features = [] # Loop over the segments (and time slices within?) mfcc = f.get_mfcc().tolist() # features.append(mfcc.tolist()) we = f.get_WE() we = we.transpose().tolist() # how to combine features with different resolution? features.append([f.get_Raven_spectrogram_measurements(sg=sg,fs=fs,window_width=256,f1=0,f2=np.shape(sg)[1],t1=0,t2=np.shape(sg)[0]),f.get_Raven_robust_measurements(sg,fs,0,np.shape(sg)[1],0,np.shape(sg)[0]),f.get_Raven_waveform_measurements(data,fs,0,len(data)),f.wiener_entropy(sg)]) # Will need to think about feature vector length for the librosa features, since they are on fixed windows f.get_chroma() f.get_mfcc() f.get_tonnetz() f.get_spectral_features() f.get_lpc(data,order=44)
def __init__(self, configdir, filterdir, folderTrain1=None, folderTrain2=None, recogniser=None, imgWidth=0, CLI=False): self.filterdir = filterdir self.configdir = configdir cl = SupportClasses.ConfigLoader() self.FilterDict = cl.filters(filterdir, bats=False) self.LearningDict = cl.learningParams( os.path.join(configdir, "LearningParams.txt")) self.sp = SignalProc.SignalProc(self.LearningDict['sgramWindowWidth'], self.LearningDict['sgramHop']) self.imgsize = [self.LearningDict['imgX'], self.LearningDict['imgY']] self.tmpdir1 = False self.tmpdir2 = False self.CLI = CLI if CLI: self.filterName = recogniser self.folderTrain1 = folderTrain1 self.folderTrain2 = folderTrain2 self.imgWidth = imgWidth self.autoThr = True self.correction = True self.annotatedAll = True else: self.autoThr = False self.correction = False self.imgWidth = imgWidth
def loadFile(filename, duration=0, offset=0, fs=0, denoise=False, f1=0, f2=0): """ Read audio file and preprocess as required. """ if duration == 0: duration = None sp = SignalProc.SignalProc(256, 128) sp.readWav(filename, duration, offset) sp.resample(fs) sampleRate = sp.sampleRate audiodata = sp.data # # pre-process if denoise: WF = WaveletFunctions.WaveletFunctions(data=audiodata, wavelet='dmey2', maxLevel=10, samplerate=fs) audiodata = WF.waveletDenoise(thresholdType='soft', maxLevel=10) if f1 != 0 and f2 != 0: # audiodata = sp.ButterworthBandpass(audiodata, sampleRate, f1, f2) audiodata = sp.bandpassFilter(audiodata, sampleRate, f1, f2) return audiodata
def generateImage(self, audiodata): ''' Generate spectrogram image''' sp = SignalProc.SignalProc(self.windowwidth, self.inc) sp.data = audiodata sp.sampleRate = self.fs sgRaw = sp.spectrogram(self.windowwidth, self.inc) maxg = np.max(sgRaw) return np.rot90(sgRaw / maxg).tolist()
def __init__(self, spInfo={}, wavelet='dmey2'): self.wavelet = wavelet self.spInfo = spInfo self.currentSR = 0 if not spInfo == {}: # for now, we default to the first subfilter: print("Detected %d subfilters in this filter" % len(spInfo["Filters"])) self.sp = SignalProc.SignalProc(256, 128)
def __init__(self, data=[], sampleRate=0, window_width=256, incr=128): self.data = data self.sampleRate = sampleRate self.window_width=window_width self.incr = incr sp = SignalProc.SignalProc(window_width=self.window_width, incr=self.incr) sp.data = self.data sp.sampleRate = self.sampleRate # The next lines are to get a spectrogram that *should* precisely match the Raven one self.sg = sp.spectrogram(sgType='Standard',window_width=self.window_width,incr=self.incr,window='Ones') self.sg = self.sg**2
def get_SAP_features(self,data,fs,window_width=256,incr=128,K=2): """ Compute the Sound Analysis Pro features, i.e., Wiener entropy, spectral derivative, and their variants. Most of the code is in SignalProc.py""" sp = SignalProc.SignalProc(sampleRate=fs, window_width=256, incr=128) spectral_deriv, sg, freq_mod, wiener_entropy, mean_freq, contours = sp.spectral_derivative(data, fs, window_width=window_width, incr=incr, K=2, threshold=0.5, returnAll=True) goodness_of_pitch = sp.goodness_of_pitch(spectral_deriv, sg) # Now compute the continuity over time, freq as mean duration of contours in window, mean frequency range # TODO return spectral_deriv, goodness_of_pitch, freq_mod, contours, wiener_entropy, mean_freq
def __init__(self, data=[], sampleRate=0, window_width=256, incr=128): self.data = data self.sampleRate = sampleRate self.window_width = window_width self.incr = incr sp = SignalProc.SignalProc(sampleRate=self.sampleRate, window_width=self.window_width, incr=self.incr) # The next lines are to get a spectrogram that *should* precisely match the Raven one self.sg = sp.spectrogram(data, multitaper=False, window_width=self.window_width, incr=self.incr, window='Ones') self.sg = self.sg**2
def loadFile(filename, duration=0, offset=0, fs=0, denoise=False, f1=0, f2=0): """ Read audio file and preprocess as required :param filename: :param fs: :param f1: :param f2: :return: """ import WaveletFunctions import SignalProc if offset == 0 and duration == 0: wavobj = wavio.read(filename) else: wavobj = wavio.read(filename, duration, offset) sampleRate = wavobj.rate audiodata = wavobj.data if audiodata.dtype is not 'float': audiodata = audiodata.astype('float') #/ 32768.0 if np.shape(np.shape(audiodata))[0] > 1: audiodata = audiodata[:, 0] if fs != 0 and sampleRate != fs: audiodata = librosa.core.audio.resample(audiodata, sampleRate, fs) sampleRate = fs # # pre-process if denoise: WF = WaveletFunctions.WaveletFunctions(data=audiodata, wavelet='dmey2', maxLevel=10, samplerate=fs) audiodata = WF.waveletDenoise(thresholdType='soft', maxLevel=10) if f1 != 0 and f2 != 0: sp = SignalProc.SignalProc([], 0, 256, 128) # audiodata = sp.ButterworthBandpass(audiodata, sampleRate, f1, f2) audiodata = sp.bandpassFilter(audiodata, sampleRate, f1, f2) return audiodata, sampleRate
def __init__(self, configdir, filterdir, folderTrain1=None, folderTrain2=None, recogniser=None, imgWidth=0, CLI=False): # Two important things: # 1. LearningParams.txt, which a dictionary of parameters *** including spectrogram parameters # 2. CLI: whether it runs off the command line, which makes picking the ROC curve parameters hard # Qn: what is imgWidth? Why not a learning param? self.filterdir = filterdir self.configdir = configdir cl = SupportClasses.ConfigLoader() self.FilterDict = cl.filters(filterdir, bats=False) self.LearningDict = cl.learningParams( os.path.join(configdir, "LearningParams.txt")) self.sp = SignalProc.SignalProc(self.LearningDict['sgramWindowWidth'], self.LearningDict['sgramHop']) self.imgsize = [self.LearningDict['imgX'], self.LearningDict['imgY']] self.tmpdir1 = False self.tmpdir2 = False self.ROCdata = {} self.CLI = CLI if CLI: self.filterName = recogniser self.folderTrain1 = folderTrain1 self.folderTrain2 = folderTrain2 self.imgWidth = imgWidth self.autoThr = True self.correction = True self.annotatedAll = True else: self.autoThr = False self.correction = False self.imgWidth = imgWidth
def testMC(): import wavio import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui #wavobj = wavio.read('Sound Files/kiwi_1min.wav') wavobj = wavio.read('Sound Files/tril1.wav') fs = wavobj.rate data = wavobj.data #[:20*fs] if data.dtype is not 'float': data = data.astype('float') #/ 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] import SignalProc sp = SignalProc.SignalProc(data, fs, 256, 128) sg = sp.spectrogram(data=data, window_width=256, incr=128, window='Hann', mean_normalise=True, onesided=True, multitaper=False, need_even=False) s = Segment.Segmenter(data, sg, sp, fs) #print np.shape(sg) #s1 = s.medianClip() s1, p, t = s.yin(returnSegs=True) app = QtGui.QApplication([]) mw = QtGui.QMainWindow() mw.show() mw.resize(800, 600) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0) im1 = pg.ImageItem(enableMouse=False) vb1.addItem(im1) im1.setImage(10. * np.log10(sg)) # vb2 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0) # im2 = pg.ImageItem(enableMouse=False) # vb2.addItem(im2) # im2.setImage(c) vb3 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0) im3 = pg.ImageItem(enableMouse=False) vb3.addItem(im3) im3.setImage(10. * np.log10(sg)) vb4 = win.addViewBox(enableMouse=False, enableMenu=False, row=2, col=0) im4 = pg.PlotDataItem(enableMouse=False) vb4.addItem(im4) im4.setData(data) for seg in s1: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) #a.setRegion([seg[0],seg[1]]) vb3.addItem(a, ignoreBounds=True) QtGui.QApplication.instance().exec_()
def generateFeatures(self, dirName, dataset, hop): ''' Read the segment library and generate features, training. Similar to SignalProc.generateFeaturesCNN, except this one saves images to disk instead of returning them. :param dataset: segments in the form of [[file, [segment], label], ..] :param hop: :return: save the preferred features into JSON files + save images. Currently the spectrogram images. ''' count = 0 dhop = hop eps = 0.0005 specFrameSize = len( range(0, int(self.length * self.fs - self.windowwidth), self.inc)) N = [0 for i in range(len(self.calltypes) + 1)] sp = SignalProc.SignalProc(self.windowwidth, self.inc) sp.sampleRate = self.fs for record in dataset: # Compute features, also consider tiny segments because this would be the case for song birds. duration = record[1][1] - record[1][0] hop = dhop[record[-1]] if duration < self.length: fileduration = wavio.readFmt(record[0])[1] record[1][0] = record[1][0] - (self.length - duration) / 2 - eps record[1][1] = record[1][1] + (self.length - duration) / 2 + eps if record[1][0] < 0: record[1][0] = 0 record[1][1] = self.length + eps elif record[1][1] > fileduration: record[1][1] = fileduration record[1][0] = fileduration - self.length - eps if record[1][0] <= 0 and record[1][1] <= fileduration: n = 1 hop = self.length duration = self.length + eps else: continue else: n = math.ceil((record[1][1] - record[1][0] - self.length) / hop + 1) print('* hop:', hop, 'n:', n, 'label:', record[-1]) try: # load file sp.readWav(record[0], len=duration, off=record[1][0]) sp.resample(self.fs) sgRaw = sp.spectrogram() # Could bandpass here if relevant: # if f1 != 0 and f2 != 0: # audiodata = sp.bandpassFilter(audiodata, sampleRate, f1, f2) except Exception as e: print("Warning: failed to load audio because:", e) continue N[record[-1]] += n # Frequency masking bin_width = self.fs / 2 / np.shape(sgRaw)[1] lb = int(np.ceil(self.f1 / bin_width)) ub = int(np.floor(self.f2 / bin_width)) sgRaw[:, 0:lb] = 0.0 sgRaw[:, ub:] = 0.0 for i in range(int(n)): print('**', record[0], self.length, record[1][0] + hop * i, self.fs, '**') # Sgram images sgstart = int(hop * i * self.fs / sp.incr) sgend = sgstart + specFrameSize if sgend > np.shape(sgRaw)[0]: # Adjusting the final frame to be full width sgend = np.shape(sgRaw)[0] sgstart = np.shape(sgRaw)[0] - specFrameSize sgRaw_i = sgRaw[sgstart:sgend, :] # Normalize and rotate maxg = np.max(sgRaw_i) sgRaw_i = np.rot90(sgRaw_i / maxg) # Save train data: individual images as npy np.save( os.path.join( dirName, str(record[-1]), str(record[-1]) + '_' + "%06d" % count + '_' + record[0].split(os.sep)[-1][:-4] + '.npy'), sgRaw_i) count += 1 print('\n\nCompleted feature extraction') return specFrameSize, N
def reconWPT(): import pylab as pl pl.ion() filename = 'Sound Files/tril1.wav' #filename = 'Sound Files/090811_184501.wav' #filename = 'Sound Files/kiwi_1min.wav' wavobj = wavio.read(filename) sampleRate = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = np.squeeze(data[:, 0]) tbd = [0, 1, 3, 7, 15, 31] #tbd = np.concatenate([np.arange(30),np.arange(50,63)]) #tbd = np.arange(50) listnodes = np.arange(63) listnodes = np.delete(listnodes, tbd) [lowd, highd, lowr, highr] = np.loadtxt('dmey.txt') wavelet = pywt.Wavelet(filter_bank=[lowd, highd, lowr, highr]) wavelet.orthogonal = True wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode='symmetric', maxlevel=5) # Make a new tree with these in new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode='zero', maxlevel=5) # There seems to be a bit of a bug to do with the size of the reconstucted nodes, so prime them # It's worse than that. pywavelet makes the whole tree. So if you don't give it blanks, it copies the details from wp even though it wasn't asked for. And reconstruction with the zeros is different to not reconstructing. for level in range(6): for n in new_wp.get_level(level, 'natural'): n.data = np.zeros(len(wp.get_level(level, 'natural')[0].data)) # Copy thresholded versions of the leaves into the new wpt for l in listnodes: ind = convert(l) new_wp[ind].data = wp[ind].data newdata = new_wp.reconstruct(update=False) import SignalProc sp = SignalProc.SignalProc(newdata, sampleRate) pl.figure() pl.subplot(3, 1, 1) sg = sp.spectrogram(data, sampleRate) pl.imshow(10. * np.log10(sg).T) pl.subplot(3, 1, 2) sg = sp.spectrogram(newdata, sampleRate) pl.imshow(10. * np.log10(sg).T) #wavio.write('tril1_d1.wav', data.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2) wavobj = wavio.read('Sound Files/tril1_d.wav') data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = np.squeeze(data[:, 0]) pl.subplot(3, 1, 3) sg = sp.spectrogram(data, sampleRate) pl.imshow(10. * np.log10(sg).T)
def showEnergies(): import pylab as pl pl.ion() #filename = 'Sound Files/tril1_d1.wav' filename = 'Sound Files/tril1.wav' #filename = 'Sound Files/090811_184501.wav' #filename = 'Sound Files/kiwi_1min.wav' wavobj = wavio.read(filename) sampleRate = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = np.squeeze(data[:, 0]) if os.path.isfile(filename + '.data'): file = open(filename + '.data', 'r') segments = json.load(file) file.close() if len(segments) > 0: if segments[0][0] == -1: del segments[0] data1 = data[int(segments[0][0] * sampleRate):int(segments[0][1] * sampleRate)] data2 = data[int(segments[1][0] * sampleRate):int(segments[1][1] * sampleRate)] data3 = data[int(segments[2][0] * sampleRate):int(segments[2][1] * sampleRate)] data4 = data[int(segments[3][0] * sampleRate):int(segments[3][1] * sampleRate)] data5 = data[int(segments[4][0] * sampleRate):int(segments[4][1] * sampleRate)] import SignalProc sp = SignalProc.SignalProc(data5, sampleRate) pl.figure() pl.subplot(5, 1, 1) sg = sp.spectrogram(data1, sampleRate) pl.imshow(10. * np.log10(sg)) pl.subplot(5, 1, 2) sg = sp.spectrogram(data2, sampleRate) pl.imshow(10. * np.log10(sg)) pl.subplot(5, 1, 3) sg = sp.spectrogram(data3, sampleRate) pl.imshow(10. * np.log10(sg)) pl.subplot(5, 1, 4) sg = sp.spectrogram(data4, sampleRate) pl.imshow(10. * np.log10(sg)) pl.subplot(5, 1, 5) sg = sp.spectrogram(data5, sampleRate) pl.imshow(10. * np.log10(sg)) pl.figure() e1 = WaveletSegment.computeWaveletEnergy_1s(data1, 'dmey2') pl.subplot(5, 1, 1) pl.plot(e1) e2 = WaveletSegment.computeWaveletEnergy_1s(data2, 'dmey2') pl.subplot(5, 1, 2) pl.plot(e2) e3 = WaveletSegment.computeWaveletEnergy_1s(data3, 'dmey2') pl.subplot(5, 1, 3) pl.plot(e3) e4 = WaveletSegment.computeWaveletEnergy_1s(data4, 'dmey2') pl.subplot(5, 1, 4) pl.plot(e4) e5 = WaveletSegment.computeWaveletEnergy_1s(data5, 'dmey2') pl.subplot(5, 1, 5) pl.plot(e5) pl.figure() pl.plot(e1) pl.plot(e2) pl.plot(e3) pl.plot(e4) pl.plot(e5) #return e2 pl.show()
import wavio from PyQt5.QtGui import QImage """ This script works in batch. It generate sound files from .bmp spectrogram images. There are two option (not used is commented) 1) Audible file -> freqnecy shift to hear bat echolocation [ON] 2) Same frequency band -> just the spectrogram inverted [OFF] NOTE: we need appropriate number of frequency bins in order to make invertSpectrogram work """ #dirName='/home/listanvirg/Data/Bat/BAT/TEST_DATA/' #dirName='D:\\Desktop\\Documents\\Work\\Data\\Bat\\BAT\\TRAIN_DATA\\NONE' #dirName='D:\\Desktop\\Documents\\Work\\Data\\Bat\\BAT\\CNN experiment\\TRAIN4' dirName= "C:\\Users\\Virginia\\Documents\\GitHub\\AviaNZ\\Sound Files\\" a = sp.SignalProc(window_width=1024, incr=512) for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.bmp'): bmpFile = root + '/' + file[:-4] imgFile=root + '/'+file print(imgFile) #img = mpimg.imread(imgFile) #read image #img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale #img2[-1, :] = 254 * np.ones((np.shape(img2)[1])) #cut last row #from Julius readBmp: this works #silent=False
def showSegs(): import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui import wavio import WaveletSegment from time import time #wavobj = wavio.read('Sound Files/tril1.wav') #wavobj = wavio.read('Sound Files/010816_202935_p1.wav') #wavobj = wavio.read('Sound Files/20170515_223004 piping.wav') wavobj = wavio.read('Sound Files/kiwi_1min.wav') fs = wavobj.rate data = wavobj.data #[:20*fs] if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] import SignalProc sp = SignalProc.SignalProc(data, fs, 256, 128) sg = sp.spectrogram(data, multitaper=False) s = Segment(data, sg, sp, fs, 50) # FIR: threshold doesn't matter much, but low is better (0.01). # Amplitude: not great, will have to work on width and abs if want to use it (threshold about 0.6) # Power: OK, but threshold matters (0.5) # Median clipping: OK, threshold of 3 fine. # Onsets: Threshold of 4.0 was fine, lower not. Still no offsets! # Yin: Threshold 0.9 is pretty good # Energy: Not great, but thr 1.0 ts = time() s1 = s.checkSegmentLength(s.segmentByFIR(0.1)) s2 = s.checkSegmentLength(s.segmentByFIR(0.01)) s3 = s.checkSegmentLength(s.medianClip(3.0)) s4 = s.checkSegmentLength(s.medianClip(2.0)) s5, p, t = s.yin(100, thr=0.5, returnSegs=True) s5 = s.checkSegmentLength(s5) s6 = s.mergeSegments(s2, s4) ws = WaveletSegment.WaveletSegment() s7 = ws.waveletSegment_test(None, data, fs, None, 'Kiwi', False) #print('Took {}s'.format(time() - ts)) #s7 = s.mergeSegments(s1,s.mergeSegments(s3,s4)) #s4, samp = s.segmentByFIR(0.4) #s4 = s.checkSegmentLength(s4) #s2 = s.segmentByAmplitude1(0.6) #s5 = s.checkSegmentLength(s.segmentByPower(0.3)) #s6, samp = s.segmentByFIR(0.6) #s6 = s.checkSegmentLength(s6) #s7 = [] #s5 = s.onsets(3.0) #s6 = s.segmentByEnergy(1.0,500) #s5 = s.Harma(5.0,0.8) #s4 = s.Harma(10.0,0.8) #s7 = s.Harma(15.0,0.8) #s2 = s.segmentByAmplitude1(0.7) #s3 = s.segmentByPower(1.) #s4 = s.medianClip(3.0) #s5 = s.onsets(3.0) #s6, p, t = s.yin(100,thr=0.5,returnSegs=True) #s7 = s.Harma(10.0,0.8) app = QtGui.QApplication([]) mw = QtGui.QMainWindow() mw.show() mw.resize(800, 600) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0) im1 = pg.ImageItem(enableMouse=False) vb1.addItem(im1) im1.setImage(10. * np.log10(sg)) vb2 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0) im2 = pg.ImageItem(enableMouse=False) vb2.addItem(im2) im2.setImage(10. * np.log10(sg)) vb3 = win.addViewBox(enableMouse=False, enableMenu=False, row=2, col=0) im3 = pg.ImageItem(enableMouse=False) vb3.addItem(im3) im3.setImage(10. * np.log10(sg)) vb4 = win.addViewBox(enableMouse=False, enableMenu=False, row=3, col=0) im4 = pg.ImageItem(enableMouse=False) vb4.addItem(im4) im4.setImage(10. * np.log10(sg)) vb5 = win.addViewBox(enableMouse=False, enableMenu=False, row=4, col=0) im5 = pg.ImageItem(enableMouse=False) vb5.addItem(im5) im5.setImage(10. * np.log10(sg)) vb6 = win.addViewBox(enableMouse=False, enableMenu=False, row=5, col=0) im6 = pg.ImageItem(enableMouse=False) vb6.addItem(im6) im6.setImage(10. * np.log10(sg)) vb7 = win.addViewBox(enableMouse=False, enableMenu=False, row=6, col=0) im7 = pg.ImageItem(enableMouse=False) vb7.addItem(im7) im7.setImage(10. * np.log10(sg)) print("====") print(s1) for seg in s1: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb1.addItem(a, ignoreBounds=True) print(s2) for seg in s2: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb2.addItem(a, ignoreBounds=True) print(s3) for seg in s3: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb3.addItem(a, ignoreBounds=True) print(s4) for seg in s4: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb4.addItem(a, ignoreBounds=True) print(s5) for seg in s5: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb5.addItem(a, ignoreBounds=True) print(s6) for seg in s6: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb6.addItem(a, ignoreBounds=True) print(s7) for seg in s7: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb7.addItem(a, ignoreBounds=True) QtGui.QApplication.instance().exec_()
def setData(self): # update plot etc currseg1 = self.annots1[self.currpage-1] currseg2 = self.annots2[self.currpage-1] wav1 = currseg1.wavname wav2 = currseg2.wavname print("Showing:", wav1, wav2) wav1start = currseg1[0] wav2start = currseg2[0] wav1len = currseg1[1]-currseg1[0] wav2len = currseg2[1]-currseg2[0] # want to show equal duration for both spectrograms # so choose min, in case one of them is near file end wavlen = min(wav1len, wav2len) # TODO temporary hacks to avoid trying to read past wav end # (this happens if the adjustment forces the matching part out of # the file where the matching segment was found) if wav1start+wavlen>900: print("Warning: adjusting shown period since requested segment %d-%d is not in file" %(wav1start, wav1start+wavlen)) wav1start = 900-wavlen if wav2start+wavlen>900: wav2start = 900-wavlen print("Warning: adjusting shown period since requested segment %d-%d is not in file" %(wav2start, wav2start+wavlen)) self.sp1 = SignalProc.SignalProc(256, 128) self.sp1.readWav(wav1, off=wav1start, len=wavlen) self.sp2 = SignalProc.SignalProc(256, 128) self.sp2.readWav(wav2, off=wav2start, len=wavlen) _ = self.sp1.spectrogram() _ = self.sp2.spectrogram() sg1 = self.sp1.normalisedSpec("Log") sg2 = self.sp2.normalisedSpec("Log") self.sgMaximum = max(np.max(sg1), np.max(sg2)) self.sgMinimum = min(np.min(sg1), np.min(sg2)) self.plot1.setImage(sg1) self.plot2.setImage(sg2) self.plot1.setLookupTable(self.lut) self.plot2.setLookupTable(self.lut) # No scroll area yet # self.scroll.horizontalScrollBar().setValue(0) # axes minFreq = 0 maxFreq = min(self.sp1.sampleRate, self.sp2.sampleRate) FreqRange = (maxFreq-minFreq)/1000. SgSize1 = np.shape(self.sp1.sg)[1] SgSize2 = np.shape(self.sp2.sg)[1] ticks1 = [(0,minFreq/1000.), (SgSize1/4, minFreq/1000.+FreqRange/4.), (SgSize1/2, minFreq/1000.+FreqRange/2.), (3*SgSize1/4, minFreq/1000.+3*FreqRange/4.), (SgSize1,minFreq/1000.+FreqRange)] ticks1 = [[(tick[0], "%.1f" % tick[1] ) for tick in ticks1]] ticks2 = [(0,minFreq/1000.), (SgSize2/4, minFreq/1000.+FreqRange/4.), (SgSize2/2, minFreq/1000.+FreqRange/2.), (3*SgSize2/4, minFreq/1000.+3*FreqRange/4.), (SgSize2,minFreq/1000.+FreqRange)] ticks2 = [[(tick[0], "%.1f" % tick[1] ) for tick in ticks2]] self.sg_axis1.setTicks(ticks1) self.sg_axis2.setTicks(ticks2) self.sg_axis1.setLabel('kHz') self.sg_axis2.setLabel('kHz') # info fields self.labelPair.setText("Showing calls at %.1f-%.1f s and %.1f-%.1f s" % (currseg1[0], currseg1[1], currseg2[0], currseg2[1])) # self.labelCurrPage.setText("Page %s of %s" %(self.currpage, len(self.shifts))) # i = self.parent.allrecs.index(self.rec1) # j = self.parent.allrecs.index(self.rec2) # self.connectCheckbox.setChecked(self.parent.recConnections[i, j]==1) self.specControls.emitCol()
def cluster_ruru(sampRate): import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui import SignalProc import wavio d = pd.read_csv( 'D:\AviaNZ\Sound_Files\Denoising_paper_data\Primary_dataset\\ruru\we2.tsv', sep="\t", header=None) data = d.values target = data[:, -1] fnames = data[:, 0] data = data[:, 1:-1] # dim reduction before clustering # pca = PCA(n_components=0.9) # data = pca.fit_transform(data) data = TSNE().fit_transform(data) learners = Clustering.Clustering(data, target) print('\n**************Ruru dataset******************') # Only choose algorithms that does not require n_clusters m = [] print('\nDBSCAN--------------------------------------') model_dbscan = learners.DBscan(eps=0.5, min_samples=5) # print(model_dbscan.labels_) print('# clusters', len(set(model_dbscan.labels_))) m.append(learners.clusteringScore1(learners.targets, model_dbscan.labels_)) print('\nBirch----------------------------------------') model_birch = learners.birch(threshold=0.88, n_clusters=None) # print(model_birch.labels_) print('# clusters', len(set(model_birch.labels_))) m.append(learners.clusteringScore1(learners.targets, model_birch.labels_)) print('\nAgglomerative Clustering----------------------') model_agg = learners.agglomerativeClustering( n_clusters=None, compute_full_tree=True, distance_threshold=4.4, linkage='complete' ) # Either set n_clusters=None and compute_full_tree=T # or distance_threshold=None model_agg.fit_predict(learners.features) # print(model_agg.labels_) print('# clusters', len(set(model_agg.labels_))) m.append(learners.clusteringScore1(learners.targets, model_agg.labels_)) print('\nAffinity Propagation--------------------------') model_aff = learners.affinityPropagation(damping=0.8, max_iter=400, convergence_iter=50) # print(model_aff.labels_) print('# clusters', len(set(model_aff.labels_))) m.append(learners.clusteringScore1(learners.targets, model_aff.labels_)) best_m = np.argmax(m, axis=0).tolist( ) # Get algorithm with the best performance on each index best_alg = max(set(best_m), key=best_m.count) # Get the overall best alg # Analysis if best_alg == 0: model_best = model_dbscan print('\n***best clustering by: DBSCAN') print('predicted:\n', model_dbscan.labels_) print('actual:\n', learners.targets) elif best_alg == 1: model_best = model_birch print('\n***best clustering by: Birch') print('predicted:\n', model_birch.labels_) print('actual:\n', learners.targets) elif best_alg == 2: model_best = model_agg print('\n***best clustering by: Agglomerative') print('predicted:\n', model_agg.labels_) print('actual:\n', learners.targets) elif best_alg == 3: model_best = model_aff print('\n***best clustering by: Affinity') print('predicted:\n', model_aff.labels_) print('actual:\n', learners.targets) # plot the examples using the best clustering model # n_clusters = len(set(model_best.labels_)) # get indices and plot them labels = list(set(model_best.labels_)) app = QtGui.QApplication([]) for label in labels: inds = np.where(model_best.labels_ == label)[0].tolist() mw = QtGui.QMainWindow() mw.show() mw.resize(1200, 800) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) row = 0 col = 0 for i in inds: wavobj = wavio.read(fnames[i]) fs = wavobj.rate audiodata = wavobj.data if audiodata.dtype is not 'float': audiodata = audiodata.astype('float') if np.shape(np.shape(audiodata))[0] > 1: audiodata = audiodata[:, 0] if fs != sampRate: audiodata = librosa.core.audio.resample( audiodata, fs, sampRate) fs = sampRate sp = SignalProc.SignalProc(audiodata, fs, 128, 128) sg = sp.spectrogram(audiodata, multitaper=False) vb = win.addViewBox(enableMouse=False, enableMenu=False, row=row, col=col, invertX=True) vb2 = win.addViewBox(enableMouse=False, enableMenu=False, row=row + 1, col=col) im = pg.ImageItem(enableMouse=False) txt = fnames[i].split("/")[-1][:-4] lbl = pg.LabelItem(txt, rotateAxis=(1, 0), angle=179) vb.addItem(lbl) vb2.addItem(im) im.setImage(sg) im.setBorder('w') mw.setWindowTitle("Class " + str(label) + ' - ' + str(np.shape(inds)[0]) + ' calls') if row == 8: row = 0 col += 1 else: row += 2 QtGui.QApplication.instance().exec_()
0]: label = 6 elif 'Rooster' in seg[4][0]: label = 7 else: continue audiodata = loadFile(filename=os.path.join(root, file), duration=seg[1] - seg[0], offset=seg[0], fs=fs, denoise=False) # minlen = minlen * fs start = seg[0] # start = int(seg[0] * fs) sp = SignalProc.SignalProc(256, 128) sp.data = audiodata sp.sampleRate = fs _ = sp.spectrogram(256, 128) segment = Segment.Segmenter(sp, fs) syls = segment.medianClip(thr=3, medfiltersize=5, minaxislength=9, minSegment=50) if len(syls) == 0: # Sanity check segment = Segment.Segmenter(sp, fs) syls = segment.medianClip(thr=2, medfiltersize=5, minaxislength=9, minSegment=50) syls = segment.checkSegmentOverlap(
def loadFile(self, species, anysound=False): print(self.filename) # Create an instance of the Signal Processing class if not hasattr(self, 'sp'): self.sp = SignalProc.SignalProc(self.config['window_width'], self.config['incr']) # Read audiodata or spectrogram if self.method == "Wavelets": self.sp.readWav(self.filename) self.sampleRate = self.sp.sampleRate self.audiodata = self.sp.data self.datalength = np.shape(self.audiodata)[0] print("Read %d samples, %f s at %d Hz" % (len(self.audiodata), float(self.datalength) / self.sampleRate, self.sampleRate)) else: self.sp.readBmp(self.filename, rotate=False) self.sampleRate = self.sp.sampleRate self.datalength = self.sp.fileLength # Read in stored segments (useful when doing multi-species) self.segments = Segment.SegmentList() if species == [ "Any sound" ] or not os.path.isfile(self.filename + '.data') or self.method == "Click": # Initialize default metadata values self.segments.metadata = dict() self.segments.metadata["Operator"] = "Auto" self.segments.metadata["Reviewer"] = "" self.segments.metadata["Duration"] = float( self.datalength) / self.sampleRate # wipe all segments: print("Wiping all previous segments") self.segments.clear() else: self.segments.parseJSON(self.filename + '.data', float(self.datalength) / self.sampleRate) # wipe same species: for sp in species: # shorthand for double-checking that it's not "Any Sound" etc if sp in self.FilterDicts: spname = self.FilterDicts[sp]["species"] print("Wiping species", spname) oldsegs = self.segments.getSpecies(spname) for i in reversed(oldsegs): wipeAll = self.segments[i].wipeSpecies(spname) if wipeAll: del self.segments[i] print("%d segments loaded from .data file" % len(self.segments)) if self.method != "Click": # Do impulse masking by default if anysound: self.sp.data = self.sp.impMask(engp=70, fp=0.50) else: self.sp.data = self.sp.impMask() self.audiodata = self.sp.data del self.sp gc.collect()
def detectFile(self, speciesStr, filters): """ Actual worker for a file in the detection loop. Does not return anything - for use with external try/catch """ # Segment over pages separately, to allow dealing with large files smoothly: # TODO: page size fixed for now samplesInPage = 900 * 16000 # (ceil division for large integers) numPages = (self.datalength - 1) // samplesInPage + 1 # Actual segmentation happens here: for page in range(numPages): print("Segmenting page %d / %d" % (page + 1, numPages)) start = page * samplesInPage end = min(start + samplesInPage, self.datalength) thisPageLen = (end - start) / self.sampleRate if thisPageLen < 2 and self.method != "Click": print("Warning: can't process short file ends (%.2f s)" % thisPageLen) continue # Process if speciesStr == "Any sound": # Create spectrogram for median clipping etc if not hasattr(self, 'sp'): self.sp = SignalProc.SignalProc( self.config['window_width'], self.config['incr']) self.sp.data = self.audiodata[start:end] self.sp.sampleRate = self.sampleRate _ = self.sp.spectrogram(window='Hann', mean_normalise=True, onesided=True, multitaper=False, need_even=False) self.seg = Segment.Segmenter(self.sp, self.sampleRate) # thisPageSegs = self.seg.bestSegments() thisPageSegs = self.seg.medianClip(thr=3.5) # Post-process # 1. Delete windy segments # 2. Merge neighbours # 3. Delete short segments print("Segments detected: ", len(thisPageSegs)) print("Post-processing...") maxgap = int(self.maxgap.value()) / 1000 minlen = int(self.minlen.value()) / 1000 maxlen = int(self.maxlen.value()) / 1000 post = Segment.PostProcess(configdir=self.configdir, audioData=self.audiodata[start:end], sampleRate=self.sampleRate, segments=thisPageSegs, subfilter={}, cert=0) if self.wind: post.wind() post.joinGaps(maxgap) post.deleteShort(minlen) # avoid extra long segments (for Isabel) post.splitLong(maxlen) # adjust segment starts for 15min "pages" if start != 0: for seg in post.segments: seg[0][0] += start / self.sampleRate seg[0][1] += start / self.sampleRate # attach mandatory "Don't Know"s etc and put on self.segments self.makeSegments(post.segments) del self.seg gc.collect() else: if self.method != "Click": # read in the page and resample as needed self.ws.readBatch(self.audiodata[start:end], self.sampleRate, d=False, spInfo=filters, wpmode="new") data_test = [] click_label = 'None' for speciesix in range(len(filters)): print("Working with recogniser:", filters[speciesix]) if self.method != "Click": # note: using 'recaa' mode = partial antialias thisPageSegs = self.ws.waveletSegment(speciesix, wpmode="new") else: click_label, data_test, gen_spec = self.ClickSearch( self.sp.sg, self.filename) print('number of detected clicks = ', gen_spec) thisPageSegs = [] # Post-process: # CNN-classify, delete windy, rainy segments, check for FundFreq, merge gaps etc. print("Segments detected (all subfilters): ", thisPageSegs) if not self.testmode: print("Post-processing...") # postProcess currently operates on single-level list of segments, # so we run it over subfilters for wavelets: spInfo = filters[speciesix] for filtix in range(len(spInfo['Filters'])): if not self.testmode: # TODO THIS IS FULL POST-PROC PIPELINE FOR BIRDS AND BATS # -- Need to check how this should interact with the testmode CNNmodel = None if 'CNN' in spInfo: if spInfo['CNN'][ 'CNN_name'] in self.CNNDicts.keys(): # This list contains the model itself, plus parameters for running it CNNmodel = self.CNNDicts[spInfo['CNN'] ['CNN_name']] if self.method == "Click": # bat-style CNN: model = CNNmodel[0] thr1 = CNNmodel[5][0] thr2 = CNNmodel[5][1] if click_label == 'Click': # we enter in the cnn only if we got a click sg_test = np.ndarray( shape=(np.shape(data_test)[0], np.shape(data_test[0][0])[0], np.shape(data_test[0][0])[1]), dtype=float) spec_id = [] print('Number of file spectrograms = ', np.shape(data_test)[0]) for j in range(np.shape(data_test)[0]): maxg = np.max(data_test[j][0][:]) sg_test[ j][:] = data_test[j][0][:] / maxg spec_id.append(data_test[j][1:3]) # CNN classification of clicks x_test = sg_test test_images = x_test.reshape( x_test.shape[0], 6, 512, 1) test_images = test_images.astype('float32') # recovering labels predictions = model.predict(test_images) # predictions is an array #imagesX #of classes which entries are the probabilities for each class # Create a label (list of dicts with species, certs) for the single segment print('Assessing file label...') label = self.File_label(predictions, thr1=thr1, thr2=thr2) print('CNN detected: ', label) if len(label) > 0: # Convert the annotation into a full segment in self.segments thisPageStart = start / self.sampleRate self.makeSegments([ thisPageStart, thisPageLen, label ]) else: # do not create any segments print("Nothing detected") else: # bird-style CNN and other processing: post = Segment.PostProcess( configdir=self.configdir, audioData=self.audiodata[start:end], sampleRate=self.sampleRate, tgtsampleRate=spInfo["SampleRate"], segments=thisPageSegs[filtix], subfilter=spInfo['Filters'][filtix], CNNmodel=CNNmodel, cert=50) print("Segments detected after WF: ", len(thisPageSegs[filtix])) if self.wind and self.useWindF( spInfo['Filters'][filtix]['FreqRange'] [0], spInfo['Filters'][filtix]['FreqRange'] [1]): post.wind() if CNNmodel: print('Post-processing with CNN') post.CNN() if 'F0' in spInfo['Filters'][ filtix] and 'F0Range' in spInfo[ 'Filters'][filtix]: if spInfo['Filters'][filtix]["F0"]: print( "Checking for fundamental frequency..." ) post.fundamentalFrq() post.joinGaps(maxgap=spInfo['Filters'][filtix] ['TimeRange'][3]) post.deleteShort(minlength=spInfo['Filters'] [filtix]['TimeRange'][0]) # adjust segment starts for 15min "pages" if start != 0: for seg in post.segments: seg[0][0] += start / self.sampleRate seg[0][1] += start / self.sampleRate # attach filter info and put on self.segments: self.makeSegments(post.segments, self.species[speciesix], spInfo["species"], spInfo['Filters'][filtix]) else: # TODO: THIS IS testmode. NOT USING ANY BAT STUFF THEN # I.E. testmode not adapted to bats post = Segment.PostProcess( configdir=self.configdir, audioData=self.audiodata[start:end], sampleRate=self.sampleRate, tgtsampleRate=spInfo["SampleRate"], segments=thisPageSegs[filtix], subfilter=spInfo['Filters'][filtix], CNNmodel=None, cert=50) # adjust segment starts for 15min "pages" if start != 0: for seg in post.segments: seg[0][0] += start / self.sampleRate seg[0][1] += start / self.sampleRate # attach filter info and put on self.segments: self.makeSegments(post.segments, self.species[speciesix], spInfo["species"], spInfo['Filters'][filtix])
def showSpecDerivs(): import SignalProc reload(SignalProc) import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui import wavio #wavobj = wavio.read('Sound Files/tril1.wav') #wavobj = wavio.read('Sound Files/010816_202935_p1.wav') #wavobj = wavio.read('Sound Files/20170515_223004 piping.wav') wavobj = wavio.read('Sound Files/kiwi_1min.wav') fs = wavobj.rate data = wavobj.data[:20 * fs] if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] import SignalProc sp = SignalProc.SignalProc(data, fs, 256, 128) sg = sp.spectrogram(data, multitaper=False) h, v, b = sp.spectralDerivatives() h = np.abs(np.where(h == 0, 0.0, 10.0 * np.log10(h))) v = np.abs(np.where(v == 0, 0.0, 10.0 * np.log10(v))) b = np.abs(np.where(b == 0, 0.0, 10.0 * np.log10(b))) s = Segment(data, sg, sp, fs, 50) hm = np.max(h[:, 10:], axis=1) inds = np.squeeze( np.where(hm > (np.mean(h[:, 10:] + 2.5 * np.std(h[:, 10:]))))) segmentsh = s.identifySegments(inds, minlength=10) vm = np.max(v[:, 10:], axis=1) inds = np.squeeze( np.where(vm > (np.mean(v[:, 10:] + 2.5 * np.std(v[:, 10:]))))) segmentsv = s.identifySegments(inds, minlength=10) bm = np.max(b[:, 10:], axis=1) segs = np.squeeze( np.where(bm > (np.mean(b[:, 10:] + 2.5 * np.std(b[:, 10:]))))) segmentsb = s.identifySegments(segs, minlength=10) #print np.mean(h), np.max(h) #print np.where(h>np.mean(h)+np.std(h)) app = QtGui.QApplication([]) mw = QtGui.QMainWindow() mw.show() mw.resize(800, 600) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0) im1 = pg.ImageItem(enableMouse=False) vb1.addItem(im1) im1.setImage(10. * np.log10(sg)) vb2 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0) im2 = pg.ImageItem(enableMouse=False) vb2.addItem(im2) im2.setImage(h) for seg in segmentsh: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb2.addItem(a, ignoreBounds=True) vb3 = win.addViewBox(enableMouse=False, enableMenu=False, row=2, col=0) im3 = pg.ImageItem(enableMouse=False) vb3.addItem(im3) im3.setImage(v) for seg in segmentsv: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb3.addItem(a, ignoreBounds=True) vb4 = win.addViewBox(enableMouse=False, enableMenu=False, row=3, col=0) im4 = pg.ImageItem(enableMouse=False) vb4.addItem(im4) im4.setImage(b) for seg in segmentsb: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb4.addItem(a, ignoreBounds=True) QtGui.QApplication.instance().exec_()
def cluster_by_dist(dir, feature='we', n_mels=24, fs=0, minlen=0.2, f_1=0, f_2=0, denoise=False, single=False, distance='dtw', max_clusters=10): """ Given wav + annotation files, 1) identify syllables using median clipping/ FIR 2) generate features WE/MFCC/chroma 3) calculate DTW distances and decide class/ generate new class :param dir: directory of audio and annotations :param feature: 'WE' or 'MFCC' or 'chroma' :param n_mels: number of mel coefs for MFCC :param fs: prefered sampling frequency, 0 leads to calculate it from the anotations :param minlen: min syllable length in secs :param f_1: lower frequency bound, 0 leads to calculate it from the anotations :param f_2: upper frequency bound, 0 leads to calculate it from the anotations :param denoise: wavelet denoise :param single: True means when there are multiple syllables in a segment, add only one syllable to the cluster info :param distance: 'dtw' or 'xcor' :return: possible clusters """ import Segment import SignalProc from scipy import signal # Get flow and fhigh for bandpass from annotations lowlist = [] highlist = [] srlist = [] for root, dirs, files in os.walk(str(dir)): for file in files: if file.endswith('.wav') and file + '.data' in files: wavobj = wavio.read(os.path.join(root, file)) srlist.append(wavobj.rate) # Read the annotation segments = Segment.SegmentList() segments.parseJSON(os.path.join(root, file + '.data')) for seg in segments: lowlist.append(seg[2]) highlist.append(seg[3]) print(lowlist) print(highlist) print(srlist) if f_1 == 0: f_1 = np.min(lowlist) if f_2 == 0: f_2 = np.median(highlist) if fs == 0: arr = [4000, 8000, 16000] pos = np.abs(arr - np.median(highlist) * 2).argmin() fs = arr[pos] print('fs: ', fs) if fs > np.min(srlist): print(fs) fs = np.min(srlist) if fs < f_2 * 2 + 50: f_2 = fs // 2 - 50 minlen_samples = minlen * fs print('Frequency band:', f_1, '-', f_2) print('fs: ', fs) # Find the lower and upper bounds (relevant to the frq range), when the range is given if feature == 'mfcc' and f_1 != 0 and f_2 != 0: mels = librosa.core.mel_frequencies(n_mels=n_mels, fmin=0.0, fmax=fs / 2, htk=False) ind_flow = (np.abs(mels - f_1)).argmin() ind_fhigh = (np.abs(mels - f_2)).argmin() elif feature == 'we' and f_1 != 0 and f_2 != 0: linear = np.linspace(0, fs / 2, 62) ind_flow = (np.abs(linear - f_1)).argmin() ind_fhigh = (np.abs(linear - f_2)).argmin() # Ready for clustering max_clusters = max_clusters n_clusters = 0 clusters = [] for root, dirs, files in os.walk(str(dir)): for file in files: if file.endswith('.wav') and file + '.data' in files: # Read the annotation segments = Segment.SegmentList() segments.parseJSON(os.path.join(root, file + '.data')) # Sort the segments longest to shortest, would be a good idea to avoid making first class with only # one member :) if len(segments) > 0 and segments[0][0] == -1: del segments[0] segments_len = [seg[1] - seg[0] for seg in segments] inds = np.argsort(segments_len)[::-1] sortedsegments = [segments[i] for i in inds] # Now find syllables within each segment, median clipping for seg in sortedsegments: if seg[0] == -1: continue audiodata, sr = loadFile(filename=os.path.join(root, file), duration=seg[1] - seg[0], offset=seg[0], fs=fs, denoise=denoise, f1=f_1, f2=f_2) start = int(seg[0] * fs) sp = SignalProc.SignalProc(audiodata, fs, 256, 128) sgRaw = sp.spectrogram(audiodata, 256, 128) segment = Segment.Segmenter(data=audiodata, sg=sgRaw, sp=sp, fs=fs, window_width=256, incr=128) syls = segment.medianClip(thr=3, medfiltersize=5, minaxislength=9, minSegment=50) if len(syls) == 0: # Try again with FIR syls = segment.segmentByFIR(threshold=0.05) syls = segment.checkSegmentOverlap( syls) # merge overlapped segments syls = [[int(s[0] * sr), int(s[1] * fs)] for s in syls] if len( syls ) == 0: # Sanity check, when annotating syllables tight, syls = [[0, int((seg[1] - seg[0]) * fs)] ] # median clipping doesn't detect it. if len(syls) > 1: # TODO: samples to seconds syls = segment.joinGaps( syls, minlen_samples) # Merge short segments if len(syls) == 1 and syls[0][1] - syls[0][ 0] < minlen_samples: # Sanity check syls = [[0, int((seg[1] - seg[0]) * fs)]] temp = [[ np.round((x[0] + start) / fs, 2), np.round((x[1] + start) / fs, 2) ] for x in syls] print('\nCurrent:', seg, '--> syllables >', minlen, 'secs ', temp) # Calculate features of the syllables in the current segment. f = [] for s in syls: data = audiodata[s[0]:s[1]] if feature == 'mfcc': # MFCC mfcc = librosa.feature.mfcc(y=data, sr=fs, n_mfcc=n_mels) if f_1 != 0 and f_2 != 0: mfcc = mfcc[ ind_flow: ind_fhigh, :] # Limit the frequency to the fixed range [f_1, f_2] mfcc_delta = librosa.feature.delta(mfcc, mode='nearest') mfcc = np.concatenate((mfcc, mfcc_delta), axis=0) mfcc = scale(mfcc, axis=1) # librosa.display.specshow(mfcc, sr=fs, x_axis='time') # m = [i for sublist in mfcc for i in sublist] f.append(mfcc) elif feature == 'we': # Wavelet Energy ws = WaveletSegment.WaveletSegment(spInfo=[]) we = ws.computeWaveletEnergy(data=data, sampleRate=fs, nlevels=5, wpmode='new') we = we.mean(axis=1) if f_1 != 0 and f_2 != 0: we = we[ ind_flow: ind_fhigh] # Limit the frequency to a fixed range f_1, f_2 f.append(we) elif feature == 'chroma': chroma = librosa.feature.chroma_cqt(y=data, sr=fs) # chroma = librosa.feature.chroma_stft(y=data, sr=fs) chroma = scale(chroma, axis=1) f.append(chroma) matched = False if n_clusters == 0: print('**Case 1: First class') newclass = class_create(label=n_clusters, syl=syls, features=f, f_low=seg[2], f_high=seg[3], segs=[ (os.path.join(root, file), seg) ], single=single, dist_method=distance) clusters.append(newclass) n_clusters += 1 print('Created new class: Class ', "'", newclass["label"], "'", ',\tIn-class_d: ', newclass["d"], '\tf_low: ', newclass["f_low"], '\tf_high: ', newclass["f_high"]) matched = True if not matched: # See if the syllables in the current seg match with any existing class min_ds = [ ] # Keep track of the minimum distances to each class clusters = random.sample(clusters, len( clusters)) # Shuffle the clusters to avoid bias for c in range(len(clusters)): f_c = clusters[c][ "features"] # features of the current class c dist_c = np.zeros( (len(f_c), len(f))) # distances to the current class c for i in range(len(f_c)): for j in range(len(f)): if distance == 'dtw': d, _ = librosa.sequence.dtw( f_c[i], f[j], metric='euclidean') dist_c[i, j] = d[d.shape[0] - 1][d.shape[1] - 1] elif distance == 'xcor': corr = signal.correlate(f_c[i], f[j], mode='full') dist_c[i, j] = np.sum(corr) / max( len(f_c[i]), len(f[j])) # Min distance to the current class print('Distance to Class ', clusters[c]["label"], ': ', np.amin(dist_c[dist_c != 0]), '( In-class distance: ', clusters[c]["d"], ')') min_ds.append(np.amin(dist_c[dist_c != 0])) # Now get the clusters sorted according to the min dist ind = np.argsort(min_ds) min_ds = np.sort(min_ds) # make the cluster order clusters = [clusters[i] for i in ind] for c in range(len(clusters)): if (clusters[c]["d"] != 0) and min_ds[c] < ( clusters[c]["d"] + clusters[c]["d"] * 0.1): print( '**Case 2: Found a match with a class > one syllable' ) print('Class ', clusters[c]["label"], ', dist ', min_ds[c]) # Update this class clusters[c] = class_update( cluster=clusters[c], newfeatures=f, newf_low=seg[2], newf_high=seg[3], newsyl=syls, newseg=(os.path.join(root, file), seg), single=single, dist_method=distance) matched = True break # found a match, exit from the for loop, go to the next segment elif c < len(clusters) - 1: continue # continue to the next class # Checked most of the classes by now, if still no match found, check the classes with only one # data point (clusters[c]["d"] == 0). # Note the arbitrary thr. if not matched: if distance == 'dtw': thr = 25 elif distance == 'xcor': thr = 1000 for c in range(len(clusters)): if clusters[c]["d"] == 0 and min_ds[c] < thr: print('**Case 3: In-class dist of ', clusters[c]["label"], '=', clusters[c]["d"], 'and this example < ', thr, ' dist') print('Class ', clusters[c]["label"], ', dist ', min_ds[c]) # Update this class clusters[c] = class_update( cluster=clusters[c], newfeatures=f, newf_low=seg[2], newf_high=seg[3], newsyl=syls, newseg=(os.path.join(root, file), seg), single=single, dist_method=distance) matched = True break # Break the search and go to the next segment # If no match found yet, check the max clusters if not matched: if n_clusters == max_clusters: print( '**Case 4: Reached max classes, therefore adding current seg to the closest ' 'class... ') # min_ind = np.argmin(min_ds) # classes are sorted in ascending order of distance already for c in range(len(clusters)): if min_ds[c] <= 4 * clusters[c][ "d"] or clusters[c]["d"] == 0: print('Class ', clusters[c]["label"], ', dist ', min_ds[c], '(in-class distance:', clusters[c]["d"], ')') # Update this class clusters[c] = class_update( cluster=clusters[c], newfeatures=f, newf_low=seg[2], newf_high=seg[3], newsyl=syls, newseg=(os.path.join(root, file), seg), single=single, dist_method=distance) matched = True break if not matched: print('Class ', clusters[0]["label"], ', dist ', min_ds[0], '(in-class distance:', clusters[0]["d"], ')') # Update this class # TODO: don't update the class as it is an outlier? clusters[0] = class_update( cluster=clusters[0], newfeatures=f, newf_low=seg[2], newf_high=seg[3], newsyl=syls, newseg=(os.path.join(root, file), seg), single=single, dist_method=distance) matched = True continue # Continue to next segment # If still no luck, create a new class if not matched: print('**Case 5: None of Case 1-4') newclass = class_create(label=n_clusters, syl=syls, features=f, f_low=seg[2], f_high=seg[3], segs=[ (os.path.join(root, file), seg) ], single=single, dist_method=distance) print('Created a new class: Class ', n_clusters + 1) clusters.append(newclass) n_clusters += 1 print('Created new class: Class ', "'", newclass["label"], "'", ',\tin-class_d: ', newclass["d"], '\tf_low: ', newclass["f_low"], '\tf_high: ', newclass["f_high"]) print('\n\n--------------Clusters created-------------------') clustered_segs = [] for c in range(len(clusters)): print('Class ', clusters[c]['label'], ': ', len(clusters[c]['segs'])) for s in range(len(clusters[c]['segs'])): print('\t', clusters[c]['segs'][s]) if single: clustered_segs.append([ clusters[c]['segs'][s][0], clusters[c]['segs'][s][1], [clusters[c]['features'][s]], clusters[c]['label'] ]) else: clustered_segs.append([ clusters[c]['segs'][s][0], clusters[c]['segs'][s][1], clusters[c]['label'] ]) # Clustered segments print( '\n\n################### Clustered segments ############################' ) for s in clustered_segs: print(s) return clustered_segs, fs, n_clusters
def detectClicks(): import SignalProc # reload(SignalProc) import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui import wavio from scipy.signal import medfilt # wavobj = wavio.read('D:\AviaNZ\Sound_Files\Clicks\\1ex\Lake_Thompson__01052018_SOUTH1047849_01052018_High_20180509_' # '20180509_183506.wav') # close kiwi and rain wavobj = wavio.read( 'D:\AviaNZ\Sound_Files\Clicks\Lake_Thompson__01052018_SOUTH1047849_01052018_High_20180508_' '20180508_200506.wav') # very close kiwi with steady wind # wavobj = wavio.read('D:\AviaNZ\Sound_Files\Clicks\\1ex\Murchison_Kelper_Heli_25042018_SOUTH7881_25042018_High_' # '20180405_20180405_211007.wav') # wavobj = wavio.read('D:\AviaNZ\Sound_Files\\Noise examples\\Noise_10s\Rain_010.wav') # wavobj = wavio.read('D:\AviaNZ\Sound_Files\Clicks\Ponui_SR2_Jono_20130911_021920.wav') # # wavobj = wavio.read('D:\AviaNZ\Sound_Files\Clicks\CL78_BIRM_141120_212934.wav') # # wavobj = wavio.read('D:\AviaNZ\Sound_Files\Clicks\CL78_BIRD_141120_212934.wav') # Loud click # wavobj = wavio.read('D:\AviaNZ\Sound_Files\Tier1\Tier1 dataset\positive\DE66_BIRD_141011_005829.wav') # close kiwi # wavobj = wavio.read('Sound Files/010816_202935_p1.wav') #wavobj = wavio.read('Sound Files/20170515_223004 piping.wav') # wavobj = wavio.read('Sound Files/test/DE66_BIRD_141011_005829.wav') #wavobj = wavio.read('/Users/srmarsla/DE66_BIRD_141011_005829_wb.wav') #wavobj = wavio.read('/Users/srmarsla/ex1.wav') #wavobj = wavio.read('/Users/srmarsla/ex2.wav') fs = wavobj.rate data = wavobj.data #[:20*fs] if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] import SignalProc sp = SignalProc.SignalProc(data, fs, 128, 128) sg = sp.spectrogram(data, multitaper=False) s = Segment(data, sg, sp, fs, 128) # for each frq band get sections where energy exceeds some (90%) percentile # and generate a binary spectrogram sgb = np.zeros((np.shape(sg))) for y in range(np.shape(sg)[1]): ey = sg[:, y] # em = medfilt(ey, 15) ep = np.percentile(ey, 90) sgb[np.where(ey > ep), y] = 1 # If lots of frq bands got 1 then predict a click clicks = [] for x in range(np.shape(sg)[0]): if np.sum(sgb[x, :]) > np.shape(sgb)[1] * 0.75: clicks.append(x) app = QtGui.QApplication([]) mw = QtGui.QMainWindow() mw.show() mw.resize(1200, 500) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0) im1 = pg.ImageItem(enableMouse=False) vb1.addItem(im1) im1.setImage(sgb) if len(clicks) > 0: clicks = s.identifySegments(clicks, minlength=1) for seg in clicks: a = pg.LinearRegionItem() a.setRegion([ convertAmpltoSpec(seg[0], fs, 128), convertAmpltoSpec(seg[1], fs, 128) ]) vb1.addItem(a, ignoreBounds=True) QtGui.QApplication.instance().exec_()
else: y1, _ = spec.sici(np.pi * L * x) y2, _ = spec.sici(np.pi * L * x - np.pi) y3, _ = spec.sici(np.pi * L * x + np.pi) y = y = 1 / 2 + (1 / (2 * np.pi)) * y1 + (1 / (4 * np.pi)) * y2 + ( 1 / (4 * np.pi)) * y3 return y window_width = 1024 incr = 256 window = "Hann" reassignment = False sp = SignalProc.SignalProc(window_width, incr) main_dir = "C:\\Users\\Virginia\\Documents\\Work\\IF_extraction" test_fold = "1024_256_Test2" for f in os.listdir(main_dir): if f.endswith('.wav'): file_name = f data_file = main_dir + "\\" + file_name print(file_name) if 'song' in data_file: song_flag = True else: song_flag = False sp.readWav(data_file) fs = sp.sampleRate IF = IFreq.IF(method=1)
def reconstructWP2(self, node, antialias=False, antialiasFilter=False): """ Inverse of WaveletPacket: returns the signal from a single node. Expects our homebrew (non-downsampled) WP. Takes Data and Wavelet from current WF instance. Antialias option controls freq squashing in final step. Return: the reconstructed signal, ndarray. """ wv = self.wavelet data = self.tree[node] sp = SignalProc.SignalProc() lvl = math.floor(math.log2(node + 1)) # position of node in its level (0-based) nodepos = node - (2**lvl - 1) # Gray-permute node positions (cause wp is not in natural order) nodepos = self.graycode(nodepos) # positive freq is split into bands 0:1/2^lvl, 1:2/2^lvl,... # same for negative freq, so in total 2^lvl * 2 bands. numnodes = 2**(lvl + 1) # do the actual convolutions + upsampling if not isinstance(data, np.ndarray): data = np.asarray(data, dtype='float64') data = ce.reconstruct(data, node, np.array(wv.rec_hi), np.array(wv.rec_lo), lvl) if antialias: if len(data) > 910 * 16000 and not antialiasFilter: print("Size of signal to be reconstructed is", len(data)) print( "ERROR: processing of big data chunks is currently disabled. Recommend splitting files to below 15 min chunks. Enable this only if you know what you're doing." ) return if antialiasFilter: # BETTER METHOD for antialiasing # essentially same as SignalProc.ButterworthBandpass, # just stripped to minimum for speed. low = nodepos / numnodes * 2 high = (nodepos + 1) / numnodes * 2 print("antialiasing by filtering between %.3f-%.3f FN" % (low, high)) data = sp.FastButterworthBandpass(data, low, high) else: # OLD METHOD for antialiasing # just setting image frequencies to 0 print("antialiasing via FFT") ft = pyfftw.interfaces.scipy_fftpack.fft(data) ll = len(ft) # to keep: [nodepos/numnodes : (nodepos+1)/numnodes] x Fs # (same for negative freqs) ft[:ll * nodepos // numnodes] = 0 ft[ll * (nodepos + 1) // numnodes:-ll * (nodepos + 1) // numnodes] = 0 # indexing [-0:] wipes everything if nodepos != 0: ft[-ll * nodepos // numnodes:] = 0 data = np.real(pyfftw.interfaces.scipy_fftpack.ifft(ft)) return data
def generateFeatures(self, dirName, dataset, hop): ''' Read the segment library and generate features, training :param dataset: segments in the form of [[file, [segment], label], ..] :param hop: :return: save the preferred features into JSON files + save images. Currently the spectrogram images. ''' count = 0 dhop = hop eps = 0.0005 specFrameSize = len( range(0, int(self.length * self.fs - self.windowwidth), self.inc)) N = [0 for i in range(len(self.calltypes) + 1)] for record in dataset: # Compute features, also consider tiny segments because this would be the case for song birds. duration = record[1][1] - record[1][0] hop = dhop[record[-1]] if duration < self.length: fileduration = wavio.readFmt(record[0])[1] record[1][0] = record[1][0] - (self.length - duration) / 2 - eps record[1][1] = record[1][1] + (self.length - duration) / 2 + eps if record[1][0] < 0: record[1][0] = 0 record[1][1] = self.length + eps elif record[1][1] > fileduration: record[1][1] = fileduration record[1][0] = fileduration - self.length - eps if record[1][0] <= 0 and record[1][1] <= fileduration: n = 1 hop = self.length duration = self.length + eps else: continue else: n = math.ceil((record[1][1] - record[1][0] - self.length) / hop + 1) print('* hop:', hop, 'n:', n, 'label:', record[-1]) try: audiodata = self.loadFile(filename=record[0], duration=duration, offset=record[1][0], fs=self.fs, denoise=False) except Exception as e: print("Warning: failed to load audio because:", e) continue N[record[-1]] += n sp = SignalProc.SignalProc(self.windowwidth, self.inc) sp.data = audiodata sp.sampleRate = self.fs sgRaw = sp.spectrogram(self.windowwidth, self.inc) for i in range(int(n)): print('**', record[0], self.length, record[1][0] + hop * i, self.fs, '************************************') # start = int(hop * i * fs) # end = int(hop * i * fs + length * fs) # if end > len(audiodata): # end = len(audiodata) # start = int(len(audiodata) - length * fs) # audiodata_i = audiodata[start: end] # audiodata_i = audiodata_i.tolist() # featuresa.append([audiodata_i, record[-1]]) # Sgram images sgstart = int(hop * i * self.fs / sp.incr) sgend = sgstart + specFrameSize if sgend > np.shape(sgRaw)[0]: sgend = np.shape(sgRaw)[0] sgstart = np.shape(sgRaw)[0] - specFrameSize if sgstart < 0: continue sgRaw_i = sgRaw[sgstart:sgend, :] maxg = np.max(sgRaw_i) # Normalize and rotate sgRaw_i = np.rot90(sgRaw_i / maxg) print(np.shape(sgRaw_i)) # Save train data: individual images as npy np.save( os.path.join( dirName, str(record[-1]), str(record[-1]) + '_' + "%06d" % count + '_' + record[0].split(os.sep)[-1][:-4] + '.npy'), sgRaw_i) count += 1 print('\n\nCompleted feature extraction') return specFrameSize, N