def emptyData(dirName='',trainTest=False, species=''): cnt=1 for root, dirs, files in os.walk(str(dirName)): for file in files: Night = False DOCRecording = re.search('(\d{6})_(\d{6})', file) if DOCRecording: startTime = DOCRecording.group(2) if int(startTime[:2]) > 18 or int(startTime[:2]) < 6: # 6pm to 6am as night Night=True # print "Night recording...", file if file.endswith('.wav') and Night and os.stat(root + '/' + file).st_size != 0: # avoid day recordings and files with no data (Tier 1 has 0Kb .wavs) if file + '.data' not in files: # skip already processed files # Save the excel file print (file) filename = root + '/' + file sTime= int(startTime[:2]) * 3600 + int(startTime[2:4]) * 60 + int(startTime[4:6]) out = SupportClasses.exportSegments(segments=[], species=species, startTime=sTime, dirName=dirName, filename=filename, datalength=14400000, sampleRate=16000, method='Wavelets', resolution=60, trainTest=trainTest,withConf=False) #out.excel() # all possible segments # out.saveAnnotation() #out.withConf=True out.segments=[] out.seg_pos=[] #out.excel() # only the segments those passed eRatio test # Save the annotation out.saveAnnotation() cnt=cnt+1 print("current: ", cnt)
def __init__(self, configdir, filterdir, folderTrain1=None, folderTrain2=None, recogniser=None, imgWidth=0, CLI=False): self.filterdir = filterdir self.configdir = configdir cl = SupportClasses.ConfigLoader() self.FilterDict = cl.filters(filterdir, bats=False) self.LearningDict = cl.learningParams( os.path.join(configdir, "LearningParams.txt")) self.sp = SignalProc.SignalProc(self.LearningDict['sgramWindowWidth'], self.LearningDict['sgramHop']) self.imgsize = [self.LearningDict['imgX'], self.LearningDict['imgY']] self.tmpdir1 = False self.tmpdir2 = False self.CLI = CLI if CLI: self.filterName = recogniser self.folderTrain1 = folderTrain1 self.folderTrain2 = folderTrain2 self.imgWidth = imgWidth self.autoThr = True self.correction = True self.annotatedAll = True else: self.autoThr = False self.correction = False self.imgWidth = imgWidth
def eRatio(dirName): for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.wav'): wavobj = wavio.read(root + '\\' + file) sampleRate = wavobj.rate data = wavobj.data if data is not 'float': data = data.astype('float') # data / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] post = SupportClasses.postProcess(data, sampleRate, []) print(file, post.eRatioConfd(seg=None))
def __init__(self, configdir, species, calltypes, fs, length, windowwidth, inc, imageheight, imagewidth): self.species = species self.length = length self.windowwidth = windowwidth self.inc = inc self.imageheight = imageheight self.imagewidth = imagewidth self.calltypes = calltypes self.fs = fs cl = SupportClasses.ConfigLoader() self.LearningDict = cl.learningParams( os.path.join(configdir, "LearningParams.txt"))
def __init__(self, parent, mode="GUI", configdir='', sdir='', recogniser=None, wind=False): # read config and filters from user location self.configdir = configdir self.configfile = os.path.join(configdir, "AviaNZconfig.txt") self.ConfigLoader = SupportClasses.ConfigLoader() self.config = self.ConfigLoader.config(self.configfile) self.saveConfig = True self.filtersDir = os.path.join(configdir, self.config['FiltersDir']) self.FilterDicts = self.ConfigLoader.filters(self.filtersDir) if mode == "GUI": self.CLI = False self.testmode = False if parent is None: print( "ERROR: must provide a parent UI or specify CLI/testmode") return self.ui = parent elif mode == "CLI": self.CLI = True self.testmode = False elif mode == "test": self.CLI = False self.testmode = True else: print("ERROR: unrecognized mode ", mode) return self.dirName = [] # In CLI/test modes, immediately run detection on init if self.CLI: self.dirName = sdir self.species = [recogniser] self.wind = wind self.detect() elif self.testmode: self.dirName = sdir self.species = [recogniser] self.wind = wind self.filesDone = [] self.detect()
def annotation2excel(dirName='', species=''): """ This is to generate the excel output given a set of annotation files. """ cnt = 0 for root, dirs, files in os.walk(str(dirName)): for file in files: Night = False DOCRecording = re.search('(\d{6})_(\d{6})', file) if DOCRecording: startTime = DOCRecording.group(2) if int(startTime[:2]) > 18 or int(startTime[:2]) < 6: # 6pm to 6am as night Night = True sTime = int(startTime[:2]) * 3600 + int(startTime[2:4]) * 60 + int(startTime[4:6]) else: sTime = 0 if file.endswith('.data') and (Night or not DOCRecording): #and os.stat(root + '/' + file).st_size != 0: # avoid day recordings and files with no data (Tier 1 has 0Kb .wavs) filename = root + '/' + file[:-5] file = open(filename + '.data', 'r') segments = json.load(file) file.close() if len(segments) > 0: if segments[0][0] == -1: operator = segments[0][2] reviewer = segments[0][3] del segments[0] # now seperate the segments into possible and withConfidence segments_possible=[] segments_withConf=[] for seg in segments: if seg[4]=='Kiwi': segments_possible.append([seg[0],seg[1]]) segments_withConf.append([seg[0],seg[1]]) elif seg[4]=='Kiwi?': segments_possible.append([seg[0],seg[1]]) out = SupportClasses.exportSegments(segments=segments_possible, species='Kiwi', startTime=sTime, dirName=dirName, filename=filename, datalength=14400000, sampleRate=16000, method='Wavelets', resolution=60, trainTest=False, withConf=False) out.excel() # all possible segments # out.saveAnnotation() out.withConf = True out.segments = segments_withConf out.seg_pos = segments_possible out.excel() # only the segments those passed eRatio test cnt=cnt+1 print("current: ", cnt)
def loadFile(filename): wavobj = wavio.read(filename) sampleRate = wavobj.rate audiodata = wavobj.data # None of the following should be necessary for librosa if audiodata.dtype is not 'float': audiodata = audiodata.astype('float') #/ 32768.0 if np.shape(np.shape(audiodata))[0]>1: audiodata = audiodata[:,0] # if sampleRate != 16000: # audiodata = librosa.core.audio.resample(audiodata, sampleRate, 16000) # sampleRate=16000 # pre-process sc = SupportClasses.preProcess(audioData=audiodata, sampleRate=sampleRate, species='Kiwi', df=False) audiodata,sampleRate = sc.denoise_filter() return audiodata,sampleRate
def __init__(self, configdir, filterdir, folderTrain1=None, folderTrain2=None, recogniser=None, imgWidth=0, CLI=False): # Two important things: # 1. LearningParams.txt, which a dictionary of parameters *** including spectrogram parameters # 2. CLI: whether it runs off the command line, which makes picking the ROC curve parameters hard # Qn: what is imgWidth? Why not a learning param? self.filterdir = filterdir self.configdir = configdir cl = SupportClasses.ConfigLoader() self.FilterDict = cl.filters(filterdir, bats=False) self.LearningDict = cl.learningParams( os.path.join(configdir, "LearningParams.txt")) self.sp = SignalProc.SignalProc(self.LearningDict['sgramWindowWidth'], self.LearningDict['sgramHop']) self.imgsize = [self.LearningDict['imgX'], self.LearningDict['imgY']] self.tmpdir1 = False self.tmpdir2 = False self.ROCdata = {} self.CLI = CLI if CLI: self.filterName = recogniser self.folderTrain1 = folderTrain1 self.folderTrain2 = folderTrain2 self.imgWidth = imgWidth self.autoThr = True self.correction = True self.annotatedAll = True else: self.autoThr = False self.correction = False self.imgWidth = imgWidth
def deleteClick2(dirName): """ Given the directory of sounds this deletes the annotation segments with rain corrupted. Check to make sure the segment to delete has no sign of kiwi - use fundamental frq rather than eRatio in 'deleteClick' """ for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.data') and file[:-5] in files: # go through each segment file = root + '/' + file with open(file) as f: segments = json.load(f) newSegments = copy.deepcopy(segments) wavobj = wavio.read(file[:-5]) audioData = wavobj.data if audioData is not 'float': audioData = audioData / 32768.0 audioData = audioData[:, 0].squeeze() sampleRate = wavobj.rate if sampleRate != 16000: audioData = librosa.core.audio.resample(audioData, sampleRate, 16000) sampleRate = 16000 # Find T_ERatio based on first 5 secs as it varies accorss the recordings post = SupportClasses.postProcess(audioData, sampleRate, []) # T_ERatio = post.eRatioConfd([1, 6, "", ""]) print(file) if len(segments)>2: ff = Features.Features(audioData, sampleRate) mfcc = ff.get_mfcc() mean = np.mean(mfcc[1, :]) std = np.std(mfcc[1, :]) thr = mean - 2 * std # mfcc1 thr for the file else: thr = 0 chg = False for seg in segments: if seg[0] == -1: continue else: # read the sound segment and check for wind secs = seg[1] - seg[0] wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0]) data = wavobj.data sampleRate =wavobj.rate if data is not 'float': data = data / 32768.0 data = data[:, 0].squeeze() # check for clicks ff = Features.Features(data, sampleRate) mfcc = ff.get_mfcc() mfcc1 = mfcc[1, :] # mfcc1 of the segment if thr == 0: ff = Features.Features(audioData, sampleRate) mfcc = ff.get_mfcc() mean = np.mean(mfcc[1,:]) std = np.std(mfcc[1,:]) thr = mean - 2 * std # mfcc1 thr for the file if np.min(mfcc1) < thr: # # now check eRatio # eRatio = post.eRatioConfd(seg) # eRatio(file[:-5], seg, thr=T_ERatio) # eRatioBefore = post.eRatioConfd([seg[0] - 10, seg[0], "", ""]) # eRatioAfter = post.eRatioConfd([seg[1], seg[1] + 10, "", ""]) # if eRatio > eRatioBefore*1.05 or eRatio > eRatioAfter*1.05: # continue # now check f. frq. # down sample to avoid higher frq noise if sampleRate != 16000: data = librosa.core.audio.resample(data, sampleRate, 16000) sampleRate = 16000 # denoise prior to f frq detection waveletDenoiser = WaveletFunctions.WaveletFunctions(data=data, wavelet=None, maxLevel=12) data = waveletDenoiser.waveletDenoise(data, thresholdType='soft', wavelet='dmey2', maxLevel=12) sp = SignalProc.SignalProc([], 0, 512, 256) sgRaw = sp.spectrogram(data, 512,256, mean_normalise=True, onesided=True, multitaper=False) segment = Segment.Segment(data, sgRaw, sp, sampleRate,512, 256) pitch, y, minfreq, W = segment.yin() ind = np.squeeze(np.where(pitch > minfreq)) pitch = pitch[ind] ff = np.mean(pitch) if ff>500 and ff<5000: continue else: print(seg) newSegments.remove(seg) chg = True if chg: file = open(file, 'w') json.dump(newSegments, file)
def deleteClick(dirName): """ Given the directory of sounds this deletes the annotation segments with wind/rain corrupted files. Targeting moderate wind and above. Check to make sure the segment to delete has no sign of kiwi """ for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.data') and file[:-5] in files: # go through each segment file = root + '/' + file with open(file) as f: segments = json.load(f) newSegments = copy.deepcopy(segments) wavobj = wavio.read(file[:-5]) audioData = wavobj.data if audioData is not 'float': audioData = audioData / 32768.0 audioData = audioData[:, 0].squeeze() sampleRate = wavobj.rate # Find T_ERatio based on first 5 secs as it varies accorss the recordings post = SupportClasses.postProcess(audioData, sampleRate, []) # T_ERatio = post.eRatioConfd([1, 6, "", ""]) print(file) chg = False for seg in segments: if seg[0] == -1: continue else: # read the sound segment and check for wind secs = seg[1] - seg[0] wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0]) data = wavobj.data if data is not 'float': data = data / 32768.0 data = data[:, 0].squeeze() # check for clicks ff = Features.Features(data, sampleRate) mfcc = ff.get_mfcc() mfcc1 = mfcc[1, :] # mfcc1 of the segment ff = Features.Features(audioData, sampleRate) mfcc = ff.get_mfcc() mean = np.mean(mfcc[1,:]) std = np.std(mfcc[1,:]) thr = mean - 2 * std # mfcc1 thr for the file if np.min(mfcc1) < thr: # # # now check eRatio # eRatio = post.eRatioConfdV2(seg) # if eRatio > 1.0: # continue # just check duration>10 sec if secs > 10: continue else: print(seg) newSegments.remove(seg) chg = True if chg: file = open(file, 'w') json.dump(newSegments, file)
def deleteWindRain(dirName, windTest=True, rainTest=False, Tmean_wind = 1e-8): """ Given the directory of sounds this deletes the annotation segments with wind/rain corrupted files. Targeting moderate wind and above. Check to make sure the segment to delete has no sign of kiwi Automatic Identification of Rainfall in Acoustic Recordings by Carol Bedoya, Claudia Isaza, Juan M.Daza, and Jose D.Lopez """ #Todo: find thrs Tmean_rain = 1e-8 # Mean threshold Tsnr_rain = 3.5 # SNR threshold # Tmean_wind = 1e-9 # Mean threshold # Tsnr_wind = 0.5 # SNR threshold cnt = 0 for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.data') and file[:-5] in files: # go through each segment file = root + '/' + file with open(file) as f: segments = json.load(f) newSegments=copy.deepcopy(segments) wavobj = wavio.read(file[:-5]) audioData = wavobj.data # # *** # if audioData.dtype is not 'float': # audioData = audioData.astype('float') # / 32768.0 # if np.shape(np.shape(audioData))[0] > 1: # audioData = np.squeeze(audioData[:, 0]) # import librosa # if wavobj.rate != 16000: # audioData = librosa.core.audio.resample(audioData, wavobj.rate, 16000) # sampleRate = 16000 # # **** if audioData is not 'float': audioData = audioData / 32768.0 audioData = audioData[:, 0].squeeze() sampleRate = wavobj.rate # Find T_ERatio based on first 5 secs as it varies accorss the recordings post = SupportClasses.postProcess(audioData, sampleRate, []) # T_ERatio = post.eRatioConfd([1, 6, "", ""]) chg = False for seg in segments: if seg[0] == -1: continue else: # read the sound segment and check for wind secs = seg[1]-seg[0] wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0]) data = wavobj.data # # *** # if data.dtype is not 'float': # data = data.astype('float') # / 32768.0 # if np.shape(np.shape(data))[0] > 1: # data = np.squeeze(data[:, 0]) # if wavobj.rate != 16000: # data = librosa.core.audio.resample(data, wavobj.rate, 16000) if data is not 'float': data = data / 32768.0 data = data[:,0].squeeze() wind_lower = 2.0 * 100 / sampleRate wind_upper = 2.0 * 250 / sampleRate rain_lower = 2.0 * 600 / sampleRate rain_upper = 2.0 * 1200 / sampleRate f, p = signal.welch(data, fs=sampleRate, window='hamming', nperseg=512, detrend=False) if windTest: limite_inf = int(round(len(p) * wind_lower)) # minimum frequency of the rainfall frequency band 0.00625(in # normalized frequency); in Hz = 0.00625 * (44100 / 2) = 100 Hz limite_sup = int(round(len(p) * wind_upper)) # maximum frequency of the rainfall frequency band 0.03125(in # normalized frequency); in Hz = 0.03125 * (44100 / 2) = 250 Hz a_wind = p[limite_inf:limite_sup] # section of interest of the power spectral density.Step 2 in Algorithm 2.1 mean_a_wind = np.mean(a_wind) # mean of the PSD in the frequency band of interest.Upper part of the step 3 in Algorithm 2.1 std_a_wind = np.std(a_wind) # standar deviation of the PSD in the frequency band of the interest. Lower part of the step 3 in Algorithm 2.1 # c_wind = mean_a_wind / std_a_wind # signal to noise ratio of the analysed recording. step 3 in Algorithm 2.1 if mean_a_wind > Tmean_wind: # eRatio = post.eRatioConfd(seg) #eRatio(file[:-5], seg, thr=T_ERatio) # eRatioBefore = post.eRatioConfd([seg[0]-10, seg[0], "", ""]) # if eRatio > eRatioBefore*1.05: # or eRatio > eRatioAfter: #it was 10 secs Before eratio # #version2 # eRatio = post.eRatioConfdV2(seg) # if eRatio > 1.0: # potentialCall = True # # now check f. frq. # # down sample will helkp to avoid higher frq noise # if sampleRate != 16000: # data = librosa.core.audio.resample(data, sampleRate, 16000) # sampleRate = 16000 # # denoise prior to f. frq. detection # waveletDenoiser = WaveletFunctions.WaveletFunctions(data=data, wavelet=None, # maxLevel=12) # data = waveletDenoiser.waveletDenoise(data, thresholdType='soft', wavelet='dmey2', # maxLevel=12) # sp = SignalProc.SignalProc([], 0, 512, 256) # sgRaw = sp.spectrogram(data, 512, 256, mean_normalise=True, onesided=True, # multitaper=False) # segment = Segment.Segment(data, sgRaw, sp, sampleRate, 512, 256) # pitch, y, minfreq, W = segment.yin(minfreq=600) # ind = np.squeeze(np.where(pitch > minfreq)) # pitch = pitch[ind] # ff = np.mean(pitch) # if ff > 500 and ff < 5000: # potentialCall = True # else: # potentialCall = False # just check duration>10 sec if secs>10: potentialCall = True else: potentialCall = False if not potentialCall: print(file, seg, "--> windy") newSegments.remove(seg) chg = True else: print(file, seg, "--> not windy") if rainTest: limite_inf = int(round(len(p) * rain_lower)) # minimum frequency of the rainfall frequency band 0.0272 (in # normalized frequency); in Hz=0.0272*(44100/2)=599.8 Hz limite_sup = int(round(len(p) * rain_upper)) # maximum frequency of the rainfall frequency band 0.0544 (in # normalized frequency); in Hz=0.0544*(44100/2)=1199.5 Hz a_rain = p[limite_inf:limite_sup] # section of interest of the power spectral density.Step 2 in Algorithm 2.1 mean_a_rain = np.mean(a_rain) # mean of the PSD in the frequency band of interest.Upper part of the step 3 in Algorithm 2.1 std_a_rain = np.std(a_rain) # standar deviation of the PSD in the frequency band of the interest. Lower part of the step 3 in Algorithm 2.1 c_rain = mean_a_rain / std_a_rain # signal to noise ratio of the analysed recording. step 3 in Algorithm 2.1 if c_rain > Tsnr_rain: # check if it is not kiwi eRatio = post.eRatioConfd(seg) # eRatio(file[:-5], seg, thr=T_ERatio) eRatioBefore = post.eRatioConfd([seg[0] - 10, seg[0], "", ""]) # eRatioAfter = post.eRatioConfd([seg[1], seg[1] + 5, "", ""]) # T_ERatio = (eRatioBefore + eRatioAfter) / 2 if eRatio > eRatioBefore: # or eRatio > eRatioAfter: #it was 10 secs Before eratio potentialCall = True else: potentialCall = False if not potentialCall: print(file, seg, "--> windy") newSegments.remove(seg) chg = True else: # rainy.append(0) print(file, "--> not rainy") if chg: file = open(file, 'w') json.dump(newSegments, file) cnt += 1 print(file, cnt)
def __init__(self,testDir,currfilt,configdir,filterdir,CLI=False): self.testDir = testDir self.outfile = open(os.path.join(self.testDir, "test-results.txt"),"w") if CLI: cl = SupportClasses.ConfigLoader() self.FilterDict = cl.filters(filterdir, bats=False) if currfilt.lower().endswith('.txt'): self.currfilt = self.FilterDict[currfilt[:-4]] else: self.currfilt = self.FilterDict[currfilt] else: self.currfilt = currfilt self.configdir = configdir self.filterdir = filterdir self.species = self.currfilt['species'] self.sampleRate = self.currfilt['SampleRate'] self.calltypes = [] for fi in self.currfilt['Filters']: self.calltypes.append(fi['calltype']) self.outfile.write("Recogniser name: %s\n" %(self.currfilt)) self.outfile.write("Species name: %s\n" % (self.species)) self.outfile.write("Using data: %s\n" % (self.testDir)) # 0. Generate GT files from annotations in test folder self.manSegNum = 0 self.window = 1 inc = None print('Generating GT...') for root, dirs, files in os.walk(self.testDir): for file in files: wavFile = os.path.join(root, file) if file.lower().endswith('.wav') and os.stat(wavFile).st_size != 0 and file + '.data' in files: segments = Segment.SegmentList() segments.parseJSON(wavFile + '.data') self.manSegNum += len(segments.getSpecies(self.species)) # Currently, we ignore call types here and just # look for all calls for the target species. segments.exportGT(wavFile, self.species, window=self.window, inc=inc) if self.manSegNum == 0: print("ERROR: no segments for species %s found" % self.species) self.flag = False self.text = 0 return # 1. Run Batch Processing upto WF and generate .tempdata files (no post-proc) avianz_batch = AviaNZ_batch.AviaNZ_batchProcess(parent=None, configdir=self.configdir, mode="test", sdir=self.testDir, recogniser=self.species, wind=True) # 2. Report statistics of WF followed by general post-proc steps (no CNN but wind-merge neighbours-delete short) self.flag, self.text = self.getSummary(avianz_batch, CNN=False) # 3. Report statistics of WF followed by post-proc steps (wind-CNN-merge neighbours-delete short) if "CNN" in self.currfilt: cl = SupportClasses.ConfigLoader() filterlist = cl.filters(self.filterdir, bats=False) CNNDicts = cl.CNNmodels(filterlist, self.filterdir, [self.species]) if self.species in CNNDicts.keys(): CNNmodel = CNNDicts[self.species] flag, text = self.getSummary(avianz_batch, CNN=True, CNNmodel=CNNmodel) else: print("Couldn't find a matching CNN!") self.outfile.write("-- End of testing --\n") self.outfile.close() return self.outfile.write("-- End of testing --\n") self.outfile.close() # Tidy up for root, dirs, files in os.walk(self.testDir): for file in files: if file.endswith('.tmpdata'): os.remove(os.path.join(root, file)) if CLI: print("Output written to " + os.path.join(self.testDir, "test-results.txt"))
def mainlauncher(cli, cheatsheet, zooniverse, infile, imagefile, batchmode, training, testing, sdir1, sdir2, recogniser, wind, width, command): # adapt path to allow this to be launched from wherever import sys, os if getattr(sys, 'frozen', False): appdir = sys._MEIPASS else: appdir = os.path.dirname(os.path.abspath(__file__)) os.chdir(appdir) # print("Using python at", sys.path) # print(os.environ) # print("Version", sys.version) try: import platform, json, shutil from jsonschema import validate import SupportClasses except Exception as e: print("ERROR: could not import packages") raise # determine location of config file and bird lists if platform.system() == 'Windows': # Win configdir = os.path.expandvars(os.path.join("%APPDATA%", "AviaNZ")) elif platform.system() == 'Linux' or platform.system() == 'Darwin': # Unix configdir = os.path.expanduser("~/.avianz/") else: print("ERROR: what OS is this? %s" % platform.system()) raise # if config and bird files not found, copy from distributed backups. # so these files will always exist on load (although they could be corrupt) # (exceptions here not handled and should always result in crashes) if not os.path.isdir(configdir): print("Creating config dir %s" % configdir) try: os.makedirs(configdir) except Exception as e: print("ERROR: failed to make config dir") print(e) raise # pre-run check of config file validity confloader = SupportClasses.ConfigLoader() configschema = json.load(open("Config/config.schema")) learnparschema = json.load(open("Config/learnpar.schema")) try: config = confloader.config(os.path.join(configdir, "AviaNZconfig.txt")) validate(instance=config, schema=configschema) learnpar = confloader.learningParams( os.path.join(configdir, "LearningParams.txt")) validate(instance=learnpar, schema=learnparschema) print("successfully validated config file") except Exception as e: print("Warning: config file failed validation with:") print(e) try: shutil.copy2("Config/AviaNZconfig.txt", configdir) shutil.copy2("Config/LearningParams.txt", configdir) except Exception as e: print("ERROR: failed to copy essential config files") print(e) raise # check and if needed copy any other necessary files necessaryFiles = [ "ListCommonBirds.txt", "ListDOCBirds.txt", "ListBats.txt", "LearningParams.txt" ] for f in necessaryFiles: if not os.path.isfile(os.path.join(configdir, f)): print("File %s not found in config dir, providing default" % f) try: shutil.copy2(os.path.join("Config", f), configdir) except Exception as e: print("ERROR: failed to copy essential config files") print(e) raise # copy over filters to ~/.avianz/Filters/: filterdir = os.path.join(configdir, "Filters/") if not os.path.isdir(filterdir): print("Creating filter dir %s" % filterdir) os.makedirs(filterdir) for f in os.listdir("Filters"): ff = os.path.join("Filters", f) # Kiwi.txt if not os.path.isfile(os.path.join(filterdir, f)): # ~/.avianz/Filters/Kiwi.txt print("Recogniser %s not found, providing default" % f) try: shutil.copy2( ff, filterdir) # cp Filters/Kiwi.txt ~/.avianz/Filters/ except Exception as e: print("Warning: failed to copy recogniser %s to %s" % (ff, filterdir)) print(e) # run splash screen: if cli: print("Starting AviaNZ in CLI mode") if batchmode: import AviaNZ_batch if os.path.isdir(sdir1) and recogniser in confloader.filters( filterdir).keys(): avianzbatch = AviaNZ_batch.AviaNZ_batchProcess( parent=None, mode="CLI", configdir=configdir, sdir=sdir1, recogniser=recogniser, wind=wind) print("Analysis complete, closing AviaNZ") else: print( "ERROR: valid input dir (-d) and recogniser name (-r) are essential for batch processing" ) raise elif training: import Training if os.path.isdir(sdir1) and os.path.isdir( sdir2) and recogniser in confloader.filters( filterdir).keys() and width > 0: training = Training.CNNtrain(configdir, filterdir, sdir1, sdir2, recogniser, width, CLI=True) training.cliTrain() print("Training complete, closing AviaNZ") else: print( "ERROR: valid input dirs (-d and -e) and recogniser name (-r) are essential for training" ) raise elif testing: import Training filts = confloader.filters(filterdir) if os.path.isdir(sdir1) and recogniser in filts: testing = Training.CNNtest(sdir1, filts[recogniser], recogniser, configdir, filterdir, CLI=True) print("Testing complete, closing AviaNZ") else: print( "ERROR: valid input dir (-d) and recogniser name (-r) are essential for training" ) raise else: if (cheatsheet or zooniverse) and isinstance(infile, str): import AviaNZ avianz = AviaNZ(configdir=configdir, CLI=True, cheatsheet=cheatsheet, zooniverse=zooniverse, firstFile=infile, imageFile=imagefile, command=command) print("Analysis complete, closing AviaNZ") else: print("ERROR: valid input file (-f) is needed") raise else: task = None print("Starting AviaNZ in GUI mode") from PyQt5.QtWidgets import QApplication app = QApplication(sys.argv) # a hack to fix default font size (Win 10 suggests 7 pt for QLabels for some reason) QApplication.setFont(QApplication.font("QMenu")) while True: # splash screen? if task is None: # This screen asks what you want to do, then processes the response import Dialogs first = Dialogs.StartScreen() first.show() app.exec_() task = first.getValues() avianz = None if task == 1: import AviaNZ_manual avianz = AviaNZ_manual.AviaNZ(configdir=configdir) elif task == 2: import AviaNZ_batch_GUI avianz = AviaNZ_batch_GUI.AviaNZ_batchWindow( configdir=configdir) elif task == 3: import AviaNZ_batch_GUI avianz = AviaNZ_batch_GUI.AviaNZ_reviewAll(configdir=configdir) elif task == 4: import SplitAnnotations avianz = SplitAnnotations.SplitData() # catch bad initialiation if avianz: avianz.activateWindow() else: return out = app.exec_() QApplication.closeAllWindows() QApplication.processEvents() # catch exit code to see if restart requested: # (note: do not use this for more complicated cleanup, # no guarantees that it is returned before program closes) if out == 0: # default quit break elif out == 1: # restart to splash screen task = None elif out == 2: # request switch to Splitter task = 4
def detect(self): # This is the function that does the work. # Chooses the filters and sampling regime to use. # Then works through the directory list, and processes each file. if hasattr(self, 'ui'): self.species = self.ui.species self.dirName = self.ui.dirName if "Any sound" in self.species: self.method = "Default" speciesStr = "Any sound" filters = None elif "Any sound (Intermittent sampling)" in self.species: self.method = "Intermittent sampling" speciesStr = "Intermittent sampling" filters = None else: if "NZ Bats" in self.species: # Should bats only be possible alone? self.method = "Click" #self.CNNDicts = self.ConfigLoader.CNNmodels(self.FilterDicts, self.filtersDir, self.species) else: self.method = "Wavelets" # double-check that all Fs are equal (should already be prevented by UI) filters = [self.FilterDicts[name] for name in self.species] samplerate = set([filt["SampleRate"] for filt in filters]) if len(samplerate) > 1: print( "ERROR: multiple sample rates found in selected recognisers, change selection" ) return (1) # convert list to string speciesStr = " & ".join(self.species) # load target CNN models (currently stored in the same dir as filters) # format: {filtername: [model, win, inputdim, output]} self.CNNDicts = self.ConfigLoader.CNNmodels( self.FilterDicts, self.filtersDir, self.species) # LIST ALL FILES that will be processed (either wav or bmp, depending on mode) allwavs = [] for root, dirs, files in os.walk(str(self.dirName)): for filename in files: if (self.method != "Click" and filename.lower().endswith('.wav')) or ( self.method == "Click" and filename.lower().endswith('.bmp')): allwavs.append(os.path.join(root, filename)) total = len(allwavs) # Parse the user-set time window to process if self.CLI or self.testmode: timeWindow_s = 0 timeWindow_e = 0 else: timeWindow_s = self.ui.w_timeStart.time().hour( ) * 3600 + self.ui.w_timeStart.time().minute( ) * 60 + self.ui.w_timeStart.time().second() timeWindow_e = self.ui.w_timeEnd.time().hour( ) * 3600 + self.ui.w_timeEnd.time().minute( ) * 60 + self.ui.w_timeEnd.time().second() self.wind = self.ui.w_wind.isChecked() # LOG FILE is read here # note: important to log all analysis settings here if not self.testmode: if self.method != "Intermittent sampling": settings = [self.method, timeWindow_s, timeWindow_e, self.wind] else: settings = [ self.method, timeWindow_s, timeWindow_e, self.config["protocolSize"], self.config["protocolInterval"] ] self.log = SupportClasses.Log( os.path.join(self.dirName, 'LastAnalysisLog.txt'), speciesStr, settings) # Ask for RESUME CONFIRMATION here if self.log.possibleAppend: filesExistAndDone = set(self.log.filesDone).intersection( set(allwavs)) text = "Previous analysis found in this folder (analysed " + str( len(filesExistAndDone) ) + " out of " + str( total ) + " files in this folder).\nWould you like to resume that analysis?" if not self.CLI: confirmedResume = self.ui.check_msg( "Resume previous batch analysis?", text) if confirmedResume: self.filesDone = filesExistAndDone else: self.filesDone = [] else: confirmedResume = input(text) if confirmedResume.lower( ) == 'yes' or confirmedResume.lower() == 'y': # ignore files in log self.filesDone = filesExistAndDone else: # process all files self.filesDone = [] #if len(filesExistAndDone) == total: # TODO: might want to redo? #print("All files appear to have previous analysis results") #return else: # work on all files self.filesDone = [] # Ask for FINAL USER CONFIRMATION here cnt = len(self.filesDone) if self.method == "Intermittent sampling": text = "Method: " + self.method + ".\nNumber of files to analyse: " + str( total) + "\n" else: text = "Species: " + speciesStr + ", method: " + self.method + ".\nNumber of files to analyse: " + str( total) + ", " + str(cnt) + " done so far.\n" text += "Output stored in " + self.dirName + "/DetectionSummary_*.xlsx.\n" text += "Log file stored in " + self.dirName + "/LastAnalysisLog.txt.\n" if speciesStr == "Any sound" or self.method == "Click": text += "\nWarning: any previous annotations in these files will be deleted!\n" else: text += "\nWarning: any previous annotations for the selected species in these files will be deleted!\n" text = "Analysis will be launched with these settings:\n" + text + "\nConfirm?" if not self.CLI: confirmedLaunch = self.ui.check_msg("Launch batch analysis", text) else: confirmedLaunch = input(text) print(confirmedLaunch.lower(), ) if confirmedLaunch.lower() == 'yes' or confirmedLaunch.lower( ) == 'y': confirmedLaunch = True else: confirmedLaunch = False if not confirmedLaunch: print("Analysis cancelled") return (2) # update log: delete everything (by opening in overwrite mode), # reprint old headers, # print current header (or old if resuming), # print old file list if resuming. self.log.file = open(self.log.file, 'w') if speciesStr not in ["Any sound", "Intermittent sampling"]: self.log.reprintOld() # else single-sp runs should be deleted anyway self.log.appendHeader(header=None, species=self.log.species, settings=self.log.settings) else: settings = [self.method, timeWindow_s, timeWindow_e, self.wind] if not self.CLI and not self.testmode: # clean up the UI before entering the long loop self.ui.clean_UI(total, cnt) import pyqtgraph as pg with pg.BusyCursor(): self.mainloop(allwavs, total, speciesStr, filters, settings) else: self.mainloop(allwavs, total, speciesStr, filters, settings) if not self.testmode: # delete old results (xlsx) # ! WARNING: any Detection...xlsx files will be DELETED, # ! ANYWHERE INSIDE the specified dir, recursively # We currently do not export any excels automatically in this mode, # the user must do that manually (through Batch Review). print("Removing old Excel files...") if not self.CLI: self.ui.statusBar().showMessage( "Removing old Excel files, almost done...") self.ui.dlg.setLabelText("Removing old Excel files...") self.ui.update() self.ui.repaint() for root, dirs, files in os.walk(str(self.dirName)): for filename in files: filenamef = os.path.join(root, filename) if fnmatch.fnmatch(filenamef, '*DetectionSummary_*.xlsx'): print("Removing excel file %s" % filenamef) os.remove(filenamef) if not self.CLI: self.ui.dlg.setValue(total + 1) # END of processing and exporting. Final cleanup self.log.file.close() if not self.CLI: self.ui.endproc(total) print("Processed all %d files" % total) return (0)
def detect(dirName='',trainTest=False, species=''): """ Wavelet detection - batch """ cnt=0 for root, dirs, files in os.walk(str(dirName)): for file in files: Night = False DOCRecording = re.search('(\d{6})_(\d{6})', file) if DOCRecording: startTime = DOCRecording.group(2) if int(startTime[:2]) > 17 or int(startTime[:2]) < 6: #if int(startTime[:2]) > 18 or int(startTime[:2]) < 6: # 6pm to 6am as night Night=True if file.endswith('.wav') and os.stat(root + '/' + file).st_size != 0 and (Night or not DOCRecording): # avoid day recordings and files with no data (Tier 1 has 0Kb .wavs) if file + '.data' not in files: # skip already processed files filename = root + '/' + file # load wav and annotation wSeg = WaveletSegment.WaveletSegment(species=species) wSeg.loadData(fName=filename[:-4],trainTest=trainTest) datalength = np.shape(wSeg.data)[0] if species!='all': import librosa if (species == 'Kiwi' or species == 'Ruru') and wSeg.sampleRate != 16000: wSeg.data = librosa.core.audio.resample(wSeg.data, wSeg.sampleRate, 16000) wSeg.sampleRate = 16000 datalength = np.shape(wSeg.data)[0] # ws = WaveletSegment.WaveletSegment(species=species, annotation=annotation) segments_possible = wSeg.waveletSegment_test(fName=None, data=wSeg.data, sampleRate=wSeg.sampleRate, species=species, trainTest=trainTest, thr=0.5) if type(segments_possible) == tuple: segments_possible = segments_possible[0] # detected=np.ones(900) # if len(segments_possible) > 0: # post = SupportClasses.postProcess(wSeg.data, wSeg.sampleRate, segments_possible) # # post.detectClicks() # post.eRatioConfd2() # segments_withConf = post.confirmedSegments else: sp = SignalProc.SignalProc() sgRaw = sp.spectrogram(data=wSeg.data, window_width=256, incr=128, window='Hann', mean_normalise=True, onesided=True,multitaper=False, need_even=False) seg = Segment.Segment(wSeg.data, sgRaw, sp, wSeg.sampleRate) segments_possible = seg.bestSegments() if trainTest == True: # turn into binary format to compute with GT detected = np.zeros(len(wSeg.annotation)) for seg in segments_possible: for a in range(len(detected)): if math.floor(seg[0]) <= a and a < math.ceil(seg[1]): detected[a] = 1 wSeg.fBetaScore(wSeg.annotation, detected) else: # Save the excel file if DOCRecording: sTime = int(startTime[:2]) * 3600 + int(startTime[2:4]) * 60 + int(startTime[4:6]) else: sTime = 0 out = SupportClasses.exportSegments(segments=segments_possible, species=species, startTime=sTime, dirName=dirName, filename=filename, datalength=datalength, sampleRate=wSeg.sampleRate, method='Wavelets', resolution=60, trainTest=trainTest, withConf=False,operator="Nirosha",reviewer="Nirosha", minLen=3) # out.excel() # all possible segments # # out.saveAnnotation() # out.withConf = True # out.segments = segments_withConf # out.confirmedSegments = segments_withConf out.confirmedSegments = [] out.segmentstoCheck = segments_possible # out.excel() # only the segments those passed eRatio test # Save the annotation out.saveAnnotation() cnt=cnt+1 print ("current: ", cnt)
def __init__(self, testDir, currfilt, filtname, configdir, filterdir, CLI=False): """ currfilt: the recognizer to be used (dict) """ self.testDir = testDir self.outfile = open(os.path.join(self.testDir, "test-results.txt"), "w") self.currfilt = currfilt self.filtname = filtname self.configdir = configdir self.filterdir = filterdir # Note: this is just the species name, unlike the self.species in Batch mode species = self.currfilt['species'] self.sampleRate = self.currfilt['SampleRate'] self.calltypes = [] for fi in self.currfilt['Filters']: self.calltypes.append(fi['calltype']) self.outfile.write("Recogniser name: %s\n" % (filtname)) self.outfile.write("Species name: %s\n" % (species)) self.outfile.write("Using data: %s\n" % (self.testDir)) # 0. Generate GT files from annotations in test folder self.manSegNum = 0 self.window = 1 inc = None print('Generating GT...') for root, dirs, files in os.walk(self.testDir): for file in files: wavFile = os.path.join(root, file) if file.lower().endswith('.wav') and os.stat( wavFile).st_size != 0 and file + '.data' in files: segments = Segment.SegmentList() segments.parseJSON(wavFile + '.data') self.manSegNum += len(segments.getSpecies(species)) # Currently, we ignore call types here and just # look for all calls for the target species. segments.exportGT(wavFile, species, resolution=self.window) if self.manSegNum == 0: print("ERROR: no segments for species %s found" % species) self.text = 0 return # 1. Run Batch Processing upto WF and generate .tempdata files (no post-proc) avianz_batch = AviaNZ_batch.AviaNZ_batchProcess( parent=None, configdir=self.configdir, mode="test", sdir=self.testDir, recogniser=filtname, wind=1) # NOTE: will use wind-robust detection # 2. Report statistics of WF followed by general post-proc steps (no CNN but wind-merge neighbours-delete short) self.text = self.getSummary(CNN=False) # 3. Report statistics of WF followed by post-proc steps (wind-CNN-merge neighbours-delete short) if "CNN" in self.currfilt: cl = SupportClasses.ConfigLoader() filterlist = cl.filters(self.filterdir, bats=False) CNNDicts = cl.CNNmodels(filterlist, self.filterdir, [filtname]) # Providing one filter, so only one CNN should be returned: if len(CNNDicts) != 1: print("ERROR: Couldn't find a unique matching CNN!") self.outfile.write("No matching CNN found!\n") self.outfile.write("-- End of testing --\n") self.outfile.close() return CNNmodel = list(CNNDicts)[0] self.text = self.getSummary(CNN=True) self.outfile.write("-- End of testing --\n") self.outfile.close() print("Testing output written to " + os.path.join(self.testDir, "test-results.txt"))
def mainlauncher(cli, cheatsheet, zooniverse, infile, imagefile, batchmode, training, testing, sdir1, sdir2, recogniser, wind, width, command): # determine location of config file and bird lists if platform.system() == 'Windows': # Win configdir = os.path.expandvars(os.path.join("%APPDATA%", "AviaNZ")) elif platform.system() == 'Linux' or platform.system() == 'Darwin': # Unix configdir = os.path.expanduser("~/.avianz/") else: print("ERROR: what OS is this? %s" % platform.system()) sys.exit() # if config and bird files not found, copy from distributed backups. # so these files will always exist on load (although they could be corrupt) # (exceptions here not handled and should always result in crashes) if not os.path.isdir(configdir): print("Creating config dir %s" % configdir) try: os.makedirs(configdir) except Exception as e: print("ERROR: failed to make config dir") print(e) sys.exit() # pre-run check of config file validity confloader = SupportClasses.ConfigLoader() configschema = json.load(open("Config/config.schema")) try: config = confloader.config(os.path.join(configdir, "AviaNZconfig.txt")) validate(instance=config, schema=configschema) print("successfully validated config file") except Exception as e: print("Warning: config file failed validation with:") print(e) try: shutil.copy2("Config/AviaNZconfig.txt", configdir) except Exception as e: print("ERROR: failed to copy essential config files") print(e) sys.exit() # check and if needed copy any other necessary files necessaryFiles = ["ListCommonBirds.txt", "ListDOCBirds.txt", "ListBats.txt", "LearningParams.txt"] for f in necessaryFiles: if not os.path.isfile(os.path.join(configdir, f)): print("File %s not found in config dir, providing default" % f) try: shutil.copy2(os.path.join("Config", f), configdir) except Exception as e: print("ERROR: failed to copy essential config files") print(e) sys.exit() # copy over filters to ~/.avianz/Filters/: filterdir = os.path.join(configdir, "Filters/") if not os.path.isdir(filterdir): print("Creating filter dir %s" % filterdir) os.makedirs(filterdir) for f in os.listdir("Filters"): ff = os.path.join("Filters", f) # Kiwi.txt if not os.path.isfile(os.path.join(filterdir, f)): # ~/.avianz/Filters/Kiwi.txt print("Recogniser %s not found, providing default" % f) try: shutil.copy2(ff, filterdir) # cp Filters/Kiwi.txt ~/.avianz/Filters/ except Exception as e: print("Warning: failed to copy recogniser %s to %s" % (ff, filterdir)) print(e) # run splash screen: if cli: print("Starting AviaNZ in CLI mode") if batchmode: import AviaNZ_batch if os.path.isdir(sdir1) and recogniser in confloader.filters(filterdir).keys(): avianzbatch = AviaNZ_batch.AviaNZ_batchProcess(parent=None, mode="CLI", configdir=configdir, sdir=sdir1, recogniser=recogniser, wind=wind) print("Analysis complete, closing AviaNZ") else: print("ERROR: valid input dir (-d) and recogniser name (-r) are essential for batch processing") sys.exit() elif training: import Training if os.path.isdir(sdir1) and os.path.isdir(sdir2) and recogniser in confloader.filters(filterdir).keys() and width>0: training = Training.CNNtrain(configdir,filterdir,sdir1,sdir2,recogniser,width,CLI=True) training.cliTrain() print("Training complete, closing AviaNZ") else: print("ERROR: valid input dirs (-d and -e) and recogniser name (-r) are essential for training") sys.exit() elif testing: import Training if os.path.isdir(sdir1) and recogniser in confloader.filters(filterdir).keys(): testing = Training.CNNtest(sdir1,recogniser,configdir,filterdir,CLI=True) print("Testing complete, closing AviaNZ") else: print("ERROR: valid input dir (-d) and recogniser name (-r) are essential for training") sys.exit() else: if (cheatsheet or zooniverse) and isinstance(infile, str): import AviaNZ avianz = AviaNZ(configdir=configdir, CLI=True, cheatsheet=cheatsheet, zooniverse=zooniverse, firstFile=infile, imageFile=imagefile, command=command) print("Analysis complete, closing AviaNZ") else: print("ERROR: valid input file (-f) is needed") sys.exit() else: print("Starting AviaNZ in GUI mode") # This screen asks what you want to do, then processes the response import Dialogs from PyQt5.QtWidgets import QApplication app = QApplication(sys.argv) first = Dialogs.StartScreen() first.show() app.exec_() task = first.getValues() avianz = None if task == 1: import AviaNZ_manual avianz = AviaNZ_manual.AviaNZ(configdir=configdir) elif task==2: import AviaNZ_batch_GUI avianz = AviaNZ_batch_GUI.AviaNZ_batchWindow(configdir=configdir) elif task==3: import AviaNZ_batch_GUI avianz = AviaNZ_batch_GUI.AviaNZ_reviewAll(configdir=configdir) if avianz: avianz.show() else: return out = app.exec_() QApplication.closeAllWindows() # restart requested: if out == 1: mainlauncher() elif out == 2: import SplitAnnotations avianz = SplitAnnotations.SplitData() avianz.show() app.exec_() print("Processing complete, returning to AviaNZ") QApplication.closeAllWindows()