def main(): baseDir = '/Users/nkridler/Desktop/whale/' # Base directory ############################## PARAMETERS ############################### dataDir = baseDir + 'data2/' # Data directory oldDir = baseDir + 'data/' # Data directory params = { 'NFFT': 256, 'Fs': 2000, 'noverlap': 192 } # Spectogram parameters ######################## BUILD A TrainData OBJECT ####################### train = fileio.TrainData(dataDir + 'train2.csv', dataDir + 'train2/') old = fileio.TrainData(oldDir + 'train.csv', oldDir + 'train/') ###################### SET OUTPUT FILE NAME HERE ######################## trainOutFile = baseDir + 'workspace/trainMetricsBase.csv' tmplFile = baseDir + 'moby2/templateBase.csv' makeMetrics(tmplFile, train=train, old=old, params=params, trainOutFile=trainOutFile) ###################### SET OUTPUT FILE NAME HERE ######################## tmplFile = baseDir + 'moby2/manyMoreTemplates.csv' trainOutFile = baseDir + 'moby2/trainMetricsMoreTemplates.csv' makeMetrics(tmplFile, train=train, old=old, params=params, trainOutFile=trainOutFile)
def main(): # baseDir = '/home/nick/whale/' # Base directory baseDir = globalConst.BASE_DIR ###################### SET OUTPUT FILE NAME HERE ######################## trainOutFile = baseDir+'workspace/trainMetrics.csv' ############################## PARAMETERS ############################### dataDir = baseDir+'data/' # Data directory templateDataDir = baseDir+'template_data/' # Data directory params = {'NFFT':256, 'Fs':2000, 'noverlap':192} # Spectogram parameters maxTime = 60 # Number of time slice metrics ######################## BUILD A TrainData OBJECT ####################### train = fileio.TrainData( dataDir+'train.csv',dataDir+'train/') templateTrain = fileio.TrainData(templateDataDir+'train.csv',templateDataDir+'train/') ##################### BUILD A TemplateManager OBJECT #################### tmplFile = baseDir+'moby/templateReduced.csv' tmpl = templateManager.TemplateManager(fileName=tmplFile, trainObj=templateTrain, params=params) ##################### PLOT the templates #################### for ix in range(tmpl.size): print "Plotting template:",ix tmpl.PlotTemplates(index=ix)
def main(): ###################### WORKING DIRECTORY ######################## baseDir = globalConst.BASE_DIR ###################### SET OUTPUT FILE NAME HERE ######################## testOutFile = baseDir + 'workspace/testMetrics.csv' ############################## PARAMETERS ############################### dataDir = baseDir + 'data/' # Data directory templateDataDir = baseDir + 'template_data/' # Data directory params = { 'NFFT': 256, 'Fs': 2000, 'noverlap': 192 } # Spectogram parameters maxTime = 60 # Number of time slice metrics ######################## BUILD A TestData OBJECT ####################### train = fileio.TrainData(dataDir + 'train.csv', dataDir + 'train/') templateTrain = fileio.TrainData(templateDataDir + 'train.csv', templateDataDir + 'train/') test = fileio.TestData(dataDir + 'test/') ##################### BUILD A TemplateManager OBJECT #################### tmplFile = baseDir + 'moby/templateReduced.csv' tmpl = templateManager.TemplateManager(fileName=tmplFile, trainObj=templateTrain, params=params) ################## VERTICAL BARS FOR HIFREQ METRICS ##################### bar_ = np.zeros((12, 9), dtype='Float32') bar1_ = np.zeros((12, 12), dtype='Float32') bar2_ = np.zeros((12, 6), dtype='Float32') bar_[:, 3:6] = 1. bar1_[:, 4:8] = 1. bar2_[:, 2:4] = 1. ########################### CREATE THE HEADER ########################### outHdr = metrics.buildHeader(tmpl) hL = [] ####################### LOOP THROUGH THE FILE ########################### for i in range(test.nTest): printStatus(i) P, freqs, bins = test.TestSample(i + 1, params=params) out = metrics.computeMetrics(P, tmpl, bins, maxTime) out += metrics.highFreqTemplate(P, bar_) out += metrics.highFreqTemplate(P, bar1_) out += metrics.highFreqTemplate(P, bar2_) hL.append(out) hL = np.array(hL) ########################## WRITE TO FILE ################################ file = open(testOutFile, 'w') file.write(outHdr + "\n") np.savetxt(file, hL, delimiter=',') file.close()
def main(): #baseDir = '/home/nick/whale/' baseDir = globalConst.BASE_DIR dataDir = baseDir + 'data/' corDir = baseDir + 'moby/' # Open up the train file train = fileio.TrainData(dataDir + 'train.csv', dataDir + 'train/') t_ = pd.read_csv(corDir + 'trainPredictions.csv') order32_ = orderMetric(t_.label, 32) order64_ = orderMetric(t_.label, 64) # Reorder the data reorder32 = order32_.copy() reorder64 = order64_.copy() # NOTE disabled reordering below, because modified genTrainMetrics # now writes in chronological order, not 'reordered' as below. """ k = 0 for i in xrange(train.numH1): j = int(train.h1[i].split('.')[0][5:]) - 1 reorder32[k] = order32_[j] reorder64[k] = order64_[j] k += 1 for i in xrange(train.numH0): j = int(train.h0[i].split('.')[0][5:]) - 1 reorder32[k] = order32_[j] reorder64[k] = order64_[j] k += 1 """ writeToFile(reorder32, 'corr32.csv', outdir=corDir) writeToFile(reorder64, 'corr64.csv', outdir=corDir) """ # There are 84503 samples trainSize = globalConst.N_TRAIN testSize = globalConst.N_TEST size_ = trainSize + testSize tt_ = size_*2. # since these are 2 second clips xs_ = np.linspace(0,tt_,testSize) xt_ = np.linspace(0,tt_,trainSize) test32_ = np.interp(xs_,xt_,order32_) test64_ = np.interp(xs_,xt_,order64_) """ t_ = pd.read_csv(corDir + 'testPredictions.csv') if 'label' in t_.columns: # similar to train.csv test32_ = orderMetric(t_.label, 32) test64_ = orderMetric(t_.label, 64) elif 'probability' in t_.columns: # similar to submissions file test32_ = orderMetric(t_.probability, 32) test64_ = orderMetric(t_.probability, 64) else: raise RuntimeError, "label or probability not found in testPredictions.csv" writeToFile(test32_, 'testCorr32.csv', outdir=corDir) writeToFile(test64_, 'testCorr64.csv', outdir=corDir)
def main(): baseDir = '/home/nick/whale/' # Base directory ###################### SET OUTPUT FILE NAME HERE ######################## trainOutFile = baseDir + 'workspace/trainMetrics.csv' ############################## PARAMETERS ############################### dataDir = baseDir + 'data/' # Data directory params = { 'NFFT': 256, 'Fs': 2000, 'noverlap': 192 } # Spectogram parameters maxTime = 60 # Number of time slice metrics ######################## BUILD A TrainData OBJECT ####################### train = fileio.TrainData(dataDir + 'train.csv', dataDir + 'train/') ##################### BUILD A TemplateManager OBJECT #################### tmplFile = baseDir + 'moby/templateReduced.csv' tmpl = templateManager.TemplateManager(fileName=tmplFile, trainObj=train, params=params) ################## VERTICAL BARS FOR HIFREQ METRICS ##################### bar_ = np.zeros((12, 9), dtype='Float32') bar1_ = np.zeros((12, 12), dtype='Float32') bar2_ = np.zeros((12, 6), dtype='Float32') bar_[:, 3:6] = 1. bar1_[:, 4:8] = 1. bar2_[:, 2:4] = 1. ########################### CREATE THE HEADER ########################### outHdr = metrics.buildHeader(tmpl) ###################### LOOP THROUGH THE FILES ########################### hL = [] for i in range(train.numH1): P, freqs, bins = train.H1Sample(i, params=params) out = metrics.computeMetrics(P, tmpl, bins, maxTime) out += metrics.highFreqTemplate(P, bar_) out += metrics.highFreqTemplate(P, bar1_) out += metrics.highFreqTemplate(P, bar2_) hL.append([1, i] + out) for i in range(train.numH0): P, freqs, bins = train.H0Sample(i, params=params) out = metrics.computeMetrics(P, tmpl, bins, maxTime) out += metrics.highFreqTemplate(P, bar_) out += metrics.highFreqTemplate(P, bar1_) out += metrics.highFreqTemplate(P, bar2_) hL.append([0, i] + out) hL = np.array(hL) file = open(trainOutFile, 'w') file.write("Truth,Index," + outHdr + "\n") np.savetxt(file, hL, delimiter=',') file.close()
def main(): baseDir = '/home/nick/whale/' dataDir = baseDir + 'data/' # Open up the train file train = fileio.TrainData(dataDir + 'train.csv', dataDir + 'train/') t_ = pd.read_csv(dataDir + 'train.csv') order32_ = orderMetric(t_.label, 32) order64_ = orderMetric(t_.label, 64) # Reorder the data reorder32 = order32_.copy() reorder64 = order64_.copy() k = 0 for i in xrange(train.numH1): j = int(train.h1[i].split('.')[0][5:]) - 1 reorder32[k] = order32_[j] reorder64[k] = order64_[j] k += 1 for i in xrange(train.numH0): j = int(train.h0[i].split('.')[0][5:]) - 1 reorder32[k] = order32_[j] reorder64[k] = order64_[j] k += 1 writeToFile(reorder32, 'corr32.csv') writeToFile(reorder64, 'corr64.csv') # There are 84503 samples trainSize = 30000 testSize = 54503 size_ = trainSize + testSize tt_ = size_ * 2. # since these are 2 second clips xs_ = np.linspace(0, tt_, testSize) xt_ = np.linspace(0, tt_, trainSize) test32_ = np.interp(xs_, xt_, order32_) test64_ = np.interp(xs_, xt_, order64_) writeToFile(test32_, 'testCorr32.csv') writeToFile(test64_, 'testCorr64.csv')
def create_dirs(Ntrain, Nval, Ntest, original_dataset_dir, base_dir): #original_dataset_dir = 'C:\\svn\\dwatts\\dev\\datasets\\whale_data\\data' #original_test_dataset_dir = 'C:\\svn\\dwatts\\dev\\datasets\\whale_data\\data\\test' # smaller dataset #base_dir = 'C:\\svn\\dwatts\\dev\\dl_with_python\\whale_small\\' if os.path.exists(base_dir): shutil.rmtree(base_dir) os.mkdir(base_dir) # define train, validationa and test splits train_dir = os.path.join(base_dir, 'train') os.mkdir(train_dir) validation_dir = os.path.join(base_dir, 'validation') os.mkdir(validation_dir) test_dir = os.path.join(base_dir, 'test') os.mkdir(test_dir) # Right and non-right whale training recording train_right_dir = os.path.join(train_dir, 'right') os.mkdir(train_right_dir) train_nonright_dir = os.path.join(train_dir, 'nonright') os.mkdir(train_nonright_dir) # Right and non-right whale validation recordings validation_right_dir = os.path.join(validation_dir, 'right') os.mkdir(validation_right_dir) validation_nonright_dir = os.path.join(validation_dir, 'nonright') os.mkdir(validation_nonright_dir) # Right and non-right whale training recordings test_right_dir = os.path.join(test_dir, 'right') os.mkdir(test_right_dir) test_nonright_dir = os.path.join(test_dir, 'nonright') os.mkdir(test_nonright_dir) # first work out which files are right/non-right and # then copy smaller set to the whale_small dir # build a train object original_train_dir = os.path.join(original_dataset_dir, 'train') train = fileio.TrainData(original_dataset_dir+'/train.csv',base_dir+'train') # Copy Ntrain right whale recordings to train_whale_dir train_right_fnames = [train.h1[i] for i in range(Ntrain) ] for fname in train_right_fnames: src = os.path.join(original_train_dir, fname) dst = os.path.join(train_right_dir, fname) shutil.copyfile(src, dst) # Copy Ntrain non-right whale recordings to train_whale_dir train_nonright_fnames = [train.h0[i] for i in range(Ntrain) ] for fname in train_nonright_fnames: src = os.path.join(original_train_dir, fname) dst = os.path.join(train_nonright_dir, fname) shutil.copyfile(src, dst) # Copy next Nval right whale recordings to validation_whale_dir val_right_fnames = [train.h1[i] for i in range(Ntrain, Ntrain+Nval) ] for fname in val_right_fnames: src = os.path.join(original_train_dir, fname) dst = os.path.join(validation_right_dir, fname) shutil.copyfile(src, dst) # Copy next Nval non-right whale recordings to validation_whale_dir val_nonright_fnames = [train.h0[i] for i in range(Ntrain, Ntrain+Nval) ] for fname in val_nonright_fnames: src = os.path.join(original_train_dir, fname) dst = os.path.join(validation_nonright_dir, fname) shutil.copyfile(src, dst) # Copy next Ntest right whale recordings to test_whale_dir test_right_fnames = [train.h1[i] for i in range(Ntrain+Nval, Ntrain+Nval+Ntest) ] for fname in test_right_fnames: src = os.path.join(original_train_dir, fname) dst = os.path.join(test_right_dir, fname) shutil.copyfile(src, dst) # Copy next Ntest non-right whale recordings to test_whale_dir test_nonright_fnames = [train.h0[i] for i in range(Ntrain+Nval, Ntrain+Nval+Ntest) ] for fname in test_nonright_fnames: src = os.path.join(original_train_dir, fname) dst = os.path.join(test_nonright_dir, fname) shutil.copyfile(src, dst) # Create dictionary of training, validation and test ids/labels partition = { 'train': train_right_fnames + train_nonright_fnames, 'validation': val_right_fnames + val_nonright_fnames, 'test': test_right_fnames + test_nonright_fnames} labels_train_right = {key: 1 for key in train_right_fnames} labels_train_nonright = {key: 0 for key in train_nonright_fnames} labels_val_right = {key: 1 for key in val_right_fnames} labels_val_nonright = {key: 0 for key in val_nonright_fnames} labels_test_right = {key: 1 for key in test_right_fnames} labels_test_nonright = {key: 0 for key in test_nonright_fnames} labels = {**labels_train_right, **labels_train_nonright, **labels_val_right, **labels_val_nonright, **labels_test_right, **labels_test_nonright} return partition, labels
def main(): baseDir = globalConst.BASE_DIR ###################### SET OUTPUT FILE NAME HERE ######################## trainOutFile = baseDir+'workspace/trainMetrics.csv' ############################## PARAMETERS ############################### dataDir = baseDir+'data/' # Data directory templateDataDir = baseDir+'template_data/' # Data directory params = {'NFFT':256, 'Fs':2000, 'noverlap':192} # Spectogram parameters maxTime = 60 # Number of time slice metrics ######################## BUILD A TrainData OBJECT ####################### train = fileio.TrainData( dataDir+'train.csv',dataDir+'train/') templateTrain = fileio.TrainData(templateDataDir+'train.csv',templateDataDir+'train/') ##################### BUILD A TemplateManager OBJECT #################### tmplFile = baseDir+'moby/templateReduced.csv' tmpl = templateManager.TemplateManager(fileName=tmplFile, trainObj=templateTrain, params=params) ################## VERTICAL BARS FOR HIFREQ METRICS ##################### bar_ = np.zeros((12,9),dtype='Float32') bar1_ = np.zeros((12,12),dtype='Float32') bar2_ = np.zeros((12,6),dtype='Float32') bar_[:,3:6] = 1. bar1_[:,4:8] = 1. bar2_[:,2:4] = 1. ########################### CREATE THE HEADER ########################### outHdr = metrics.buildHeader(tmpl) ###################### LOOP THROUGH THE FILES ########################### hL = [] print "\nprocessing whale clips" for i in range(train.numH1): printStatus(i) clip_filename = train.h1[i] fid = fileID(clip_filename) P, freqs, bins = train.H1Sample(i,params=params) out = metrics.computeMetrics(P, tmpl, bins, maxTime) out += metrics.highFreqTemplate(P, bar_) out += metrics.highFreqTemplate(P, bar1_) out += metrics.highFreqTemplate(P, bar2_) #hL.append([1, i] + out) hL.append( (fid, [1,i]+out) ) # NOTE added fid for sorting below print "\nprocessing non-whale clips" for i in range(train.numH0): printStatus(i) clip_filename = train.h0[i] fid = fileID(clip_filename) P, freqs, bins = train.H0Sample(i,params=params) out = metrics.computeMetrics(P, tmpl, bins, maxTime) out += metrics.highFreqTemplate(P, bar_) out += metrics.highFreqTemplate(P, bar1_) out += metrics.highFreqTemplate(P, bar2_) #hL.append([0, i] + out) hL.append( (fid, [0,i]+out) ) # NOTE added fid for sorting below # NOTE Sort by fileID so metrics file lines are in # sequential order (train1.aiff, train2.aiff....) print "\nsorting metrics by fileID" hL_sort = np.array( [metric_lst for fid, metric_lst in sorted(hL)] ) print "writing metrics to:", trainOutFile file = open(trainOutFile,'w') file.write("Truth,Index,"+outHdr+"\n") np.savetxt(file,hL_sort,delimiter=',') file.close()