def buildClassifier(trainFile, testFile, tCats=None, ttCats=None, classType="NaiveBayes", save=False, K=None): ''' Code inspired by Bruce's code ''' dtrain = data.Data(trainFile) dtest = data.Data(testFile) if (tCats != None and ttCats != None): traincatdata = data.Data(tCats) traincats = traincatdata.get_data([traincatdata.get_headers()[0]], traincatdata.get_num_rows()) testcatdata = data.Data(ttCats) testcats = testcatdata.get_data([testcatdata.get_headers()[0]], testcatdata.get_num_rows()) A = dtrain.get_data(dtrain.get_headers(), dtrain.get_num_rows()) B = dtest.get_data(dtest.get_headers(), dtest.get_num_rows()) else: # assume the categories are the last column traincats = dtrain.get_data([dtrain.get_headers()[-1]], dtrain.get_num_rows()) testcats = dtest.get_data([dtest.get_headers()[-1]], dtest.get_num_rows()) A = dtrain.get_data(dtrain.get_headers()[:-1], dtrain.get_num_rows()) B = dtest.get_data(dtest.get_headers()[:-1], dtest.get_num_rows()) #default is a naiveBayes Classifier nbc = classifiers.NaiveBayes() if (classType == "KNN"): if K != None: nbc = classifiers.KNN(K=K) nbc.build(A, traincats) ctraincats, ctrainlabels = nbc.classify(A) ctestcats, ctestlabels = nbc.classify(B) else: #default K of 3 nbc = classifiers.KNN(K=3) nbc.build(A, traincats) ctraincats, ctrainlabels = nbc.classify(A) ctestcats, ctestlabels = nbc.classify(B) else: # build the classifier using the training data nbc.build(A, traincats) # use the classifier on the training data ctraincats, ctrainlabels = nbc.classify(A) ctestcats, ctestlabels = nbc.classify(B) if save == True: ctestcats.tofile('cTestCats.csv', sep=" ", format="%s") ctestlabels.tofile('cTestLabels.csv', sep=" ", format="%s") print "Training Data" print nbc.confusion_matrix_str(nbc.confusion_matrix(traincats, ctraincats)) print "Test Data" print nbc.confusion_matrix_str(nbc.confusion_matrix(testcats, ctestcats)) return nbc
def main(argv): # Reads in a training set and its category labels, possibly as a separate file. # usage if len(argv) < 3: print "usage: python %s <Training File> <Test File> <opt: Training Categories> <opt: Test Categories> <KNN or NaiveBayes>" % ( argv[0]) return # read the training and test sets print "Reading: \n Training: %s\n Test: %s\n KNN/NB: %s\n " % ( argv[1], argv[2], argv[-1]) trainData = data.Data(argv[1]) testData = data.Data(argv[2]) #test data headerList = [1, 2] headerList[0] = trainData.getHeaderRaw() headerList[1] = testData.getHeaderRaw() # print trainData # print testData headers = [] #header names for cmtx # get the categories and the training data A and the test data B if len(argv) > 4: traincatdata = data.Data(argv[3]) testcatdata = data.Data(argv[4]) # needs to be a list traincats = traincatdata.getDataNum([traincatdata.getHeaderRaw()[0]]) testcats = testcatdata.getDataNum([testcatdata.getHeaderRaw()[0]]) A = trainData.getDataNum(trainData.getHeaderRaw()) B = testData.getDataNum(testData.getHeaderRaw()) else: # assume the categories are the last columnlen traincats = trainData.getDataNum([trainData.getHeaderRaw()[-1]]) testcats = testData.getDataNum([testData.getHeaderRaw()[-1]]) A = trainData.getDataNum(trainData.getHeaderRaw()[:-1]) B = testData.getDataNum(testData.getHeaderRaw()[:-1]) if argv[-1] == "NaiveBayes": classifier = classifiers.NaiveBayes() else: classifier = classifiers.KNN() classifier.build(A, traincats) ctraincats, ctrainlabels = classifier.classify(A) # print ctrainlabels[:20] # # # print traincats[:20] print "Training Data" print tabulate( classifier.confusionMatrixStr( classifier.confusionMatrix(traincats, ctrainlabels), headerList[0])) trainData.addCol("codes", "numeric", traincats.T.tolist()[0]) #print "data: ", trainData.getDataNum(["Training Cats"]) f = open('datasets/trainData.csv', 'w') trainData.writeOut(f, trainData.getHeaderRaw(), "numeric") print "\n" classifier.confusionMatrixGraphic( classifier.confusionMatrix(traincats, ctrainlabels), headerList[0], title="Confusion Matrix of Training Data") print "Test Data" ctestcats, ctestlabels = classifier.classify(B) print tabulate( classifier.confusionMatrixStr( classifier.confusionMatrix(testcats, ctestlabels), headerList[1])) testData.addCol("Test Cats", "numeric", testcats.T.tolist()[0]) #print "data: ", testData.getDataNum(["Training Cats"]) f = open('datasets/testData.csv', 'w') testData.writeOut(f, testData.getHeaderRaw(), "numeric") print "\n" classifier.confusionMatrixGraphic(classifier.confusionMatrix( testcats, ctestlabels), headerList[1], title="Confusion Matrix of Test Data")
def main(argv): time = datetime.datetime.now() # test function here if len(argv) < 4 or (argv[3] != 'k' and argv[3] != 'n'): print( 'Usage: python %s <training data file> <test data file> <n for Naive Bayes, k for KNN> <optional training categories file> <optional test categories file>' % (argv[0])) print( ' If categories are not provided as separate files, then the last column is assumed to be the category.' ) exit(-1) train_file = argv[1] test_file = argv[2] knn = True if argv[3] == 'k' else False dtrain = data.Data(train_file) dtest = data.Data(test_file) if len(argv) >= 6: train_headers = dtrain.get_headers() test_headers = dtrain.get_headers() traincat_file = argv[4] testcat_file = argv[5] traincats = data.Data(traincat_file) traincatdata = traincats.limit_columns(traincats.get_headers()) testcats = data.Data(testcat_file) testcatdata = testcats.limit_columns(testcats.get_headers()) else: train_headers = dtrain.get_headers()[:-1] test_headers = dtrain.get_headers()[:-1] traincatdata = dtrain.limit_columns([dtrain.get_headers()[-1]]) testcatdata = dtest.limit_columns([dtest.get_headers()[-1]]) uniquelabels, correctedtraincats = np.unique( traincatdata.T.tolist()[0], return_inverse=True) correctedtraincats = np.matrix([correctedtraincats]).T uniquelabels, correctedtestcats = np.unique(testcatdata.T.tolist()[0], return_inverse=True) correctedtestcats = np.matrix([correctedtestcats]).T if not knn: nbc = classifiers.NaiveBayes(dtrain, train_headers, traincatdata) print('Naive Bayes Training Set Results') A = dtrain.limit_columns(train_headers) newcats, newlabels = nbc.classify(A) traincats = newcats print('making confusion matrix') confmtx = nbc.confusion_matrix(correctedtraincats, newcats) print(nbc.confusion_matrix_str(confmtx)) print('Naive Bayes Test Set Results') for i in range(len(test_headers)): try: test_headers[i] = int(test_headers[i]) except: break A = dtest.limit_columns(test_headers) print('classifying with naive bayes classifier') newcats, newlabels = nbc.classify(A) print('confusion matrix') confmtx = nbc.confusion_matrix(correctedtestcats, newcats) print(nbc.confusion_matrix_str(confmtx)) else: print('knn') print('-----------------') print('Building KNN Classifier') knnc = classifiers.KNN(dtrain, train_headers, traincatdata, 3) print('KNN Training Set Results') A = dtrain.limit_columns(train_headers) newcats, newlabels = knnc.classify(A) traincats = newcats confmtx = knnc.confusion_matrix(correctedtraincats, newcats) print(knnc.confusion_matrix_str(confmtx)) print('KNN Test Set Results') A = dtest.limit_columns(test_headers) newcats, newlabels = knnc.classify(A) print('KNN TEST::Correct labels\n', correctedtestcats.T) print('KNN TEST:::Predicted labels\n', newcats) # print the confusion matrix confmtx = knnc.confusion_matrix(correctedtestcats, newcats) print(knnc.confusion_matrix_str(confmtx)) test_headers.append('predicted') dtest.add_header2col('predicted') dtest.add_column(newcats.T) dtest.write("heresyourdata.csv", test_headers) return
def __init__(self, train, test, t1=None, ts1=None): # read the training and test sets dtrain = data.Data(train) dtest = data.Data(test) """ Bruce KNN test code source starts here, with additional comments for my understanding """ # get the categories and the training data A and the test data B if t1!=None and ts1!=None: traincatdata = data.Data(t1) testcatdata = data.Data(ts1) traincats = traincatdata.get_data( [traincatdata.get_headers()[-1]] ) testcats = testcatdata.get_data( [testcatdata.get_headers()[-1]] ) A = dtrain.get_data( dtrain.get_headers()[:-1] ) B = dtest.get_data( dtest.get_headers()[:-1] ) else: # assume the categories are the last column traincats = dtrain.get_data( [dtrain.get_headers()[-1]] ) # training categories testcats = dtest.get_data( [dtest.get_headers()[-1]] ) # test categories A = dtrain.get_data( dtrain.get_headers()[:-1] ) # train data matrice B = dtest.get_data( dtest.get_headers()[:-1] ) # test data matrice # create two classifiers, one using 10 exemplars per class nb = classifiers.NaiveBayes() knnc10 = classifiers.NaiveBayes() # build the classifiers given data and categories nb.build( A, traincats ) # use the classifiers on the test data, to try classify A classcats, alllabels =nb.classify(A) """ #Bruce KNN test edited for my project code source ends here """ # Classify the training set and print out a confusion matrix. # build confusion matrix and print it out confusion_matrix=nb.confusion_matrix(traincats , classcats ) # # print out the confusion matrix cmtxA=nb.confusion_matrix_str(confusion_matrix) #print classcats print " ** train set confusion matrix **\n ",cmtxA # Classify the test set and print out a confusion matrix. print " ** in:",B.shape # use the classifiers on the test data, to try classify B classcats, alllabels = nb.classify( B ) print " ** out:",classcats.shape # build confusion matrix and print it out if len(testcats)!=len(classcats): print "#* Error: Something terribly wrong needs to be fixed. THE CONFUSION MATRIX BELOW IS WRONG" testcats=classcats confusion_matrix=nb.confusion_matrix(testcats , classcats ) # # print out the confusion matrix cmtx=nb.confusion_matrix_str(confusion_matrix) #print classcats print " ** test set confusion matrix **\n ",cmtx self.cmtx=cmtx # write out csv file for test data + predicted categories #create a temporary csv file that holds the data with open('testmatrix_data.csv', 'wb') as f: writer = csv.writer(f) #write in my headers list=[] headers=dtest.get_headers()[:-1] headers.append("Cluster ID") list.append(headers) writer.writerows(list) types=[] # asume all numeric data for i in range(len(headers)): types.append("numeric") writer.writerows([types]) # write in the types values=[] C=B.tolist() for i in range(len(C)): C[i].append(classcats[i].item(0)) for row in C: # row in test data values.append(row) writer.writerows(values) # write in all the rows print "**** new file created for test data , named 'testmatrix_data.csv'" f.close()
def main(argv): if len(argv) < 3: print 'Usage: python %s <train data file> <test data file> <optional train categories> <optional test categories>' % ( argv[0]) exit(-1) dtrain = data.Data(argv[1]) dtest = data.Data(argv[2]) if len(argv) > 3: traincatdata = data.Data(argv[3]) traincats = traincatdata.get_data([traincatdata.get_headers()[0]], traincatdata.get_num_rows()) testcatdata = data.Data(argv[4]) testcats = testcatdata.get_data([testcatdata.get_headers()[0]], testcatdata.get_num_rows()) A = dtrain.get_data(dtrain.get_headers(), dtrain.get_num_rows()) B = dtest.get_data(dtest.get_headers(), dtest.get_num_rows()) else: # assume the categories are the last column traincats = dtrain.get_data([dtrain.get_headers()[-1]], dtrain.get_num_rows()) testcats = dtest.get_data([dtest.get_headers()[-1]], dtest.get_num_rows()) A = dtrain.get_data(dtrain.get_headers()[:-1], dtrain.get_num_rows()) B = dtest.get_data(dtest.get_headers()[:-1], dtest.get_num_rows()) # create a new classifier nbc = classifiers.NaiveBayes() # build the classifier using the training data nbc.build(A, traincats) # use the classifier on the training data ctraincats, ctrainlabels = nbc.classify(A) ctestcats, ctestlabels = nbc.classify(B) print "Confusion Matrix" print nbc.confusion_matrix_str(nbc.confusion_matrix(traincats, ctraincats)) print 'Results on Training Set:' print ' True Est' for i in range(ctraincats.shape[0]): if int(traincats[i, 0]) == int(ctraincats[i, 0]): print "%03d: %4d %4d" % (i, int( traincats[i, 0]), int(ctraincats[i, 0])) else: print "%03d: %4d %4d **" % (i, int( traincats[i, 0]), int(ctraincats[i, 0])) print 'Results on Test Set:' print ' True Est' for i in range(ctestcats.shape[0]): if int(testcats[i, 0]) == int(ctestcats[i, 0]): print "%03d: %4d %4d" % (i, int(testcats[i, 0]), int(ctestcats[i, 0])) else: print "%03d: %4d %4d **" % (i, int( testcats[i, 0]), int(ctestcats[i, 0])) return
def main(argv): #usage if len(argv) < 4: print 'Usage: python %s <training data file> <test data file> <nb or knn> <optional training category file> <optional test category file>' % ( argv[0]) exit(-1) #store classifier type classifier = argv[3] if classifier != 'nb' and classifier != 'knn': print 'Usage: python %s <training data file> <test data file> <nb or knn> <optional training category file> <optional test category file>' % ( argv[0]) exit(-1) print '\nReading data files' #read the training and test sets dtrain = data.Data(argv[1]) dtest = data.Data(argv[2]) #get the categories and the training data train and the test data test if len(argv) > 5: traincatdata = data.Data(argv[4]) testcatdata = data.Data(argv[5]) traincats = traincatdata.get_data([traincatdata.get_headers()[0]]) testcats = testcatdata.get_data([testcatdata.get_headers()[0]]) train = dtrain.get_data(dtrain.get_headers()) test = dtest.get_data(dtest.get_headers()) headers = dtest.get_headers() else: #assume the categories are the last column traincats = dtrain.get_data([dtrain.get_headers()[-1]]) testcats = dtest.get_data([dtest.get_headers()[-1]]) train = dtrain.get_data(dtrain.get_headers()[:-1]) test = dtest.get_data(dtest.get_headers()[:-1]) headers = dtest.get_headers()[:-1] #create classifier using training set if classifier == 'knn': #get k k = raw_input( 'How many nearest neighbors? (default=3) Type number then press enter: ' ) if k == '': k = 3 else: k = abs(int(k)) #make new KNN classifier knntrain = classifiers.KNN() print '\nTraining the classifier' # build the classifier from training set knntrain.build(train, traincats, k) print '\nClassifying training data' # classify training set print confusion matrix trainCat, trainLab = knntrain.classify(train) print '\nBuilding training confusion matrix' traincmat = knntrain.confusion_matrix(traincats, trainCat) print knntrain.confusion_matrix_str(traincmat) print '\nClassifying testing data' # classify test set and print confusion matrix testCat, testLab = knntrain.classify(test) print '\nBuilding testing confusion matrix' testcmat = knntrain.confusion_matrix(testcats, testCat) print knntrain.confusion_matrix_str(testcmat) #write test data set and categories to CSV file filename = raw_input('Type filename for test data, then press enter: ') print '\nSaving test data' dtest.addColumn('Categories', 'numeric', testCat.T.tolist()[0]) headers.append('Categories') dtest.write(filename, headers) else: # classifier is nb #make new naive bayes classifier nbtrain = classifiers.NaiveBayes() print '\nTraining the classifier' # build the classifier from training set nbtrain.build(train, traincats) print '\nClassifying training data' # classify training set print confusion matrix trainCat, trainLab = nbtrain.classify(train) print '\nBuilding training confusion matrix' traincmat = nbtrain.confusion_matrix(traincats, trainCat) print nbtrain.confusion_matrix_str(traincmat) print '\nClassifying testing data' # classify test set and print confusion matrix testCat, testLab = nbtrain.classify(test) print '\nBuilding testing confusion matrix' testcmat = nbtrain.confusion_matrix(testcats, testCat) print nbtrain.confusion_matrix_str(testcmat) #write test data set and categories to CSV file filename = raw_input('Type filename for test data, then press enter: ') print '\nSaving test data' dtest.addColumn('Categories', 'numeric', testCat.T.tolist()[0]) headers.append('Categories') dtest.write(filename, headers)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--likelihood_model", help="likelihood model", default="inverse_Gaussian") # default="exponential") parser.add_argument("--behaviors_labels", help="behavioral labels", default='[nonsocial,headtail,conspecific]') # default='[approach,following,headhead,headtail,conspecific,rice1,rice2]') parser.add_argument("--interactions", help="interaction numbers", default="[1,3,5,6,7,8,9,11,13,14,15]") # default="[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]") parser.add_argument( "--bouttimes_filenames_pattern_pattern", help="bouttimes filename pattern pattern", default="../../../../data/120120/Behavior/*_int{:d}_bouttimes.npz") parser.add_argument("--spikes_times_filenames_pattern", help="spikes times filename pattern", default="../../../../data/120120/Neurons_BLA/*.npy") parser.add_argument("--decodings_log_filename_pattern", help="decodings log filename pattern", default="../../logs/decodings_{:s}.log") parser.add_argument("--confusion_matrix_filename_pattern", help="confusion matrix filename pattern", default="../../results/confusionMatrix_{:s}_{:s}.npz") parser.add_argument("--fig_filename_pattern", help="figure filename pattern", default="../../figures/confusionMatrix_{:s}_{:s}.{:s}") args = parser.parse_args() likelihood_model = args.likelihood_model behaviors_labels = args.behaviors_labels[1:-1].split(",") interactions = [int(str) for str in args.interactions[1:-1].split(",")] bouttimes_filenames_pattern_pattern = args.bouttimes_filenames_pattern_pattern spikes_times_filenames_pattern = args.spikes_times_filenames_pattern decodings_log_filename_pattern = args.decodings_log_filename_pattern confusion_matrix_filename_pattern = args.confusion_matrix_filename_pattern fig_filename_pattern = args.fig_filename_pattern decodings_log_filename = decodings_log_filename_pattern.format( likelihood_model) spikes_times_filenames = glob.glob(spikes_times_filenames_pattern) if likelihood_model == "exponential": model_class = probabilisticModels.Exponential elif likelihood_model == "inverse_Gaussian": model_class = probabilisticModels.InverseGaussian else: raise ValueError( "Invalid likelihood_model={:s}".format(likelihood_model)) nBehaviors = len(behaviors_labels) confusion_matrix = np.zeros((nBehaviors, nBehaviors)) classifier = classifiers.NaiveBayes() for i, spikes_times_filename in enumerate(spikes_times_filenames): print("Processing {:s}".format(spikes_times_filename)) neuron_label = os.path.splitext( os.path.basename(spikes_times_filename))[0] spikes_times = np.load(spikes_times_filename) spikes_times = spikes_times.astype(float) for test_behavior_index, test_behavior_label in enumerate( behaviors_labels): for j, test_interaction in enumerate(interactions): train_interactions = np.delete(interactions, j) train_ISIs = utils.get_ISIs_by_behavior_in_interactions( spikes_times=spikes_times, behaviors_labels=behaviors_labels, interactions=train_interactions, bouttimes_filenames_pattern_pattern= bouttimes_filenames_pattern_pattern) test_ISIs = utils.get_ISIs_for_behaviors_in_interactions( spikes_times=spikes_times, behaviors_labels=[test_behavior_label], interactions=[test_interaction], bouttimes_filenames_pattern_pattern= bouttimes_filenames_pattern_pattern) if test_ISIs is not None: classifier.train(x_by_class=train_ISIs, model_class=model_class) classified_behavior = classifier.classify(x=test_ISIs) if classified_behavior is not None: classified_behavior_index = np.where( np.array(behaviors_labels) == classified_behavior)[0][0] confusion_matrix[test_behavior_index, classified_behavior_index] += 1 aString = "Test interaction={:02d}. True test behavior={:s}, Classified test behavoir={:s}".format( test_interaction, test_behavior_label, classified_behavior) row_sums = np.sum(confusion_matrix, axis=1) normalized_confusion_matrix = np.matmul(np.diag(1 / row_sums), confusion_matrix) neuron_rank = np.diag(normalized_confusion_matrix).sum() aString = "{:s}\t{:f}\n".format(neuron_label, neuron_rank) with open(decodings_log_filename, "a") as f: f.write(aString) print(aString) confusion_matrix_filename = confusion_matrix_filename_pattern.format( neuron_label, likelihood_model) np.savez(confusion_matrix_filename, confusion_matrix=confusion_matrix, normalized_confusion_matrix=normalized_confusion_matrix, behaviors_labels=behaviors_labels) fig = px.imshow(normalized_confusion_matrix, labels=dict(x="Decoded Behavior", y="True Behavior", color="Proportion"), x=behaviors_labels, y=behaviors_labels, zmin=0.0, zmax=1.0) htmlFigFilename = fig_filename_pattern.format(neuron_label, likelihood_model, "html") pngFigFilename = fig_filename_pattern.format(neuron_label, likelihood_model, "png") fig.write_html(htmlFigFilename) fig.write_image(pngFigFilename) pdb.set_trace()
def main(argv): # usage if len(argv) < 3: print 'Usage: python %s <classtype> <training data file> <test data file> <optional training category file> <optional test category file>' % ( argv[0]) exit(-1) # read the training and test sets dtrain = data.Data(argv[2]) dtest = data.Data(argv[3]) # get the categories and the training data A and the test data B if len(argv) > 5: traincatdata = data.Data(argv[4]) testcatdata = data.Data(argv[5]) traincats = traincatdata.get_data([traincatdata.get_headers()[0]]) testcats = testcatdata.get_data([testcatdata.get_headers()[0]]) A = dtrain.get_data(dtrain.get_headers()) B = dtest.get_data(dtest.get_headers()) else: # assume the categories are the last column traincats = dtrain.get_data([dtrain.get_headers()[-1]]) testcats = dtest.get_data([dtest.get_headers()[-1]]) A = dtrain.get_data(dtrain.get_headers()[:-1]) B = dtest.get_data(dtest.get_headers()[:-1]) if (argv[1] == "KNN"): print "You chose KNN" #create knn classifier knnc = classifiers.KNN() #build knn classifier knnc.build(A, traincats) trainclasscats, trainclasslabels = knnc.classify(A) testclasscats, testclasslabels = knnc.classify(B) #use KNN classifier on test data traincmtx = knnc.confusion_matrix((traincats), (trainclasscats)) traincmtxstr = knnc.confusion_matrix_str(traincmtx) print "Training Confusion Matrix" print traincmtxstr testcmtx = knnc.confusion_matrix(testcats, testclasscats) testcmtxstr = knnc.confusion_matrix_str(testcmtx) print "Testing Confusion Matrix" print testcmtxstr elif (argv[1] == "Naive-Bayes"): print "You chose Naive-Bayes" # create Naive-Bayes classifier nbc = classifiers.NaiveBayes() # build Naive-Bayes classifier nbc.build(A, traincats) # use Naive-Bayes classifier on test data trainclasscats, trainclasslabels = nbc.classify(A) testclasscats, testclasslabels = nbc.classify(B) # use KNN classifier on test data traincmtx = nbc.confusion_matrix(traincats, trainclasscats) traincmtxstr = nbc.confusion_matrix_str(traincmtx) print "Training Data Confusion Matrix" print traincmtxstr testcmtx = nbc.confusion_matrix(testcats, testclasscats) testcmtxstr = nbc.confusion_matrix_str(testcmtx) print "Test Data Confusion Matrix" print testcmtxstr dtest.addColumn("Classifiers", testclasscats) dtest.write("writtendatafile.csv")
def classify(trainingSet, testSet, bayes=True, optrainingCats=None, optestCats=None, outputFile="KNN.csv"): print("in classify") dtrain = data.Data(trainingSet) dtest = data.Data(testSet) if optrainingCats != None: trainHeaders = dtrain.get_headers() trainCats = data.Data(optrainingCats) trainCatsData = trainCats.newMatrix(trainCats.get_headers()) else: trainHeaders = dtrain.get_headers()[:-1] trainCatsData = dtrain.newMatrix([dtrain.get_headers()[-1]]) if optestCats != None: testHeaders = dtrain.get_headers() testCats = data.Data(optestCats) testCatsData = testCats.newMatrix(testCats.get_headers()) else: testHeaders = dtrain.get_headers()[:-1] testCatsData = dtest.newMatrix([dtest.get_headers()[-1]]) if bayes: nbc = classifiers.NaiveBayes(dtrain, trainHeaders, trainCatsData) print('Naive Bayes Training Set Results') A = dtrain.newMatrix(trainHeaders) newcats, newlabels = nbc.classify(A) uniquelabels, correctedtraincats = np.unique( trainCatsData.T.tolist()[0], return_inverse=True) correctedtraincats = np.matrix([correctedtraincats]).T confmtx = nbc.confusion_matrix(correctedtraincats, newcats) print(nbc.confusion_matrix_str(confmtx)) print('Naive Bayes Test Set Results') A = dtest.newMatrix(testHeaders) newcats, newlabels = nbc.classify(A) uniquelabels, correctedtestcats = np.unique(testCatsData.T.tolist()[0], return_inverse=True) correctedtestcats = np.matrix([correctedtestcats]).T confmtx = nbc.confusion_matrix(correctedtestcats, newcats) print(nbc.confusion_matrix_str(confmtx)) with open(outputFile, mode='w') as file: dataToWrite = A.tolist() writer = csv.writer(file) testHeaders.append("predicted categories") writer.writerow(testHeaders) writer.writerow(["numeric" for i in range(len(testHeaders))]) for i in range(len(dataToWrite)): dataToWrite[i].append(newcats[i, 0]) writer.writerow(dataToWrite[i]) else: print('Building KNN Classifier') knnc = classifiers.KNN(dtrain, trainHeaders, trainCatsData, 5) print('KNN Training Set Results') A = dtrain.newMatrix(trainHeaders) newcats, newlabels = knnc.classify(A) uniquelabels, correctedtraincats = np.unique( trainCatsData.T.tolist()[0], return_inverse=True) correctedtraincats = np.matrix([correctedtraincats]).T confmtx = knnc.confusion_matrix(correctedtraincats, newcats) print(knnc.confusion_matrix_str(confmtx)) print('KNN Test Set Results') A = dtest.newMatrix(testHeaders) newcats, newlabels = knnc.classify(A) uniquelabels, correctedtestcats = np.unique(testCatsData.T.tolist()[0], return_inverse=True) correctedtestcats = np.matrix([correctedtestcats]).T # print the confusion matrix confmtx = knnc.confusion_matrix(correctedtestcats, newcats) print(knnc.confusion_matrix_str(confmtx)) with open(outputFile, mode='w') as file: dataToWrite = A.tolist() writer = csv.writer(file) testHeaders.append("predicted categories") writer.writerow(testHeaders) writer.writerow(["numeric" for i in range(len(testHeaders))]) for i in range(len(dataToWrite)): dataToWrite[i].append(newcats[i, 0]) writer.writerow(dataToWrite[i])
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--model_class_name", help="class name of the probabilistic model used in the naive Bayes classifier", default="probabilityModels.ExponentialModel") parser.add_argument("--nResamples", help="number of resamples for confusion_matrix", type=int, default=100) parser.add_argument("--percentage_train", help="percentage train for confusionmatrix", type=float, default=.8) parser.add_argument("--randomize_ISIs", help="randomize ISI across classes", action="store_true") parser.add_argument("--data_filename", help="data filename", default="../../data/66A_int13_14.npz") parser.add_argument("--fig_filename_pattern", help="figure filename pattern", default="../../figures/exponential_decoding_randomized_ISIs{:d}.{:s}") args = parser.parse_args() model_class_name = args.model_class_name nResamples = args.nResamples percentage_train = args.percentage_train randomize_ISIs = args.randomize_ISIs data_filename = args.data_filename fig_filename_pattern = args.fig_filename_pattern model_class = eval(model_class_name) load_res = np.load(data_filename, allow_pickle=True) female1_spike_times = load_res["Female1_2_spikes_times"] female2_spike_times = load_res["Female2_2_spikes_times"] interactions_labels = ["female1", "female2"] female1_ISIs = np.diff(female1_spike_times) female1_ISIs[np.where(female1_ISIs==0)[0]] = 1.0 # fixing problem due to storing spike times in milliseconds female2_ISIs = np.diff(female2_spike_times) female2_ISIs[np.where(female2_ISIs==0)[0]] = 1.0 # fixing problem due to storing spike times in milliseconds if randomize_ISIs: all_ISIs = np.concatenate((female1_ISIs, female2_ISIs)) suffled_all_ISIs = np.random.permutation(all_ISIs) female1_ISIs = suffled_all_ISIs[len(female1_ISIs):] female2_ISIs = suffled_all_ISIs[:len(female1_ISIs)] confusion_matrix = np.zeros((2,2)) classifier = classifiers.NaiveBayes() for i in range(nResamples): shuffled_female1_ISIs = np.random.permutation(female1_ISIs) shuffled_female2_ISIs = np.random.permutation(female2_ISIs) train_female1_ISIs = shuffled_female1_ISIs[:round(len(shuffled_female1_ISIs)*percentage_train)] test_female1_ISIs = shuffled_female1_ISIs[round(len(shuffled_female1_ISIs)*percentage_train):] train_female2_ISIs = shuffled_female2_ISIs[:round(len(shuffled_female2_ISIs)*percentage_train)] test_female2_ISIs = shuffled_female2_ISIs[round(len(shuffled_female2_ISIs)*percentage_train):] classifier.train(x=[train_female1_ISIs, train_female2_ISIs], y=interactions_labels, model_class=model_class) classified_female1 = classifier.classify(x=test_female1_ISIs) if classified_female1==interactions_labels[0]: confusion_matrix[0,0] += 1 # TP else: confusion_matrix[0,1] += 1 # FN classified_female2 = classifier.classify(x=test_female2_ISIs) if classified_female2==interactions_labels[1]: confusion_matrix[1,1] += 1 # TN else: confusion_matrix[1,0] += 1 # FN confusion_matrix_metrics = statMetrics.get_confusion_matrix_metrics(confusion_matrix=confusion_matrix) fig = px.imshow(confusion_matrix, labels=dict(y="Decoded Interaction", x="True Interaction"), x=interactions_labels, y=interactions_labels, zmin=0.0, zmax=nResamples) fig.update_layout( title="Precision: {:.02f}, Recall: {:.02f}, f1-score: {:.02f}".format(*confusion_matrix_metrics) ) htmlFigFilename = fig_filename_pattern.format(randomize_ISIs, "html") pngFigFilename = fig_filename_pattern.format(randomize_ISIs, "png") fig.write_html(htmlFigFilename) fig.write_image(pngFigFilename) fig.show() pdb.set_trace()