예제 #1
0
def buildClassifier(trainFile,
                    testFile,
                    tCats=None,
                    ttCats=None,
                    classType="NaiveBayes",
                    save=False,
                    K=None):
    '''
        Code inspired by Bruce's code
    '''
    dtrain = data.Data(trainFile)
    dtest = data.Data(testFile)

    if (tCats != None and ttCats != None):
        traincatdata = data.Data(tCats)
        traincats = traincatdata.get_data([traincatdata.get_headers()[0]],
                                          traincatdata.get_num_rows())
        testcatdata = data.Data(ttCats)
        testcats = testcatdata.get_data([testcatdata.get_headers()[0]],
                                        testcatdata.get_num_rows())
        A = dtrain.get_data(dtrain.get_headers(), dtrain.get_num_rows())
        B = dtest.get_data(dtest.get_headers(), dtest.get_num_rows())
    else:
        # assume the categories are the last column
        traincats = dtrain.get_data([dtrain.get_headers()[-1]],
                                    dtrain.get_num_rows())
        testcats = dtest.get_data([dtest.get_headers()[-1]],
                                  dtest.get_num_rows())
        A = dtrain.get_data(dtrain.get_headers()[:-1], dtrain.get_num_rows())
        B = dtest.get_data(dtest.get_headers()[:-1], dtest.get_num_rows())

    #default is a naiveBayes Classifier
    nbc = classifiers.NaiveBayes()
    if (classType == "KNN"):
        if K != None:
            nbc = classifiers.KNN(K=K)
            nbc.build(A, traincats)
            ctraincats, ctrainlabels = nbc.classify(A)
            ctestcats, ctestlabels = nbc.classify(B)
        else:
            #default K of 3
            nbc = classifiers.KNN(K=3)
            nbc.build(A, traincats)
            ctraincats, ctrainlabels = nbc.classify(A)
            ctestcats, ctestlabels = nbc.classify(B)
    else:
        # build the classifier using the training data
        nbc.build(A, traincats)

        # use the classifier on the training data
        ctraincats, ctrainlabels = nbc.classify(A)
        ctestcats, ctestlabels = nbc.classify(B)

    if save == True:
        ctestcats.tofile('cTestCats.csv', sep=" ", format="%s")
        ctestlabels.tofile('cTestLabels.csv', sep=" ", format="%s")

    print "Training Data"
    print nbc.confusion_matrix_str(nbc.confusion_matrix(traincats, ctraincats))
    print "Test Data"
    print nbc.confusion_matrix_str(nbc.confusion_matrix(testcats, ctestcats))

    return nbc
예제 #2
0
def main(argv):
    # Reads in a training set and its category labels, possibly as a separate file.

    # usage
    if len(argv) < 3:
        print "usage: python %s <Training File> <Test File> <opt: Training Categories> <opt: Test Categories> <KNN or NaiveBayes>" % (
            argv[0])
        return

    # read the training and test sets
    print "Reading: \n  Training: %s\n  Test: %s\n  KNN/NB: %s\n  " % (
        argv[1], argv[2], argv[-1])

    trainData = data.Data(argv[1])
    testData = data.Data(argv[2])  #test data

    headerList = [1, 2]
    headerList[0] = trainData.getHeaderRaw()
    headerList[1] = testData.getHeaderRaw()

    # print trainData
    # print testData

    headers = []  #header names for cmtx

    # get the categories and the training data A and the test data B
    if len(argv) > 4:
        traincatdata = data.Data(argv[3])
        testcatdata = data.Data(argv[4])

        # needs to be a list
        traincats = traincatdata.getDataNum([traincatdata.getHeaderRaw()[0]])
        testcats = testcatdata.getDataNum([testcatdata.getHeaderRaw()[0]])

        A = trainData.getDataNum(trainData.getHeaderRaw())
        B = testData.getDataNum(testData.getHeaderRaw())
    else:

        # assume the categories are the last columnlen

        traincats = trainData.getDataNum([trainData.getHeaderRaw()[-1]])
        testcats = testData.getDataNum([testData.getHeaderRaw()[-1]])
        A = trainData.getDataNum(trainData.getHeaderRaw()[:-1])
        B = testData.getDataNum(testData.getHeaderRaw()[:-1])

    if argv[-1] == "NaiveBayes":
        classifier = classifiers.NaiveBayes()
    else:
        classifier = classifiers.KNN()

    classifier.build(A, traincats)
    ctraincats, ctrainlabels = classifier.classify(A)

    # print ctrainlabels[:20]
    # #
    # print traincats[:20]

    print "Training Data"
    print tabulate(
        classifier.confusionMatrixStr(
            classifier.confusionMatrix(traincats, ctrainlabels),
            headerList[0]))

    trainData.addCol("codes", "numeric", traincats.T.tolist()[0])
    #print "data: ", trainData.getDataNum(["Training Cats"])
    f = open('datasets/trainData.csv', 'w')
    trainData.writeOut(f, trainData.getHeaderRaw(), "numeric")
    print "\n"

    classifier.confusionMatrixGraphic(
        classifier.confusionMatrix(traincats, ctrainlabels),
        headerList[0],
        title="Confusion Matrix of Training Data")

    print "Test Data"
    ctestcats, ctestlabels = classifier.classify(B)
    print tabulate(
        classifier.confusionMatrixStr(
            classifier.confusionMatrix(testcats, ctestlabels), headerList[1]))

    testData.addCol("Test Cats", "numeric", testcats.T.tolist()[0])
    #print "data: ", testData.getDataNum(["Training Cats"])
    f = open('datasets/testData.csv', 'w')
    testData.writeOut(f, testData.getHeaderRaw(), "numeric")
    print "\n"

    classifier.confusionMatrixGraphic(classifier.confusionMatrix(
        testcats, ctestlabels),
                                      headerList[1],
                                      title="Confusion Matrix of Test Data")
예제 #3
0
파일: task2.py 프로젝트: ejseal21/CS251
def main(argv):
    time = datetime.datetime.now()
    # test function here
    if len(argv) < 4 or (argv[3] != 'k' and argv[3] != 'n'):
        print(
            'Usage: python %s <training data file> <test data file> <n for Naive Bayes, k for KNN> <optional training categories file> <optional test categories file>'
            % (argv[0]))
        print(
            '    If categories are not provided as separate files, then the last column is assumed to be the category.'
        )
        exit(-1)

    train_file = argv[1]
    test_file = argv[2]
    knn = True if argv[3] == 'k' else False
    dtrain = data.Data(train_file)
    dtest = data.Data(test_file)

    if len(argv) >= 6:
        train_headers = dtrain.get_headers()
        test_headers = dtrain.get_headers()

        traincat_file = argv[4]
        testcat_file = argv[5]

        traincats = data.Data(traincat_file)
        traincatdata = traincats.limit_columns(traincats.get_headers())

        testcats = data.Data(testcat_file)
        testcatdata = testcats.limit_columns(testcats.get_headers())

    else:
        train_headers = dtrain.get_headers()[:-1]
        test_headers = dtrain.get_headers()[:-1]

        traincatdata = dtrain.limit_columns([dtrain.get_headers()[-1]])
        testcatdata = dtest.limit_columns([dtest.get_headers()[-1]])

        uniquelabels, correctedtraincats = np.unique(
            traincatdata.T.tolist()[0], return_inverse=True)
        correctedtraincats = np.matrix([correctedtraincats]).T

        uniquelabels, correctedtestcats = np.unique(testcatdata.T.tolist()[0],
                                                    return_inverse=True)
        correctedtestcats = np.matrix([correctedtestcats]).T

    if not knn:
        nbc = classifiers.NaiveBayes(dtrain, train_headers, traincatdata)

        print('Naive Bayes Training Set Results')
        A = dtrain.limit_columns(train_headers)

        newcats, newlabels = nbc.classify(A)
        traincats = newcats

        print('making confusion matrix')
        confmtx = nbc.confusion_matrix(correctedtraincats, newcats)

        print(nbc.confusion_matrix_str(confmtx))

        print('Naive Bayes Test Set Results')
        for i in range(len(test_headers)):
            try:
                test_headers[i] = int(test_headers[i])
            except:
                break

        A = dtest.limit_columns(test_headers)

        print('classifying with naive bayes classifier')
        newcats, newlabels = nbc.classify(A)

        print('confusion matrix')
        confmtx = nbc.confusion_matrix(correctedtestcats, newcats)
        print(nbc.confusion_matrix_str(confmtx))

    else:
        print('knn')
        print('-----------------')
        print('Building KNN Classifier')
        knnc = classifiers.KNN(dtrain, train_headers, traincatdata, 3)

        print('KNN Training Set Results')
        A = dtrain.limit_columns(train_headers)

        newcats, newlabels = knnc.classify(A)
        traincats = newcats
        confmtx = knnc.confusion_matrix(correctedtraincats, newcats)
        print(knnc.confusion_matrix_str(confmtx))

        print('KNN Test Set Results')
        A = dtest.limit_columns(test_headers)

        newcats, newlabels = knnc.classify(A)

        print('KNN TEST::Correct labels\n', correctedtestcats.T)
        print('KNN TEST:::Predicted labels\n', newcats)

        # print the confusion matrix
        confmtx = knnc.confusion_matrix(correctedtestcats, newcats)
        print(knnc.confusion_matrix_str(confmtx))

    test_headers.append('predicted')

    dtest.add_header2col('predicted')
    dtest.add_column(newcats.T)
    dtest.write("heresyourdata.csv", test_headers)
    return
	def __init__(self, train, test, t1=None, ts1=None):
	
	
		
	
		# read the training and test sets
		dtrain = data.Data(train)
		dtest = data.Data(test)
	
		""" Bruce KNN test code source starts here, with additional comments for my understanding """
	
		# get the categories and the training data A and the test data B
		if t1!=None and	ts1!=None:
			traincatdata = data.Data(t1)
			testcatdata = data.Data(ts1)
			traincats = traincatdata.get_data( [traincatdata.get_headers()[-1]] )
			testcats = testcatdata.get_data( [testcatdata.get_headers()[-1]] )
			A = dtrain.get_data( dtrain.get_headers()[:-1] )
			B = dtest.get_data( dtest.get_headers()[:-1] )
		else:
			# assume the categories are the last column
			traincats = dtrain.get_data( [dtrain.get_headers()[-1]] )  # training categories 
			testcats = dtest.get_data( [dtest.get_headers()[-1]] )	# test categories
			A = dtrain.get_data( dtrain.get_headers()[:-1] )  # train data matrice
			B = dtest.get_data( dtest.get_headers()[:-1] )	# test data matrice

		
		
		
		

	
	
		

		# create two classifiers, one using 10 exemplars per class
		nb = classifiers.NaiveBayes()
		knnc10 = classifiers.NaiveBayes()

		# build the classifiers given data and categories
		nb.build( A, traincats )
	

		# use the classifiers on the test data, to try classify A
		classcats, alllabels =nb.classify(A)
	
	
		""" #Bruce KNN test edited for my project code source ends here """
	
		# Classify the training set and print out a confusion matrix.
		# build confusion matrix and print it out
		confusion_matrix=nb.confusion_matrix(traincats , classcats )  #
		# print out the confusion matrix
		cmtxA=nb.confusion_matrix_str(confusion_matrix)
		#print classcats 
		print "	 **	  train set	  confusion matrix	 **\n ",cmtxA
	
	
		# Classify the test set and print out a confusion matrix.
		
		print " **  in:",B.shape
		
		# use the classifiers on the test data, to try classify B
		classcats, alllabels = nb.classify( B )
	
		print " **  out:",classcats.shape
		
		# build confusion matrix and print it out
		if len(testcats)!=len(classcats):
			print "#* Error:  Something terribly wrong   needs to be fixed. THE  CONFUSION MATRIX BELOW IS WRONG"
			testcats=classcats
		
		
		confusion_matrix=nb.confusion_matrix(testcats , classcats )  #
		# print out the confusion matrix
		cmtx=nb.confusion_matrix_str(confusion_matrix)
		
		#print classcats 
		print " **	test set	 confusion matrix	**\n ",cmtx
		self.cmtx=cmtx
		# write out csv file for test data +  predicted categories
		#create a temporary csv file that holds the data
		
		with open('testmatrix_data.csv', 'wb') as f:
			writer = csv.writer(f)

			#write in my headers
			list=[]
			headers=dtest.get_headers()[:-1]
			headers.append("Cluster ID")
			list.append(headers)
			writer.writerows(list)
		
			types=[] # asume all numeric data
			for i in range(len(headers)):
				types.append("numeric")
			writer.writerows([types])	# write in the types
		
		
		
			values=[]
			C=B.tolist()
		
			for i in range(len(C)):
				C[i].append(classcats[i].item(0))
		
		
			for row in C:  # row in test data
			
				values.append(row)
		
			writer.writerows(values) # write in all the rows
			print "****		new file created for test data , named 'testmatrix_data.csv'"

		f.close()
예제 #5
0
def main(argv):

    if len(argv) < 3:
        print 'Usage: python %s <train data file> <test data file> <optional train categories> <optional test categories>' % (
            argv[0])
        exit(-1)

    dtrain = data.Data(argv[1])
    dtest = data.Data(argv[2])

    if len(argv) > 3:
        traincatdata = data.Data(argv[3])
        traincats = traincatdata.get_data([traincatdata.get_headers()[0]],
                                          traincatdata.get_num_rows())
        testcatdata = data.Data(argv[4])
        testcats = testcatdata.get_data([testcatdata.get_headers()[0]],
                                        testcatdata.get_num_rows())
        A = dtrain.get_data(dtrain.get_headers(), dtrain.get_num_rows())
        B = dtest.get_data(dtest.get_headers(), dtest.get_num_rows())

    else:
        # assume the categories are the last column
        traincats = dtrain.get_data([dtrain.get_headers()[-1]],
                                    dtrain.get_num_rows())
        testcats = dtest.get_data([dtest.get_headers()[-1]],
                                  dtest.get_num_rows())
        A = dtrain.get_data(dtrain.get_headers()[:-1], dtrain.get_num_rows())
        B = dtest.get_data(dtest.get_headers()[:-1], dtest.get_num_rows())

    # create a new classifier
    nbc = classifiers.NaiveBayes()

    # build the classifier using the training data
    nbc.build(A, traincats)

    # use the classifier on the training data
    ctraincats, ctrainlabels = nbc.classify(A)
    ctestcats, ctestlabels = nbc.classify(B)

    print "Confusion Matrix"

    print nbc.confusion_matrix_str(nbc.confusion_matrix(traincats, ctraincats))

    print 'Results on Training Set:'
    print '     True  Est'
    for i in range(ctraincats.shape[0]):
        if int(traincats[i, 0]) == int(ctraincats[i, 0]):
            print "%03d: %4d %4d" % (i, int(
                traincats[i, 0]), int(ctraincats[i, 0]))
        else:
            print "%03d: %4d %4d **" % (i, int(
                traincats[i, 0]), int(ctraincats[i, 0]))

    print 'Results on Test Set:'
    print '     True  Est'
    for i in range(ctestcats.shape[0]):
        if int(testcats[i, 0]) == int(ctestcats[i, 0]):
            print "%03d: %4d %4d" % (i, int(testcats[i,
                                                     0]), int(ctestcats[i, 0]))
        else:
            print "%03d: %4d %4d **" % (i, int(
                testcats[i, 0]), int(ctestcats[i, 0]))
    return
예제 #6
0
def main(argv):
    #usage
    if len(argv) < 4:
        print 'Usage: python %s <training data file> <test data file> <nb or knn> <optional training category file> <optional test category file>' % (
            argv[0])
        exit(-1)

    #store classifier type
    classifier = argv[3]

    if classifier != 'nb' and classifier != 'knn':
        print 'Usage:  python %s <training data file> <test data file> <nb or knn> <optional training category file> <optional test category file>' % (
            argv[0])
        exit(-1)

    print '\nReading data files'

    #read the training and test sets
    dtrain = data.Data(argv[1])
    dtest = data.Data(argv[2])

    #get the categories and the training data train and the test data test
    if len(argv) > 5:
        traincatdata = data.Data(argv[4])
        testcatdata = data.Data(argv[5])

        traincats = traincatdata.get_data([traincatdata.get_headers()[0]])
        testcats = testcatdata.get_data([testcatdata.get_headers()[0]])

        train = dtrain.get_data(dtrain.get_headers())
        test = dtest.get_data(dtest.get_headers())

        headers = dtest.get_headers()
    else:
        #assume the categories are the last column
        traincats = dtrain.get_data([dtrain.get_headers()[-1]])
        testcats = dtest.get_data([dtest.get_headers()[-1]])

        train = dtrain.get_data(dtrain.get_headers()[:-1])
        test = dtest.get_data(dtest.get_headers()[:-1])

        headers = dtest.get_headers()[:-1]

    #create classifier using training set
    if classifier == 'knn':

        #get k
        k = raw_input(
            'How many nearest neighbors? (default=3) Type number then press enter: '
        )
        if k == '':
            k = 3
        else:
            k = abs(int(k))

        #make new KNN classifier
        knntrain = classifiers.KNN()

        print '\nTraining the classifier'
        # build the classifier from training set
        knntrain.build(train, traincats, k)

        print '\nClassifying training data'
        # classify training set print confusion matrix
        trainCat, trainLab = knntrain.classify(train)

        print '\nBuilding training confusion matrix'
        traincmat = knntrain.confusion_matrix(traincats, trainCat)
        print knntrain.confusion_matrix_str(traincmat)

        print '\nClassifying testing data'
        # classify test set and print confusion matrix
        testCat, testLab = knntrain.classify(test)

        print '\nBuilding testing confusion matrix'
        testcmat = knntrain.confusion_matrix(testcats, testCat)
        print knntrain.confusion_matrix_str(testcmat)

        #write test data set and categories to CSV file
        filename = raw_input('Type filename for test data, then press enter: ')

        print '\nSaving test data'
        dtest.addColumn('Categories', 'numeric', testCat.T.tolist()[0])

        headers.append('Categories')

        dtest.write(filename, headers)

    else:  # classifier is nb

        #make new naive bayes classifier
        nbtrain = classifiers.NaiveBayes()

        print '\nTraining the classifier'
        # build the classifier from training set
        nbtrain.build(train, traincats)

        print '\nClassifying training data'
        # classify training set print confusion matrix
        trainCat, trainLab = nbtrain.classify(train)

        print '\nBuilding training confusion matrix'
        traincmat = nbtrain.confusion_matrix(traincats, trainCat)
        print nbtrain.confusion_matrix_str(traincmat)

        print '\nClassifying testing data'
        # classify test set and print confusion matrix
        testCat, testLab = nbtrain.classify(test)

        print '\nBuilding testing confusion matrix'
        testcmat = nbtrain.confusion_matrix(testcats, testCat)
        print nbtrain.confusion_matrix_str(testcmat)

        #write test data set and categories to CSV file
        filename = raw_input('Type filename for test data, then press enter: ')

        print '\nSaving test data'
        dtest.addColumn('Categories', 'numeric', testCat.T.tolist()[0])

        headers.append('Categories')

        dtest.write(filename, headers)
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--likelihood_model",
                        help="likelihood model",
                        default="inverse_Gaussian")
    # default="exponential")
    parser.add_argument("--behaviors_labels",
                        help="behavioral labels",
                        default='[nonsocial,headtail,conspecific]')
    # default='[approach,following,headhead,headtail,conspecific,rice1,rice2]')
    parser.add_argument("--interactions",
                        help="interaction numbers",
                        default="[1,3,5,6,7,8,9,11,13,14,15]")
    # default="[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]")
    parser.add_argument(
        "--bouttimes_filenames_pattern_pattern",
        help="bouttimes filename pattern pattern",
        default="../../../../data/120120/Behavior/*_int{:d}_bouttimes.npz")
    parser.add_argument("--spikes_times_filenames_pattern",
                        help="spikes times filename pattern",
                        default="../../../../data/120120/Neurons_BLA/*.npy")
    parser.add_argument("--decodings_log_filename_pattern",
                        help="decodings log filename pattern",
                        default="../../logs/decodings_{:s}.log")
    parser.add_argument("--confusion_matrix_filename_pattern",
                        help="confusion matrix filename pattern",
                        default="../../results/confusionMatrix_{:s}_{:s}.npz")
    parser.add_argument("--fig_filename_pattern",
                        help="figure filename pattern",
                        default="../../figures/confusionMatrix_{:s}_{:s}.{:s}")
    args = parser.parse_args()

    likelihood_model = args.likelihood_model
    behaviors_labels = args.behaviors_labels[1:-1].split(",")
    interactions = [int(str) for str in args.interactions[1:-1].split(",")]
    bouttimes_filenames_pattern_pattern = args.bouttimes_filenames_pattern_pattern
    spikes_times_filenames_pattern = args.spikes_times_filenames_pattern
    decodings_log_filename_pattern = args.decodings_log_filename_pattern
    confusion_matrix_filename_pattern = args.confusion_matrix_filename_pattern
    fig_filename_pattern = args.fig_filename_pattern

    decodings_log_filename = decodings_log_filename_pattern.format(
        likelihood_model)
    spikes_times_filenames = glob.glob(spikes_times_filenames_pattern)
    if likelihood_model == "exponential":
        model_class = probabilisticModels.Exponential
    elif likelihood_model == "inverse_Gaussian":
        model_class = probabilisticModels.InverseGaussian
    else:
        raise ValueError(
            "Invalid likelihood_model={:s}".format(likelihood_model))

    nBehaviors = len(behaviors_labels)
    confusion_matrix = np.zeros((nBehaviors, nBehaviors))
    classifier = classifiers.NaiveBayes()
    for i, spikes_times_filename in enumerate(spikes_times_filenames):
        print("Processing {:s}".format(spikes_times_filename))
        neuron_label = os.path.splitext(
            os.path.basename(spikes_times_filename))[0]
        spikes_times = np.load(spikes_times_filename)
        spikes_times = spikes_times.astype(float)

        for test_behavior_index, test_behavior_label in enumerate(
                behaviors_labels):
            for j, test_interaction in enumerate(interactions):
                train_interactions = np.delete(interactions, j)
                train_ISIs = utils.get_ISIs_by_behavior_in_interactions(
                    spikes_times=spikes_times,
                    behaviors_labels=behaviors_labels,
                    interactions=train_interactions,
                    bouttimes_filenames_pattern_pattern=
                    bouttimes_filenames_pattern_pattern)
                test_ISIs = utils.get_ISIs_for_behaviors_in_interactions(
                    spikes_times=spikes_times,
                    behaviors_labels=[test_behavior_label],
                    interactions=[test_interaction],
                    bouttimes_filenames_pattern_pattern=
                    bouttimes_filenames_pattern_pattern)
                if test_ISIs is not None:
                    classifier.train(x_by_class=train_ISIs,
                                     model_class=model_class)
                    classified_behavior = classifier.classify(x=test_ISIs)
                    if classified_behavior is not None:
                        classified_behavior_index = np.where(
                            np.array(behaviors_labels) ==
                            classified_behavior)[0][0]
                        confusion_matrix[test_behavior_index,
                                         classified_behavior_index] += 1
                        aString = "Test interaction={:02d}. True test behavior={:s}, Classified test behavoir={:s}".format(
                            test_interaction, test_behavior_label,
                            classified_behavior)

        row_sums = np.sum(confusion_matrix, axis=1)
        normalized_confusion_matrix = np.matmul(np.diag(1 / row_sums),
                                                confusion_matrix)
        neuron_rank = np.diag(normalized_confusion_matrix).sum()
        aString = "{:s}\t{:f}\n".format(neuron_label, neuron_rank)
        with open(decodings_log_filename, "a") as f:
            f.write(aString)
        print(aString)
        confusion_matrix_filename = confusion_matrix_filename_pattern.format(
            neuron_label, likelihood_model)
        np.savez(confusion_matrix_filename,
                 confusion_matrix=confusion_matrix,
                 normalized_confusion_matrix=normalized_confusion_matrix,
                 behaviors_labels=behaviors_labels)
        fig = px.imshow(normalized_confusion_matrix,
                        labels=dict(x="Decoded Behavior",
                                    y="True Behavior",
                                    color="Proportion"),
                        x=behaviors_labels,
                        y=behaviors_labels,
                        zmin=0.0,
                        zmax=1.0)
        htmlFigFilename = fig_filename_pattern.format(neuron_label,
                                                      likelihood_model, "html")
        pngFigFilename = fig_filename_pattern.format(neuron_label,
                                                     likelihood_model, "png")
        fig.write_html(htmlFigFilename)
        fig.write_image(pngFigFilename)
    pdb.set_trace()
예제 #8
0
def main(argv):
    # usage
    if len(argv) < 3:
        print 'Usage: python %s <classtype> <training data file> <test data file> <optional training category file> <optional test category file>' % (
            argv[0])
        exit(-1)

    # read the training and test sets
    dtrain = data.Data(argv[2])
    dtest = data.Data(argv[3])

    # get the categories and the training data A and the test data B
    if len(argv) > 5:
        traincatdata = data.Data(argv[4])
        testcatdata = data.Data(argv[5])
        traincats = traincatdata.get_data([traincatdata.get_headers()[0]])
        testcats = testcatdata.get_data([testcatdata.get_headers()[0]])
        A = dtrain.get_data(dtrain.get_headers())
        B = dtest.get_data(dtest.get_headers())
    else:
        # assume the categories are the last column
        traincats = dtrain.get_data([dtrain.get_headers()[-1]])
        testcats = dtest.get_data([dtest.get_headers()[-1]])
        A = dtrain.get_data(dtrain.get_headers()[:-1])
        B = dtest.get_data(dtest.get_headers()[:-1])

    if (argv[1] == "KNN"):
        print "You chose KNN"
        #create knn classifier
        knnc = classifiers.KNN()
        #build knn classifier
        knnc.build(A, traincats)
        trainclasscats, trainclasslabels = knnc.classify(A)
        testclasscats, testclasslabels = knnc.classify(B)
        #use KNN classifier on test data
        traincmtx = knnc.confusion_matrix((traincats), (trainclasscats))
        traincmtxstr = knnc.confusion_matrix_str(traincmtx)
        print "Training Confusion Matrix"
        print traincmtxstr
        testcmtx = knnc.confusion_matrix(testcats, testclasscats)
        testcmtxstr = knnc.confusion_matrix_str(testcmtx)
        print "Testing Confusion Matrix"
        print testcmtxstr

    elif (argv[1] == "Naive-Bayes"):
        print "You chose Naive-Bayes"
        # create Naive-Bayes classifier
        nbc = classifiers.NaiveBayes()
        # build Naive-Bayes classifier
        nbc.build(A, traincats)
        # use Naive-Bayes classifier on test data

        trainclasscats, trainclasslabels = nbc.classify(A)
        testclasscats, testclasslabels = nbc.classify(B)
        # use KNN classifier on test data
        traincmtx = nbc.confusion_matrix(traincats, trainclasscats)
        traincmtxstr = nbc.confusion_matrix_str(traincmtx)
        print "Training Data Confusion Matrix"
        print traincmtxstr
        testcmtx = nbc.confusion_matrix(testcats, testclasscats)
        testcmtxstr = nbc.confusion_matrix_str(testcmtx)
        print "Test Data Confusion Matrix"
        print testcmtxstr

    dtest.addColumn("Classifiers", testclasscats)
    dtest.write("writtendatafile.csv")
예제 #9
0
def classify(trainingSet,
             testSet,
             bayes=True,
             optrainingCats=None,
             optestCats=None,
             outputFile="KNN.csv"):
    print("in classify")
    dtrain = data.Data(trainingSet)
    dtest = data.Data(testSet)
    if optrainingCats != None:
        trainHeaders = dtrain.get_headers()
        trainCats = data.Data(optrainingCats)
        trainCatsData = trainCats.newMatrix(trainCats.get_headers())
    else:
        trainHeaders = dtrain.get_headers()[:-1]
        trainCatsData = dtrain.newMatrix([dtrain.get_headers()[-1]])

    if optestCats != None:
        testHeaders = dtrain.get_headers()
        testCats = data.Data(optestCats)
        testCatsData = testCats.newMatrix(testCats.get_headers())
    else:
        testHeaders = dtrain.get_headers()[:-1]
        testCatsData = dtest.newMatrix([dtest.get_headers()[-1]])

    if bayes:
        nbc = classifiers.NaiveBayes(dtrain, trainHeaders, trainCatsData)

        print('Naive Bayes Training Set Results')
        A = dtrain.newMatrix(trainHeaders)

        newcats, newlabels = nbc.classify(A)

        uniquelabels, correctedtraincats = np.unique(
            trainCatsData.T.tolist()[0], return_inverse=True)
        correctedtraincats = np.matrix([correctedtraincats]).T

        confmtx = nbc.confusion_matrix(correctedtraincats, newcats)
        print(nbc.confusion_matrix_str(confmtx))
        print('Naive Bayes Test Set Results')
        A = dtest.newMatrix(testHeaders)

        newcats, newlabels = nbc.classify(A)
        uniquelabels, correctedtestcats = np.unique(testCatsData.T.tolist()[0],
                                                    return_inverse=True)
        correctedtestcats = np.matrix([correctedtestcats]).T

        confmtx = nbc.confusion_matrix(correctedtestcats, newcats)
        print(nbc.confusion_matrix_str(confmtx))

        with open(outputFile, mode='w') as file:
            dataToWrite = A.tolist()
            writer = csv.writer(file)
            testHeaders.append("predicted categories")
            writer.writerow(testHeaders)
            writer.writerow(["numeric" for i in range(len(testHeaders))])
            for i in range(len(dataToWrite)):
                dataToWrite[i].append(newcats[i, 0])
                writer.writerow(dataToWrite[i])

    else:
        print('Building KNN Classifier')
        knnc = classifiers.KNN(dtrain, trainHeaders, trainCatsData, 5)

        print('KNN Training Set Results')
        A = dtrain.newMatrix(trainHeaders)

        newcats, newlabels = knnc.classify(A)
        uniquelabels, correctedtraincats = np.unique(
            trainCatsData.T.tolist()[0], return_inverse=True)
        correctedtraincats = np.matrix([correctedtraincats]).T

        confmtx = knnc.confusion_matrix(correctedtraincats, newcats)
        print(knnc.confusion_matrix_str(confmtx))

        print('KNN Test Set Results')
        A = dtest.newMatrix(testHeaders)

        newcats, newlabels = knnc.classify(A)

        uniquelabels, correctedtestcats = np.unique(testCatsData.T.tolist()[0],
                                                    return_inverse=True)
        correctedtestcats = np.matrix([correctedtestcats]).T

        # print the confusion matrix
        confmtx = knnc.confusion_matrix(correctedtestcats, newcats)
        print(knnc.confusion_matrix_str(confmtx))

        with open(outputFile, mode='w') as file:
            dataToWrite = A.tolist()
            writer = csv.writer(file)
            testHeaders.append("predicted categories")
            writer.writerow(testHeaders)
            writer.writerow(["numeric" for i in range(len(testHeaders))])
            for i in range(len(dataToWrite)):
                dataToWrite[i].append(newcats[i, 0])
                writer.writerow(dataToWrite[i])
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_class_name",
                        help="class name of the probabilistic model used in the naive Bayes classifier",
                        default="probabilityModels.ExponentialModel")
    parser.add_argument("--nResamples",
                        help="number of resamples for confusion_matrix",
                        type=int,
                        default=100)
    parser.add_argument("--percentage_train", 
                        help="percentage train for confusionmatrix",
                        type=float,
                        default=.8)
    parser.add_argument("--randomize_ISIs", 
                        help="randomize ISI across classes",
                        action="store_true")
    parser.add_argument("--data_filename", 
                        help="data filename",
                        default="../../data/66A_int13_14.npz")
    parser.add_argument("--fig_filename_pattern", 
                        help="figure filename pattern",
                        default="../../figures/exponential_decoding_randomized_ISIs{:d}.{:s}")
    args = parser.parse_args()

    model_class_name = args.model_class_name
    nResamples = args.nResamples
    percentage_train = args.percentage_train
    randomize_ISIs = args.randomize_ISIs
    data_filename = args.data_filename
    fig_filename_pattern = args.fig_filename_pattern

    model_class = eval(model_class_name)
    load_res = np.load(data_filename, allow_pickle=True)
    female1_spike_times = load_res["Female1_2_spikes_times"]
    female2_spike_times = load_res["Female2_2_spikes_times"]
    interactions_labels = ["female1", "female2"]

    female1_ISIs = np.diff(female1_spike_times)
    female1_ISIs[np.where(female1_ISIs==0)[0]] = 1.0 # fixing problem due to storing spike times in milliseconds
    female2_ISIs = np.diff(female2_spike_times)
    female2_ISIs[np.where(female2_ISIs==0)[0]] = 1.0 # fixing problem due to storing spike times in milliseconds

    if randomize_ISIs:
        all_ISIs = np.concatenate((female1_ISIs, female2_ISIs))
        suffled_all_ISIs = np.random.permutation(all_ISIs)
        female1_ISIs = suffled_all_ISIs[len(female1_ISIs):]
        female2_ISIs = suffled_all_ISIs[:len(female1_ISIs)]

    confusion_matrix = np.zeros((2,2))
    classifier = classifiers.NaiveBayes()

    for i in range(nResamples):
        shuffled_female1_ISIs = np.random.permutation(female1_ISIs)
        shuffled_female2_ISIs = np.random.permutation(female2_ISIs)
        train_female1_ISIs = shuffled_female1_ISIs[:round(len(shuffled_female1_ISIs)*percentage_train)]
        test_female1_ISIs = shuffled_female1_ISIs[round(len(shuffled_female1_ISIs)*percentage_train):]
        train_female2_ISIs = shuffled_female2_ISIs[:round(len(shuffled_female2_ISIs)*percentage_train)]
        test_female2_ISIs = shuffled_female2_ISIs[round(len(shuffled_female2_ISIs)*percentage_train):]
        classifier.train(x=[train_female1_ISIs, train_female2_ISIs],
                         y=interactions_labels,
                         model_class=model_class)
        classified_female1 = classifier.classify(x=test_female1_ISIs)
        if classified_female1==interactions_labels[0]:
            confusion_matrix[0,0] += 1 # TP
        else:
            confusion_matrix[0,1] += 1 # FN
        classified_female2 = classifier.classify(x=test_female2_ISIs)
        if classified_female2==interactions_labels[1]:
            confusion_matrix[1,1] += 1 # TN
        else:
            confusion_matrix[1,0] += 1 # FN

    confusion_matrix_metrics = statMetrics.get_confusion_matrix_metrics(confusion_matrix=confusion_matrix)

    fig = px.imshow(confusion_matrix,
                        labels=dict(y="Decoded Interaction", x="True Interaction"),
                        x=interactions_labels,
                        y=interactions_labels,
                        zmin=0.0, zmax=nResamples)
    fig.update_layout(
        title="Precision: {:.02f}, Recall: {:.02f}, f1-score: {:.02f}".format(*confusion_matrix_metrics)
    )

    htmlFigFilename = fig_filename_pattern.format(randomize_ISIs, "html")
    pngFigFilename = fig_filename_pattern.format(randomize_ISIs, "png")
    fig.write_html(htmlFigFilename)
    fig.write_image(pngFigFilename)
    fig.show()
    pdb.set_trace()