def main(): doc = open("fingerprintGender.txt",'r') wordsDict = getwords(doc.read()) genericClassifier = classifier.classifier(wordsDict) genericClassifier.setdb("generic.db") sampletrain(genericClassifier) print "---genericClassifier---" print genericClassifier.weightedprob('quick rabbit','good', genericClassifier.fprob) print "---Naive Bayes---" bayesClassifier = naivebayes.naivebayes(wordsDict) bayesClassifier.setdb("bayes.db") sampletrain(bayesClassifier) print bayesClassifier.prob('quick rabbit','good') bayesClassifier.classify('quick money',default='unknown') for i in range(10): sampletrain(bayesClassifier) print bayesClassifier.classify('quick money',default='unknown') print "---FISHER CLASSIFIER---" fisher = fisherclassifier.fisherclassifier(wordsDict) fisher.setdb("fisher.db") sampletrain(fisher) print fisher.fisherprob('quick rabbit','good') print fisher.weightedprob('money','bad', fisher.cprob)
def main(): trainingparams = [0.01, 0.02, 0.03, 0.125, 0.625, 1] iterations = 5 eta = 0.001 epsilon = 0.001 valuerange = 10 params = [] data = utils.getdata('breast-cancer-wisconsin.data.txt') nb = naivebayes.naivebayes(data, trainingparams, iterations, valuerange) nb['label'] = 'Naive Bayes' lr = logisticregression.logisticregression(data, trainingparams, iterations, eta, epsilon) lr['label'] = 'Logistic Regression' params.append(lr) params.append(nb) plot = graph.plot(params, 'Training Set', 'Accuracy') plot.show() return
data_test = r.readfile("Text_Data_for_Project1_test_data.txt") alabel_train, clabel_train, adata_train, cdata_train = p.p_traindata( data_train) alabel_test, adata_test = p.p_testdata(data_test) atraintestmix = np.vstack((adata_train, adata_test)) att_num = np.shape(adata_train)[1] dat_num = np.shape(adata_train)[0] adata_traintest_dummy, a_label = p.indexing(atraintestmix, att_num, dat_num, key="test") adata_train_dummy = adata_traintest_dummy[:-1, :] adata_test_dummy = np.array([adata_traintest_dummy[-1, :]]) cdata_train_dummy, c_label = p.indexing(cdata_train, 1, dat_num, key="test") m, p = 0, 0 prior_probability = cd.cal_prior(adata_train_dummy, cdata_train_dummy, a_label, c_label, dat_num, m, p) a_probability = cd.cal_prob(adata_train_dummy, dat_num) c_probability = cd.cal_prob(cdata_train_dummy, dat_num) result = nb.naivebayes(adata_test_dummy, prior_probability, a_probability, c_probability, a_label, c_label) print(result)
# print cl.prob('quick money', 'bad') # for i in range(10): # docclass.sampletrain(cl) # print cl.classify('quick money', defalt='unknow') # print cl.prob('quick money', 'good') # print cl.prob('quick money', 'bad') # cl=fisherClassifier.FisherClassifier(docclass.getwords) # docclass.sampletrain(cl) # print cl.cprob('quick', 'good') # print cl.cprob('money', 'bad') # cl=fisherClassifier.FisherClassifier(docclass.getwords) # docclass.sampletrain(cl) # print cl.cprob('quick', 'good') # print cl.fisherprob('quick rabbit', 'good') # print cl.fisherprob('quick rabbit', 'bad') # cl=fisherClassifier.FisherClassifier(docclass.getwords) # docclass.sampletrain(cl) # print cl.classify('quick rabbit') # print cl.classify("quick money") # cl.setminimum('bad', 0.8) # print cl.classify("quick money") cl = fisherClassifier.FisherClassifier(docclass.getwords) cl.setdb("test1.db") docclass.sampletrain(cl) cl2 = naivebayes.naivebayes(docclass.getwords) cl2.setdb("test1.db") print cl2.classify('quick money')
from whoareyou import whoareyou from genTrainFeatures import genTrainFeatures from naivebayesCL import naivebayesCL xTr, yTr = genTrainFeatures() w, b = naivebayesCL(xTr, yTr) #whoareyou(w,b) from naivebayes import naivebayes x1 = xTr[:, 1] print(x1.shape) log = naivebayes(xTr, yTr, x1) print(log)
def example_tests(): # ============================================================================= # function [r, ok, s]=example_tests() # # Tests the functions from homework assignment 0 # Please make sure that the error statements are instructive. # # Output: # r= The number of tests that failed # ok= The number of tests that passed # s= statement describing the failed test (s={} if all succeed) # ============================================================================= # Put in any seed below random.seed(31415926535) # initial outputs r = 0 ok = 0 s = [] #used to be matlab cell array # load in name data xTr, yTr = genTrainFeatures() print('---------Starting Test 1---------') try: # Test 1: check if probabilities sum to 1 pos, neg = naivebayesPY(xTr, yTr) failtest = (np.linalg.norm(pos + neg - 1) > 1e-8) addon = '' except: failtest = True addon = traceback.format_exc() if failtest: r = r + 1 s += 'Failed Test 1 naivebayesPY: Probabilities of P(Y) do not sum to 1.\n' + addon + '\n' print("failed") else: ok = ok + 1 print('---------Completed Test 1---------') y = np.matrix([-1, 1]) x = np.matrix([[0, 1], [1, 0]]) failtest = False print('---------Starting Test 2---------') try: # Test 2: Test the Naive Bayes function on a simple matrix pos, neg = naivebayesPY(x, y) pos0 = 0.5 neg0 = 0.5 if (pos != pos0) or (neg != neg0): failtest = True addon = '' except: failtest = True addon = traceback.format_exc() if failtest: r = r + 1 s += 'Failed Test 2 naivebayesPXY: The calculation of P(Y) seems incorrect.\n' + addon + '\n' print("failed") else: ok = ok + 1 print('---------Completed Test 2---------') failtest = False print('---------Starting Test 3---------') pospossi0 = np.matrix([[0.66667], [0.33333]]) negpossi0 = np.matrix([[0.33333], [0.66667]]) try: # Test 3 calculate conditional probabilities pospossi, negpossi = naivebayesPXY(x, y) print(pospossi) print(negpossi) addon = '' if (np.linalg.norm(pospossi - pospossi0) > 1e-3) or (np.linalg.norm(negpossi - negpossi0) > 1e-3): failtest = True except: failtest = True addon = traceback.format_exc() if failtest: r = r + 1 s += 'Failed Test 3: The calculation of P(X|Y) seems incorrect.\n' + addon + '\n' print("failed") else: ok = ok + 1 print('---------Finished Test 3---------') # Tests 4~8 are testing about the naivebayesPXY function. # Some are sanity tests that the function is returning reasonable answers. # Some are making sure they are correct on small cases print('---------Starting Test 4---------') xTr, yTr = genTrainFeatures() posprob, negprob = naivebayesPXY(xTr, yTr) print(posprob.shape) print(negprob.shape) print('---------Finished Test 4---------') # Tests 9 is on naivebayes print('---------Starting Test 9---------') logratio = naivebayes(x, y, np.array([[1], [1]])) print(logratio) print("---------------------------------") logratio = naivebayes(x, y, np.array([[0], [0]])) print(logratio) print('---------Finished Test 9---------') # Tests 10-11 is on naivebayesCL print('---------Starting Test 10-11---------') w, b = naivebayesCL(xTr, yTr) print(w.shape) print(b) w, b = naivebayesCL(x, y) print(w.shape) print(b) print('---------Finished Test 10-11---------') percentage = ok / (r + ok) * 100 print("Passing percentage: " + str(percentage)) return r, ok, s
# print cl.prob('quick money', 'bad') # for i in range(10): # docclass.sampletrain(cl) # print cl.classify('quick money', defalt='unknow') # print cl.prob('quick money', 'good') # print cl.prob('quick money', 'bad') # cl=fisherClassifier.FisherClassifier(docclass.getwords) # docclass.sampletrain(cl) # print cl.cprob('quick', 'good') # print cl.cprob('money', 'bad') # cl=fisherClassifier.FisherClassifier(docclass.getwords) # docclass.sampletrain(cl) # print cl.cprob('quick', 'good') # print cl.fisherprob('quick rabbit', 'good') # print cl.fisherprob('quick rabbit', 'bad') # cl=fisherClassifier.FisherClassifier(docclass.getwords) # docclass.sampletrain(cl) # print cl.classify('quick rabbit') # print cl.classify("quick money") # cl.setminimum('bad', 0.8) # print cl.classify("quick money") cl=fisherClassifier.FisherClassifier(docclass.getwords) cl.setdb("test1.db") docclass.sampletrain(cl) cl2=naivebayes.naivebayes(docclass.getwords) cl2.setdb("test1.db") print cl2.classify('quick money')