def main():
  doc = open("fingerprintGender.txt",'r')
  wordsDict = getwords(doc.read())  
  
  genericClassifier = classifier.classifier(wordsDict)
  genericClassifier.setdb("generic.db")
  sampletrain(genericClassifier)
  
  print "---genericClassifier---"
  print genericClassifier.weightedprob('quick rabbit','good', genericClassifier.fprob)

  print "---Naive Bayes---"
  bayesClassifier = naivebayes.naivebayes(wordsDict)
  bayesClassifier.setdb("bayes.db")
  sampletrain(bayesClassifier)
  print bayesClassifier.prob('quick rabbit','good')
  bayesClassifier.classify('quick money',default='unknown')
  for i in range(10): sampletrain(bayesClassifier)
  print bayesClassifier.classify('quick money',default='unknown')
  
  print "---FISHER CLASSIFIER---"
  fisher = fisherclassifier.fisherclassifier(wordsDict)
  fisher.setdb("fisher.db")
  sampletrain(fisher)
  print fisher.fisherprob('quick rabbit','good')
  print fisher.weightedprob('money','bad', fisher.cprob)
def main():
    trainingparams = [0.01, 0.02, 0.03, 0.125, 0.625, 1]
    iterations = 5
    eta = 0.001
    epsilon = 0.001
    valuerange = 10
    params = []

    data = utils.getdata('breast-cancer-wisconsin.data.txt')
    nb = naivebayes.naivebayes(data, trainingparams, iterations, valuerange)
    nb['label'] = 'Naive Bayes'
    lr = logisticregression.logisticregression(data, trainingparams,
                                               iterations, eta, epsilon)
    lr['label'] = 'Logistic Regression'
    params.append(lr)
    params.append(nb)
    plot = graph.plot(params, 'Training Set', 'Accuracy')
    plot.show()

    return
Esempio n. 3
0
data_test = r.readfile("Text_Data_for_Project1_test_data.txt")

alabel_train, clabel_train, adata_train, cdata_train = p.p_traindata(
    data_train)
alabel_test, adata_test = p.p_testdata(data_test)

atraintestmix = np.vstack((adata_train, adata_test))

att_num = np.shape(adata_train)[1]
dat_num = np.shape(adata_train)[0]

adata_traintest_dummy, a_label = p.indexing(atraintestmix,
                                            att_num,
                                            dat_num,
                                            key="test")

adata_train_dummy = adata_traintest_dummy[:-1, :]
adata_test_dummy = np.array([adata_traintest_dummy[-1, :]])

cdata_train_dummy, c_label = p.indexing(cdata_train, 1, dat_num, key="test")

m, p = 0, 0
prior_probability = cd.cal_prior(adata_train_dummy, cdata_train_dummy, a_label,
                                 c_label, dat_num, m, p)

a_probability = cd.cal_prob(adata_train_dummy, dat_num)
c_probability = cd.cal_prob(cdata_train_dummy, dat_num)

result = nb.naivebayes(adata_test_dummy, prior_probability, a_probability,
                       c_probability, a_label, c_label)
print(result)
Esempio n. 4
0
# print cl.prob('quick money', 'bad')
# for i in range(10):
#     docclass.sampletrain(cl)
# print cl.classify('quick money', defalt='unknow')
# print cl.prob('quick money', 'good')
# print cl.prob('quick money', 'bad')

# cl=fisherClassifier.FisherClassifier(docclass.getwords)
# docclass.sampletrain(cl)
# print cl.cprob('quick', 'good')
# print cl.cprob('money', 'bad')

# cl=fisherClassifier.FisherClassifier(docclass.getwords)
# docclass.sampletrain(cl)
# print cl.cprob('quick', 'good')
# print cl.fisherprob('quick rabbit', 'good')
# print cl.fisherprob('quick rabbit', 'bad')

# cl=fisherClassifier.FisherClassifier(docclass.getwords)
# docclass.sampletrain(cl)
# print cl.classify('quick rabbit')
# print cl.classify("quick money")
# cl.setminimum('bad', 0.8)
# print cl.classify("quick money")

cl = fisherClassifier.FisherClassifier(docclass.getwords)
cl.setdb("test1.db")
docclass.sampletrain(cl)
cl2 = naivebayes.naivebayes(docclass.getwords)
cl2.setdb("test1.db")
print cl2.classify('quick money')
from whoareyou import whoareyou
from genTrainFeatures import genTrainFeatures
from naivebayesCL import naivebayesCL

xTr, yTr = genTrainFeatures()
w, b = naivebayesCL(xTr, yTr)
#whoareyou(w,b)

from naivebayes import naivebayes
x1 = xTr[:, 1]
print(x1.shape)
log = naivebayes(xTr, yTr, x1)
print(log)
Esempio n. 6
0
def example_tests():
    # =============================================================================
    # function [r, ok, s]=example_tests()
    #
    # Tests the functions from homework assignment 0
    # Please make sure that the error statements are instructive.
    #
    # Output:
    # r= The number of tests that failed
    # ok= The number of tests that passed
    # s= statement describing the failed test (s={} if all succeed)
    # =============================================================================

    # Put in any seed below
    random.seed(31415926535)
    # initial outputs
    r = 0
    ok = 0
    s = []  #used to be matlab cell array

    # load in name data
    xTr, yTr = genTrainFeatures()
    print('---------Starting Test 1---------')
    try:
        # Test 1: check if probabilities sum to 1
        pos, neg = naivebayesPY(xTr, yTr)
        failtest = (np.linalg.norm(pos + neg - 1) > 1e-8)
        addon = ''
    except:
        failtest = True
        addon = traceback.format_exc()

    if failtest:
        r = r + 1
        s += 'Failed Test 1 naivebayesPY: Probabilities of P(Y) do not sum to 1.\n' + addon + '\n'
        print("failed")
    else:
        ok = ok + 1

    print('---------Completed Test 1---------')

    y = np.matrix([-1, 1])
    x = np.matrix([[0, 1], [1, 0]])

    failtest = False
    print('---------Starting Test 2---------')
    try:
        # Test 2: Test the Naive Bayes function on a simple matrix
        pos, neg = naivebayesPY(x, y)
        pos0 = 0.5
        neg0 = 0.5
        if (pos != pos0) or (neg != neg0):
            failtest = True
            addon = ''
    except:
        failtest = True
        addon = traceback.format_exc()

    if failtest:
        r = r + 1
        s += 'Failed Test 2 naivebayesPXY: The calculation of P(Y) seems incorrect.\n' + addon + '\n'
        print("failed")
    else:
        ok = ok + 1
    print('---------Completed Test 2---------')

    failtest = False
    print('---------Starting Test 3---------')
    pospossi0 = np.matrix([[0.66667], [0.33333]])
    negpossi0 = np.matrix([[0.33333], [0.66667]])
    try:
        # Test 3 calculate conditional probabilities
        pospossi, negpossi = naivebayesPXY(x, y)
        print(pospossi)
        print(negpossi)
        addon = ''
        if (np.linalg.norm(pospossi - pospossi0) >
                1e-3) or (np.linalg.norm(negpossi - negpossi0) > 1e-3):
            failtest = True
    except:
        failtest = True
        addon = traceback.format_exc()

    if failtest:
        r = r + 1
        s += 'Failed Test 3: The calculation of P(X|Y) seems incorrect.\n' + addon + '\n'
        print("failed")
    else:
        ok = ok + 1
    print('---------Finished Test 3---------')

    #    Tests 4~8 are testing about the naivebayesPXY function.
    #    Some are sanity tests that the function is returning reasonable answers.
    #    Some are making sure they are correct on small cases

    print('---------Starting Test 4---------')
    xTr, yTr = genTrainFeatures()
    posprob, negprob = naivebayesPXY(xTr, yTr)
    print(posprob.shape)
    print(negprob.shape)
    print('---------Finished Test 4---------')

    #    Tests 9 is on naivebayes
    print('---------Starting Test 9---------')
    logratio = naivebayes(x, y, np.array([[1], [1]]))
    print(logratio)
    print("---------------------------------")
    logratio = naivebayes(x, y, np.array([[0], [0]]))
    print(logratio)
    print('---------Finished Test 9---------')

    #    Tests 10-11 is on naivebayesCL
    print('---------Starting Test 10-11---------')
    w, b = naivebayesCL(xTr, yTr)
    print(w.shape)
    print(b)

    w, b = naivebayesCL(x, y)
    print(w.shape)
    print(b)
    print('---------Finished Test 10-11---------')

    percentage = ok / (r + ok) * 100
    print("Passing percentage: " + str(percentage))

    return r, ok, s
Esempio n. 7
0
# print cl.prob('quick money', 'bad')
# for i in range(10):
#     docclass.sampletrain(cl)
# print cl.classify('quick money', defalt='unknow')
# print cl.prob('quick money', 'good')
# print cl.prob('quick money', 'bad')

# cl=fisherClassifier.FisherClassifier(docclass.getwords)
# docclass.sampletrain(cl)
# print cl.cprob('quick', 'good')
# print cl.cprob('money', 'bad')

# cl=fisherClassifier.FisherClassifier(docclass.getwords)    
# docclass.sampletrain(cl)
# print cl.cprob('quick', 'good')
# print cl.fisherprob('quick rabbit', 'good')
# print cl.fisherprob('quick rabbit', 'bad')

# cl=fisherClassifier.FisherClassifier(docclass.getwords)
# docclass.sampletrain(cl)
# print cl.classify('quick rabbit')
# print cl.classify("quick money")
# cl.setminimum('bad', 0.8)
# print cl.classify("quick money")

cl=fisherClassifier.FisherClassifier(docclass.getwords)
cl.setdb("test1.db")
docclass.sampletrain(cl)
cl2=naivebayes.naivebayes(docclass.getwords)
cl2.setdb("test1.db")
print cl2.classify('quick money')