def applyingSVMToTheMatchmakerDataset(): print '## Applying SVM to the Matchmaker Dataset' from svm import * numericalset=advancedclassify.loadnumerical( ) scaledset,scalef=advancedclassify.scaledata(numericalset) answers,inputs=[r.match for r in scaledset],[r.data for r in scaledset] inputs1=[] for i in inputs: data={}; for m in range(len(i)): data[m+1]=i[m] inputs1.append(data) print inputs1[0] print 'answers[0]',answers[0],'inputs[0]',inputs[0],'all',len(answers) ''' answers inputs <类别号> <索引1>:<特征值1> <索引2>:<特征值2>.''' prob = svm_problem(answers,inputs1) #, isKernel=True param = svm_parameter() print 'param--->',param,'<----' m = libsvm.svm_train(prob, param) newrow=[28.0,-1,-1,26.0,-1,1,2,0.8] # Man doesn't want children, woman does x0, max_idx = gen_svm_nodearray(scalef(newrow)) print '*** ',libsvm.svm_predict(m,x0) newrow=[28.0,-1,1,26.0,-1,1,2,0.8] # Both want children x0, max_idx = gen_svm_nodearray(scalef(newrow)) print '*** ',libsvm.svm_predict(m,x0)
def scalingTheData(): print '## Scaling the Data' reload(advancedclassify) numericalset=advancedclassify.loadnumerical( ) scaledset,scalef=advancedclassify.scaledata(numericalset) avgs=advancedclassify.lineartrain(scaledset) print 'numericalset[0].data',numericalset[0].data print 'numericalset[0].match', numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data),avgs) print 'numericalset[11].match', numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data),avgs)
def theKernelTrick1(): print '## The Kernel Trick' numericalset=advancedclassify.loadnumerical( ) scaledset,scalef=advancedclassify.scaledata(numericalset) ssoffset=advancedclassify.getoffset(scaledset) print 'offset',ssoffset print numericalset[0].match print advancedclassify.nlclassify(scalef(numericalset[0].data),scaledset,ssoffset) print numericalset[1].match print advancedclassify.nlclassify(scalef(numericalset[1].data),scaledset,ssoffset) print numericalset[2].match print advancedclassify.nlclassify(scalef(numericalset[2].data),scaledset,ssoffset) newrow=[28.0,-1,-1,26.0,-1,1,2,0.8] # Man doesn't want children, woman does print advancedclassify.nlclassify(scalef(newrow),scaledset,ssoffset) newrow=[28.0,-1,1,26.0,-1,1,2,0.8] # Both want children print advancedclassify.nlclassify(scalef(newrow),scaledset,ssoffset)
def test_pg_197_to_214(): """ Matchmaker Dataset """ import advancedclassify agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True) matchmaker = advancedclassify.loadmatch("matchmaker.csv") """ Difficulties with the data """ # Scatter plot of mans age vs womans age # O is a match # X is not a match advancedclassify.plotagematches(agesonly) """ Basic linear classification """ avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30,30], avgs) print advancedclassify.dpclassify([30,25], avgs) print advancedclassify.dpclassify([25,40], avgs) print advancedclassify.dpclassify([48,20], avgs) """ Determing distances using Yahoo! maps """ print advancedclassify.milesdistance("cambride, ma", "new york, ny") """ Creating the new dataset """ numericalset = advancedclassify.loadnumerical() print numericalset[0].data """ Scaling the dataset. """ scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print numericalset[0].data print numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) """ The kernel trick """ offset = advancedclassify.getoffset(agesonly) print offset print advancedclassify.nlclassify([30, 30], agesonly, offset) print advancedclassify.nlclassify([30, 25], agesonly, offset) print advancedclassify.nlclassify([25, 40], agesonly, offset) # In contrast to linear classification now recognises that # 48, 20 is not a good match print advancedclassify.nlclassify([48, 20], agesonly, offset) ssoffset = advancedclassify.getoffset(scaledset) # 0 print numericalset[0].match # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # 1 print numericalset[1].match # 1 print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset) # 0 print numericalset[2].match # 0 # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset) # man doesnt want children, women does, otherwise really gd match newrow=[28.0, -1, -1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # both want children newrow=[28.0, -1, 1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
label = row.match if label == 0: label = False else: label = True actual = arow_model.predict(row.data) if actual == label: correct_count += 1 accuracy = correct_count / len(dataset) print "[%s] correct: %d/%d, accuracy: %f" % ("AROW", correct_count, len(dataset), accuracy) # load and scaling data agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) numericalset = advancedclassify.loadnumerical('matchmaker.csv') scaledset, scalef = advancedclassify.scaledata(numericalset) # show agematch distribution #advancedclassify.plotagematches(agesonly) #advancedclassify.plotagematches(numericalset) # agesonly.csv linear classification classify_and_validate_dp("agesonly", agesonly) # matchmaker.csv linear classification classify_and_validate_dp("matchmaker", numericalset) classify_and_validate_dp("scaled matchmaker", scaledset) classify_and_validate_arow(scaledset)
agesonly = ad.loadmatch('agesonly.csv', allnum=True) matchmaker = ad.loadmatch('matchmaker.csv') # ad.plotagematches(agesonly) age = [] for line in file('agesonly.csv'): l = [] for w in line.split(','): l.append(int(w)) age.append(l) tree = tr.buildtree(age) tr.printtree(tree) tr.drawtree(tree) print tr.classify(tree, [65, 63]) avgs = ad.lineartrain(agesonly) print avgs print ad.dpclassify([30, 25], avgs.values()) print ad.dpclassify([25, 40], avgs.values()) print ad.dpclassify([48, 20], avgs.values()) print tr.classify(tree, [30, 25]) print tr.classify(tree, [25, 40]) print tr.classify(tree, [48, 20]) numericalset = ad.loadnumerical() numericalset[0].data
def creatingTheNewDataset(): print '## Creating the New Dataset' reload(advancedclassify) numericalset=advancedclassify.loadnumerical( ) print numericalset[0].data
# print('<----load data---->') # agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True) # matchmaker = advancedclassify.loadmatch('matchmaker.csv') # advancedclassify.plotagematches(agesonly) # # # # scikit svm # print('<----Use Scikit Svm---->') # X = [[1, 0, 1], [-1, 0, -1]] # y = [1, -1] # clf = svm.SVC() # print(clf.fit(X, y)) # print(clf.predict([[1, 1, 1]])) # Applying SVM to the Matchmaker Dataset numericalset = advancedclassify.loadnumerical() print(numericalset[0].data) scaledset, scalef = advancedclassify.scaledata(numericalset) answers, input = [r.match for r in scaledset], [r.data for r in scaledset] clf = svm.SVC() print(clf.fit(input, answers)) print(clf.predict([scalef([28, -1, 1, 26, -1, 1, 2, 0])])) # a sample of cross_validating print('<----A sample of cross_validating---->') from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(input, answers, test_size=0.7, random_state=0)
def get_numericalset(): numericalset = advancedclassify.loadnumerical() print ">>> numericalset[0].data" print numericalset[0].data return numericalset
import advancedclassify agesonly = advancedclassify.loadmatch('agesonly.csv', allnum = True) matchmaker = advancedclassify.loadmatch('matchmaker.csv') reload(advancedclassify) avgs = advancedclassify.lineartrain(agesonly) reload(advancedclassify) advancedclassify.dpclassify([30,30], avgs) advancedclassify.dpclassify([48,32], avgs) advancedclassify.dpclassify([25,40], avgs) advancedclassify.dpclassify([48,20], avgs) reload(advancedclassify) numericalset = advancedclassify.loadnumerical() numericalset[0].data reload(advancedclassify) scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) numericalset[0].data reload(advancedclassify) offset = advancedclassify.getoffset(agesonly) advancedclassify.nlclassify([30,30], agesonly, offset) advancedclassify.nlclassify([48,32], agesonly, offset) advancedclassify.nlclassify([25,40], agesonly, offset) advancedclassify.nlclassify([48,20], agesonly, offset)
def test_pg_197_to_214(): """ Matchmaker Dataset """ import advancedclassify agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True) matchmaker = advancedclassify.loadmatch("matchmaker.csv") """ Difficulties with the data """ # Scatter plot of mans age vs womans age # O is a match # X is not a match advancedclassify.plotagematches(agesonly) """ Basic linear classification """ avgs = advancedclassify.lineartrain(agesonly) print advancedclassify.dpclassify([30, 30], avgs) print advancedclassify.dpclassify([30, 25], avgs) print advancedclassify.dpclassify([25, 40], avgs) print advancedclassify.dpclassify([48, 20], avgs) """ Determing distances using Yahoo! maps """ print advancedclassify.milesdistance("cambride, ma", "new york, ny") """ Creating the new dataset """ numericalset = advancedclassify.loadnumerical() print numericalset[0].data """ Scaling the dataset. """ scaledset, scalef = advancedclassify.scaledata(numericalset) avgs = advancedclassify.lineartrain(scaledset) print numericalset[0].data print numericalset[0].match print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs) print numericalset[11].match print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs) """ The kernel trick """ offset = advancedclassify.getoffset(agesonly) print offset print advancedclassify.nlclassify([30, 30], agesonly, offset) print advancedclassify.nlclassify([30, 25], agesonly, offset) print advancedclassify.nlclassify([25, 40], agesonly, offset) # In contrast to linear classification now recognises that # 48, 20 is not a good match print advancedclassify.nlclassify([48, 20], agesonly, offset) ssoffset = advancedclassify.getoffset(scaledset) # 0 print numericalset[0].match # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # 1 print numericalset[1].match # 1 print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset) # 0 print numericalset[2].match # 0 # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset) # man doesnt want children, women does, otherwise really gd match newrow = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset) # both want children newrow = [28.0, -1, 1, 26.0, -1, 1, 2, 0.8] # 0 print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)