Esempio n. 1
0
File: run.py Progetto: wz125/courses
def applyingSVMToTheMatchmakerDataset():
  print '## Applying SVM to the Matchmaker Dataset'
  from svm import *
  numericalset=advancedclassify.loadnumerical( )
  scaledset,scalef=advancedclassify.scaledata(numericalset)
  answers,inputs=[r.match for r in scaledset],[r.data for r in scaledset]
  inputs1=[]
  for i in inputs:
    data={};
    for m in range(len(i)):
      data[m+1]=i[m]
    inputs1.append(data)
  print inputs1[0]
  print 'answers[0]',answers[0],'inputs[0]',inputs[0],'all',len(answers)
  ''' answers inputs <类别号> <索引1>:<特征值1> <索引2>:<特征值2>.'''
  prob = svm_problem(answers,inputs1)   #, isKernel=True
  param = svm_parameter()
  print 'param--->',param,'<----'
  m = libsvm.svm_train(prob, param)
  newrow=[28.0,-1,-1,26.0,-1,1,2,0.8] # Man doesn't want children, woman does
  x0, max_idx = gen_svm_nodearray(scalef(newrow))
  print '*** ',libsvm.svm_predict(m,x0)
  newrow=[28.0,-1,1,26.0,-1,1,2,0.8] # Both want children
  x0, max_idx = gen_svm_nodearray(scalef(newrow))
  print '*** ',libsvm.svm_predict(m,x0)
Esempio n. 2
0
File: run.py Progetto: wz125/courses
def scalingTheData():
  print '## Scaling the Data'
  reload(advancedclassify)
  numericalset=advancedclassify.loadnumerical( )
  scaledset,scalef=advancedclassify.scaledata(numericalset)
  avgs=advancedclassify.lineartrain(scaledset)
  print 'numericalset[0].data',numericalset[0].data
  print 'numericalset[0].match', numericalset[0].match
  print advancedclassify.dpclassify(scalef(numericalset[0].data),avgs)
  print 'numericalset[11].match', numericalset[11].match
  print advancedclassify.dpclassify(scalef(numericalset[11].data),avgs)
Esempio n. 3
0
File: run.py Progetto: wz125/courses
def theKernelTrick1():
  print '## The Kernel Trick'
  numericalset=advancedclassify.loadnumerical( )
  scaledset,scalef=advancedclassify.scaledata(numericalset)
  ssoffset=advancedclassify.getoffset(scaledset)
  print 'offset',ssoffset
  print numericalset[0].match
  print advancedclassify.nlclassify(scalef(numericalset[0].data),scaledset,ssoffset)
  print numericalset[1].match
  print advancedclassify.nlclassify(scalef(numericalset[1].data),scaledset,ssoffset)
  print numericalset[2].match
  print advancedclassify.nlclassify(scalef(numericalset[2].data),scaledset,ssoffset)
  newrow=[28.0,-1,-1,26.0,-1,1,2,0.8] # Man doesn't want children, woman does
  print advancedclassify.nlclassify(scalef(newrow),scaledset,ssoffset)
  newrow=[28.0,-1,1,26.0,-1,1,2,0.8] # Both want children
  print advancedclassify.nlclassify(scalef(newrow),scaledset,ssoffset)
def test_pg_197_to_214():

    """
    Matchmaker Dataset
    """


    import advancedclassify

    agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True)
    matchmaker = advancedclassify.loadmatch("matchmaker.csv")

    """
    Difficulties with the data
    """

    # Scatter plot of mans age vs womans age
    # O is a match
    # X is not a match

    advancedclassify.plotagematches(agesonly)

    """
    Basic linear classification
    """

    avgs = advancedclassify.lineartrain(agesonly)

    print advancedclassify.dpclassify([30,30], avgs)
    print advancedclassify.dpclassify([30,25], avgs)
    print advancedclassify.dpclassify([25,40], avgs)
    print advancedclassify.dpclassify([48,20], avgs)

    """
    Determing distances using Yahoo! maps
    """

    print advancedclassify.milesdistance("cambride, ma", "new york, ny")

    """
    Creating the new dataset
    """

    numericalset = advancedclassify.loadnumerical()
    print numericalset[0].data

    """
    Scaling the dataset.
    """

    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print numericalset[0].data
    print numericalset[0].match
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print numericalset[11].match
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)

    """
    The kernel trick
    """

    offset = advancedclassify.getoffset(agesonly)
    print offset
    print advancedclassify.nlclassify([30, 30], agesonly, offset)
    print advancedclassify.nlclassify([30, 25], agesonly, offset)
    print advancedclassify.nlclassify([25, 40], agesonly, offset)
    # In contrast to linear classification now recognises that
    # 48, 20 is not a good match
    print advancedclassify.nlclassify([48, 20], agesonly, offset)

    ssoffset = advancedclassify.getoffset(scaledset)

    # 0
    print numericalset[0].match
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)

    # 1
    print numericalset[1].match
    # 1
    print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset)

    # 0
    print numericalset[2].match
    # 0
    # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)

    # man doesnt want children, women does, otherwise really gd match
    newrow=[28.0, -1, -1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)

    # both want children
    newrow=[28.0, -1, 1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
Esempio n. 5
0
        label = row.match
        if label == 0:
            label = False
        else:
            label = True
        actual = arow_model.predict(row.data)
        if actual == label:
            correct_count += 1

    accuracy = correct_count / len(dataset)
    print "[%s] correct: %d/%d, accuracy: %f" % ("AROW", correct_count,
                                                 len(dataset), accuracy)


# load and scaling data
agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
numericalset = advancedclassify.loadnumerical('matchmaker.csv')
scaledset, scalef = advancedclassify.scaledata(numericalset)

# show agematch distribution
#advancedclassify.plotagematches(agesonly)
#advancedclassify.plotagematches(numericalset)

# agesonly.csv linear classification
classify_and_validate_dp("agesonly", agesonly)

# matchmaker.csv linear classification
classify_and_validate_dp("matchmaker", numericalset)
classify_and_validate_dp("scaled matchmaker", scaledset)
classify_and_validate_arow(scaledset)
Esempio n. 6
0
agesonly = ad.loadmatch('agesonly.csv', allnum=True)
matchmaker = ad.loadmatch('matchmaker.csv')

# ad.plotagematches(agesonly)

age = []
for line in file('agesonly.csv'):
    l = []
    for w in line.split(','):
        l.append(int(w))
    age.append(l)
tree = tr.buildtree(age)
tr.printtree(tree)
tr.drawtree(tree)

print tr.classify(tree, [65, 63])

avgs = ad.lineartrain(agesonly)
print avgs

print ad.dpclassify([30, 25], avgs.values())
print ad.dpclassify([25, 40], avgs.values())
print ad.dpclassify([48, 20], avgs.values())

print tr.classify(tree, [30, 25])
print tr.classify(tree, [25, 40])
print tr.classify(tree, [48, 20])

numericalset = ad.loadnumerical()
numericalset[0].data
Esempio n. 7
0
File: run.py Progetto: wz125/courses
def creatingTheNewDataset():
  print '## Creating the New Dataset'
  reload(advancedclassify)
  numericalset=advancedclassify.loadnumerical( )
  print numericalset[0].data
Esempio n. 8
0
# print('<----load data---->')
# agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
# matchmaker = advancedclassify.loadmatch('matchmaker.csv')
# advancedclassify.plotagematches(agesonly)
#
#
# # scikit svm
# print('<----Use Scikit Svm---->')
# X = [[1, 0, 1], [-1, 0, -1]]
# y = [1, -1]
# clf = svm.SVC()
# print(clf.fit(X, y))
# print(clf.predict([[1, 1, 1]]))

# Applying SVM to the Matchmaker Dataset
numericalset = advancedclassify.loadnumerical()
print(numericalset[0].data)
scaledset, scalef = advancedclassify.scaledata(numericalset)
answers, input = [r.match for r in scaledset], [r.data for r in scaledset]
clf = svm.SVC()
print(clf.fit(input, answers))
print(clf.predict([scalef([28, -1, 1, 26, -1, 1, 2, 0])]))

# a sample of cross_validating
print('<----A sample of cross_validating---->')
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(input,
                                                    answers,
                                                    test_size=0.7,
                                                    random_state=0)
def get_numericalset():
    numericalset = advancedclassify.loadnumerical()
    print ">>> numericalset[0].data"
    print numericalset[0].data
    return numericalset
Esempio n. 10
0
import advancedclassify
agesonly = advancedclassify.loadmatch('agesonly.csv', allnum = True)
matchmaker = advancedclassify.loadmatch('matchmaker.csv')

reload(advancedclassify)
avgs = advancedclassify.lineartrain(agesonly)

reload(advancedclassify)
advancedclassify.dpclassify([30,30], avgs)
advancedclassify.dpclassify([48,32], avgs)
advancedclassify.dpclassify([25,40], avgs)
advancedclassify.dpclassify([48,20], avgs)

reload(advancedclassify)
numericalset = advancedclassify.loadnumerical()
numericalset[0].data

reload(advancedclassify)
scaledset, scalef = advancedclassify.scaledata(numericalset)
avgs = advancedclassify.lineartrain(scaledset)
numericalset[0].data

reload(advancedclassify)
offset = advancedclassify.getoffset(agesonly)
advancedclassify.nlclassify([30,30], agesonly, offset)
advancedclassify.nlclassify([48,32], agesonly, offset)
advancedclassify.nlclassify([25,40], agesonly, offset)
advancedclassify.nlclassify([48,20], agesonly, offset)
def test_pg_197_to_214():
    """
    Matchmaker Dataset
    """

    import advancedclassify

    agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True)
    matchmaker = advancedclassify.loadmatch("matchmaker.csv")
    """
    Difficulties with the data
    """

    # Scatter plot of mans age vs womans age
    # O is a match
    # X is not a match

    advancedclassify.plotagematches(agesonly)
    """
    Basic linear classification
    """

    avgs = advancedclassify.lineartrain(agesonly)

    print advancedclassify.dpclassify([30, 30], avgs)
    print advancedclassify.dpclassify([30, 25], avgs)
    print advancedclassify.dpclassify([25, 40], avgs)
    print advancedclassify.dpclassify([48, 20], avgs)
    """
    Determing distances using Yahoo! maps
    """

    print advancedclassify.milesdistance("cambride, ma", "new york, ny")
    """
    Creating the new dataset
    """

    numericalset = advancedclassify.loadnumerical()
    print numericalset[0].data
    """
    Scaling the dataset.
    """

    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print numericalset[0].data
    print numericalset[0].match
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print numericalset[11].match
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)
    """
    The kernel trick
    """

    offset = advancedclassify.getoffset(agesonly)
    print offset
    print advancedclassify.nlclassify([30, 30], agesonly, offset)
    print advancedclassify.nlclassify([30, 25], agesonly, offset)
    print advancedclassify.nlclassify([25, 40], agesonly, offset)
    # In contrast to linear classification now recognises that
    # 48, 20 is not a good match
    print advancedclassify.nlclassify([48, 20], agesonly, offset)

    ssoffset = advancedclassify.getoffset(scaledset)

    # 0
    print numericalset[0].match
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)

    # 1
    print numericalset[1].match
    # 1
    print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset,
                                      ssoffset)

    # 0
    print numericalset[2].match
    # 0
    # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)

    # man doesnt want children, women does, otherwise really gd match
    newrow = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)

    # both want children
    newrow = [28.0, -1, 1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)