Ejemplo n.º 1
0
Archivo: run.py Proyecto: wz125/courses
def scalingTheData():
  print '## Scaling the Data'
  reload(advancedclassify)
  numericalset=advancedclassify.loadnumerical( )
  scaledset,scalef=advancedclassify.scaledata(numericalset)
  avgs=advancedclassify.lineartrain(scaledset)
  print 'numericalset[0].data',numericalset[0].data
  print 'numericalset[0].match', numericalset[0].match
  print advancedclassify.dpclassify(scalef(numericalset[0].data),avgs)
  print 'numericalset[11].match', numericalset[11].match
  print advancedclassify.dpclassify(scalef(numericalset[11].data),avgs)
Ejemplo n.º 2
0
def get_scaledset(numericalset):
    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print ">>> numericalset[0].match"
    print numericalset[0].match
    print ">>> advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)"
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print ">>> numericalset[11].match"
    print numericalset[11].match
    print ">>> advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)"
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)
    return scaledset, scalef
Ejemplo n.º 3
0
Archivo: run.py Proyecto: wz125/courses
def basicLinearClassification():
  print '## Basic Linear Classification'
  reload(advancedclassify)
  avgs=advancedclassify.lineartrain(agesonly)
  print avgs
  for (k,v) in  avgs.items(): 
    if k==0:
      p='b%s' % ('o')
    if k==1:
      p='b%s' % ('+')
    plot(v[0],v[1],p)
  show()
  print '30,30',advancedclassify.dpclassify([30,30],avgs)
  print '30,25',advancedclassify.dpclassify([30,25],avgs)
  print '25,40',advancedclassify.dpclassify([25,40],avgs)
  print '48,20',advancedclassify.dpclassify([48,20],avgs)
Ejemplo n.º 4
0
def get_linear_classification(agesonly):
    avgs = advancedclassify.lineartrain(agesonly)
    print ">>> advancedclassify.dpclassify([30, 30], avgs)"
    print advancedclassify.dpclassify([30, 30], avgs)
    print ">>> advancedclassify.dpclassify([30, 25], avgs)"
    print advancedclassify.dpclassify([30, 25], avgs)
    print ">>> advancedclassify.dpclassify([25, 40], avgs)"
    print advancedclassify.dpclassify([25, 40], avgs)
    print ">>> advancedclassify.dpclassify([48, 20], avgs)"
    print advancedclassify.dpclassify([48, 20], avgs)
    print "oops!!"
Ejemplo n.º 5
0
def classify_and_validate_dp(name, dataset):
    avgs = advancedclassify.lineartrain(dataset)
    correct_count = 0.0
    for row in dataset:
        res = advancedclassify.dpclassify(row.data, avgs)
        if res == row.match:
            correct_count += 1
    accuracy = correct_count / len(dataset)
    print "[%s] correct: %d/%d, accuracy: %f" % (name, correct_count,
                                                 len(dataset), accuracy)
def test_pg_197_to_214():

    """
    Matchmaker Dataset
    """


    import advancedclassify

    agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True)
    matchmaker = advancedclassify.loadmatch("matchmaker.csv")

    """
    Difficulties with the data
    """

    # Scatter plot of mans age vs womans age
    # O is a match
    # X is not a match

    advancedclassify.plotagematches(agesonly)

    """
    Basic linear classification
    """

    avgs = advancedclassify.lineartrain(agesonly)

    print advancedclassify.dpclassify([30,30], avgs)
    print advancedclassify.dpclassify([30,25], avgs)
    print advancedclassify.dpclassify([25,40], avgs)
    print advancedclassify.dpclassify([48,20], avgs)

    """
    Determing distances using Yahoo! maps
    """

    print advancedclassify.milesdistance("cambride, ma", "new york, ny")

    """
    Creating the new dataset
    """

    numericalset = advancedclassify.loadnumerical()
    print numericalset[0].data

    """
    Scaling the dataset.
    """

    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print numericalset[0].data
    print numericalset[0].match
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print numericalset[11].match
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)

    """
    The kernel trick
    """

    offset = advancedclassify.getoffset(agesonly)
    print offset
    print advancedclassify.nlclassify([30, 30], agesonly, offset)
    print advancedclassify.nlclassify([30, 25], agesonly, offset)
    print advancedclassify.nlclassify([25, 40], agesonly, offset)
    # In contrast to linear classification now recognises that
    # 48, 20 is not a good match
    print advancedclassify.nlclassify([48, 20], agesonly, offset)

    ssoffset = advancedclassify.getoffset(scaledset)

    # 0
    print numericalset[0].match
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)

    # 1
    print numericalset[1].match
    # 1
    print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset)

    # 0
    print numericalset[2].match
    # 0
    # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)

    # man doesnt want children, women does, otherwise really gd match
    newrow=[28.0, -1, -1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)

    # both want children
    newrow=[28.0, -1, 1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
Ejemplo n.º 7
0
agesonly = ad.loadmatch('agesonly.csv', allnum=True)
matchmaker = ad.loadmatch('matchmaker.csv')

# ad.plotagematches(agesonly)

age = []
for line in file('agesonly.csv'):
    l = []
    for w in line.split(','):
        l.append(int(w))
    age.append(l)
tree = tr.buildtree(age)
tr.printtree(tree)
tr.drawtree(tree)

print tr.classify(tree, [65, 63])

avgs = ad.lineartrain(agesonly)
print avgs

print ad.dpclassify([30, 25], avgs.values())
print ad.dpclassify([25, 40], avgs.values())
print ad.dpclassify([48, 20], avgs.values())

print tr.classify(tree, [30, 25])
print tr.classify(tree, [25, 40])
print tr.classify(tree, [48, 20])

numericalset = ad.loadnumerical()
numericalset[0].data
Ejemplo n.º 8
0
import advancedclassify

agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
matchmaker = advancedclassify.loadmatch('matchmaker.csv')

# advancedclassify.plotagematches(agesonly)
avgs = advancedclassify.lineartrain(agesonly)

print advancedclassify.dpclassify([30,30], avgs)
print advancedclassify.dpclassify([30,25], avgs)
print advancedclassify.dpclassify([25,40], avgs)
print advancedclassify.dpclassify([48,20], avgs)

print advancedclassify.getlocation('1 alewife center, cambridge, ma')
Ejemplo n.º 9
0
import advancedclassify
agesonly = advancedclassify.loadmatch('agesonly.csv', allnum = True)
matchmaker = advancedclassify.loadmatch('matchmaker.csv')

reload(advancedclassify)
avgs = advancedclassify.lineartrain(agesonly)

reload(advancedclassify)
advancedclassify.dpclassify([30,30], avgs)
advancedclassify.dpclassify([48,32], avgs)
advancedclassify.dpclassify([25,40], avgs)
advancedclassify.dpclassify([48,20], avgs)

reload(advancedclassify)
numericalset = advancedclassify.loadnumerical()
numericalset[0].data

reload(advancedclassify)
scaledset, scalef = advancedclassify.scaledata(numericalset)
avgs = advancedclassify.lineartrain(scaledset)
numericalset[0].data

reload(advancedclassify)
offset = advancedclassify.getoffset(agesonly)
advancedclassify.nlclassify([30,30], agesonly, offset)
advancedclassify.nlclassify([48,32], agesonly, offset)
advancedclassify.nlclassify([25,40], agesonly, offset)
advancedclassify.nlclassify([48,20], agesonly, offset)
def test_pg_197_to_214():
    """
    Matchmaker Dataset
    """

    import advancedclassify

    agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True)
    matchmaker = advancedclassify.loadmatch("matchmaker.csv")
    """
    Difficulties with the data
    """

    # Scatter plot of mans age vs womans age
    # O is a match
    # X is not a match

    advancedclassify.plotagematches(agesonly)
    """
    Basic linear classification
    """

    avgs = advancedclassify.lineartrain(agesonly)

    print advancedclassify.dpclassify([30, 30], avgs)
    print advancedclassify.dpclassify([30, 25], avgs)
    print advancedclassify.dpclassify([25, 40], avgs)
    print advancedclassify.dpclassify([48, 20], avgs)
    """
    Determing distances using Yahoo! maps
    """

    print advancedclassify.milesdistance("cambride, ma", "new york, ny")
    """
    Creating the new dataset
    """

    numericalset = advancedclassify.loadnumerical()
    print numericalset[0].data
    """
    Scaling the dataset.
    """

    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print numericalset[0].data
    print numericalset[0].match
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print numericalset[11].match
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)
    """
    The kernel trick
    """

    offset = advancedclassify.getoffset(agesonly)
    print offset
    print advancedclassify.nlclassify([30, 30], agesonly, offset)
    print advancedclassify.nlclassify([30, 25], agesonly, offset)
    print advancedclassify.nlclassify([25, 40], agesonly, offset)
    # In contrast to linear classification now recognises that
    # 48, 20 is not a good match
    print advancedclassify.nlclassify([48, 20], agesonly, offset)

    ssoffset = advancedclassify.getoffset(scaledset)

    # 0
    print numericalset[0].match
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)

    # 1
    print numericalset[1].match
    # 1
    print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset,
                                      ssoffset)

    # 0
    print numericalset[2].match
    # 0
    # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)

    # man doesnt want children, women does, otherwise really gd match
    newrow = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)

    # both want children
    newrow = [28.0, -1, 1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)