Ejemplo n.º 1
0
def loadnumerical():
    oldrows = advancedclassify.loadmatch('matchmaker.csv')
    newrows = []
    out = open('numerical.txt', 'w+')
    for row in oldrows:
        d = row.data
        data = [float(d[0]), advancedclassify.yesno(d[1]), advancedclassify.yesno(d[2]), advancedclassify.yesno(d[5]),
                advancedclassify.yesno(d[6]), advancedclassify.yesno(d[7]), advancedclassify.matchcount(d[3], d[8]),
                milesdistance(d[4], d[9]), row.match]
        newrows.append(advancedclassify.matchrow(data))
        print(advancedclassify.matchrow(data).__str__())
        out.write("%s\n" % data)
    out.close()
    return newrows
Ejemplo n.º 2
0
def loadnumerical():
    oldrows = advancedclassify.loadmatch('matchmaker.csv')
    newrows = []
    out = open('numerical.txt', 'w+')
    for row in oldrows:
        d = row.data
        data = [
            float(d[0]),
            advancedclassify.yesno(d[1]),
            advancedclassify.yesno(d[2]),
            advancedclassify.yesno(d[5]),
            advancedclassify.yesno(d[6]),
            advancedclassify.yesno(d[7]),
            advancedclassify.matchcount(d[3], d[8]),
            milesdistance(d[4], d[9]), row.match
        ]
        newrows.append(advancedclassify.matchrow(data))
        print(advancedclassify.matchrow(data).__str__())
        out.write("%s\n" % data)
    out.close()
    return newrows
Ejemplo n.º 3
0
import advancedclassify
ageonly = advancedclassify.loadmatch('agesonly.csv', allnum = True)
advancedclassify.plotagematches(ageonly)
def test_pg_197_to_214():

    """
    Matchmaker Dataset
    """


    import advancedclassify

    agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True)
    matchmaker = advancedclassify.loadmatch("matchmaker.csv")

    """
    Difficulties with the data
    """

    # Scatter plot of mans age vs womans age
    # O is a match
    # X is not a match

    advancedclassify.plotagematches(agesonly)

    """
    Basic linear classification
    """

    avgs = advancedclassify.lineartrain(agesonly)

    print advancedclassify.dpclassify([30,30], avgs)
    print advancedclassify.dpclassify([30,25], avgs)
    print advancedclassify.dpclassify([25,40], avgs)
    print advancedclassify.dpclassify([48,20], avgs)

    """
    Determing distances using Yahoo! maps
    """

    print advancedclassify.milesdistance("cambride, ma", "new york, ny")

    """
    Creating the new dataset
    """

    numericalset = advancedclassify.loadnumerical()
    print numericalset[0].data

    """
    Scaling the dataset.
    """

    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print numericalset[0].data
    print numericalset[0].match
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print numericalset[11].match
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)

    """
    The kernel trick
    """

    offset = advancedclassify.getoffset(agesonly)
    print offset
    print advancedclassify.nlclassify([30, 30], agesonly, offset)
    print advancedclassify.nlclassify([30, 25], agesonly, offset)
    print advancedclassify.nlclassify([25, 40], agesonly, offset)
    # In contrast to linear classification now recognises that
    # 48, 20 is not a good match
    print advancedclassify.nlclassify([48, 20], agesonly, offset)

    ssoffset = advancedclassify.getoffset(scaledset)

    # 0
    print numericalset[0].match
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)

    # 1
    print numericalset[1].match
    # 1
    print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset)

    # 0
    print numericalset[2].match
    # 0
    # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)

    # man doesnt want children, women does, otherwise really gd match
    newrow=[28.0, -1, -1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)

    # both want children
    newrow=[28.0, -1, 1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset)
Ejemplo n.º 5
0
import advancedclassify as ad
import treepredict as tr

agesonly = ad.loadmatch('agesonly.csv', allnum=True)
matchmaker = ad.loadmatch('matchmaker.csv')

# ad.plotagematches(agesonly)

age = []
for line in file('agesonly.csv'):
    l = []
    for w in line.split(','):
        l.append(int(w))
    age.append(l)
tree = tr.buildtree(age)
tr.printtree(tree)
tr.drawtree(tree)

print tr.classify(tree, [65, 63])

avgs = ad.lineartrain(agesonly)
print avgs

print ad.dpclassify([30, 25], avgs.values())
print ad.dpclassify([25, 40], avgs.values())
print ad.dpclassify([48, 20], avgs.values())

print tr.classify(tree, [30, 25])
print tr.classify(tree, [25, 40])
print tr.classify(tree, [48, 20])
Ejemplo n.º 6
0
import advancedclassify

agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
matchmaker = advancedclassify.loadmatch('matchmaker.csv')

# advancedclassify.plotagematches(agesonly)
avgs = advancedclassify.lineartrain(agesonly)

print advancedclassify.dpclassify([30,30], avgs)
print advancedclassify.dpclassify([30,25], avgs)
print advancedclassify.dpclassify([25,40], avgs)
print advancedclassify.dpclassify([48,20], avgs)

print advancedclassify.getlocation('1 alewife center, cambridge, ma')
Ejemplo n.º 7
0
Archivo: run.py Proyecto: wz125/courses
def makerDataset():
  print '##  Matchmaker Dataset'
  import advancedclassify
  agesonly=advancedclassify.loadmatch('agesonly.csv',allnum=True)
  matchmaker=advancedclassify.loadmatch('matchmaker.csv')
  print ', '.join(['%s:%s' % item for item in matchmaker[0].__dict__.items()])
Ejemplo n.º 8
0
import advancedclassify
ageonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
advancedclassify.plotagematches(ageonly)
Ejemplo n.º 9
0
def get_dataset():
    print "getting data..."
    agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
    matchmaker = advancedclassify.loadmatch('matchmaker.csv')
    return (agesonly, matchmaker)
def test_pg_197_to_214():
    """
    Matchmaker Dataset
    """

    import advancedclassify

    agesonly = advancedclassify.loadmatch("agesonly.csv", allnum=True)
    matchmaker = advancedclassify.loadmatch("matchmaker.csv")
    """
    Difficulties with the data
    """

    # Scatter plot of mans age vs womans age
    # O is a match
    # X is not a match

    advancedclassify.plotagematches(agesonly)
    """
    Basic linear classification
    """

    avgs = advancedclassify.lineartrain(agesonly)

    print advancedclassify.dpclassify([30, 30], avgs)
    print advancedclassify.dpclassify([30, 25], avgs)
    print advancedclassify.dpclassify([25, 40], avgs)
    print advancedclassify.dpclassify([48, 20], avgs)
    """
    Determing distances using Yahoo! maps
    """

    print advancedclassify.milesdistance("cambride, ma", "new york, ny")
    """
    Creating the new dataset
    """

    numericalset = advancedclassify.loadnumerical()
    print numericalset[0].data
    """
    Scaling the dataset.
    """

    scaledset, scalef = advancedclassify.scaledata(numericalset)
    avgs = advancedclassify.lineartrain(scaledset)
    print numericalset[0].data
    print numericalset[0].match
    print advancedclassify.dpclassify(scalef(numericalset[0].data), avgs)
    print numericalset[11].match
    print advancedclassify.dpclassify(scalef(numericalset[11].data), avgs)
    """
    The kernel trick
    """

    offset = advancedclassify.getoffset(agesonly)
    print offset
    print advancedclassify.nlclassify([30, 30], agesonly, offset)
    print advancedclassify.nlclassify([30, 25], agesonly, offset)
    print advancedclassify.nlclassify([25, 40], agesonly, offset)
    # In contrast to linear classification now recognises that
    # 48, 20 is not a good match
    print advancedclassify.nlclassify([48, 20], agesonly, offset)

    ssoffset = advancedclassify.getoffset(scaledset)

    # 0
    print numericalset[0].match
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)

    # 1
    print numericalset[1].match
    # 1
    print advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset,
                                      ssoffset)

    # 0
    print numericalset[2].match
    # 0
    # print advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset)

    # man doesnt want children, women does, otherwise really gd match
    newrow = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)

    # both want children
    newrow = [28.0, -1, 1, 26.0, -1, 1, 2, 0.8]
    # 0
    print advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset,
                                      ssoffset)
Ejemplo n.º 11
0
# advancedclassifytest.py

import advancedclassify

agesonly = advancedclassify.loadmatch('agesonly.csv', allnum=True)
matchmaker = advancedclassify.loadmatch('matchmaker.csv')

numericalset = advancedclassify.loadnumerical()
scaledset, scalef = advancedclassify.scaledata(numericalset)
avgs = advancedclassify.lineartrain(scaledset)

ssoffset = advancedclassify.getoffset(scaledset)

print('classify numericalset[0] : ')
print(numericalset[0].match)
print(advancedclassify.nlclassify(scalef(numericalset[0].data), scaledset, ssoffset))
print('------------------------------')

print('classify numericalset[1] : ')
print(numericalset[1].match)
print(advancedclassify.nlclassify(scalef(numericalset[1].data), scaledset, ssoffset))
print('------------------------------')

print('classify numericalset[2] : ')
print(numericalset[2].match)
print(advancedclassify.nlclassify(scalef(numericalset[2].data), scaledset, ssoffset))
print('------------------------------')

print('classify newrow0 : ')
newrow0 = [28.0, -1, -1, 26.0, -1, 1, 2, 0.8]