Example #1
0
def main():
    laptops = doSearch("laptop")
    print laptops[0:10]
    # print getCategory('computers')
    # print getCategory('laptops', parentID=58058)
    # if laptops:
    #     print getItem(laptops[7][0])
    set = makeLaptopDataset()
    numpredict.knnestimate(set, (512, 1000, 14, 40, 1000))
Example #2
0
def getkNN(data, bName, bData):
    k = [1, 2, 5, 10, 20]

    for x in k:
        neighbors = numpredict.knnestimate(data, bData, x)

        print("Nearest Neighbors For ", bName, "(k = ", x, ")")
        print("Nearest Neighbors For ",
              bName,
              "(k = ",
              x,
              ")",
              file=open(str(bName) + ".txt", 'a+'))

        for y in neighbors:
            nIndex = y[1]
            nName = names[nIndex]

            print("\t", nName, "\t---\t", y[0])
            print("\t",
                  nName,
                  "\t---\t",
                  y[0],
                  file=open(str(bName) + ".txt", 'a+'))

        print("")
        print("", file=open(str(bName) + ".txt", 'a+'))
Example #3
0
File: run.py Project: wz125/courses
def codeForKNearest():
  ''' 计算输入点与训练集距离最近的k=5各点,然后将这k个price取平均值作为预测价格
  '''
  print '## Code for k-Nearest Neighbors 计算KNN'
  global data
  reload(numpredict)
  print numpredict.knnestimate(data,(95.0,3.0))
  print numpredict.knnestimate(data,(99.0,3.0))
  print numpredict.knnestimate(data,(99.0,5.0))
  print numpredict.wineprice(99.0,5.0) # Get the actual price
  print numpredict.knnestimate(data,(99.0,5.0),k=1) # Try with fewer neighbors
Example #4
0
def stemmeda8():
    # behave exactly as its non-stemmed counterpart except using the stemmed file
    blognames, words, data = readfile("datafiles/blogtop500_stemmed.txt")
    fmeasure_idk = blognames.index('F-Measure')
    wsdl_idk = blognames.index('Web Science and Digital Libraries Research Group')
    takeout = data.pop(fmeasure_idk)
    mout = []
    klast = None
    print("http://f-measure.blogspot.com/")
    for k in [1, 2, 5, 10, 20]:
        if k != 1:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)[klast:]
        else:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)
        klast = k
        mout.append(["%d"%k, ", ".join(val)])
        print("The %d nearest neighbors are:" % k, knnestimate(data, data[fmeasure_idk]['input'], k))
    headers = ['k', 'nearest neighbors']
    with open("tables/fmeasure-knearest-stemmed.text", "w+") as out:
        out.write(tabulate(mout, headers=headers, tablefmt="latex"))
    data.insert(fmeasure_idk, takeout)
    print("http://ws-dl.blogspot.com/")
    data.pop(wsdl_idk)
    mout.clear()
    for k in [1, 2, 5, 10, 20]:
        if k != 1:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)[klast:]
        else:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)
        klast = k
        mout.append(["%d" % k, ", ".join(val)])
        print("The %d nearest neighbors are:" % k, knnestimate(data, data[fmeasure_idk]['input'], k))
    with open("tables/wsdl-knearest-stemmed.text", "w+") as out:
        out.write(tabulate(mout, headers=headers, tablefmt="latex"))
Example #5
0
def nonstemmeda8():
    # load the data
    blognames, words, data = readfile("datafiles/blogtop500.txt")
    # find out where F-Measure is
    fmeasure_idk = blognames.index('F-Measure')
    # find out where wsdl is
    wsdl_idk = blognames.index('Web Science and Digital Libraries Research Group')
    # remove fmeasure from the data as we do not want to include it
    takeout = data.pop(fmeasure_idk)
    print("http://f-measure.blogspot.com/")
    # hold the results of knn for reporting
    mout = []
    klast = None
    # do knn
    for k in [1, 2, 5, 10, 20]:
        # for reporting I am shortening the output to only include the new data
        # successive knn runs return the same data for previous k values
        # ie k=2 [a,b] then k=5 would be [a,b,c,d,e] I only want k=2[a,b] and k=5[c,d,e]
        if k != 1:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)[klast:]
        else:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)
        klast = k
        mout.append(["%d" % k, ", ".join(val)])
        print("The %d nearest neighbors are:" % k, knnestimate(data, data[fmeasure_idk]['input'], k))
    # write out a table in latex
    headers = ['k', 'nearest neighbors']
    with open("tables/fmeasure-knearest-nonstemmed.text", "w+") as out:
        out.write(tabulate(mout, headers=headers, tablefmt="latex"))
    # do the same for wsdl
    data.insert(fmeasure_idk, takeout)
    print("http://ws-dl.blogspot.com/")
    data.pop(wsdl_idk)
    mout.clear()
    for k in [1, 2, 5, 10, 20]:
        if k != 1:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)[klast:]
        else:
            val = knnestimate(data, data[fmeasure_idk]['input'], k)
        klast = k
        mout.append(["%d" % k, ", ".join(val)])
        print("The %d nearest neighbors are:" % k, knnestimate(data, data[fmeasure_idk]['input'], k))
    headers = ['k', 'nearest neighbors']
    with open("tables/wsdl-knearest-nonstemmed.text", "w+") as out:
        out.write(tabulate(mout, headers=headers, tablefmt="latex"))
Example #6
0
import numpredict
import clusters

if __name__ == "__main__":

  blognames,words,data=clusters.readfile('blogdata.txt')
  
  print 'F-Measure' # in data[75]
  #print 'distance list: ' + str(numpredict.getdistances(data, blognames, data[75]))
  print 'k = 1 ' + str(numpredict.knnestimate(data, blognames, data[75], k = 1))
  print 'k = 2 ' + str(numpredict.knnestimate(data, blognames, data[75], k = 2))
  print 'k = 5 ' + str(numpredict.knnestimate(data, blognames, data[75], k = 5))
  print 'k = 10 ' + str(numpredict.knnestimate(data, blognames, data[75], k = 10))
  print 'k = 20 ' + str(numpredict.knnestimate(data, blognames, data[75], k = 20))

  print '\nWeb Science and Digital Libraries Research Group' # in data[7]
  #print 'distance list: ' + str(numpredict.getdistances(data, blognames, data[7]))
  print 'k = 1 ' + str(numpredict.knnestimate(data, blognames, data[7], k = 1))
  print 'k = 2 ' + str(numpredict.knnestimate(data, blognames, data[7], k = 2))
  print 'k = 5 ' + str(numpredict.knnestimate(data, blognames, data[7], k = 5))
  print 'k = 10 ' + str(numpredict.knnestimate(data, blognames, data[7], k = 10))
  print 'k = 20 ' + str(numpredict.knnestimate(data, blognames, data[7], k = 20))
Example #7
0
def findNearestNeighbour(i, data, k):
    testing = data[i]
    neighbors = numpredict.knnestimate(data, testing, k)
    for i in neighbors:
        print(blogs[i[1]])
def knn1(d, v):
    return numpredict.knnestimate(d, v, k=3)
import numpredict

# main function
# Building a Sample Dataset
print('\n<----Building a Sample Dataset--->')
print(numpredict.wineprice(90.0, 41.0))
data = numpredict.wineset1()
print(data[:2])

# K-Nearnest Neighbors
print('\n<----K-Nearnest Neighbors---->')
print(numpredict.knnestimate(data, (95.0, 3.0)))
print(numpredict.wineprice(95.0, 3.0))
print('<----Weighted KNN---->')
print(numpredict.weightedknn(data, (95.0, 3.0)))

# Cross_Validation
print('\n<----Cross-Validation---->')
data = numpredict.wineset1()


def knn1(d, v):
    return numpredict.knnestimate(d, v, k=3)


def knn2(d, v):
    return numpredict.weightedknn(d, v, k=3)


print(numpredict.crossvalidate(knn1, data))
print(numpredict.crossvalidate(knn2, data))
Example #10
0
    else:
        return int(str_result)

def makeLaptopDataset():
    searchResults = doSearch('laptop', categoryID=177)
    result = []
    for r in searchResults:
        item = getItem(r['itemId'])

        if item is None:
            continue

        att = item['attributes']
        try:
            data = (float(take_first_number(att['Memory'])), float(take_first_number(att['Hard Drive Capacity'])),
                    float(take_first_number(att['Screen Size'])),
                    float(item['feedback']))
            print data
            entry = {'input': data, 'result': float(item['price'])}
            result.append(entry)
        except:
            print item['title']+' failed'
    return result


data = makeLaptopDataset()
print 'Price=', numpredict.knnestimate(data, (8.0, 512.0, 15.0, 10000.0))
print 'Price=', numpredict.knnestimate(data, (8.0, 256.0, 15.0, 10000.0))
print 'Price=', numpredict.knnestimate(data, (8.0, 256.0, 13.0, 10000.0))

Example #11
0
File: run.py Project: wz125/courses
def knn1(d,v): return numpredict.knnestimate(d,v,k=1)
def knninverse(d,v):return numpredict.weightedknn(d,v,weightf=numpredict.inverseweight)
Example #12
0
File: run.py Project: wz125/courses
def knn3(d,v): return numpredict.knnestimate(d,v,k=3)
def knn1(d,v): return numpredict.knnestimate(d,v,k=1)
Example #13
0
f = open("blogdata.txt", "r")

for line in f:
	a = line.strip('\n').split('\t');
	b = a.pop(0)
	vecs[b] = a
	

fm = 'F-Measure'
ws = 'Web Science and Digital Libraries Research Group'

a = vecs[fm]
temp = vecs.values()
temp.pop(vecs.keys().index(fm))

a = numpredict.knnestimate(temp,a,k=5)


k = [1, 2, 5, 10, 20]
print "K Nearest Neighbors for F-Measure:"
for i in k:
  print "When k = "+str(i)
  for j in range(i):
    b = a[j][1]
    print vecs.keys()[b]
print '\n'

a = vecs[ws]
temp = vecs.values()
temp.pop(vecs.keys().index(ws))
Example #14
0
        attValue = getSingleValue(att, 'ValueLiteral')
        attributes[attID] = attValue
    result['attributes'] = attributes
    return result


print getItem(laptops[7][0])
'''构造价格预测程序'''


def makeLaptopDataset():
    searchResults = doSearch('laptop', categoryID=51148)
    result = []
    for r in searchResults:
        item = getItem(r[0])
        att = item['attributes']
        try:
            data = (float(att['12']), float(att['26444']), float(att['26446']),
                    float(att['25710']))
            entry = {'input': data, 'result': float(item['price'])}
            result.append(entry)
        except:
            print item['title'] + 'failed'
    return result


set1 = makeLaptopDataset()
import numpredict

print numpredict.knnestimate(set1, (1024, 1000, 14, 40, 1000))