def test(): dataSet=loadData.loadTrainingData("u.data") occupation=occupationLoad() listOfKValues=[8,16,32,64] for x in listOfKValues: centroids,clusterAssignment=kMeans(dataSet,x) print "For Clusters= %d :-"%x testFile="u" avg=0.0 standardDeviation=0.0 numOfTimes=True for i in range(1,6): k1,k2=0,0 testData,testLabel=loadData.loadTestData(testFile+str(i)+".test") m = shape(dataSet)[0] totalError=0 index=0# for test Label no. predictions=[] for t in testData: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] clusterNum=clusterAssignment[user] # cluster number of the user to test userInCluster=[] for i in range(0,m): if clusterAssignment[i]==clusterNum: userInCluster.append(i) sumOfRatings=0 count=0 # number of users who've watched the movie count1=0 for i in userInCluster: if dataSet[i][movie]!=0:# if movie is watched sumOfRatings+=dataSet[i][movie] count+=1 if occupation[i][1]==occupation[user][1]: k1+=dataSet[i][movie] count1+=1 if count==0:# if there is no user in the cluster who've watched the movie ratingsPredicted=3 else: if count1!=0: temp1=around(sumOfRatings/count)# average of the raings.. round-off temp2=around(k1/count1) ratingsPredicted=min(temp2,temp1) else: ratingsPredicted=around(sumOfRatings/count) predictions.append(ratingsPredicted) totalError+=absolute(ratingsPredicted-label) index+=1 meanError=totalError/len(testData) if numOfTimes: print metrics.classification_report(testLabel,predictions) numOfTimes=False # print meanError avg+=meanError predictions=array(predictions) standardDeviationError+=std(predictions) print "Mean Error: ", float(avg)/5 print "Standard Deviation: ",float(standardDeviation)/5 print
def testKMeans(dataSet): for x in range(50,61,2): centroids,clusterAssignment=kMeans(dataSet,x) dataSet=loadData.loadTrainingData("u.data") testFile="u" avg=0.0 standardDeviationError=0 for i in range(1,6): testData,testLabel=loadData.loadTestData(testFile+str(i)+".test") m = shape(dataSet)[0] totalError=0 index=0# for test Label no. predictions=[] for t in testData: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] clusterNum=clusterAssignment[user] # cluster number of the user to test userInCluster=[] for i in range(0,m): if clusterAssignment[i]==clusterNum: userInCluster.append(i) sumOfRatings=0 count=0 # number of users who've watched the movie for i in userInCluster: if dataSet[i][movie]!=0:# if movie is watched sumOfRatings+=dataSet[i][movie] count+=1 if count==0:# if there is no user in the cluster who've watched the movie ratingsPredicted=3 else: ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off predictions.append(ratingsPredicted) totalError+=absolute(ratingsPredicted-label) index+=1 meanError=totalError/len(testData) # print "Mean Absolute Error: "+str(meanError) # print avg+=meanError # print "Precision And Recall: " # print metrics.classification_report(testLabel,predictions) # print predictions=array(predictions) standardDeviationError+=std(predictions) # print "Standard Deviation: "+str(standardDeviationError) # meanActual=mean(array(testLabel)) # standardDeviationActual=std(array(testLabel)) # tValue=(meanActual-meanError)/( sqrt( (((standardDeviationActual)**2)/len(testLabel)) + (((standardDeviationError)**2)/len(predictions)) ) ) # print "tValue: "+str(tValue) # break # break # print print "Mean Absolute Error: "+str(float(avg)/5) print print "Standard Deviation: "+str(float(standardDeviationError/5)) print break
def test(): dataSet=loadData.loadTrainingData("u.data") avg=0.0 standardDeviation=0.0 for x in range(1,6): testSet,testLabel=loadData.loadTestData("u"+str(x)+".test") # for i in range(shape(testSet)[0]): testLabel=testLabel[:100] index=0 totalError=0 mTest=0 predictions=[] for t in testSet: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] relation=pearson(dataSet,user) summation=0.0 answer=0.0 count=0 for j in range(0,shape(dataSet)[0]): if user!=j: if dataSet[j,movie]!=0: summation+=((dataSet[j,movie])*relation[0,j]) count+=1 if count==0: answer=3 else: answer=around(summation/(count)) # print answer predictions.append(answer) totalError+=absolute(answer-label) index+=1 mTest+=1 if mTest==100: break # stdDeviation= meanError=float(totalError)/mTest predictions=array(predictions) avg+=meanError standardDeviation+=std(predictions) print metrics.classification_report(testLabel,predictions) # print meanError print print "Mean Absolute Error: "+str(float(avg)/5) print print "Standard Deviation: "+str(float(standardDeviation/5)) print
def test(): dataSet=loadData.loadTrainingData("u.data") for x in range(20,40,2): centroids,clusterAssignment=kMeans(dataSet[:100,:],x,shape(dataSet)[0])# 15 clusters emptyPool=[] for i in range(100,shape(dataSet)[0]): emptyPool.append(i) print centroids return testFile="u" avg=0.0 for i in range(1,6): testData,testLabel=loadData.loadTestData(testFile+str(i)+".test") m = shape(dataSet)[0] totalError=0 index=0# for test Label no. predictions=[] for t in testData: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] clusterNum=clusterAssignment[user] # cluster number of the user to test userInCluster=[] for i in range(0,m): if clusterAssignment[i]==clusterNum: userInCluster.append(i) sumOfRatings=0 count=0 # number of users who've watched the movie for i in userInCluster: if dataSet[i][movie]!=0:# if movie is watched sumOfRatings+=dataSet[i][movie] count+=1 if count==0:# if there is no user in the cluster who've watched the movie ratingsPredicted=3 else: ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off predictions.append(ratingsPredicted) totalError+=absolute(ratingsPredicted-label) index+=1 meanError=totalError/len(testData) # print meanError avg+=meanError print print float(avg)/5
def test(): dataSet=loadData.loadTrainingData("u.data") u,clusterAssignment=fcm(dataSet,2,8,2) # centroids,clusterAssignment=kMeans(dataSet,x)# 15 clusters print u # return testFile="u" avg=0.0 for i in range(1,6): testData,testLabel=loadData.loadTestData(testFile+str(i)+".test") m = shape(dataSet)[0] totalError=0 index=0# for test Label no. predictions=[] # clusterAssignedCode for t in testData: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] clusterNum=clusterAssignment[user] # cluster number of the user to test userInCluster=[] for i in range(0,m): if clusterAssignment[i]==clusterNum: userInCluster.append(i) sumOfRatings=0 count=0 # number of users who've watched the movie for i in userInCluster: if dataSet[i][movie]!=0:# if movie is watched sumOfRatings+=dataSet[i][movie] count+=1 if count==0:# if there is no user in the cluster who've watched the movie ratingsPredicted=3 else: ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off predictions.append(ratingsPredicted) print ratingsPredicted totalError+=absolute(ratingsPredicted-label) index+=1 meanError=totalError/len(testData) # avg+=meanError # print metrics.classification_report(testLabel,predictions) # meanError=totalError/len(testData) print meanError
def testKMeansForPca(data): dataSet=loadData.loadTrainingData("u1.base") # centroids,clusterAssignment=kMeans(dataSet,15)# 15 clusters testData,testLabel=loadData.loadTestData("u1.test") clf=KMeans(n_clusters=15) clf.fit(data) clusterAssignment=clf.predict(data) m = shape(dataSet)[0] totalError=0 index=0# for test Label no. predictions=[] for t in testData: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] clusterNum=clusterAssignment[user] # cluster number of the user to test userInCluster=[] for i in range(0,m): if clusterAssignment[i]==clusterNum: userInCluster.append(i) sumOfRatings=0 count=0 # number of users who've watched the movie for i in userInCluster: if dataSet[i][movie]!=0:# if movie is watched sumOfRatings+=dataSet[i][movie] count+=1 if count==0:# if there is no user in the cluster who've watched the movie ratingsPredicted=3 else: ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off predictions.append(ratingsPredicted) totalError+=absolute(ratingsPredicted-label) index+=1 meanError=totalError/len(testData) print meanError standardDeviation=std(predictions) print standardDeviation
def mainFunction(): testData,testLabel=loadData.loadTestData("u1.test") classifierTest(testData,testLabel)
def test(): dataSet=loadData.loadTrainingData("u1.base") testSet,testLabel=loadData.loadTestData("u1.test") som(dataSet)
trainData=loadData.loadTrainingData("u.data") alpha=0.01 # thres=0.001 theta=mat(zeros((943,19))) # while True: print shape(mat(l)) # return for i in range(10): for k in range(19): old=theta[i,k] thres=0.1 sumi=0 # T=2 while True: for j in range(1682): if trainData[i,j]: sumi+=(mat(l[j])*theta[i,:].T- trainData[i,j])*l[j][k] theta[i,k]-=alpha*sumi if abs(theta[i,k]-old)<thres: break # T-=1 print theta[0] testData,testLabel=loadData.loadTestData("u1.test") index=0 for t in testData: user,movie=int(t[0])-1,int(t[1])-1 label=testLabel[index] rating=theta[user,:]*mat(l[movie]).T print label,rating index+=1
def test(): dataSet=loadData.loadTrainingData("u.data") for clu in range(74,75,2): avg=0.0 standardDeviation=0.0 for te in range(1,6): testData,testLabel=loadData.loadTestData("u"+str(te)+".test") clusters,emptyPool,meanList=initialization(dataSet,clu) while len(emptyPool): randVar=random.randint(0,len(emptyPool)-1) user=emptyPool[randVar] randNes=random.randint(0,len(clusters)-1) mae=float(sum(abs(dataSet[user,:]-meanList[randNes])))/1682 mini=100000000 count=0 threshold=int(0.3*len(clusters[randNes])) minPerson=-1 for i in range(0,len(clusters[randNes])): mae=float(sum(abs(dataSet[clusters[randNes][i][0],:]-meanList[randNes])))/1682 if mae<mini: count+=1 mini=mae minPerson=clusters[randNes][i][0] if count: if count>=threshold: for c in clusters[randNes]: if c[0]==minPerson: q=clusters[randNes].index(c) for t in range(0,1682): add=(meanList[randNes][t]*len(meanList[randNes]))-dataSet[minPerson,t] add=add/(len(meanList[randNes])-1) meanList[randNes][t]=add del(clusters[randNes][q]) emptyPool.append(minPerson) clusters[randNes].append([user,dataSet[user,:]]) ind=emptyPool.index(user) for t in range(0,1682): add=(meanList[randNes][t]*len(meanList[randNes]))+dataSet[user,t] add=add/(len(meanList[randNes])+1) meanList[randNes][t]=add del(emptyPool[ind]) # var+=1 summation=0 for c in clusters: # print len(c) summation+=len(c) # print summation totalError=0 predictions=[] m = shape(dataSet)[0] index=0 for t in testData: user,movie=int(t[0])-1,int(t[1])-1 # print user,movie label=testLabel[index] check=False for i in range(0,len(clusters)): for j in range(0,len(clusters[i])): if clusters[i][j][0]==user: count =0 tum=0.0 for k in range(0,len(clusters[i])): if dataSet[clusters[i][k][0],movie]!=0: count+=1 tum+=dataSet[clusters[i][k][0],movie] if count!=0: tum=tum/count check=True if check: break if check: break predictions.append(tum) totalError+=absolute(tum-label) index+=1 meanError=totalError/len(testData) print "Precision And Recall: " print shape(testLabel) print metrics.classification_report(testLabel,predictions) # print return predictions=array(predictions) standardDeviation+=std(predictions) # print standardDeviation # print meanError avg+=meanError # break # break print "Standard Deviation: "+str(standardDeviation)