def test():
	dataSet=loadData.loadTrainingData("u.data")
	occupation=occupationLoad()
	listOfKValues=[8,16,32,64]
	for x in listOfKValues:
		centroids,clusterAssignment=kMeans(dataSet,x)
		print "For Clusters= %d :-"%x
		testFile="u"
		avg=0.0
		standardDeviation=0.0
		numOfTimes=True
		for i in range(1,6):
			k1,k2=0,0
			testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
			m = shape(dataSet)[0]
			totalError=0
			index=0# for test Label no.
			predictions=[]
			for t in testData:
				user,movie=int(t[0])-1,int(t[1])-1
				label=testLabel[index]
				clusterNum=clusterAssignment[user] # cluster number of the user to test
				userInCluster=[]
				for i in range(0,m):
					if clusterAssignment[i]==clusterNum:
						userInCluster.append(i)
				sumOfRatings=0
				count=0 # number of users who've watched the movie
				count1=0
				for i in userInCluster:
					if dataSet[i][movie]!=0:# if movie is watched
						sumOfRatings+=dataSet[i][movie]
						count+=1
						if occupation[i][1]==occupation[user][1]:
							k1+=dataSet[i][movie]
							count1+=1
				if count==0:# if there is no user in the cluster who've watched the movie
					ratingsPredicted=3
				else:
					if count1!=0:
						temp1=around(sumOfRatings/count)# average of the raings.. round-off
						temp2=around(k1/count1)
						ratingsPredicted=min(temp2,temp1)
					else:
						ratingsPredicted=around(sumOfRatings/count)
				predictions.append(ratingsPredicted)
				totalError+=absolute(ratingsPredicted-label)
				index+=1
			meanError=totalError/len(testData)
			if numOfTimes:
				print metrics.classification_report(testLabel,predictions)
				numOfTimes=False
			# print meanError
			avg+=meanError
			predictions=array(predictions)
			standardDeviationError+=std(predictions)

		print "Mean Error: ", float(avg)/5
		print "Standard Deviation: ",float(standardDeviation)/5
		print
def testKMeans(dataSet):
	for x in range(50,61,2):
		centroids,clusterAssignment=kMeans(dataSet,x)
		dataSet=loadData.loadTrainingData("u.data")
		testFile="u"
		avg=0.0
		standardDeviationError=0
		for i in range(1,6):
			testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
			m = shape(dataSet)[0]
			totalError=0
			index=0# for test Label no.
			predictions=[]
			for t in testData:
				user,movie=int(t[0])-1,int(t[1])-1
				label=testLabel[index]
				clusterNum=clusterAssignment[user] # cluster number of the user to test
				userInCluster=[]
				for i in range(0,m):
					if clusterAssignment[i]==clusterNum:
						userInCluster.append(i)
				sumOfRatings=0
				count=0 # number of users who've watched the movie
				for i in userInCluster:
					if dataSet[i][movie]!=0:# if movie is watched
						sumOfRatings+=dataSet[i][movie]
						count+=1
				if count==0:# if there is no user in the cluster who've watched the movie
					ratingsPredicted=3
				else:
					ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
				predictions.append(ratingsPredicted)
				totalError+=absolute(ratingsPredicted-label)
				index+=1
			meanError=totalError/len(testData)
			# print "Mean Absolute Error: "+str(meanError)
			# print
			avg+=meanError
			# print "Precision And Recall: "
			# print metrics.classification_report(testLabel,predictions)
			# print
			predictions=array(predictions)
			standardDeviationError+=std(predictions)
			# print "Standard Deviation: "+str(standardDeviationError)
			# meanActual=mean(array(testLabel))
			# standardDeviationActual=std(array(testLabel))
			# tValue=(meanActual-meanError)/( sqrt( (((standardDeviationActual)**2)/len(testLabel)) + (((standardDeviationError)**2)/len(predictions))  )  )
			# print "tValue: "+str(tValue)
		# 	break
		# break
		# print
		print "Mean Absolute Error: "+str(float(avg)/5)
		print
		print "Standard Deviation: "+str(float(standardDeviationError/5))
		print
		break
Example #3
0
def test():
	dataSet=loadData.loadTrainingData("u.data")
	avg=0.0
	standardDeviation=0.0
	for x in range(1,6):
		testSet,testLabel=loadData.loadTestData("u"+str(x)+".test")
		# for i in range(shape(testSet)[0]):
		testLabel=testLabel[:100]
		index=0
		totalError=0
		mTest=0
		predictions=[]
		for t in testSet:
			user,movie=int(t[0])-1,int(t[1])-1
			label=testLabel[index]
			relation=pearson(dataSet,user)
			summation=0.0
			answer=0.0
			count=0
			for j in range(0,shape(dataSet)[0]):
				if user!=j:
					if dataSet[j,movie]!=0:
						summation+=((dataSet[j,movie])*relation[0,j])
						count+=1
			if count==0:
				answer=3
			else:
				answer=around(summation/(count))
			# print answer
			predictions.append(answer)
			totalError+=absolute(answer-label)
			index+=1
			mTest+=1
			if mTest==100:
				break

			# stdDeviation=
		meanError=float(totalError)/mTest
		predictions=array(predictions)
		avg+=meanError
		standardDeviation+=std(predictions)
		print metrics.classification_report(testLabel,predictions)
		# print meanError
	print
	print "Mean Absolute Error: "+str(float(avg)/5)
	print
	print "Standard Deviation: "+str(float(standardDeviation/5))
	print
def test():
	dataSet=loadData.loadTrainingData("u.data")
	for x in range(20,40,2):
		centroids,clusterAssignment=kMeans(dataSet[:100,:],x,shape(dataSet)[0])# 15 clusters
        emptyPool=[]
        for i in range(100,shape(dataSet)[0]):
            emptyPool.append(i)
        print centroids
        return
        testFile="u"
        avg=0.0
        for i in range(1,6):
        	testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
        	m = shape(dataSet)[0]
        	totalError=0
        	index=0# for test Label no.
        	predictions=[]
        	for t in testData:
        		user,movie=int(t[0])-1,int(t[1])-1
        		label=testLabel[index]
        		clusterNum=clusterAssignment[user] # cluster number of the user to test
        		userInCluster=[]
        		for i in range(0,m):
        			if clusterAssignment[i]==clusterNum:
        				userInCluster.append(i)
        		sumOfRatings=0
        		count=0 # number of users who've watched the movie
        		for i in userInCluster:
        			if dataSet[i][movie]!=0:# if movie is watched
        				sumOfRatings+=dataSet[i][movie]
        				count+=1
        		if count==0:# if there is no user in the cluster who've watched the movie
        			ratingsPredicted=3
        		else:
        			ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
        		predictions.append(ratingsPredicted)
        		totalError+=absolute(ratingsPredicted-label)
        		index+=1
        	meanError=totalError/len(testData)
        	# print meanError
        	avg+=meanError
        print
        print float(avg)/5
Example #5
0
def test():
	dataSet=loadData.loadTrainingData("u.data")
	u,clusterAssignment=fcm(dataSet,2,8,2)
	# centroids,clusterAssignment=kMeans(dataSet,x)# 15 clusters
	print u
	# return
	testFile="u"
	avg=0.0
	for i in range(1,6):
		testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
		m = shape(dataSet)[0]
		totalError=0
		index=0# for test Label no.
		predictions=[]
		# clusterAssignedCode
		for t in testData:
			user,movie=int(t[0])-1,int(t[1])-1
			label=testLabel[index]
			clusterNum=clusterAssignment[user] # cluster number of the user to test
			userInCluster=[]
			for i in range(0,m):
				if clusterAssignment[i]==clusterNum:
					userInCluster.append(i)
			sumOfRatings=0
			count=0 # number of users who've watched the movie
			for i in userInCluster:
				if dataSet[i][movie]!=0:# if movie is watched
					sumOfRatings+=dataSet[i][movie]
					count+=1
			if count==0:# if there is no user in the cluster who've watched the movie
				ratingsPredicted=3
			else:
				ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
			predictions.append(ratingsPredicted)
			print ratingsPredicted
			totalError+=absolute(ratingsPredicted-label)
			index+=1
		meanError=totalError/len(testData)
		# avg+=meanError
		# print metrics.classification_report(testLabel,predictions)

		# meanError=totalError/len(testData)
		print meanError
def testKMeansForPca(data):
	dataSet=loadData.loadTrainingData("u1.base")
	# centroids,clusterAssignment=kMeans(dataSet,15)# 15 clusters
	testData,testLabel=loadData.loadTestData("u1.test")
	clf=KMeans(n_clusters=15)
	clf.fit(data)
	clusterAssignment=clf.predict(data)
	m = shape(dataSet)[0]
	totalError=0
	index=0# for test Label no.
	predictions=[]
	for t in testData:
		user,movie=int(t[0])-1,int(t[1])-1
		label=testLabel[index]
		clusterNum=clusterAssignment[user] # cluster number of the user to test
		userInCluster=[]
		for i in range(0,m):
			if clusterAssignment[i]==clusterNum:
				userInCluster.append(i)
		sumOfRatings=0
		count=0 # number of users who've watched the movie
		for i in userInCluster:
			if dataSet[i][movie]!=0:# if movie is watched
				sumOfRatings+=dataSet[i][movie]
				count+=1
		if count==0:# if there is no user in the cluster who've watched the movie
			ratingsPredicted=3
		else:
			ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
		predictions.append(ratingsPredicted)
		totalError+=absolute(ratingsPredicted-label)
		index+=1
	meanError=totalError/len(testData)
	print meanError
	standardDeviation=std(predictions)
	print standardDeviation
def mainFunction():
	testData,testLabel=loadData.loadTestData("u1.test")
	classifierTest(testData,testLabel)
Example #8
0
def test():
    dataSet=loadData.loadTrainingData("u1.base")
    testSet,testLabel=loadData.loadTestData("u1.test")
    som(dataSet)
trainData=loadData.loadTrainingData("u.data")
alpha=0.01
# thres=0.001
theta=mat(zeros((943,19)))
# while True:
print shape(mat(l))
# return
for i in range(10):
    for k in range(19):
        old=theta[i,k]
        thres=0.1
        sumi=0
        # T=2
        while True:
            for j in range(1682):
                if trainData[i,j]:
                    sumi+=(mat(l[j])*theta[i,:].T- trainData[i,j])*l[j][k]
            theta[i,k]-=alpha*sumi
            if abs(theta[i,k]-old)<thres:
                break
            # T-=1
print theta[0]
testData,testLabel=loadData.loadTestData("u1.test")
index=0
for t in testData:
    user,movie=int(t[0])-1,int(t[1])-1
    label=testLabel[index]
    rating=theta[user,:]*mat(l[movie]).T
    print label,rating
    index+=1
Example #10
0
def test():
    dataSet=loadData.loadTrainingData("u.data")
    for clu in range(74,75,2):
        avg=0.0
        standardDeviation=0.0
        for te in range(1,6):
            testData,testLabel=loadData.loadTestData("u"+str(te)+".test")
            clusters,emptyPool,meanList=initialization(dataSet,clu)
            while len(emptyPool):
                randVar=random.randint(0,len(emptyPool)-1)
                user=emptyPool[randVar]
                randNes=random.randint(0,len(clusters)-1)
                mae=float(sum(abs(dataSet[user,:]-meanList[randNes])))/1682
                mini=100000000
                count=0
                threshold=int(0.3*len(clusters[randNes]))
                minPerson=-1
                for i in range(0,len(clusters[randNes])):
                    mae=float(sum(abs(dataSet[clusters[randNes][i][0],:]-meanList[randNes])))/1682
                    if mae<mini:
                        count+=1
                        mini=mae
                        minPerson=clusters[randNes][i][0]
                if count:
                    if count>=threshold:
                        for c in clusters[randNes]:
                            if c[0]==minPerson:
                                q=clusters[randNes].index(c)
                                for t in range(0,1682):
                                    add=(meanList[randNes][t]*len(meanList[randNes]))-dataSet[minPerson,t]
                                    add=add/(len(meanList[randNes])-1)
                                    meanList[randNes][t]=add
                                del(clusters[randNes][q])
                        emptyPool.append(minPerson)
                    clusters[randNes].append([user,dataSet[user,:]])
                    ind=emptyPool.index(user)
                    for t in range(0,1682):
                        add=(meanList[randNes][t]*len(meanList[randNes]))+dataSet[user,t]
                        add=add/(len(meanList[randNes])+1)
                        meanList[randNes][t]=add
                    del(emptyPool[ind])

                # var+=1
            summation=0
            for c in clusters:
                # print len(c)
                summation+=len(c)
            # print summation
            totalError=0
            predictions=[]
            m = shape(dataSet)[0]
            index=0
            for t in testData:
            	user,movie=int(t[0])-1,int(t[1])-1
                # print user,movie
            	label=testLabel[index]
                check=False
                for i in range(0,len(clusters)):
                    for j in range(0,len(clusters[i])):
                        if clusters[i][j][0]==user:
                            count =0
                            tum=0.0
                            for k in range(0,len(clusters[i])):
                                if dataSet[clusters[i][k][0],movie]!=0:
                                    count+=1
                                    tum+=dataSet[clusters[i][k][0],movie]
                            if count!=0:
                                tum=tum/count
                            check=True
                        if check:
                            break
                    if check:
                        break
            	predictions.append(tum)
            	totalError+=absolute(tum-label)
                index+=1

            meanError=totalError/len(testData)
            print "Precision And Recall: "
            print shape(testLabel)

            print metrics.classification_report(testLabel,predictions)
            # print
            return
            predictions=array(predictions)
            standardDeviation+=std(predictions)
            # print standardDeviation
            # print meanError
            avg+=meanError
            # break
        # break
        print "Standard Deviation: "+str(standardDeviation)