def clusterAllWithKMeans(lowerLimit, upperLimit, featMan, weightMatrix,\
					samePairsSet, differentPairsSet, outDir):
	metrics = {}
	print 'Weight matrix length' ,len(weightMatrix)

	data = featMan.returnKeys()
	for k in range(lowerLimit,upperLimit,2):
		i = len(data)/k
		if i == 0:
			i = 1
		kmeans = KMeans(i,data,weightMatrix,5, 0.1)
		kmeans.cluster();
		clusters = kmeans.getClusters();
		noClus =kmeans.getTermInNoCluster();
		
		if clusters:
                        print 'Found clusters length', len(clusters),'singaltons', len(noClus)
			predictedSamePairsSet, predictedDifferentPairsSet = \
				getPairLabelsFromClusters(clusters,featMan)
			fname = outDir+'_'+str(i)+'.txt'
			oFile = open(fname,'w');
			for entry in clusters:
				if len(entry) > 0:
					oFile.write(toString(entry,featMan)+'\n')
			oFile.write('NO CLUST\t'+toString(noClus,featMan)+'\n');
			oFile.close()
                        print 'Same pair set', len(predictedSamePairsSet)
			#metrics[k] = getRecallPrecision(samePairsSet, \
			#differentPairsSet, predictedSamePairsSet, predictedDifferentPairsSet)
                        metrics[k] = getSamePairPrecisionRecallF1Calculator(samePairsSet,\
                                predictedSamePairsSet)
	for tcount, met in metrics.items():
		print tcount, met
	return metrics
def clusterCatWithKMeans(lowerLimit, upperLimit, featMan, \
						weightMatrix, samePairsSet, \
						differentPairsSet, catQueryDist,\
						outFile = 'cat-clusters-with-mean.txt'):
	metrics = {}
	for termCount in range(lowerLimit, upperLimit):
		i = 1
		fclusters = []
		allCatClusters = []
		oFile = open(outFile+'_'+str(termCount)+'.txt','w')
		for cat, qIdSet in catQueryDist.items():
			if len(qIdSet) > 1:
				k = len(qIdSet)/termCount
				if k == 0:
					k = 1
				print cat, len(qIdSet), k
				if k > 1:
					kmeans = KMeans(k,list(qIdSet),weightMatrix,5, 0.1)
					kmeans.cluster();
					clusters = kmeans.getClusters();
					noClus =kmeans.getTermInNoCluster();
					for entry in clusters:
						if len(entry) > 1:
							allCatClusters.append(entry)
						if len(entry) > 0:
							cStr = toString(entry,featMan)
							fclusters.append(cStr)
							oFile.write(cat+'\t'+cStr+'\n');
					oFile.write(cat+'\t'+'NO CLUST\t'+\
								toString(noClus,featMan)+'\n');
				else:
					cStr = toString(qIdSet,featMan)
					oFile.write(cat+'\t'+cStr+'\n');
					allCatClusters.append(list(qIdSet))
				
				if i % 50 == 0:
					print i
				i+=1	
		predictedSamePairsSet, predictedDifferentPairsSet = \
						getPairLabelsFromClusters(allCatClusters,featMan)
		print 'COUNTS ',termCount, len(allCatClusters), \
		len(predictedSamePairsSet), len(catQueryDist)
		#metrics[termCount] = getRecallPrecision(samePairsSet, \
		#				differentPairsSet,\
		#				predictedSamePairsSet,\
		#				predictedDifferentPairsSet)	
                metrics[termCount] = getSamePairPrecisionRecallF1Calculator(samePairsSet,\
                                predictedSamePairsSet)
		oFile.close()
	for tcount, met in metrics.items():
		print tcount, met
	return metrics