if len(kCentroidsList)>0:
		#canopyCenterKCentroidsDict = {canopyCenter.toString():kCentroidsList}
		canopyCenterKCentroidsDict[canopyCenter] = kCentroidsList


for line in sys.stdin:
	(key,value) = line.strip().split("\t")
	#key = canopy center
	#value = data point

	canopyCenter = DataPoint(key)
	dataPoint = DataPoint(value)

	#print ("P1>\tcanopyCenter: " + canopyCenter.toString() + "\t dataPoint: " + dataPoint.toString())


	if canopyCenter in canopyCenterKCentroidsDict:
		#print ("canopyCenter : " + canopyCenter.toString() + "\t" + "kCentroidsList : " + "\t dataPoint : " + dataPoint.toString())
		kCentroidsList = canopyCenterKCentroidsDict[canopyCenter]	
		if len(kCentroidsList) < 1 :
			continue	
		minDistance = dataPoint.complexDistance(kCentroidsList[0])
		#print("Initial minDistance : " + str(minDistance) + "\tkCentroidsList[0] : " + kCentroidsList[0].toString())	
		pos = 0
		for i in range (1 , len(kCentroidsList)):
			currentDistance = dataPoint.complexDistance(kCentroidsList[i])
		#	print("currentDistance : " + str(currentDistance) + "\tkCentroidsList[i]" + kCentroidsList[i].toString())
			if currentDistance < minDistance:
				minDistance = currentDistance
				pos = i
		print (kCentroidsList[pos].toString() + "\t" + dataPoint.toString())
Esempio n. 2
0
            #kCentroidsList.append(kCentroid.toString())
            kCentroidsList.append(kCentroid)
    if len(kCentroidsList) > 0:
        #canopyCenterKCentroidsDict = {canopyCenter.toString():kCentroidsList}
        canopyCenterKCentroidsDict[canopyCenter] = kCentroidsList

for line in sys.stdin:
    (key, value) = line.strip().split("\t")
    #key = canopy center
    #value = data point

    canopyCenter = DataPoint(key)
    dataPoint = DataPoint(value)

    #print ("P1>\tcanopyCenter: " + canopyCenter.toString() + "\t dataPoint: " + dataPoint.toString())

    if canopyCenter in canopyCenterKCentroidsDict:
        #print ("canopyCenter : " + canopyCenter.toString() + "\t" + "kCentroidsList : " + "\t dataPoint : " + dataPoint.toString())
        kCentroidsList = canopyCenterKCentroidsDict[canopyCenter]
        if len(kCentroidsList) < 1:
            continue
        minDistance = dataPoint.complexDistance(kCentroidsList[0])
        #print("Initial minDistance : " + str(minDistance) + "\tkCentroidsList[0] : " + kCentroidsList[0].toString())
        pos = 0
        for i in range(1, len(kCentroidsList)):
            currentDistance = dataPoint.complexDistance(kCentroidsList[i])
            #	print("currentDistance : " + str(currentDistance) + "\tkCentroidsList[i]" + kCentroidsList[i].toString())
            if currentDistance < minDistance:
                minDistance = currentDistance
                pos = i
        print(kCentroidsList[pos].toString() + "\t" + dataPoint.toString())
	sys.exit(-1)

file = hdfs.open(sys.argv[1])

kCentroids = []

for line in file:
	(key, value) = line.strip().split("\t")
	kCentroid = DataPoint(value)
	kCentroids.append(kCentroid)

file.close()

"""
for dataPoint in dataPoints:
	print dataPoint.toString()
"""


for line in sys.stdin:
	dataPoint = DataPoint(line.strip())
	
	minDistance = dataPoint.complexDistance(kCentroids[0])
	pos = 0
	for i in range (1, len(kCentroids)):
		distance = dataPoint.complexDistance(kCentroids[i])
		if distance < minDistance:
			minDistance = distance
			pos = i
	print(kCentroids[pos].toString()+"\t"+dataPoint.toString())