def main(): #first run the initializer to get starting centroids filePath = '/home/mike-bowles/pyWorkspace/mapReducers/src/kMeans3/input.txt' #mrJob = MRkMeansInit(args=[filePath]) #mrJob = MRkMeansInit(args=['-r', 'emr', filePath]) #with mrJob.make_runner() as runner: # runner.run() #pull out the centroid values to compare with values after one iteration centPath = "s3://mike-mrjob/kMeans/centroids/intermediateResults.txt" key = EMRJobRunner().get_s3_key(centPath) centroidsJson = key.get_contents_as_string() delta = 10 #Begin iteration on change in centroids while delta > 0.01: #parse old centroid values oldCentroids = json.loads(centroidsJson) #run one iteration mrJob2 = MRkMeansIter(args=['-r', 'emr', filePath]) with mrJob2.make_runner() as runner: runner.run() #compare new centroids to old ones centroidsJson = key.get_contents_as_string() newCentroids = json.loads(centroidsJson) kMeans = len(newCentroids) delta = 0.0 for i in range(kMeans): delta += dist(newCentroids[i],oldCentroids[i]) print delta