counts[j][c] += 1 for i in xrange(len(strand)): new += maxChar(counts[i]) return new if __name__ == "__main__": dnaFuncs = ks.kMeansInfo(distance, centroids) k = 4 #(data, truths) = dna.makeStrands(k, 100, 5000, 10) (data, truths) = dna.makeStrands(k, 100, 10000, 2) #data = ['ACGT', 'CCTT', 'TCGT', 'CGTT', 'ACGG', 'ATGT'] """ with open("benchmark_data.txt", "w") as fp: fp.write(pickle.dumps(data)) """ with open("benchmark_data.txt", "r") as fp: data = pickle.loads(fp.read()) print hash(fp.read()) obj = ks.kMeansCluster(dnaFuncs, data, k, .01) if (comm.Get_rank() == ROOT): t1 = time.time() res = obj.findClusters() t2 = time.time() #with open("foo.txt", "w") as fp: # fp.write(str(res) + '\n\n' + str(t2 - t1)) print t2 - t1 else: obj.findClusters()
for (x,y) in points: x_sum += x y_sum += y final_x = x_sum / len(points) final_y = y_sum / len(points) return (final_x, final_y) #points gives data in the form [(x1,y1), (x2,y2), ....] if __name__ == "__main__": pointFuncs = ks.kMeansInfo(distance, findCentroids) data = [(-10,-10),(-11, -11), (-10,10),(-11,11), (10,-10),(11,-11), (10,10),(11,11)] obj = ks.kMeansCluster(pointFuncs, data, 4, .01) if (comm.Get_rank() == ROOT): print "Initial cluster" print " ", data res = obj.findClusters() print "Final result:" print " ", res else: obj.findClusters()