g = d['G'] t = d['T'] res = '' if (a >= c and a >= g and a >= t): res = 'A' elif (c >= a and c >= g and c >= t): res = 'C' elif (g >= a and g >= c and g >= t): res = 'G' else: res = 'T' return res new = "" counts = [{'A':0, 'C':0, 'G':0, 'T':0} for i in xrange(len(strands[0]))] for strand in strands: for j in xrange(len(strand)): c = strand[j] counts[j][c] += 1 for i in xrange(len(strand)): new += maxChar(counts[i]) return new dnaFuncs = ks.kMeansInfo(distance, centroids) data = ['ACGT', 'CCTT', 'TCGT', 'CGTT', 'ACGG', 'ATGT'] obj = ks.kMeansCluster(dnaFuncs, data, 2, .01) print obj.findClusters()
import math import kmeansSequential as ks def distance(p1, p2): return math.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2) def centroids(points): x_sum = 0.0 y_sum = 0.0 for (x,y) in points: x_sum += x y_sum += y final_x = x_sum / len(points) final_y = y_sum / len(points) return (final_x, final_y) #points gives data in the form [(x1,y1), (x2,y2), ....] pointFuncs = ks.kMeansInfo(distance, centroids) data = [(-1,-1),(1,1),(-1,1),(1,-1),(2,-2),(-2,2),(2,2),(-2,-2)] obj = ks.kMeansCluster(pointFuncs, data, 4, .01) print obj.findClusters()