def problem1(): sample = generate_sample() kms = {} for K in [2,3,4,5]: km = KMeans(sample, K=K) km.iterate(20) kms[K] = km return kms
def problem2_3(rec=None): if rec is None: rec = problem2() U, V = rec.U, rec.V km = KMeans(U, K=30) km.iterate(10) centroids = km.MU[km.MU != 0].dropna() assert len(centroids) >= 5, 'Not enough centroids!' print len(centroids), 'centroids' ptypes = set() while len(ptypes) < 5: ptypes.add(random.choice(centroids.index)) similarities = map_ptypes(ptypes, centroids, V) pprint(similarities) return km