コード例 #1
0
def problem1():
    sample = generate_sample()
    kms = {}

    for K in [2,3,4,5]:
        km = KMeans(sample, K=K)
        km.iterate(20)
        kms[K] = km

    return kms
コード例 #2
0
def problem2_3(rec=None):
    if rec is None:
        rec = problem2()

    U, V = rec.U, rec.V
    km = KMeans(U, K=30)
    km.iterate(10)
    centroids = km.MU[km.MU != 0].dropna()
    assert len(centroids) >= 5, 'Not enough centroids!'
    print len(centroids), 'centroids'
    ptypes = set()
    while len(ptypes) < 5:
        ptypes.add(random.choice(centroids.index))
    similarities = map_ptypes(ptypes, centroids, V)
    pprint(similarities)
    return km