Beispiel #1
0
def prob0():
    arff = Arff('datasets/labor.arff', label_count=1)
    # Trim the id column
    arff = arff.create_subset_arff(col_idx=slice(1, None))
    arff = arff.get_features()
    km = KMeans(5)
    km.train(arff, verbose=True, centers=arff.data[:5])
Beispiel #2
0
def prob2wclass():
    iris = Arff('datasets/iris.arff', label_count=0)
    # features.normalize()
    # Train k means for 2-7
    ks = [2, 3, 4, 5, 6, 7]
    for k in ks:
        km = KMeans(k)
        km.train(iris)

    hac2 = HAC(simple=False)
    hac2.train(iris, printk=ks)
Beispiel #3
0
def prob4():
    arff = Arff('datasets/abalone.arff', label_count=0)
    arff.normalize()
    domain = np.arange(2, 8)

    ssekmm = []
    for k in domain:
        km = KMeans(k)
        ssek = km.train(arff)
        ssekmm.append(ssek)
        print(km.calc_silhouette_score())
Beispiel #4
0
def prob2():
    iris = Arff('datasets/iris.arff')
    features = iris.get_features()
    # features.normalize()
    # Train k means for 2-7
    ks = [2, 3, 4, 5, 6, 7]
    for k in ks:
        km = KMeans(k)
        km.train(features)

    hac2 = HAC(simple=False)
    hac2.train(features, printk=ks)
Beispiel #5
0
def prob3():
    arff = Arff('datasets/abalone.arff', label_count=0)
    domain = np.arange(2, 8)

    ssekmm = []
    for k in domain:
        km = KMeans(k)
        ssek = km.train(arff)
        ssekmm.append(ssek)

    hac = HAC()
    hac2 = HAC(simple=False)
    ssehac = hac.train(arff, printk=domain)
    ssehac2 = hac2.train(arff, printk=domain)

    plt.plot(domain, ssekmm, label="K-Means SSE")
    plt.plot(domain, ssehac[::-1], label="HAC (Single-Link) SSE")
    plt.plot(domain, ssehac2[::-1], label="HAC (Complete-Link) SSE")
    plt.title("Abalone SSE vs # of Clusters")
    plt.xlabel("# of Clusters")
    plt.ylabel('SSE')
    plt.legend()
    plt.show()
Beispiel #6
0
def prob2_v3():
    iris = Arff('datasets/iris.arff', label_count=0)
    for _ in range(5):
        km = KMeans(4)
        km.train(iris)
Beispiel #7
0
def prob1():
    arff = Arff('datasets/sponge.arff')

    km = KMeans(4)
    print(arff.data[:4])
    km.train(arff, verbose=True, centers=arff.data[:4])