def prob0(): arff = Arff('datasets/labor.arff', label_count=1) # Trim the id column arff = arff.create_subset_arff(col_idx=slice(1, None)) arff = arff.get_features() km = KMeans(5) km.train(arff, verbose=True, centers=arff.data[:5])
def prob2wclass(): iris = Arff('datasets/iris.arff', label_count=0) # features.normalize() # Train k means for 2-7 ks = [2, 3, 4, 5, 6, 7] for k in ks: km = KMeans(k) km.train(iris) hac2 = HAC(simple=False) hac2.train(iris, printk=ks)
def prob4(): arff = Arff('datasets/abalone.arff', label_count=0) arff.normalize() domain = np.arange(2, 8) ssekmm = [] for k in domain: km = KMeans(k) ssek = km.train(arff) ssekmm.append(ssek) print(km.calc_silhouette_score())
def prob2(): iris = Arff('datasets/iris.arff') features = iris.get_features() # features.normalize() # Train k means for 2-7 ks = [2, 3, 4, 5, 6, 7] for k in ks: km = KMeans(k) km.train(features) hac2 = HAC(simple=False) hac2.train(features, printk=ks)
def prob3(): arff = Arff('datasets/abalone.arff', label_count=0) domain = np.arange(2, 8) ssekmm = [] for k in domain: km = KMeans(k) ssek = km.train(arff) ssekmm.append(ssek) hac = HAC() hac2 = HAC(simple=False) ssehac = hac.train(arff, printk=domain) ssehac2 = hac2.train(arff, printk=domain) plt.plot(domain, ssekmm, label="K-Means SSE") plt.plot(domain, ssehac[::-1], label="HAC (Single-Link) SSE") plt.plot(domain, ssehac2[::-1], label="HAC (Complete-Link) SSE") plt.title("Abalone SSE vs # of Clusters") plt.xlabel("# of Clusters") plt.ylabel('SSE') plt.legend() plt.show()
def prob2_v3(): iris = Arff('datasets/iris.arff', label_count=0) for _ in range(5): km = KMeans(4) km.train(iris)
def prob1(): arff = Arff('datasets/sponge.arff') km = KMeans(4) print(arff.data[:4]) km.train(arff, verbose=True, centers=arff.data[:4])