def prob0haccomplete(): arff = Arff('datasets/labor.arff', label_count=1) # Trim the id column arff = arff.create_subset_arff(col_idx=slice(1, None)) arff = arff.get_features() hac = HAC(simple=False) hac.train(arff, verbose=True, printk=[5])
def setup(): arff = Arff('datasets/labor.arff', label_count=1) # Trim the id column arff = arff.create_subset_arff(col_idx=slice(1, None)) arff = arff.get_features() hac = HAC() hac.nominal_indicies = np.where(np.array(arff.attr_types) == 'nominal')[0] print('33,44', hac.get_distance(arff.data[33], arff.data[44])) print('25,34', hac.get_distance(arff.data[25], arff.data[34]))
def prob2wclass(): iris = Arff('datasets/iris.arff', label_count=0) # features.normalize() # Train k means for 2-7 ks = [2, 3, 4, 5, 6, 7] for k in ks: km = KMeans(k) km.train(iris) hac2 = HAC(simple=False) hac2.train(iris, printk=ks)
def prob2(): iris = Arff('datasets/iris.arff') features = iris.get_features() # features.normalize() # Train k means for 2-7 ks = [2, 3, 4, 5, 6, 7] for k in ks: km = KMeans(k) km.train(features) hac2 = HAC(simple=False) hac2.train(features, printk=ks)
def prob4h(): arff = Arff('datasets/abalone.arff', label_count=0) arff.normalize() domain = np.arange(2, 8) print('single link --------------------') hoc = HAC() hoc.train(arff, printk=domain, silhouette=True) print('complete link -----------------------') hoc = HAC(simple=False) hoc.train(arff, printk=domain, silhouette=True)
def prob3(): arff = Arff('datasets/abalone.arff', label_count=0) domain = np.arange(2, 8) ssekmm = [] for k in domain: km = KMeans(k) ssek = km.train(arff) ssekmm.append(ssek) hac = HAC() hac2 = HAC(simple=False) ssehac = hac.train(arff, printk=domain) ssehac2 = hac2.train(arff, printk=domain) plt.plot(domain, ssekmm, label="K-Means SSE") plt.plot(domain, ssehac[::-1], label="HAC (Single-Link) SSE") plt.plot(domain, ssehac2[::-1], label="HAC (Complete-Link) SSE") plt.title("Abalone SSE vs # of Clusters") plt.xlabel("# of Clusters") plt.ylabel('SSE') plt.legend() plt.show()
def prob1haccomplete(): arff = Arff('datasets/sponge.arff') hac = HAC(simple=False) hac.train(arff, printk=[4])
def prob1hac(): arff = Arff('datasets/sponge.arff') hac = HAC() hac.train(arff, printk=[4])