예제 #1
0
def prob0haccomplete():
    arff = Arff('datasets/labor.arff', label_count=1)
    # Trim the id column
    arff = arff.create_subset_arff(col_idx=slice(1, None))
    arff = arff.get_features()
    hac = HAC(simple=False)
    hac.train(arff, verbose=True, printk=[5])
예제 #2
0
def setup():
    arff = Arff('datasets/labor.arff', label_count=1)
    # Trim the id column
    arff = arff.create_subset_arff(col_idx=slice(1, None))
    arff = arff.get_features()
    hac = HAC()
    hac.nominal_indicies = np.where(np.array(arff.attr_types) == 'nominal')[0]
    print('33,44', hac.get_distance(arff.data[33], arff.data[44]))
    print('25,34', hac.get_distance(arff.data[25], arff.data[34]))
예제 #3
0
def prob2wclass():
    iris = Arff('datasets/iris.arff', label_count=0)
    # features.normalize()
    # Train k means for 2-7
    ks = [2, 3, 4, 5, 6, 7]
    for k in ks:
        km = KMeans(k)
        km.train(iris)

    hac2 = HAC(simple=False)
    hac2.train(iris, printk=ks)
예제 #4
0
def prob2():
    iris = Arff('datasets/iris.arff')
    features = iris.get_features()
    # features.normalize()
    # Train k means for 2-7
    ks = [2, 3, 4, 5, 6, 7]
    for k in ks:
        km = KMeans(k)
        km.train(features)

    hac2 = HAC(simple=False)
    hac2.train(features, printk=ks)
예제 #5
0
def prob4h():
    arff = Arff('datasets/abalone.arff', label_count=0)
    arff.normalize()
    domain = np.arange(2, 8)
    print('single link --------------------')
    hoc = HAC()
    hoc.train(arff, printk=domain, silhouette=True)
    print('complete link -----------------------')
    hoc = HAC(simple=False)
    hoc.train(arff, printk=domain, silhouette=True)
예제 #6
0
def prob3():
    arff = Arff('datasets/abalone.arff', label_count=0)
    domain = np.arange(2, 8)

    ssekmm = []
    for k in domain:
        km = KMeans(k)
        ssek = km.train(arff)
        ssekmm.append(ssek)

    hac = HAC()
    hac2 = HAC(simple=False)
    ssehac = hac.train(arff, printk=domain)
    ssehac2 = hac2.train(arff, printk=domain)

    plt.plot(domain, ssekmm, label="K-Means SSE")
    plt.plot(domain, ssehac[::-1], label="HAC (Single-Link) SSE")
    plt.plot(domain, ssehac2[::-1], label="HAC (Complete-Link) SSE")
    plt.title("Abalone SSE vs # of Clusters")
    plt.xlabel("# of Clusters")
    plt.ylabel('SSE')
    plt.legend()
    plt.show()
예제 #7
0
def prob1haccomplete():
    arff = Arff('datasets/sponge.arff')

    hac = HAC(simple=False)
    hac.train(arff, printk=[4])
예제 #8
0
def prob1hac():
    arff = Arff('datasets/sponge.arff')

    hac = HAC()
    hac.train(arff, printk=[4])