plt.show()


def output_image(arr, m, n, filename):
    arr = [255 if i == 1 else 0 for i in arr]
    a = np.array(arr)
    a = a.reshape((m, n))
    cv2.imwrite(filename, a)


if __name__ == "__main__":
    #####################
    # read files
    colors = ["red", "blue"]
    k = 2
    circs_X = read_file("data.data")

    # Q1 circs
    #####################
    # regular k-means
    kmeans_model1 = KMeans(n_clusters=k,
                           random_state=np.random.randint(100)).fit(circs_X)
    kmeans_labels1 = kmeans_model1.labels_
    # show(circs_X, kmeans_labels1)
    print(calc_loss(circs_X, kmeans_labels1))

    # spectral clustering
    # sigma1 = 0.01
    # sigma1 = 0.1
    # sigma1 = 1
    # sigma1 = 5
Esempio n. 2
0
        return linear_kernel

    @staticmethod
    def gauss_kernel_generator(sigma2):
        def gauss_kernel(x1, x2):
            return np.exp(-np.linalg.norm(x1 - x2) ** 2 / (2. * sigma2))
        return gauss_kernel


def distance(hyperplane, point):
    return np.inner(hyperplane.flatten(), point) / np.linalg.norm(hyperplane[1:-1])


if __name__ == "__main__":
    # read data
    train_X, train_Y = read_file("train.data")
    validation_X, validation_Y = read_file("validation.data")
    test_X, test_Y = read_file("test.data")

    train_len = train_X.shape[0]
    validation_len = validation_X.shape[0]
    test_len = test_X.shape[0]
    #
    c_arr = [1, 10, 100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8]
    sigma2_arr = [0.1, 1, 10, 100, 1000]
    #
    s = dict()
    for c in c_arr:
        for sigma2 in sigma2_arr:
            # s[c, sigma2] = SVM("spam_train.data", c=c, sigma2=sigma2)
            print("c: ", c, " sigma2: ", sigma2)
Esempio n. 3
0
    @staticmethod
    def predict(dt, X):
        predicts = []
        for i in range(X.shape[0]):
            x = X[i]
            predicts += [dt.predict_single(x)]
        return predicts

    @staticmethod
    def calc_accu(t, test_X, test_Y):
        pred = DecisionTree.predict(t, test_X)
        accu = np.sum(test_Y == pred) / test_Y.shape[0]
        return accu


if __name__ == "__main__":
    feature_names = np.array([
        'cap-shape', 'cap-surface', 'cap-color', 'bruises?', 'odor',
        'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
        'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
        'stalk-surface-below-ring', 'stalk-color-above-ring',
        'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
        'ring-type', 'spore-print-color', 'population', 'habitat'
    ])

    train_X, train_Y = read_file("train.data")
    test_X, test_Y = read_file("test.data")

    t = DecisionTree(train_X, train_Y, feature_names)
    accu = DecisionTree.calc_accu(t, test_X, test_Y)
Esempio n. 4
0
        # t subtree
        tsub = t.tsub
        G.add_edge(t.name, tsub.name)
        e = G.get_edge(t.name, tsub.name)
        e.attr['label'] = "t"
        draw_tree_helper(G, tsub)
        # f subtree
        fsub = t.fsub
        G.add_edge(t.name, fsub.name)
        e = G.get_edge(t.name, fsub.name)
        e.attr['label'] = "f"
        draw_tree_helper(G, fsub)


if __name__ == "__main__":
    heart_train_X, heart_train_Y = read_file("train.data")
    heart_test_X, heart_test_Y = read_file("test.data")

    # 1 (a)
    epsilons1, alphas1, predictors1 = adaboost(heart_train_X, heart_train_Y, 5)

    ada_pred_Y = adaboost_pred(alphas1, predictors1, heart_train_X)
    accu = np.sum(ada_pred_Y == heart_train_Y) / heart_train_Y.size

    # draw tree
    for i in range(len(predictors1)):
        t = predictors1[i]
        draw_tree(t, str(i))

    # 1 (b)
    epsilons2, alphas2, predictors2 = adaboost(heart_train_X, heart_train_Y,
Esempio n. 5
0
    for i in range(X.shape[0]):
        xi = X[i]
        labeli = labels[i]
        s += np.sum((xi - mus[labeli])**2)
    return s


def func_uniform_draw(left, right):
    def draw(m, n):
        return np.random.uniform(left, right, (m, n))

    return draw


if __name__ == "__main__":
    train_X, train_Y = read_file("train.data")
    func_draw = func_uniform_draw(-3, 3)
    ks = [12, 18, 24, 36, 42]

    # k-means
    k1, g1 = dict(), dict()
    for k in ks:
        print(k)
        k1[k], g1[k] = [], []
        for i in range(20):
            init_centres = func_draw(k, train_X.shape[1])
            k1[k] += [KMeans(n_clusters=k, init=init_centres).fit(train_X)]
            g1[k] += [GMM(train_X, k, mu=init_centres)]

    # k-means++, GMM++
    k2, g2 = dict(), dict()