def closure_1_3_4():
            k = 4
            best_model = None
            min_error = np.inf
            for i in range(50):
                model = Kmedians(k)
                model.fit(X)
                error = model.error(X)
                if error < min_error:
                    min_error = error
                    best_model = model

            plt.figure()
            utils.plot_2dclustering(X, best_model.predict(X))

            fname = os.path.join("..", "figs",
                                 "kmedians_outliers_best_model.png")
            plt.savefig(fname)
            print("\nFigure saved as '%s'" % fname)
        plt.plot(k_values,
                 y,
                 label="K-Means minimum error plot against k values")
        plt.xlabel("k values")
        plt.ylabel("Minimum error")
        plt.legend()
        fname = os.path.join("..", "figs", "q13minerror.pdf")
        plt.savefig(fname)

    if question == '1.3.4':
        X = utils.load_dataset('clusterData2')['X']

        model_array = []
        error_array = []
        for i in range(0, 50):
            model = Kmedians(k=4)
            model_array.append(model)
            model_array[i].fit(X)
            model_array[i].predict(X)
            error_array.append(model_array[i].error(X))

        y = min(error_array)
        index = error_array.index(y)
        model = model_array[index]
        y = model.predict(X)

        utils.plot_2dclustering(X, y)

    if question == '1.4':
        X = utils.load_dataset('clusterData2')['X']
        #     model_2 = Kmeans(k=4)
        #     model_2 = Kmedians(k=4)
        #     model_2.fit(X)
        #     if model_2.error(X) < error:
        #         model = model_2
        # plot_2dclustering(X, model.predict(X))
        #
        # fname = os.path.join("..", "figs", "kmeans_3_3.png")
        # plt.savefig(fname)
        # print("\nFigure saved as '%s'" % fname)
        ''' Part 2, Part 4 '''
        errors = np.ones([10, ]) * -1
        for i in range(50):
            kVal = random.randint(1, 10)
            # model = Kmeans(k=kVal)
            model = Kmedians(k=kVal)
            model.fit(X)
            error = model.error(X)
            if errors[kVal - 1] == -1 or error < errors[kVal - 1]:
                errors[kVal - 1] = error
        print(errors)
        plt.plot(range(1, 11), errors)
        plt.xlabel('k')
        plt.ylabel('Error')
        plt.title('Error vs k')

        fname = os.path.join("..", "figs", "error_vs_kmedians_3_3.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    elif question == '3.4':
        #     	fname3 = "C:\\Users\\wangzhen\\Desktop\\cpsc340\\g5e0b_u7p1b_a2-master\\figs\\q3_3_kmedians.png"
        #     	plt.savefig(fname3)
        #     	print("\nFigure saved as '%s'" % fname3)
        X = load_dataset('clusterData2.pkl')['X']
        # model=Kmeans(k=1)
        # model.fit(X)
        # error=model.error(X)
        # print(error)

        minError = np.zeros(10)
        for k in range(10):
            minError[k] = np.inf
            larg_slope = 0
            slope_k = 1
            for i in range(50):
                model = Kmedians(k=k + 1)
                model.fit(X)
                error = model.error(X)
                if error < minError[k]:
                    minError[k] = error
            print(-minError[k] + minError[k - 1])
            if k != 0:
                if -minError[k] + minError[k - 1] > larg_slope:
                    larg_slope = -minError[k] + minError[k - 1]

        x1 = [i + 1 for i in range(10)]
        y1 = minError

        plt.figure(1)
        plot1 = plt.plot(x1, y1, 'b', label='minError')
        plt.title(
Exemple #5
0
        plt.draw()
        fname = os.path.join("..", "figs", "kmeans_elbow_method.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)
        #print(errors)

    # Using K-medians for different clustering problems
    elif module == '4':
        X = load_dataset('clusterData2.pkl')['X']

        # using elbow method to eyeball the best k
        errors = np.zeros(10)
        for i in range(10):
            min_error = np.inf
            for j in range(50):
                model = Kmedians(k=i + 1)
                model.fit(X)
                err = model.error(X)
                if err < min_error:
                    min_error = err
            errors[i] = min_error
        plt.plot(np.arange(1, 11), errors)
        plt.title("k Vs min_error")
        plt.xlabel("k")
        plt.ylabel("Min error across 50 random initializations")
        plt.draw()
        fname = os.path.join("..", "figs", "kmedians_elbow_method.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    # Trying out scikit's DBSCAN