コード例 #1
0
    def create_splits(self, X):
        # get shape of dataset
        N, D = X.shape

        # thresholds is set of K-Means of each feature
        self.thresholds = []

        for d in range(D):
            # reshape (n,) to (n,1)
            feature = X[:, d]
            feature = np.reshape(feature, [feature.size, 1])

            # Initialize K-Means model
            k_means = Kmeans(k=k)
            min_err = np.inf
            min_err_means = None

            for i in range(50):
                k_means.fit(feature)
                error = k_means.error(feature)
                if error < min_err:
                    min_err = error
                    min_err_means = k_means.means

            self.thresholds.append(min_err_means)
コード例 #2
0
        def closure_1_3_1():
            k = 4
            best_model = None
            min_error = np.inf
            for i in range(50):
                model = Kmeans(k)
                model.fit(X)
                error = model.error(X)
                if error < min_error:
                    min_error = error
                    best_model = model

            plt.figure()
            utils.plot_2dclustering(X, best_model.predict(X))

            fname = os.path.join("..", "figs",
                                 "kmeans_outliers_best_model.png")
            plt.savefig(fname)
            print("\nFigure saved as '%s'" % fname)
コード例 #3
0
        def closure_1_3_2():
            minErrs = []
            for k in range(1, 11):
                best_model = None
                min_error = np.inf
                for i in range(50):
                    model = Kmeans(k)
                    model.fit(X)
                    error = model.error(X)
                    if error < min_error:
                        min_error = error
                        best_model = model

                minErrs.append(min_error)

            plt.figure()
            plt.plot(list(range(1, 11)), minErrs)
            plt.xlabel('k')
            plt.ylabel('Error')
            plt.title('k-means training error as k increases')

            fname = os.path.join("..", "figs", "kmeans_err_k_outliers.png")
            plt.savefig(fname)
            print("\nFigure saved as '%s'" % fname)
コード例 #4
0
        fname = os.path.join("..", "figs", "kmeans_basic.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    if question == '1.1':
        X = utils.load_dataset('clusterData')['X']

        # part 1: implement kmeans.error
        # part 2: get clustering with lowest error out of 50 random initialization

        best_model = None
        min_error = np.inf
        for i in range(50):
            model = Kmeans(k=4)
            model.fit(X)
            error = model.error(X)
            if error < min_error:
                min_error = error
                best_model = model

        utils.plot_2dclustering(X, best_model.predict(X))

        fname = os.path.join("..", "figs", "kmeans_50_inits.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    if question == '1.2':

        # part 3: plot min error across 50 random inits, as k is varied from 1 to 10
        X = utils.load_dataset('clusterData')['X']
コード例 #5
0
        model.fit(X)
        y = model.predict(X)
        plt.scatter(X[:, 0], X[:, 1], c=y, cmap="jet")

        fname = os.path.join("..", "figs", "kmeans_basic.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    elif question == '5.1':
        X = load_dataset('clusterData.pkl')['X']
        model = Kmeans(k=4)
        low = model.error(X)

        for i in range(49):
            new_model = Kmeans(k=4)
            err = new_model.error(X)
            if err < low:
                model = new_model
                low = err

        utils.plot_2dclustering(X, model['predict'](model, X))
        print("Displaying figure...")
        plt.title("K-Means on clusterData")
        plt.show()

    elif question == '5.2':
        X = load_dataset('clusterData.pkl')['X']

    elif question == '5.3':
        X = load_dataset('clusterData2.pkl')['X']
コード例 #6
0
ファイル: main.py プロジェクト: kthnd/ml-basics
        plt.xlabel("Random clustering k=4")
        fname = os.path.join("..", "figs", "1.png")

        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    if question == '1.1':
        X = utils.load_dataset('clusterData')['X']
        models = []
        errors = np.zeros(50)
        for i in range(0, 50):
            model = Kmeans(k=4)
            model.fit(X)
            models.append(model)
            errors[i] = model.error(X)

        model = models.pop(np.argmin(errors))
        utils.plot_2dclustering(X, model.predict(X))
        plt.xlabel("Best of 50 clusterings k=4")
        fname = os.path.join("..", "figs", "1.1.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    if question == '1.2':
        X = utils.load_dataset('clusterData')['X']

        k = np.arange(0, 12)
        min_errors = np.zeros(k.size)
        for kk in k:
            models = []
コード例 #7
0
        plot_2dclustering(X, model.predict(X))

        fname = os.path.join("..", "figs", "kmeans_basic.png")
        # plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    elif question == '3.1':
        X = load_dataset('clusterData.pkl')['X']
        error = float('inf')
        model = Kmeans(k=4)
        model.fit(X)
        error = model.error(X)
        for i in range(49):
            model_2 = Kmeans(k=4)
            model_2.fit(X)
            if model_2.error(X) < error:
                model = model_2
        plot_2dclustering(X, model.predict(X))

        fname = os.path.join("..", "figs", "kmeans_3_1.png")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    elif question == '3.2':
        X = load_dataset('clusterData.pkl')['X']
        errors = np.ones([10, ]) * -1
        for i in range(50):
            kVal = random.randint(1, 10)
            model = Kmeans(k=kVal)
            model.fit(X)
            error = model.error(X)