Python KMeans.fit Examples

Programming Language: Python

Namespace/Package Name: dislib.cluster

Class/Type: KMeans

Method/Function: fit

Examples at hotexamples.com: 4

Python KMeans.fit - 4 examples found. These are the top rated real world Python examples of dislib.cluster.KMeans.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KMeans(13)

fit_predict(5)

fit(4)

predict(1)

Example #1

Show file

    def test_fit(self):
        """ Tests that the fit method returns the expected centers using toy
        data.
        """
        arr = np.array([[1, 2], [2, 1], [-1, -2], [-2, -1]])
        x = ds.array(arr, block_size=(2, 2))

        km = KMeans(n_clusters=2, random_state=666, verbose=False)
        km.fit(x)

        expected_centers = np.array([[1.5, 1.5], [-1.5, -1.5]])

        self.assertTrue((km.centers == expected_centers).all())

Example #2

Show file

    def test_predict(self):
        """ Tests that labels are correctly predicted using toy data. """
        p1, p2, p3, p4 = [1, 2], [2, 1], [-1, -2], [-2, -1]

        arr1 = np.array([p1, p2, p3, p4])
        x = ds.array(arr1, block_size=(2, 2))

        km = KMeans(n_clusters=2, random_state=666)
        km.fit(x)

        p5, p6 = [10, 10], [-10, -10]

        arr2 = np.array([p1, p2, p3, p4, p5, p6])
        x_test = ds.array(arr2, block_size=(2, 2))

        labels = km.predict(x_test).collect()
        expected_labels = np.array([0, 0, 1, 1, 0, 1])

        self.assertTrue(np.array_equal(labels, expected_labels))

Example #3

Show file

    def test_init(self):
        # With dense data
        x, y = make_blobs(n_samples=1500, random_state=170)
        x_filtered = np.vstack(
            (x[y == 0][:500], x[y == 1][:100], x[y == 2][:10]))
        x_train = ds.array(x_filtered, block_size=(300, 2))

        init = np.random.random((5, 2))
        km = KMeans(n_clusters=5, init=init)
        km.fit(x_train)

        self.assertTrue(np.array_equal(km._init, init))
        self.assertFalse(np.array_equal(km.centers, init))

        # With sparse data
        x_sp = ds.array(csr_matrix(x_filtered), block_size=(300, 2))
        init = csr_matrix(np.random.random((5, 2)))

        km = KMeans(n_clusters=5, init=init)
        km.fit(x_sp)

        self.assertTrue(np.array_equal(km._init.toarray(), init.toarray()))
        self.assertFalse(np.array_equal(km.centers.toarray(), init.toarray()))

Example #4

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--svmlight", help="read files in SVMLight format",
                        action="store_true")
    parser.add_argument("-dt", "--detailed_times",
                        help="get detailed execution times (read and fit)",
                        action="store_true")
    parser.add_argument("-a", "--arity", metavar="CASCADE_ARITY", type=int,
                        help="default is 50", default=50)
    parser.add_argument("-c", "--centers", metavar="N_CENTERS", type=int,
                        help="default is 2", default=2)
    parser.add_argument("-b", "--block_size", metavar="BLOCK_SIZE", type=str,
                        help="two comma separated ints that represent the "
                             "size of the blocks in which to divide the input "
                             "data (default is 100,100)",
                        default="100,100")
    parser.add_argument("-i", "--iteration", metavar="MAX_ITERATIONS",
                        type=int, help="default is 5", default=5)
    parser.add_argument("-f", "--features", metavar="N_FEATURES",
                        help="number of features of the input data "
                             "(only for SVMLight files)",
                        type=int, default=None, required=False)
    parser.add_argument("--dense", help="store data in dense format (only "
                                        "for SVMLight files)",
                        action="store_true")
    parser.add_argument("--labeled", help="the last column of the input file "
                                          "represents labels (only for text "
                                          "files)",
                        action="store_true")
    parser.add_argument("train_data",
                        help="input file in CSV or SVMLight format", type=str)
    args = parser.parse_args()

    train_data = args.train_data

    s_time = time.time()
    read_time = 0

    sparse = not args.dense

    bsize = args.block_size.split(",")
    block_size = (int(bsize[0]), int(bsize[1]))

    if args.svmlight:
        x, y = ds.load_svmlight_file(train_data, block_size, args.features,
                                     sparse)
    else:
        x = ds.load_txt_file(train_data, block_size)

    n_features = x.shape[1]

    if args.labeled and not args.svmlight:
        x = x[:, :n_features - 1]

    if args.detailed_times:
        barrier()
        read_time = time.time() - s_time
        s_time = time.time()

    kmeans = KMeans(n_clusters=args.clusters, max_iter=args.iteration,
                    arity=args.arity, verbose=True)
    kmeans.fit(x)

    barrier()
    fit_time = time.time() - s_time

    out = [args.clusters, args.arity, args.part_size, read_time, fit_time]

    print(out)