예제 #1
0
def test_k_equal_1():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([-2.5, -2.5, -2.5, -2.5])
        result = ckwrap.ckmeans(x, 1, method=method)
        assert np.all(result.labels + 1 == [1, 1, 1, 1])
        assert np.allclose(result.centers, [-2.5])
        assert np.allclose(result.sizes, [4.0])
        assert np.allclose(result.withinss, [0.0])

        x = np.arange(1, 101)
        result = ckwrap.ckmeans(x, 1, method=method)
        assert np.allclose(result.sizes, [100.0])
예제 #2
0
def test_estimate_k_set_1():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([0.9, 1.0, 1.1, 1.9, 2.0, 2.1])
        result = ckwrap.ckmeans(x, (1, 6), method=method)
        assert np.allclose(result.sizes, [3.0, 3.0])

        x = x[::-1]
        result = ckwrap.ckmeans(x, (1, 6), method=method)
        assert np.allclose(result.sizes, [3.0, 3.0])

        x = np.arange(1, 11)
        result = ckwrap.ckmeans(x, (1, 10), method=method)
        assert np.allclose(result.sizes, [10.0])
예제 #3
0
def test_n_equal_10_k_equal_3():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0])
        result = ckwrap.ckmeans(x, 3, method=method)
        assert np.all(result.labels + 1 == [3, 3, 3, 3, 1, 1, 1, 2, 2, 2])
        assert np.allclose(result.centers, [1.0, 2.0, 3.0])
        assert np.allclose(result.sizes, [3.0, 3.0, 4.0])
        assert np.allclose(result.withinss, [0.0, 0.0, 0.0])
예제 #4
0
def test_k_equal_2():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.arange(1, 11)
        result = ckwrap.ckmeans(x, 2, method=method)
        assert np.all(result.labels + 1 == [1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
        assert np.allclose(result.centers, [3.0, 8.0])
        assert np.allclose(result.sizes, [5.0, 5.0])
        assert np.allclose(result.withinss, [10.0, 10.0])
예제 #5
0
def test_n_less_or_equal_to_k():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([3.0, 2.0, -5.4, 0.1])
        result = ckwrap.ckmeans(x, 4, method=method)
        assert np.all(result.labels + 1 == [4, 3, 1, 2])
        assert np.allclose(result.centers, [-5.4, 0.1, 2.0, 3.0])
        assert np.allclose(result.sizes, [1.0, 1.0, 1.0, 1.0])
        assert np.allclose(result.withinss, [0.0, 0.0, 0.0, 0.0])
예제 #6
0
def test_weighted_input():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([-1.0, 2.0, 4.0, 5.0, 6.0])
        y = np.array([4.0, 3.0, 1.0, 1.0, 1.0])
        result = ckwrap.ckmeans(x, 3, weights=y, method=method)
        assert np.all(result.labels + 1 == [1, 2, 3, 3, 3])
        assert np.allclose(result.centers, [-1.0, 2.0, 5.0])
        assert np.allclose(result.sizes, [4.0, 3.0, 3.0])
        assert np.allclose(result.withinss, [0.0, 0.0, 2.0])

        # Range of k values
        x = np.array([-0.9, 1.0, 1.1, 1.9, 2.0, 2.1])
        y = np.array([3.0, 1.0, 2.0, 2.0, 1.0, 1.0])
        result = ckwrap.ckmeans(x, (1, 6), weights=y, method=method)
        assert np.allclose(
            result.centers,
            [-0.9, (1.0 + 2.2) / 3.0, (1.9 * 2.0 + 2.0 + 2.1) / 4.0])
        assert np.allclose(result.sizes, [3.0, 3.0, 4.0])
예제 #7
0
def test_estimate_k_set_3_cosine():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.cos(np.arange(-10, 11))
        result = ckwrap.ckmeans(x, (1, 9), method=method)
        assert np.all(
            result.labels + 1 ==
            [1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1])
        assert np.allclose(result.centers, [-0.6592474631, 0.6751193405])
        assert np.allclose(result.sizes, [12.0, 9.0])
        assert np.allclose(result.withinss, [1.0564793100, 0.6232976959])
예제 #8
0
def test_estimate_k_set_2():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([3.5, 3.6, 3.7, 3.1, 1.1, 0.9, 0.8, 2.2, 1.9, 2.1])
        result = ckwrap.ckmeans(x, (2, 5), method=method)
        assert np.all(result.labels + 1 == [3, 3, 3, 3, 1, 1, 1, 2, 2, 2])
        assert np.allclose(result.centers,
                           [0.933333333333, 2.066666666667, 3.475])
        assert np.allclose(result.sizes, [3.0, 3.0, 4.0])
        assert np.allclose(result.withinss,
                           [0.0466666666667, 0.0466666666667, 0.2075])
예제 #9
0
파일: kmeans.py 프로젝트: djdt/pewpew
def kmeans1d(x: np.ndarray,
             k: int,
             method: str = "ckmeans1d",
             method_kws: dict = None) -> np.ndarray:
    """1-dim k-means clustering.
    Uses Ckmeans.1d.dp through ``ckwrap`` if it is installed and `method` is
    'ckmeans1d'.

    Args:
        x: flattened to 1d
        k: number of clusters
        method: if 'ckmeans1d' ckwrap is used, otherwise 'kmeans' in 1d
        method_kws: passed through to the implementaion used

    Returns:
        array of labels mapping clusters to objects

    See Also:
        :func:`pewpew.lib.kmeans.kmeans`
    """
    kwargs = {
        "init": "kmeans++",
        "max_iterations": 1000,
        "weights": None,
        "method": "linear",
    }
    if method_kws is not None:
        kwargs.update(method_kws)

    if method == "ckmeans1d":  # pragma: no cover
        try:
            from ckwrap import ckmeans

            idx = ckmeans(
                x.ravel(),
                (k, k),
                weights=kwargs["weights"],
                method=kwargs["method"],
            ).labels
        except ImportError:
            logger.warning(
                "Unable to use ckmeans1d as ckwrap package not found.")
            return kmeans1d(x, k, method="kmeans", method_kws=method_kws)
    elif method == "kmeans":
        idx = kmeans(
            x.ravel(),
            k,
            init=kwargs["init"],  # type: ignore
            max_iterations=kwargs["max_iterations"],  # type: ignore
        ).labels
    else:  # pragma: no cover
        raise ValueError(f"Unknown method {method}.")
    return np.reshape(idx, x.shape)
예제 #10
0
def test_unweighted():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array([-1.0, 2.0, -1.0, 2.0, 4.0, 5.0, 6.0, -1.0, 2.0, -1.0])
        result = ckwrap.ckmeans(x, 3, method=method)
        assert np.all(result.labels + 1 == [1, 2, 1, 2, 3, 3, 3, 1, 2, 1])
        assert np.allclose(result.centers, [-1.0, 2.0, 5.0])
        assert np.allclose(result.sizes, [4.0, 3.0, 3.0])
        assert np.allclose(result.withinss, [0.0, 0.0, 2.0])

        totss = np.sum((x - np.sum(x) / x.size)**2)
        assert np.allclose(result.totss, totss)
        assert np.allclose(np.sum(result.withinss), 2.0)
        assert np.allclose(np.sum(result.betweenss), totss - 2.0)
예제 #11
0
def test_estimate_k_set_4_gamma():
    for method in ["linear", "loglinear", "quadratic"]:
        x = (dgamma.pdf(np.arange(1, 10.5, 0.5), 2.0) * 2.0
             )  # times 2 for 1/2 in stats.dgamma
        result = ckwrap.ckmeans(x, (1, 9), method=method)
        assert np.all(
            result.labels +
            1 == [3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        assert np.allclose(result.centers,
                           [0.01702193495, 0.15342151455, 0.32441508262])
        assert np.allclose(result.sizes, [13, 3, 3])
        assert np.allclose(result.withinss,
                           [0.006126754998, 0.004977009034, 0.004883305120])
예제 #12
0
def test_n_equal_14_k_equal_8():
    for method in ["linear", "loglinear", "quadratic"]:
        x = np.array(
            [-3, 2.2, -6, 7, 9, 11, -6.3, 75, 82.6, 32.3, -9.5, 62.5, 7, 95.2])
        result = ckwrap.ckmeans(x, 8, method=method)
        assert np.all(result.labels +
                      1 == [2, 2, 1, 3, 3, 3, 1, 6, 7, 4, 1, 5, 3, 8])
        assert np.allclose(
            result.centers,
            [-7.266666667, -0.4, 8.5, 32.3, 62.5, 75.0, 82.6, 95.2])
        assert np.allclose(result.sizes,
                           [3.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0])
        assert np.allclose(result.withinss,
                           [7.526666667, 13.52, 11.0, 0.0, 0.0, 0.0, 0.0, 0.0])