Example #1
0
def test_labels_assignement_and_inertia():
    # pure numpy implementation as easily auditable reference gold
    # implementation
    rng = np.random.RandomState(42)
    noisy_centers = centers + rng.normal(size=centers.shape)
    labels_gold = - np.ones(n_samples, dtype=np.int)
    mindist = np.empty(n_samples)
    mindist.fill(np.infty)
    for center_id in range(n_clusters):
        dist = np.sum((X - noisy_centers[center_id]) ** 2, axis=1)
        labels_gold[dist < mindist] = center_id
        mindist = np.minimum(dist, mindist)
    inertia_gold = mindist.sum()
    assert_true((mindist >= 0.0).all())
    assert_true((labels_gold != -1).all())

    # perform label assignement using the dense array input
    x_squared_norms = (X ** 2).sum(axis=1)
    labels_array, inertia_array = _labels_inertia(
        X, x_squared_norms, noisy_centers)
    assert_array_almost_equal(inertia_array, inertia_gold)
    assert_array_equal(labels_array, labels_gold)

    # perform label assignement using the sparse CSR input
    x_squared_norms_from_csr = csr_row_norm_l2(X_csr)
    labels_csr, inertia_csr = _labels_inertia(
        X_csr, x_squared_norms_from_csr, noisy_centers)
    assert_array_almost_equal(inertia_csr, inertia_gold)
    assert_array_equal(labels_csr, labels_gold)
Example #2
0
def test_labels_assignement_and_inertia():
    # pure numpy implementation as easily auditable reference gold
    # implementation
    rng = np.random.RandomState(42)
    noisy_centers = centers + rng.normal(size=centers.shape)
    labels_gold = -np.ones(n_samples, dtype=np.int)
    mindist = np.empty(n_samples)
    mindist.fill(np.infty)
    for center_id in range(n_clusters):
        dist = np.sum((X - noisy_centers[center_id])**2, axis=1)
        labels_gold[dist < mindist] = center_id
        mindist = np.minimum(dist, mindist)
    inertia_gold = mindist.sum()
    assert_true((mindist >= 0.0).all())
    assert_true((labels_gold != -1).all())

    # perform label assignement using the dense array input
    x_squared_norms = (X**2).sum(axis=1)
    labels_array, inertia_array = _labels_inertia(X, x_squared_norms,
                                                  noisy_centers)
    assert_array_almost_equal(inertia_array, inertia_gold)
    assert_array_equal(labels_array, labels_gold)

    # perform label assignement using the sparse CSR input
    x_squared_norms_from_csr = csr_row_norm_l2(X_csr)
    labels_csr, inertia_csr = _labels_inertia(X_csr, x_squared_norms_from_csr,
                                              noisy_centers)
    assert_array_almost_equal(inertia_csr, inertia_gold)
    assert_array_equal(labels_csr, labels_gold)
Example #3
0
def test_minibatch_update_consistency():
    """Check that dense and sparse minibatch update give the same results"""
    rng = np.random.RandomState(42)
    old_centers = centers + rng.normal(size=centers.shape)

    new_centers = old_centers.copy()
    new_centers_csr = old_centers.copy()

    counts = np.zeros(new_centers.shape[0], dtype=np.int32)
    counts_csr = np.zeros(new_centers.shape[0], dtype=np.int32)

    x_squared_norms = (X ** 2).sum(axis=1)
    x_squared_norms_csr = csr_row_norm_l2(X_csr, squared=True)

    buffer = np.zeros(centers.shape[1], dtype=np.double)
    buffer_csr = np.zeros(centers.shape[1], dtype=np.double)

    # extract a small minibatch
    X_mb = X[:10]
    X_mb_csr = X_csr[:10]
    x_mb_squared_norms = x_squared_norms[:10]
    x_mb_squared_norms_csr = x_squared_norms_csr[:10]

    # step 1: compute the dense minibatch update
    old_inertia, incremental_diff = _mini_batch_step(
        X_mb, x_mb_squared_norms, new_centers, counts,
        buffer, 1)
    assert_true(old_inertia > 0.0)

    # compute the new inertia on the same batch to check that it decreased
    labels, new_inertia = _labels_inertia(
        X_mb, x_mb_squared_norms, new_centers)
    assert_true(new_inertia > 0.0)
    assert_true(new_inertia < old_inertia)

    # check that the incremental difference computation is matching the
    # final observed value
    effective_diff = np.sum((new_centers - old_centers) ** 2)
    assert_almost_equal(incremental_diff, effective_diff)

    # step 2: compute the sparse minibatch update
    old_inertia_csr, incremental_diff_csr = _mini_batch_step(
        X_mb_csr, x_mb_squared_norms_csr, new_centers_csr, counts_csr,
        buffer_csr, 1)
    assert_true(old_inertia_csr > 0.0)

    # compute the new inertia on the same batch to check that it decreased
    labels_csr, new_inertia_csr = _labels_inertia(
        X_mb_csr, x_mb_squared_norms_csr, new_centers_csr)
    assert_true(new_inertia_csr > 0.0)
    assert_true(new_inertia_csr < old_inertia_csr)

    # check that the incremental difference computation is matching the
    # final observed value
    effective_diff = np.sum((new_centers_csr - old_centers) ** 2)
    assert_almost_equal(incremental_diff_csr, effective_diff)

    # step 3: check that sparse and dense updates lead to the same results
    assert_array_equal(labels, labels_csr)
    assert_array_almost_equal(new_centers, new_centers_csr)
    assert_almost_equal(incremental_diff, incremental_diff_csr)
    assert_almost_equal(old_inertia, old_inertia_csr)
    assert_almost_equal(new_inertia, new_inertia_csr)
Example #4
0
def test_square_norms():
    x_squared_norms = (X ** 2).sum(axis=1)
    x_squared_norms_from_csr = csr_row_norm_l2(X_csr)
    assert_array_almost_equal(x_squared_norms,
                              x_squared_norms_from_csr, 5)
Example #5
0
def test_minibatch_update_consistency():
    """Check that dense and sparse minibatch update give the same results"""
    rng = np.random.RandomState(42)
    old_centers = centers + rng.normal(size=centers.shape)

    new_centers = old_centers.copy()
    new_centers_csr = old_centers.copy()

    counts = np.zeros(new_centers.shape[0], dtype=np.int32)
    counts_csr = np.zeros(new_centers.shape[0], dtype=np.int32)

    x_squared_norms = (X**2).sum(axis=1)
    x_squared_norms_csr = csr_row_norm_l2(X_csr, squared=True)

    buffer = np.zeros(centers.shape[1], dtype=np.double)
    buffer_csr = np.zeros(centers.shape[1], dtype=np.double)

    # extract a small minibatch
    X_mb = X[:10]
    X_mb_csr = X_csr[:10]
    x_mb_squared_norms = x_squared_norms[:10]
    x_mb_squared_norms_csr = x_squared_norms_csr[:10]

    # step 1: compute the dense minibatch update
    old_inertia, incremental_diff = _mini_batch_step(X_mb, x_mb_squared_norms,
                                                     new_centers, counts,
                                                     buffer, 1)
    assert_true(old_inertia > 0.0)

    # compute the new inertia on the same batch to check that it decreased
    labels, new_inertia = _labels_inertia(X_mb, x_mb_squared_norms,
                                          new_centers)
    assert_true(new_inertia > 0.0)
    assert_true(new_inertia < old_inertia)

    # check that the incremental difference computation is matching the
    # final observed value
    effective_diff = np.sum((new_centers - old_centers)**2)
    assert_almost_equal(incremental_diff, effective_diff)

    # step 2: compute the sparse minibatch update
    old_inertia_csr, incremental_diff_csr = _mini_batch_step(
        X_mb_csr, x_mb_squared_norms_csr, new_centers_csr, counts_csr,
        buffer_csr, 1)
    assert_true(old_inertia_csr > 0.0)

    # compute the new inertia on the same batch to check that it decreased
    labels_csr, new_inertia_csr = _labels_inertia(X_mb_csr,
                                                  x_mb_squared_norms_csr,
                                                  new_centers_csr)
    assert_true(new_inertia_csr > 0.0)
    assert_true(new_inertia_csr < old_inertia_csr)

    # check that the incremental difference computation is matching the
    # final observed value
    effective_diff = np.sum((new_centers_csr - old_centers)**2)
    assert_almost_equal(incremental_diff_csr, effective_diff)

    # step 3: check that sparse and dense updates lead to the same results
    assert_array_equal(labels, labels_csr)
    assert_array_almost_equal(new_centers, new_centers_csr)
    assert_almost_equal(incremental_diff, incremental_diff_csr)
    assert_almost_equal(old_inertia, old_inertia_csr)
    assert_almost_equal(new_inertia, new_inertia_csr)
Example #6
0
def test_square_norms():
    x_squared_norms = (X**2).sum(axis=1)
    x_squared_norms_from_csr = csr_row_norm_l2(X_csr)
    assert_array_almost_equal(x_squared_norms, x_squared_norms_from_csr, 5)