Beispiel #1
0
def test_mem_vec_same_clusters():
    """
    Verify membership vector produces same n_clusters as clusterer
    """
    # Given a flat clustering trained for n_clusters picked by HDBSCAN,
    n_clusters_fit = None
    clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit)

    # When membership_vector_flat is called with new data,
    memberships = membership_vector_flat(clusterer, X_test)

    # Then the number of clusters in memberships matches those of clusterer,
    assert_equal(memberships.shape[1],
                 n_clusters_from_labels(clusterer.labels_))
    # and the number of points should equal those in the test set
    assert_equal(len(memberships), len(X_test))
    # and all probabilities are <= 1.
    assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14)

    # ========================================
    # Given a flat clustering for a specified n_clusters,
    n_clusters_fit = n_clusters_from_labels(clusterer.labels_) - 2
    clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit)

    # When membership_vector_flat is called with new data,
    memberships = membership_vector_flat(clusterer, X_test)

    # Then the number of clusters in memberships matches those of clusterer,
    assert_equal(memberships.shape[1], n_clusters_fit)
    # and the number of points should equal those in the test set
    assert_equal(len(memberships), len(X_test))
    # and all probabilities are <= 1.
    assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14)
    return
Beispiel #2
0
def test_mem_vec_diff_clusters():
    """
    Verify membership vector produces as many clusters as requested
    """
    # Ignore user warnings in this function
    warnings.filterwarnings("ignore", category=UserWarning)

    # Given a flat clustering trained for n_clusters picked by HDBSCAN,
    n_clusters_fit = None
    clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit)
    n_clusters_fitted = n_clusters_from_labels(clusterer.labels_)

    # When membership_vector_flat is called with new data for some n_clusters,
    n_clusters_predict = n_clusters_fitted + 3
    memberships = membership_vector_flat(clusterer,
                                         X_test,
                                         n_clusters=n_clusters_predict)

    # Then the number of clusters in memberships should be as requested,
    assert_equal(memberships.shape[1], n_clusters_predict)
    # and the number of points should equal those in the test set
    assert_equal(len(memberships), len(X_test))
    # and all probabilities are <= 1.
    assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14)

    # ========================================
    # Given a flat clustering for a specified n_clusters,
    n_clusters_fit = n_clusters_from_labels(clusterer.labels_) + 2
    clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit)

    # When membership_vector_flat is called with new data for some n_clusters,
    n_clusters_predict = n_clusters_fit + 3
    memberships = membership_vector_flat(clusterer,
                                         X_test,
                                         n_clusters=n_clusters_predict)

    # Then the number of clusters in memberships should be as requested,
    assert_equal(memberships.shape[1], n_clusters_predict)
    # and the number of points should equal those in the test set
    assert_equal(len(memberships), len(X_test))
    # and all probabilities are <= 1.
    assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14)
    return