def test_mem_vec_same_clusters(): """ Verify membership vector produces same n_clusters as clusterer """ # Given a flat clustering trained for n_clusters picked by HDBSCAN, n_clusters_fit = None clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit) # When membership_vector_flat is called with new data, memberships = membership_vector_flat(clusterer, X_test) # Then the number of clusters in memberships matches those of clusterer, assert_equal(memberships.shape[1], n_clusters_from_labels(clusterer.labels_)) # and the number of points should equal those in the test set assert_equal(len(memberships), len(X_test)) # and all probabilities are <= 1. assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14) # ======================================== # Given a flat clustering for a specified n_clusters, n_clusters_fit = n_clusters_from_labels(clusterer.labels_) - 2 clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit) # When membership_vector_flat is called with new data, memberships = membership_vector_flat(clusterer, X_test) # Then the number of clusters in memberships matches those of clusterer, assert_equal(memberships.shape[1], n_clusters_fit) # and the number of points should equal those in the test set assert_equal(len(memberships), len(X_test)) # and all probabilities are <= 1. assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14) return
def test_mem_vec_diff_clusters(): """ Verify membership vector produces as many clusters as requested """ # Ignore user warnings in this function warnings.filterwarnings("ignore", category=UserWarning) # Given a flat clustering trained for n_clusters picked by HDBSCAN, n_clusters_fit = None clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit) n_clusters_fitted = n_clusters_from_labels(clusterer.labels_) # When membership_vector_flat is called with new data for some n_clusters, n_clusters_predict = n_clusters_fitted + 3 memberships = membership_vector_flat(clusterer, X_test, n_clusters=n_clusters_predict) # Then the number of clusters in memberships should be as requested, assert_equal(memberships.shape[1], n_clusters_predict) # and the number of points should equal those in the test set assert_equal(len(memberships), len(X_test)) # and all probabilities are <= 1. assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14) # ======================================== # Given a flat clustering for a specified n_clusters, n_clusters_fit = n_clusters_from_labels(clusterer.labels_) + 2 clusterer = HDBSCAN_flat(X, n_clusters=n_clusters_fit) # When membership_vector_flat is called with new data for some n_clusters, n_clusters_predict = n_clusters_fit + 3 memberships = membership_vector_flat(clusterer, X_test, n_clusters=n_clusters_predict) # Then the number of clusters in memberships should be as requested, assert_equal(memberships.shape[1], n_clusters_predict) # and the number of points should equal those in the test set assert_equal(len(memberships), len(X_test)) # and all probabilities are <= 1. assert_array_less(memberships, np.ones(memberships.shape) + 1.e-14) return