def test_trustworthiness(input_type, n_samples, n_features, n_components): centers = round(n_samples*0.4) X, y = make_blobs(n_samples=n_samples, centers=centers, n_features=n_features) X_embedded = \ UMAP(n_components=n_components).fit_transform(X) X = X.astype(np.float32) X_embedded = X_embedded.astype(np.float32) if input_type == 'dataframe': gdf = cudf.DataFrame() for i in range(X.shape[1]): gdf[str(i)] = np.asarray(X[:, i], dtype=np.float32) gdf_embedded = cudf.DataFrame() for i in range(X_embedded.shape[1]): gdf_embedded[str(i)] = np.asarray(X_embedded[:, i], dtype=np.float32) score = cuml_trustworthiness(gdf, gdf_embedded) else: score = cuml_trustworthiness(X, X_embedded) sk_score = sklearn_trustworthiness(X, X_embedded) eps = 0.001 assert (sk_score * (1 - eps) <= score and score <= sk_score * (1 + eps))
def test_trustworthiness(input_type, n_samples, n_features, n_components, batch_size): centers = round(n_samples*0.4) X, y = make_blobs(n_samples=n_samples, centers=centers, n_features=n_features, random_state=32) X_embedded = \ UMAP(n_components=n_components, random_state=32).fit_transform(X) X = X.astype(np.float32) X_embedded = X_embedded.astype(np.float32) sk_score = sklearn_trustworthiness(X, X_embedded) if input_type == 'dataframe': X = cudf.DataFrame(X) X_embedded = cudf.DataFrame(X_embedded) score = cuml_trustworthiness(X, X_embedded, batch_size=batch_size) assert abs(score - sk_score) <= 1e-3