Ejemplo n.º 1
0
def test_trustworthiness(input_type, n_samples, n_features, n_components):
    centers = round(n_samples*0.4)
    X, y = make_blobs(n_samples=n_samples, centers=centers,
                      n_features=n_features)

    X_embedded = \
        UMAP(n_components=n_components).fit_transform(X)
    X = X.astype(np.float32)
    X_embedded = X_embedded.astype(np.float32)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        for i in range(X.shape[1]):
            gdf[str(i)] = np.asarray(X[:, i], dtype=np.float32)

        gdf_embedded = cudf.DataFrame()
        for i in range(X_embedded.shape[1]):
            gdf_embedded[str(i)] = np.asarray(X_embedded[:, i],
                                              dtype=np.float32)

        score = cuml_trustworthiness(gdf, gdf_embedded)
    else:
        score = cuml_trustworthiness(X, X_embedded)

    sk_score = sklearn_trustworthiness(X, X_embedded)

    eps = 0.001
    assert (sk_score * (1 - eps) <= score and
            score <= sk_score * (1 + eps))
Ejemplo n.º 2
0
def test_trustworthiness(input_type, n_samples, n_features, n_components,
                         batch_size):
    centers = round(n_samples*0.4)
    X, y = make_blobs(n_samples=n_samples, centers=centers,
                      n_features=n_features, random_state=32)

    X_embedded = \
        UMAP(n_components=n_components, random_state=32).fit_transform(X)
    X = X.astype(np.float32)
    X_embedded = X_embedded.astype(np.float32)

    sk_score = sklearn_trustworthiness(X, X_embedded)

    if input_type == 'dataframe':
        X = cudf.DataFrame(X)

        X_embedded = cudf.DataFrame(X_embedded)

    score = cuml_trustworthiness(X, X_embedded, batch_size=batch_size)

    assert abs(score - sk_score) <= 1e-3