from sklearn.datasets import load_iris import umap import matplotlib.pyplot as plt iris = load_iris() X = iris.data y = iris.target reducer = umap.UMAP() embedding = reducer.fit_transform(X) plt.scatter(embedding[:, 0], embedding[:, 1], c=y) plt.show()
from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier import umap digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target) preprocessor = make_pipeline(StandardScaler(), umap.UMAP(n_components=30)) knn = KNeighborsClassifier(n_neighbors=3) model = make_pipeline(preprocessor, knn) model.fit(X_train, y_train) score = model.score(X_test, y_test) print("Accuracy:", score)In this example, we create a scikit-learn pipeline that includes a UMAP transformer to reduce the dimensionality of the data before passing it to a K-Nearest Neighbors classifier. We then train the model and output the accuracy on the test set. Package library: scikit-learn