def _init_neighbors(self, adata): from umap.distances import named_distances from umap.nndescent import ( make_initialisations, make_initialized_nnd_search, ) if 'use_rep' in adata.uns['neighbors']['params']: self._use_rep = adata.uns['neighbors']['params']['use_rep'] self._rep = adata.X if self._use_rep == 'X' else adata.obsm[self._use_rep] elif 'n_pcs' in adata.uns['neighbors']['params']: self._use_rep = 'X_pca' self._n_pcs = adata.uns['neighbors']['params']['n_pcs'] self._rep = adata.obsm['X_pca'][:, : self._n_pcs] elif adata.n_vars > N_PCS and 'X_pca' in adata.obsm.keys(): self._use_rep = 'X_pca' self._rep = adata.obsm['X_pca'][:, :N_PCS] self._n_pcs = self._rep.shape[1] if 'metric_kwds' in adata.uns['neighbors']['params']: dist_args = tuple(adata.uns['neighbors']['params']['metric_kwds'].values()) else: dist_args = () dist_func = named_distances[adata.uns['neighbors']['params']['metric']] self._random_init, self._tree_init = make_initialisations(dist_func, dist_args) self._search = make_initialized_nnd_search(dist_func, dist_args) search_graph = adata.uns['neighbors']['distances'].copy() search_graph.data = (search_graph.data > 0).astype(np.int8) self._search_graph = search_graph.maximum(search_graph.transpose()) if 'rp_forest' in adata.uns['neighbors']: self._rp_forest = _rp_forest_generate(adata.uns['neighbors']['rp_forest']) else: self._rp_forest = None
def test_nn_search(): train = nn_data[100:] test = nn_data[:100] (knn_indices, knn_dists, rp_forest) = nearest_neighbors(train, 10, "euclidean", {}, False, np.random) graph = fuzzy_simplicial_set( nn_data, 10, np.random, "euclidean", {}, knn_indices, knn_dists, False, 1.0, 1.0, False, ) search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8) search_graph.rows = knn_indices search_graph.data = (knn_dists != 0).astype(np.int8) search_graph = search_graph.maximum(search_graph.transpose()).tocsr() random_init, tree_init = make_initialisations(dist.euclidean, ()) search = make_initialized_nnd_search(dist.euclidean, ()) rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) init = initialise_search(rp_forest, train, test, int(10 * 3), random_init, tree_init, rng_state) result = search(train, search_graph.indptr, search_graph.indices, init, test) indices, dists = deheap_sort(result) indices = indices[:, :10] tree = KDTree(train) true_indices = tree.query(test, 10, return_distance=False) num_correct = 0.0 for i in range(test.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], indices[i])) percent_correct = num_correct / (test.shape[0] * 10) assert_greater_equal( percent_correct, 0.99, "Sparse NN-descent did not get " "99% accuracy on nearest " "neighbors", )
def _init_dist_search(self, dist_args): from functools import partial from umap.nndescent import initialise_search from umap.distances import named_distances self._random_init = None self._tree_init = None self._initialise_search = None self._search = None self._dist_func = None dist_func = named_distances[self._metric] if pkg_version('umap-learn') < version.parse("0.4.0"): from umap.nndescent import ( make_initialisations, make_initialized_nnd_search, ) self._random_init, self._tree_init = make_initialisations( dist_func, dist_args) _initialise_search = partial( initialise_search, init_from_random=self._random_init, init_from_tree=self._tree_init, ) _search = make_initialized_nnd_search(dist_func, dist_args) else: from numba import njit from umap.nndescent import initialized_nnd_search @njit def partial_dist_func(x, y): return dist_func(x, y, *dist_args) _initialise_search = partial(initialise_search, dist=partial_dist_func) _search = partial(initialized_nnd_search, dist=partial_dist_func) self._dist_func = partial_dist_func self._initialise_search = _initialise_search self._search = _search
def get_umap(): latent_dim = request.json['latent_dim'] nn = request.json['n_neighbors'] dist = request.json['min_dist'] pkl_path = abs_path('./data/{}/umap/umap{}-nn{}-dist{}.pkl').format( dset, latent_dim, nn, dist) if os.path.exists(pkl_path): with open(pkl_path, 'rb') as pkl_file: data = pickle.load(pkl_file) from umap.nndescent import make_initialisations, make_initialized_nnd_search data._random_init, data._tree_init = make_initialisations( data._distance_func, data._dist_args) data._search = make_initialized_nnd_search(data._distance_func, data._dist_args) umap_fit['{}-{}'.format(nn, dist)] = data d = data.embedding_ else: d = _fit_umap(latent_dim, nn, dist).embedding_ return jsonify({'data': d.tolist()}), 200
def neighbors_update(adata, adata_new, k=10, queue_size=5, random_state=0): # only with use_rep='X' for now from umap.nndescent import make_initialisations, make_initialized_nnd_search, initialise_search from umap.umap_ import INT32_MAX, INT32_MIN from umap.utils import deheap_sort import umap.distances as dist if 'metric_kwds' in adata.uns['neighbors']['params']: dist_args = tuple( adata.uns['neighbors']['params']['metric_kwds'].values()) else: dist_args = () dist_func = dist.named_distances[adata.uns['neighbors']['params'] ['metric']] random_init, tree_init = make_initialisations(dist_func, dist_args) search = make_initialized_nnd_search(dist_func, dist_args) search_graph = adata.uns['neighbors']['distances'].copy() search_graph.data = (search_graph.data > 0).astype(np.int8) search_graph = search_graph.maximum(search_graph.transpose()) # prune it? random_state = check_random_state(random_state) rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) if 'rp_forest' in adata.uns['neighbors']: rp_forest = _rp_forest_generate(adata.uns['neighbors']['rp_forest']) else: rp_forest = None train = adata.X test = adata_new.X init = initialise_search(rp_forest, train, test, int(k * queue_size), random_init, tree_init, rng_state) result = search(train, search_graph.indptr, search_graph.indices, init, test) indices, dists = deheap_sort(result) return indices[:, :k], dists[:, :k]