def diffmap( data: AnnData, n_components: int = 100, rep: str = "pca", solver: str = "eigsh", random_state: int = 0, max_t: float = 5000, ) -> None: """Calculate Diffusion Map. Parameters ---------- data: ``anndata.AnnData`` Annotated data matrix with rows for cells and columns for genes. n_components: ``int``, optional, default: ``100`` Number of diffusion components to calculate. rep: ``str``, optional, default: ``"pca"`` Embedding Representation of data used for calculating the Diffusion Map. By default, use PCA coordinates. solver: ``str``, optional, default: ``"eigsh"`` Solver for eigen decomposition: * ``"eigsh"``: default setting. Use *scipy* `eigsh <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html>`_ as the solver to find eigenvalus and eigenvectors using the Implicitly Restarted Lanczos Method. * ``"randomized"``: Use *scikit-learn* `randomized_svd <https://scikit-learn.org/stable/modules/generated/sklearn.utils.extmath.randomized_svd.html>`_ as the solver to calculate a truncated randomized SVD. random_state: ``int``, optional, default: ``0`` Random seed set for reproducing results. max_t: ``float``, optional, default: ``5000`` pegasus tries to determine the best t to sum up to between ``[1, max_t]``. Returns ------- ``None`` Update ``data.obsm``: * ``data.obsm["X_diffmap"]``: Diffusion Map matrix of the data. Update ``data.uns``: * ``data.uns["diffmap_evals"]``: Eigenvalues corresponding to Diffusion Map matrix. Examples -------- >>> pg.diffmap(adata) """ rep = update_rep(rep) Phi_pt, Lambda, Phi = calculate_diffusion_map( W_from_rep(data, rep), n_components=n_components, solver=solver, random_state=random_state, max_t=max_t, ) data.obsm["X_diffmap"] = Phi_pt data.uns["diffmap_evals"] = Lambda data.obsm["X_phi"] = Phi
def net_fle( data: MultimodalData, file_name: str = None, n_jobs: int = -1, rep: str = "diffmap", K: int = 50, full_speed: bool = False, target_change_per_node: float = 2.0, target_steps: int = 5000, is3d: bool = False, memory: int = 8, random_state: int = 0, select_frac: float = 0.1, select_K: int = 25, select_alpha: float = 1.0, net_alpha: float = 0.1, polish_target_steps: int = 1500, out_basis: str = "net_fle", ) -> None: """Construct Net-Force-directed (FLE) graph. Net-FLE is an approximated FLE graph using Deep Learning model to improve the speed. In specific, the deep model used is MLPRegressor_, the *scikit-learn* implementation of Multi-layer Perceptron regressor. See [Li20]_ for details. .. _MLPRegressor: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html Parameters ---------- data: ``pegasusio.MultimodalData`` Annotated data matrix with rows for cells and columns for genes. file_name: ``str``, optional, default: ``None`` Temporary file to store the coordinates as the input to forceatlas2. If ``None``, use ``tempfile.mkstemp`` to generate file name. n_jobs: ``int``, optional, default: ``-1`` Number of threads to use. If ``-1``, use all available threads. rep: ``str``, optional, default: ``"diffmap"`` Representation of data used for the calculation. By default, use Diffusion Map coordinates. If ``None``, use the count matrix ``data.X``. K: ``int``, optional, default: ``50`` Number of nearest neighbors to be considered during the computation. full_speed: ``bool``, optional, default: ``False`` * If ``True``, use multiple threads in constructing ``hnsw`` index. However, the kNN results are not reproducible. * Otherwise, use only one thread to make sure results are reproducible. target_change_per_node: ``float``, optional, default: ``2.0`` Target change per node to stop ForceAtlas2. target_steps: ``int``, optional, default: ``5000`` Maximum number of iterations before stopping the ForceAtlas2 algorithm. is3d: ``bool``, optional, default: ``False`` If ``True``, calculate 3D force-directed layout. memory: ``int``, optional, default: ``8`` Memory size in GB for the Java FA2 component. By default, use 8GB memory. random_state: ``int``, optional, default: ``0`` Random seed set for reproducing results. select_frac: ``float``, optional, default: ``0.1`` Down sampling fraction on the cells. select_K: ``int``, optional, default: ``25`` Number of neighbors to be used to estimate local density for each data point for down sampling. select_alpha: ``float``, optional, default: ``1.0`` Weight the down sample to be proportional to ``radius ** select_alpha``. net_alpha: ``float``, optional, default: ``0.1`` L2 penalty (regularization term) parameter of the deep regressor. polish_target_steps: ``int``, optional, default: ``1500`` After running the deep regressor to predict new coordinate, Number of ForceAtlas2 iterations. out_basis: ``str``, optional, default: ``"net_fle"`` Key name for calculated FLE coordinates to store. Returns ------- ``None`` Update ``data.obsm``: * ``data.obsm['X_' + out_basis]``: Net FLE coordinates of the data. Update ``data.obs``: * ``data.obs['ds_selected']``: Boolean array to indicate which cells are selected during the down sampling phase. Examples -------- >>> pg.net_fle(data) """ if file_name is None: if file_name is None: import tempfile _, file_name = tempfile.mkstemp() n_jobs = effective_n_jobs(n_jobs) rep = update_rep(rep) if ("W_" + rep) not in data.uns: neighbors( data, K=K, rep=rep, n_jobs=n_jobs, random_state=random_state, full_speed=full_speed, ) indices_key = rep + "_knn_indices" distances_key = rep + "_knn_distances" if not knn_is_cached(data, indices_key, distances_key, select_K): raise ValueError("Please run neighbors first!") selected = select_cells( data.uns[distances_key], select_frac, K=select_K, alpha=select_alpha, random_state=random_state, ) X_full = X_from_rep(data, rep) X = X_full[selected, :] ds_indices_key = "ds_" + rep + "_knn_indices" ds_distances_key = "ds_" + rep + "_knn_distances" indices, distances = calculate_nearest_neighbors(X, K=K, n_jobs=n_jobs, random_state=random_state, full_speed=full_speed) data.uns[ds_indices_key] = indices data.uns[ds_distances_key] = distances W = calculate_affinity_matrix(indices, distances) X_fle = calc_force_directed_layout( W, file_name + ".small", n_jobs, target_change_per_node, target_steps, is3d, memory, random_state, ) data.uns["X_" + out_basis + "_small"] = X_fle data.obs["ds_diffmap_selected"] = selected n_components = 2 if not is3d else 3 Y_init = np.zeros((data.shape[0], n_components), dtype=np.float64) Y_init[selected, :] = X_fle Y_init[~selected, :] = net_train_and_predict(X, X_fle, X_full[~selected, :], net_alpha, random_state, verbose=True) data.obsm["X_" + out_basis + "_pred"] = Y_init data.obsm["X_" + out_basis] = calc_force_directed_layout( W_from_rep(data, rep), file_name, n_jobs, target_change_per_node, polish_target_steps, is3d, memory, random_state, init=Y_init, )
def fle( data: MultimodalData, file_name: str = None, n_jobs: int = -1, rep: str = "diffmap", K: int = 50, full_speed: bool = False, target_change_per_node: float = 2.0, target_steps: int = 5000, is3d: bool = False, memory: int = 8, random_state: int = 0, out_basis: str = "fle", ) -> None: """Construct the Force-directed (FLE) graph. This implementation uses forceatlas2-python_ package, which is a Python wrapper of ForceAtlas2_. See [Jacomy14]_ for details on FLE. .. _forceatlas2-python: https://github.com/klarman-cell-observatory/forceatlas2-python .. _ForceAtlas2: https://github.com/klarman-cell-observatory/forceatlas2 Parameters ---------- data: ``pegasusio.MultimodalData`` Annotated data matrix with rows for cells and columns for genes. file_name: ``str``, optional, default: ``None`` Temporary file to store the coordinates as the input to forceatlas2. If ``None``, use ``tempfile.mkstemp`` to generate file name. n_jobs: ``int``, optional, default: ``-1`` Number of threads to use. If ``-1``, use all available threads. rep: ``str``, optional, default: ``"diffmap"`` Representation of data used for the calculation. By default, use Diffusion Map coordinates. If ``None``, use the count matrix ``data.X``. K: ``int``, optional, default: ``50`` Number of nearest neighbors to be considered during the computation. full_speed: ``bool``, optional, default: ``False`` * If ``True``, use multiple threads in constructing ``hnsw`` index. However, the kNN results are not reproducible. * Otherwise, use only one thread to make sure results are reproducible. target_change_per_node: ``float``, optional, default: ``2.0`` Target change per node to stop ForceAtlas2. target_steps: ``int``, optional, default: ``5000`` Maximum number of iterations before stopping the ForceAtlas2 algorithm. is3d: ``bool``, optional, default: ``False`` If ``True``, calculate 3D force-directed layout. memory: ``int``, optional, default: ``8`` Memory size in GB for the Java FA2 component. By default, use 8GB memory. random_state: ``int``, optional, default: ``0`` Random seed set for reproducing results. out_basis: ``str``, optional, default: ``"fle"`` Key name for calculated FLE coordinates to store. Returns ------- ``None`` Update ``data.obsm``: * ``data.obsm['X_' + out_basis]``: FLE coordinates of the data. Examples -------- >>> pg.fle(data) """ if file_name is None: import tempfile _, file_name = tempfile.mkstemp() n_jobs = effective_n_jobs(n_jobs) rep = update_rep(rep) if ("W_" + rep) not in data.uns: neighbors( data, K=K, rep=rep, n_jobs=n_jobs, random_state=random_state, full_speed=full_speed, ) data.obsm["X_" + out_basis] = calc_force_directed_layout( W_from_rep(data, rep), file_name, n_jobs, target_change_per_node, target_steps, is3d, memory, random_state, )
def diffmap( data: MultimodalData, n_components: int = 100, rep: str = "pca", solver: str = "eigsh", max_t: float = 5000, n_jobs: int = -1, random_state: int = 0, ) -> None: """Calculate Diffusion Map. Parameters ---------- data: ``pegasusio.MultimodalData`` Annotated data matrix with rows for cells and columns for genes. n_components: ``int``, optional, default: ``100`` Number of diffusion components to calculate. rep: ``str``, optional, default: ``"pca"`` Embedding Representation of data used for calculating the Diffusion Map. By default, use PCA coordinates. solver: ``str``, optional, default: ``"eigsh"`` Solver for eigen decomposition: * ``"eigsh"``: default setting. Use *scipy* `eigsh <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html>`_ as the solver to find eigenvalus and eigenvectors using the Implicitly Restarted Lanczos Method. * ``"randomized"``: Use *scikit-learn* `randomized_svd <https://scikit-learn.org/stable/modules/generated/sklearn.utils.extmath.randomized_svd.html>`_ as the solver to calculate a truncated randomized SVD. max_t: ``float``, optional, default: ``5000`` pegasus tries to determine the best t to sum up to between ``[1, max_t]``. n_jobs : `int`, optional (default: -1) Number of threads to use. -1 refers to using all physical CPU cores. random_state: ``int``, optional, default: ``0`` Random seed set for reproducing results. Returns ------- ``None`` Update ``data.obsm``: * ``data.obsm["X_diffmap"]``: Diffusion Map matrix of the data. Update ``data.uns``: * ``data.uns["diffmap_evals"]``: Eigenvalues corresponding to Diffusion Map matrix. Examples -------- >>> pg.diffmap(data) """ rep = update_rep(rep) Phi_pt, Lambda, Phi = calculate_diffusion_map( W_from_rep(data, rep), n_components=n_components, solver=solver, max_t = max_t, n_jobs = n_jobs, random_state=random_state, ) data.obsm["X_diffmap"] = np.ascontiguousarray(Phi_pt, dtype=np.float32) data.uns["diffmap_evals"] = Lambda.astype(np.float32) data.obsm["X_phi"] = np.ascontiguousarray(Phi, dtype=np.float32) # data.uns['W_norm'] = W_norm # data.obsm['X_dmnorm'] = U_df # remove previous FLE calculations data.uns.pop("diffmap_knn_indices", None) data.uns.pop("diffmap_knn_distances", None) data.uns.pop("W_diffmap", None)