Beispiel #1
0
def smoothen_dists(store, z_idx, z_dist, lc: float, bw: float, chunk_size: int = 100000):
    """
    Smoothens KNN distances.

    Args:
        store ():
        z_idx ():
        z_dist ():
        lc ():
        bw ():
        chunk_size ():

    Returns:
        None

    """
    from umap.umap_ import smooth_knn_dist, compute_membership_strengths

    umap_is_latest = _is_umap_version_new()

    n_cells, n_neighbors = z_idx.shape
    zge = create_zarr_dataset(store, f'edges', (chunk_size,), ('u8', 'u8'),
                              (n_cells * n_neighbors, 2))
    zgw = create_zarr_dataset(store, f'weights', (chunk_size,), 'f8',
                              (n_cells * n_neighbors,))
    last_row = 0
    val_counts = 0
    step = int(chunk_size / n_neighbors)
    for i in tqdm(range(0, n_cells, step), desc='Smoothening KNN distances'):
        if i + step > n_cells:
            ki, kv = z_idx[i:n_cells, :], z_dist[i:n_cells, :]
        else:
            ki, kv = z_idx[i:i+step, :], z_dist[i:i+step, :]
        kv = kv.astype(np.float32, order='C')
        sigmas, rhos = smooth_knn_dist(kv, k=n_neighbors,
                                       local_connectivity=lc, bandwidth=bw)
        if umap_is_latest:
            rows, cols, vals, _ = compute_membership_strengths(ki, kv, sigmas, rhos)
        else:
            rows, cols, vals = compute_membership_strengths(ki, kv, sigmas, rhos)
        rows = rows + last_row
        start = val_counts
        end = val_counts + len(rows)
        last_row = rows[-1] + 1
        val_counts += len(rows)
        zge[start:end, 0] = rows
        zge[start:end, 1] = cols
        zgw[start:end] = vals
    # Fixing edges with 0 weights
    w = zgw[:]
    idx = w == 0
    minv = w[~idx].min()
    w[idx] = minv
    zgw[:] = w
    return None
Beispiel #2
0
def smooth_knn(nn_data, local_connectivity=1.0):
    knn_indices, knn_dists, _ = nearest_neighbors(nn_data, 10, "euclidean", {},
                                                  False, np.random)
    sigmas, rhos = smooth_knn_dist(knn_dists,
                                   10.0,
                                   local_connectivity=local_connectivity)
    shifted_dists = knn_dists - rhos[:, np.newaxis]
    shifted_dists[shifted_dists < 0.0] = 0.0
    vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis]))
    norms = np.sum(vals, axis=1)
    return norms
Beispiel #3
0
def _fuzzy_simplicial_set(X, n_neighbors, random_state,
    metric, metric_kwds={}, knn_indices=None, knn_dists=None, angular=False,
    set_op_mix_ratio=1.0, local_connectivity=1.0, apply_set_operations=True,
    verbose=False, return_dists=None):
    '''
    Overwrite the UMAP `fuzzy_simplicial_set` function to allow computation with float64.
    '''

    if knn_indices is None or knn_dists is None:
        knn_indices, knn_dists, _ = nearest_neighbors(
            X, n_neighbors, metric, metric_kwds, angular, random_state, verbose=verbose,
        )

    sigmas, rhos = smooth_knn_dist(
        knn_dists, float(n_neighbors), local_connectivity=float(local_connectivity),
    )

    rows, cols, vals, dists = _compute_membership_strengths(
        knn_indices, knn_dists, sigmas, rhos, return_dists
    )

    result = scipy.sparse.coo_matrix(
        (vals, (rows, cols)), shape=(X.shape[0], X.shape[0])
    )
    result.eliminate_zeros()

    if apply_set_operations:
        transpose = result.transpose()

        prod_matrix = result.multiply(transpose)

        result = (
            set_op_mix_ratio * (result + transpose - prod_matrix)
            + (1.0 - set_op_mix_ratio) * prod_matrix
        )

    result.eliminate_zeros()

    if return_dists is None:
        return result, sigmas, rhos
    else:
        if return_dists:
            dmat = scipy.sparse.coo_matrix(
                (dists, (rows, cols)), shape=(X.shape[0], X.shape[0])
            )

            dists = dmat.maximum(dmat.transpose()).todok()
        else:
            dists = None

        return result, sigmas, rhos, dists
Beispiel #4
0
def test_smooth_knn_dist_l1norms():
    knn_indices, knn_dists, _ = nearest_neighbors(
        nn_data, 10, "euclidean", {}, False, np.random
    )
    sigmas, rhos = smooth_knn_dist(knn_dists, 10.0)
    shifted_dists = knn_dists - rhos[:, np.newaxis]
    shifted_dists[shifted_dists < 0.0] = 0.0
    vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis]))
    norms = np.sum(vals, axis=1)

    assert_array_almost_equal(
        norms,
        1.0 + np.log2(10) * np.ones(norms.shape[0]),
        decimal=3,
        err_msg="Smooth knn-dists does not give expected" "norms",
    )
Beispiel #5
0
def smoothen_dists(store, z_idx, z_dist, lc: float, bw: float,
                   chunk_size: int):
    """
    Smoothens KNN distances.

    Args:
        store ():
        z_idx ():
        z_dist ():
        lc ():
        bw ():
        chunk_size ():

    Returns:
        None

    """
    from umap.umap_ import smooth_knn_dist, compute_membership_strengths

    umap_is_latest = _is_umap_version_new()

    n_cells, n_neighbors = z_idx.shape
    zge = create_zarr_dataset(store, f"edges", (chunk_size, ), ("u8", "u8"),
                              (n_cells * n_neighbors, 2))
    zgw = create_zarr_dataset(store, f"weights", (chunk_size, ), "f8",
                              (n_cells * n_neighbors, ))
    last_row = 0
    val_counts = 0
    null_idx = []
    global_min = 1
    for i in tqdmbar(range(0, n_cells, chunk_size),
                     desc="Smoothening KNN distances"):
        if i + chunk_size > n_cells:
            ki, kv = z_idx[i:n_cells, :], z_dist[i:n_cells, :]
        else:
            ki, kv = z_idx[i:i + chunk_size, :], z_dist[i:i + chunk_size, :]
        kv = kv.astype(np.float32, order="C")
        sigmas, rhos = smooth_knn_dist(kv,
                                       k=n_neighbors,
                                       local_connectivity=lc,
                                       bandwidth=bw)
        if umap_is_latest:
            rows, cols, vals, _ = compute_membership_strengths(
                ki, kv, sigmas, rhos)
        else:
            rows, cols, vals = compute_membership_strengths(
                ki, kv, sigmas, rhos)
        rows = rows + last_row
        start = val_counts
        end = val_counts + len(rows)
        last_row = rows[-1] + 1
        val_counts += len(rows)
        zge[start:end, 0] = rows
        zge[start:end, 1] = cols
        zgw[start:end] = vals

        # Fixing edges with 0 weights
        # We are doing these steps here to have minimum operations outside
        # the scope of a progress bar
        nidx = vals == 0
        if nidx.sum() > 0:
            min_val = vals[~nidx].min()
            if min_val < global_min:
                global_min = min_val
        null_idx.extend(nidx)

    # The whole zarr array needs to copied, modified and written back.
    # Or is this assumption wrong?
    w = zgw[:]
    w[null_idx] = global_min
    zgw[:] = w
    return None