Example #1
0
def moments(data, n_neighbors=30, n_pcs=None, mode='connectivities', method='umap', use_rep=None, copy=False):
    """Computes moments for velocity estimation.

    Arguments
    ---------
    data: :class:`~anndata.AnnData`
        Annotated data matrix.
    n_neighbors: `int` (default: 30)
        Number of neighbors to use.
    n_pcs: `int` (default: None)
        Number of principal components to use.
        If not specified, the full space is used of a pre-computed PCA,
        or 30 components are used when PCA is computed internally.
    mode: `'connectivities'` or `'distances'`  (default: `'connectivities'`)
        Distance metric to use for moment computation.
    method : {{'umap', 'gauss', 'hnsw', 'sklearn', `None`}}  (default: `'umap'`)
        Use 'umap' [McInnes18]_ or 'gauss' (Gauss kernel following [Coifman05]_
        with adaptive width [Haghverdi16]_) for computing connectivities.
    use_rep : `None`, `'X'` or any key for `.obsm` (default: None)
        Use the indicated representation. If `None`, the representation is chosen automatically:
        for .n_vars < 50, .X is used, otherwise ‘X_pca’ is used.
    copy: `bool` (default: `False`)
        Return a copy instead of writing to adata.

    Returns
    -------
    Returns or updates `adata` with the attributes
    Ms: `.layers`
        dense matrix with first order moments of spliced counts.
    Mu: `.layers`
        dense matrix with first order moments of unspliced counts.
    """
    adata = data.copy() if copy else data

    if 'spliced' not in adata.layers.keys() or 'unspliced' not in adata.layers.keys():
        raise ValueError('Could not find spliced / unspliced counts.')
    if any([not_yet_normalized(adata.layers[layer]) for layer in {'spliced', 'unspliced'}]):
        normalize_per_cell(adata)
    if neighbors_to_be_recomputed(adata, n_neighbors=n_neighbors):
        if use_rep is None: use_rep = 'X_pca'
        neighbors(adata, n_neighbors=n_neighbors, use_rep=use_rep, n_pcs=n_pcs, method=method)
    if mode not in adata.uns['neighbors']:
        raise ValueError('mode can only be \'connectivities\' or \'distances\'')

    logg.info('computing moments based on ' + str(mode), r=True)

    connectivities = get_connectivities(adata, mode, n_neighbors=n_neighbors, recurse_neighbors=False)

    adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).astype(np.float32).A
    adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).astype(np.float32).A
    # if renormalize: normalize_per_cell(adata, layers={'Ms', 'Mu'}, enforce=True)

    logg.info('    finished', time=True, end=' ' if settings.verbosity > 2 else '\n')
    logg.hint(
        'added \n'
        '    \'Ms\' and \'Mu\', moments of spliced/unspliced abundances (adata.layers)')
    return adata if copy else None
Example #2
0
def get_moments(adata,
                layer=None,
                second_order=None,
                centered=True,
                mode="connectivities"):
    """Computes moments for a specified layer.

    First and second order moments.
    If centered, that corresponds to means and variances across nearest neighbors.

    Arguments
    ---------
    adata: `AnnData`
        Annotated data matrix.
    layer: `str` (default: `None`)
        Key of layer with abundances to consider for moment computation.
    second_order: `bool` (default: `None`)
        Whether to compute second order moments from abundances.
    centered: `bool` (default: `True`)
        Whether to compute centered (=variance) or uncentered second order moments.
    mode: `'connectivities'` or `'distances'`  (default: `'connectivities'`)
        Distance metric to use for moment computation.

    Returns
    -------
    Mx: first or second order moments
    """

    if "neighbors" not in adata.uns:
        raise ValueError(
            "You need to run `pp.neighbors` first to compute a neighborhood graph."
        )
    connectivities = get_connectivities(adata, mode=mode)
    X = (adata.X if layer is None else
         adata.layers[layer] if isinstance(layer, str) else layer)
    X = (csr_matrix(X)
         if isinstance(layer, str) and layer in {"spliced", "unspliced"} else
         np.array(X) if not issparse(X) else X)
    if not issparse(X):
        X = X[:, ~np.isnan(X.sum(0))]
    if second_order:
        X2 = X.multiply(X) if issparse(X) else X**2
        Mx = (csr_matrix.dot(connectivities, X2)
              if second_order else csr_matrix.dot(connectivities, X))
        if centered:
            mu = csr_matrix.dot(connectivities, X)
            mu2 = mu.multiply(mu) if issparse(mu) else mu**2
            Mx = Mx - mu2
    else:
        Mx = csr_matrix.dot(connectivities, X)
    if issparse(X):
        Mx = Mx.astype(np.float32).A
    return Mx
Example #3
0
def moments(adata, n_neighbors=30, n_pcs=30, mode='connectivities', renormalize=False, copy=False):
    """Computes first order moments for velocity estimation.

    Arguments
    ---------
    adata: :class:`~anndata.AnnData`
        Annotated data matrix.
    n_neighbors: `int` (default: 30)
        Number of neighbors to use.
    n_pcs: `int` (default: 30)
        Number of principal components to use.
    mode: `'connectivities'` or `'distances'`  (default: `'connectivities'`)
        Distance metric to use for moment computation.
    renormalize: `bool` (default: `False`)
        Renormalize the moments by total counts per cell to its median.
    copy: `bool` (default: `False`)
        Return a copy instead of writing to adata.

    Returns
    -------
    Returns or updates `adata` with the attributes
    Ms: `.layers`
        dense matrix with first order moments of spliced counts.
    Mu: `.layers`
        dense matrix with first order moments of unspliced counts.
    """
    if 'neighbors' not in adata.uns.keys() or n_neighbors > adata.uns['neighbors']['params']['n_neighbors']:
        from scanpy.api.pp import neighbors, pca
        if 'X_pca' not in adata.obsm.keys() or n_pcs > adata.obsm['X_pca'].shape[1]:
            pca(adata, n_comps=n_pcs, svd_solver='arpack')
        neighbors(adata, n_neighbors=n_neighbors, use_rep='X_pca')

    if mode not in adata.uns['neighbors']:
        raise ValueError('mode can only be  \'connectivities\' or \'distances\'')

    logg.info('computing moments', r=True)
    normalize_layers(adata)

    connectivities = get_connectivities(adata, mode)
    #connectivities += connectivities.dot(connectivities*.5)

    adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).A
    adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).A
    if renormalize: normalize_layers(adata, layers={'Ms', 'Mu'})

    logg.info('    finished', time=True, end=' ' if settings.verbosity > 2 else '\n')
    logg.hint(
        'added to `.layers`\n'
        '    \'Ms\', moments of spliced abundances\n'
        '    \'Mu\', moments of unspliced abundances')
    return adata if copy else None
Example #4
0
def terminal_states(data,
                    vkey='velocity',
                    self_transitions=False,
                    basis=None,
                    weight_diffusion=0,
                    scale_diffusion=1,
                    eps=1e-3,
                    copy=False):
    """Computes terminal states (root and end points) via eigenvalue decomposition.
    """
    adata = data.copy() if copy else data
    connectivities = get_connectivities(adata, 'distances')

    logg.info('computing root cells', r=True, end=' ')
    T = transition_matrix(adata,
                          vkey=vkey,
                          basis=basis,
                          weight_diffusion=weight_diffusion,
                          scale_diffusion=scale_diffusion,
                          self_transitions=self_transitions,
                          backward=True)
    eigvecs = eigs(T, eps=eps, perc=[2, 98])[1]
    eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1)
    eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98))
    adata.obs['root'] = scale(eigvec)
    logg.info('using ' + str(eigvecs.shape[1]) +
              ' eigenvectors with eigenvalue 1.')

    logg.info('computing end points', end=' ')
    T = transition_matrix(adata,
                          vkey=vkey,
                          basis=basis,
                          weight_diffusion=weight_diffusion,
                          scale_diffusion=scale_diffusion,
                          self_transitions=self_transitions,
                          backward=False)
    eigvecs = eigs(T, eps=eps, perc=[2, 98])[1]
    eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1)
    eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98))
    adata.obs['end'] = scale(eigvec)
    logg.info('using ' + str(eigvecs.shape[1]) +
              ' eigenvectors with eigenvalue 1.')

    logg.info('    finished',
              time=True,
              end=' ' if settings.verbosity > 2 else '\n')
    logg.hint(
        'added\n'
        '    \'root\', root cells of Markov diffusion process (adata.obs)\n'
        '    \'end\', end points of Markov diffusion process (adata.obs)')
    return adata if copy else None
Example #5
0
def moments(data, n_neighbors=30, n_pcs=30, mode='connectivities', use_rep=None, recurse_neighbors=False,
            renormalize=False, copy=False):
    """Computes moments for velocity estimation.

    Arguments
    ---------
    data: :class:`~anndata.AnnData`
        Annotated data matrix.
    n_neighbors: `int` (default: 30)
        Number of neighbors to use.
    n_pcs: `int` (default: 30)
        Number of principal components to use.
    mode: `'connectivities'` or `'distances'`  (default: `'connectivities'`)
        Distance metric to use for moment computation.
    renormalize: `bool` (default: `False`)
        Renormalize the moments by total counts per cell to its median.
    copy: `bool` (default: `False`)
        Return a copy instead of writing to adata.

    Returns
    -------
    Returns or updates `adata` with the attributes
    Ms: `.layers`
        dense matrix with first order moments of spliced counts.
    Mu: `.layers`
        dense matrix with first order moments of unspliced counts.
    """
    adata = data.copy() if copy else data

    if 'spliced' not in adata.layers.keys() or 'unspliced' not in adata.layers.keys():
        raise ValueError('Could not find spliced / unspliced counts.')
    if 'neighbors' not in adata.uns.keys() or n_neighbors > adata.uns['neighbors']['params']['n_neighbors']:
        neighbors(adata, n_neighbors=n_neighbors, use_rep=('X_pca' if use_rep is None else use_rep), n_pcs=n_pcs)
    if mode not in adata.uns['neighbors']:
        raise ValueError('mode can only be  \'connectivities\' or \'distances\'')

    logg.info('computing moments based on ' + str(mode), r=True)
    normalize_layers(adata)

    connectivities = get_connectivities(adata, mode, n_neighbors=n_neighbors, recurse_neighbors=recurse_neighbors)

    adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).astype(np.float32).A
    adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).astype(np.float32).A
    if renormalize: normalize_layers(adata, layers={'Ms', 'Mu'}, enforce=True)

    logg.info('    finished', time=True, end=' ' if settings.verbosity > 2 else '\n')
    logg.hint(
        'added \n'
        '    \'Ms\' and \'Mu\', moments of spliced/unspliced abundances (adata.layers)')
    return adata if copy else None
def collaborative_recommend_method(rated_dict):
    rating_sparse = load_sparse_csr(data_path + "user_rating_matrix_sparse.npz")
    with open(data_path + "movie_id_index", "r") as input:
        movie_id_index = yaml.safe_load(input)
    input.close()
    with open(data_path + "movie_index_id", "r") as input:
        movie_index_id = yaml.safe_load(input)
    input.close()

    col = []
    row = np.zeros(20)
    data = []

    for key, value in rated_dict.items():
	try:
	    key = key.strip()
            movie_index = movie_id_index[key]
            col.append(movie_index)
            data.append(float(value))
	except:
	    continue

    user_rate = coo_matrix((data, (row,col)), shape=(1, 10197)).tocsr()
    similarities = csr_matrix(cosine_similarity(user_rate, rating_sparse))
    predict_rating = csr_matrix.dot(similarities, rating_sparse)

    predict_rating_sorted = predict_rating.getrow(0).toarray().ravel() #sort predicted rating result
    predict_rating_top = heapq.nlargest(20, range(len(predict_rating_sorted)), predict_rating_sorted.__getitem__) #fetch top 20 movies
    predict_rating_top_mapped = list(map(lambda x: movie_index_id[str(x)], predict_rating_top))
    predict_rating_top_selected = list(filter(lambda x: x not in list(map(lambda x: x.strip(), rated_dict.keys())), predict_rating_top_mapped))
    predict_rating_top_final = predict_rating_top_selected[:10]

    return predict_rating_top_final
Example #7
0
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        sparse_input, weight, bias = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        # if ctx.needs_input_grad[0]:
        #     grad_input = grad_output.mm(weight)
        grad_output_np = grad_output.data.numpy()
        # print grad_output_np
        sparse_input_np = csr_matrix(sparse_input.data.numpy().T)
        # print sparse_input_np
        if ctx.needs_input_grad[1]:
            grad_weight = torch.autograd.Variable(
                torch.from_numpy((csr_matrix.dot(sparse_input_np,
                                                 grad_output_np)).T).float())
        # if bias is not None and ctx.needs_input_grad[2]:
        #     grad_bias = grad_output.sum(0).squeeze(0)

        return grad_input, grad_weight, grad_bias
Example #8
0
def stream(B, a, k, PSI, Y, M, ord1, i, u, is_sparse, beg):
    Q = []
    Q1 = []
    w = []
    if is_sparse == 1:
        PSI = PSI + SM.dot(a.T, a)
    else:
        PSI = PSI + np.dot(a.T, a)
    Z = Alg1(PSI, k)  #Z and PSI are matrices
    for y in Y:
        BB = B[y]
        C = np.dot(np.array(BB), Z)
        s = np.sum(np.power(C, 2), 1)  #line 14
        ord1[y] = s.tolist()
        My = M[y].copy()
        ord2 = ord1[y].copy()
        for mm in range(len(M[y])):
            if u[np.mod(mm, 10000), y] > (s[mm] / (s[mm] + k)):
                My.remove(M[y][mm])
                ord2.remove(ord1[y][mm])
        M[y] = My
        ord1[y] = ord2
    for y in Y:
        if len(M[y]) == 1:
            Q.append(M[y])
            Q1.append(ord1[y])
    for q in Q1:
        w.append(k / (len(Q1) * q[0]))
    return PSI, Q, w, s, M, time.time() - beg
Example #9
0
 def canonicalCorrelations(self, datasets=None):
     """
     Take :py:attr:`k` datasets and return the :py:attr:`k` canonical correlations.
     :param datasets: A list of numpy.arrays.
     """
     if datasets is not None:
         XZ = [
             csr_matrix.dot(X, Z.transpose())
             for X, Z in zip(datasets, self.ZZ)
         ]
         k = self.getK()
         corrs = np.zeros((k, len(XZ), len(XZ)))
         for i in range(len(XZ)):
             corrs[:, i, i] = np.ones(k)
             for j in range(i + 1, len(XZ)):
                 for cc in range(k):
                     corrs[cc, j, i] = np.corrcoef(XZ[i][:, cc],
                                                   XZ[j][:, cc],
                                                   rowvar=False)[0, 1]
         for cc in range(k):
             # fill in the upper triangles with the transpose of the lower
             corrs[cc, :, :] = corrs[cc, :, :] + np.tril(
                 corrs[cc, :, :], -1).T
     else:
         corrs = None  # TODO: save canonical correlations during CCA.fit
     return np.nan_to_num(corrs)
Example #10
0
def run_rwr(P, alpha=0.9, eps=1e-4, max_iters=10, verbose=False):
    """
    Run Random Walk with Restarts on a graph 
    *P*: noramlized csr scipy sparse matrix
    *alpha*: restart parameter
    *max_iters*: maximum number of iterations
    *eps*: maximum difference of node scores from one iteration to the next
    """

    # intialize with a 1 along each diagonal
    P0 = eye(P.shape[0])
    # matrix of node score vectors
    X = csr_matrix(P.shape)
    prev_X = csr_matrix(P.shape)
    for iters in trange(1,max_iters+1):
        X = alpha*csr_matrix.dot(P, prev_X) + ((1-alpha)*P0)

        max_d = (X - prev_X).max()
        if verbose:
            print("\t\titer %d max score change: %0.6f" % (iters, max_d))
        if max_d < eps:
            # converged!
            break
        prev_X = X.copy()

    if iters == max_iters:
        print("Reached max iters %d" % (max_iters))
    else:
        print("RWR converged after %d iters" % (iters))

    return X
Example #11
0
 def evaluate(self, t, algo=''):
     assert self.seeds is not None
     assert self.graph_type == 1
     self.graph = self.graph.tocsr()
     colors = self.seeds
     #for i in range(self.total_nodes):
     #    if colors[i] == 0:
     #        colors[i] = random.choice([1,-1])
     for i in range(0, t):
         colors = csr_matrix.dot(self.graph, colors)
     if save:
         if self.only_scc:
             np.save(
                 'saved/epinions_{}_{}_{}_{}_scc.npy'.format(
                     algo, t, sum(abs(self.seeds)), sum(self.targets)),
                 colors)
         else:
             np.save(
                 'saved/epinions_{}_{}_{}_{}.npy'.format(
                     algo, t, sum(abs(self.seeds)), sum(self.targets)),
                 colors)
     res1 = Network.eval_stats_purple(self.targets, self.partitions, colors)
     res2 = Network.eval_stats(self.targets, self.partitions, colors)
     self.seeds = None
     result = (res1['P+ C+'] + res1['P- C-'], res2['P+ C+'] + res2['P- C-'])
     return result  #- result['p+ c-'] - result['p- c+']
Example #12
0
 def forward(ctx, sparse_input, weight, bias=None):
     sparse_input_np = csr_matrix(sparse_input.data.numpy())
     ctx.save_for_backward(sparse_input, weight, bias)
     weight_np = weight.data.numpy().T
     output = csr_matrix.dot(sparse_input_np, weight_np)
     output = torch.autograd.Variable(torch.from_numpy(output).float())
     if bias is not None:
         output += bias.unsqueeze(0).expand_as(output)
     return output
 def _calc_cutX4(laplace_spmat, part_vec):
     '''
     The method calculates and returns 4 times the size of the cut from
     a partition vector.
     '''
     # x'Lx = 4w(E(P1, P2))
     Lx = csr_matrix.dot(laplace_spmat, part_vec)
     cut_x4 = np.dot(part_vec, Lx)
     return cut_x4
def kmeans_plspls1(A,w,eps,V,clus_num,we,alfa_app,is_sparse,is_jl):
        """
        This funtion operates the kmeans++ initialization algorithm. each point chosed under the Sinus probability.
        Input:
            A: data matrix, n points, each on a sphere of dimension d.
            k: number of required points to find.
        Output:
            Cents: K initial centroids, each of a dimension d.
        """
        if is_sparse==1:
            A=SM(A)
        if is_jl==1:
            dex=int(clus_num*np.log(A.shape[0]))
    
            ran=np.random.randn(A.shape[1],dex)
            A=SM.dot(A,ran)
            is_sparse=0      #A=np.multiply(w1,A)
        num_of_samples = A.shape[0]
        if any(np.isnan(np.ravel(w)))+any(np.isinf(np.ravel(w))):
            Cents= A[np.random.choice(num_of_samples,size=1),:]   #choosing arbitrary point as the first               
        else: 
            w[w<0]=0               
            Cents= A[np.random.choice(num_of_samples,size=1,p=np.ravel(w)/np.sum(np.ravel(w))),:] #choosing arbitrary point as the first               
        if is_sparse==1:
            PA=make_P(A)
        else:
            PA=make_P_dense(A)
        fcost=alfa_app*1.1
        h1=1
        inds=[]
        while (Cents.shape[0]<clus_num+1):
            Cents2=Cents[h1-1:h1,:] 
            if is_sparse==1:
                Pmina,tags,_=squaredis(PA,Cents2)  
            else:
                Pmina,tags,_=squaredis_dense(PA,Cents2)  
            if h1==1:
                Pmin=Pmina
            else:
                Pmin=np.minimum(Pmin,Pmina)
                Pmin[np.asarray(inds)]=0
            Pmin[Pmin<0]=0
            Pmin00=np.multiply(w,Pmin)
            Pmin0=Pmin00/np.sum(Pmin00)
            if any(np.isnan(np.ravel(Pmin0)))+any(np.isinf(np.ravel(Pmin0))):
                ind=np.random.choice(Pmin.shape[0],1)
            else:
                Pmin0[Pmin0<0]=0
                ind=np.random.choice(Pmin.shape[0],1, p=Pmin0)
            if is_sparse==1:
                Cents=vstack((Cents,A[ind,:]))
            else:
                Cents=np.concatenate((Cents,A[ind,:]),0)
            inds.append(ind)
            h1=h1+1
        return Cents,inds
Example #15
0
def dot(A, B):

    if type(A) is spm and type(B) is spm:
        return spm.dot(A, B)
    elif is_dense(A) and is_dense(B):
        return np.dot(A, B)
    elif type(A) is DST and type(B) is DST:
        return A.matmul(B)
    else:
        raise NotImplementedError()
Example #16
0
def calculate_F(w, Xtr, Ytr):
    """
    """
    w = csr_matrix(w)
    wx = csr_matrix.dot(w, Xtr.T)
    ywx = wx.multiply(Ytr)
    constraint = 0
    z = (ywx < 1).toarray()
    constraint = (1 - ywx.toarray()[z]).sum(axis=0)

    f = 0.5 * (np.linalg.norm(w.toarray()))**2 + constraint
    return f
Example #17
0
def calculate_F(w, Xtr, Ytr):
    """
        calculate value of primal objective
    """
    w = csr_matrix(w)
    wx = csr_matrix.dot(w, Xtr.T)
    ywx = wx.multiply(Ytr)
    # calculate sum of slack variables
    slackSum = (1 - ywx.toarray()[(ywx < 1).toarray()]).sum(axis=0)

    f = 0.5 * (np.linalg.norm(w.toarray()))**2 + slackSum
    return f
def alaa_coreset(wiki0,j,eps,w,is_pca,spar): 
    """
    our algorithm, equivalent to Algorithm 1 in the paper.
    input:
        wiki0:data matrix
        j: dimension of the approximated subspace
        eps: determine coreset size
        w: initial weights
        is_pca: 1 coreset for pca, 0 coreset dor SVD
        spar: is data in sparse format
    output:
        weighted coreset
    """
    coreset_size=j/eps
    dex=int(j*np.log(wiki0.shape[0]))
    d=wiki0.shape[1]
    if is_pca==1:
        j=j+1
        wiki0=PCA_to_SVD(wiki0,eps,spar)
    if is_jl==1:
        ran=np.random.randn(wiki0.shape[1],dex)
        if spar==1:
            wiki=SM.dot(wiki0,ran)	
        else:
            wiki=np.dot(wiki0,ran)	
    else:
        wiki=wiki0
    w=w/wiki.shape[0]
    sensetivities=[]
    jd=j
    w1=np.reshape(w,(len(w),1))
    wiki1=np.multiply(np.sqrt(w1),wiki)
    k=0
    for i,p in enumerate(wiki1) :
        k=k+1
        sensetivities.append(calc_sens(wiki1,p,jd,eps))
    
    p0=np.asarray(sensetivities)
    if is_pca==1:
        p0=p0+81*eps
    indec=np.random.choice(np.arange(wiki.shape[0]),int(coreset_size),p=p0/np.sum(p0)) #sampling according to the sensitivity
    p=p0/np.sum(p0) #normalizing sensitivies
    w=np.ones(wiki.shape[0])
    u=np.divide(np.sqrt(w),p)/coreset_size #caculating new weights
    u1=u[indec]#picking weights of sampled
    u1=np.reshape(u1,(len(u1),1))
    squ=np.sqrt(u1)   
    if spar==1:        
        C=SM(wiki0)[indec,:d].multiply(squ) #weighted coreset
    else:
        C=np.multiply(squ,wiki0[indec,:d])
    return C
Example #19
0
    def get_ek(self, psi):
        assert isinstance(psi, Wavefunction)
        v = self.v
        dv = self.grid.dv
        nelect = self.system.nelect
        psi = psi.get_psi()

        # T = psi'*Lap3*psi
        T1 = csr_matrix.dot(v, psi)
        T2 = csc_matrix.dot(csc_matrix(psi), T1)
        T = T2 * nelect * dv
        T = T[0]
        return T
Example #20
0
def second_order_moments(adata):
    """Computes second order moments for stochastic velocity estimation.

    Arguments
    ---------
    adata: `AnnData`
        Annotated data matrix.

    Returns
    -------
    Mss: Second order moments for spliced abundances
    Mus: Second order moments for spliced with unspliced abundances
    """
    if 'neighbors' not in adata.uns:
        raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.')

    connectivities = get_connectivities(adata, 'connectivities')
    s, u = csr_matrix(adata.layers['spliced']), csr_matrix(adata.layers['unspliced'])
    Mss = csr_matrix.dot(connectivities, s.multiply(s)).A
    Mus = csr_matrix.dot(connectivities, s.multiply(u)).A

    return Mss, Mus
Example #21
0
def SCNW_classic(A2, k, coreset_size, is_jl):
    coreset_size = int(coreset_size)
    """
    This function operates the CNW algorithm, exactly as elaborated in Feldman & Ras

    inputs:
    A: data matrix, n points, each of dimension d.
    k: an algorithm parameter which determines the normalization neededand the error given the coreset size.
    coreset_size: the maximal coreset size (number of lines inequal to zero) demanded for input.
    output:
    error: The error between the original data to the CNW coreset.        
    duration: the duration this CNW operation lasted
    """
    if is_jl == 1:
        dex = int(k * np.log(A2.shape[0]))

        ran = np.random.randn(A2.shape[1], dex)
        A1 = SM.dot(A2, ran)
    else:
        A1 = np.copy(A2)
    print('A1.shape', A1.shape)
    epsi = np.sqrt(k / coreset_size)  #
    A, A3 = initializing_data(A1, k)
    print('A.shape', A.shape)
    At = np.transpose(A)
    AtA = np.dot(At, A)
    num_of_channels = A.shape[1]
    ww = np.zeros((int(coreset_size)))
    Z = np.zeros((num_of_channels, num_of_channels))
    X_u = k * np.diag(np.ones(num_of_channels))
    X_l = -k * np.diag(np.ones(num_of_channels))
    delta_u = epsi + 2 * np.power(epsi, 2)
    delta_l = epsi - 2 * np.power(epsi, 2)
    ind = np.zeros(int(coreset_size), dtype=np.int)

    for j in range(coreset_size):
        if j % 50 == 1:
            print('j=', j)
        X_u = X_u + delta_u * AtA
        X_l = X_l + delta_l * AtA
        Z, jj, t = single_CNW_iteration_classic(A, At, delta_u, delta_l, X_u,
                                                X_l, Z)
        ww[j] = t
        ind[j] = jj
    sqrt_ww = np.sqrt(epsi * ww / k)
    sqrt_ww = np.reshape(sqrt_ww, (len(sqrt_ww), 1))
    if is_jl == 1:
        SA0 = SM(A2)[ind, :].multiply(sqrt_ww)
    else:
        SA0 = np.multiply(A2[ind, :], sqrt_ww)
    return SA0, ind
def Nonuniform(AA0,k,is_pca,eps,spar): 
        """
        non uniform sampling opponent to our algorithm, from
        Varadarajan, Kasturi, and Xin Xiao. "On the sensitivity of shape fitting problems." arXiv preprint arXiv:1209.4893 (2012).‏
        input:
            AA0:data matrix
            k: dimension of the approximated subspace
            is_pca: if 1 will provide a coreset to PCA, 0 will provide coreset for SVD
            eps: detemines coreset size
            spar: is data in sparse format
        output:
            weighted coreset
        """
        d=AA0.shape[1]
        if is_pca==1:
                k=k+1
                AA0=PCA_to_SVD(AA0,eps,spar)
        if is_jl==1:
            dex=int(k*np.log(AA0.shape[0]))
            ran=np.random.randn(AA0.shape[1],dex)
            if spar==1:
                AA=SM.dot(AA0,ran)
            else:
                AA=np.dot(AA0,ran)
        else:
            AA=AA0
        size_of_coreset=int(k+k/eps-1) 
        U,D,VT=ssp.linalg.svds(AA,k)       
        V = np.transpose(VT)
        AAV = np.dot(AA, V)
        del V
        del VT    
        x = np.sum(np.power(AA, 2), 1)
        y = np.sum(np.power(AAV, 2), 1)
        P = np.abs(x - y)
        AAV=np.concatenate((AAV,np.zeros((AAV.shape[0],1))),1)
        Ua, _, _ = ssp.linalg.svds(AAV,k)
        U = np.sum(np.power(Ua, 2), 1)
        pro = 2 * P / np.sum(P) + 8 * U
        if is_pca==1:
            pro=pro+81*eps
        pro0 = pro / sum(pro)
        w=np.ones(AA.shape[0])
        u=np.divide(w,pro0)/size_of_coreset
        DMM_ind=np.random.choice(AA.shape[0],size_of_coreset, p=pro0)
        u1=np.reshape(u[DMM_ind],(len(DMM_ind),1))
        if spar==1:
            SA0=SM(AA0)[DMM_ind,:d].multiply(np.sqrt(u1))
        else:
            SA0=np.multiply(np.sqrt(u1),AA0[DMM_ind,:d])
        return SA0   
Example #23
0
    def _transform(self, datasets, outcome_index=None):
        # TODO: use 'outcome_index' to allow user to use d - 1 datasets to predict the
        # remaining one
        ZZ = self.ZZ
        d = len(ZZ)
        assert len(datasets) == d,\
            "number of datasets should be len(self.ZZ)"
        standardization = self.getStandardization()
        if standardization:
            datasets = [X - np.mean(X, axis=0) for X in datasets]

        if outcome_index is not None:
            assert outcome_index >= 0 and outcome_index < d,\
                "outcome_index is not a valid index for datasets"
            XZ = [
                csr_matrix.dot(datasets[j], ZZ[j].transpose())
                for j in [x for x in range(d) if x != outcome_index]
            ]
            XZ_sums = np.sum(XZ, axis=0)
            prediction = np.dot(XZ_sums,
                                pinv(ZZ[outcome_index].todense()).transpose())
        else:
            XZ = [
                csr_matrix.dot(X, Z.transpose()) for X, Z in zip(datasets, ZZ)
            ]
            XZ_sums = []
            for i in range(d):
                for j in range(d):
                    if j != i:
                        if len(XZ_sums) == i:
                            XZ_sums.append(XZ[j])
                        else:
                            XZ_sums[i] += XZ[j]
            prediction = [
                np.dot(XZ_sums[i],
                       pinv(ZZ[i].todense()).transpose()) for i in range(d)
            ]
        return prediction
Example #24
0
def get_moments(adata, layer=None, second_order=None, centered=True):
    """Computes moments for a specified layer.

    First and second order moments. If centered, that corresponds to means and variances across nearest neighbors.

    Arguments
    ---------
    adata: `AnnData`
        Annotated data matrix.
    layer: `str` (default: `None`)
        Key of layer with abundances to consider for moment computation.
    second_order: `bool` (default: `None`)
        Whether to compute second order (instead of first order) moments from abundances.
    centered: `bool` (default: `True`)
        Whether to compute centered or uncentered second order moments (centered = variance).
    Returns
    -------
    Mx: first or second order moments
    """
    if 'neighbors' not in adata.uns:
        raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.')
    connectivities = get_connectivities(adata)
    X = adata.X if layer is None else adata.layers[layer]
    X = csr_matrix(X) if layer in {'spliced', 'unspliced'} else np.array(X) if not issparse(X) else X
    if not issparse(X):
        X = X[:, ~np.isnan(X.sum(0))]
    if second_order:
        X2 = X.multiply(X) if issparse(X) else X ** 2
        Mx = csr_matrix.dot(connectivities, X2) if second_order else csr_matrix.dot(connectivities, X)
        if centered:
            mu = csr_matrix.dot(connectivities, X)
            mu2 = mu.multiply(mu) if issparse(mu) else mu ** 2
            Mx = Mx - mu2
    else:
        Mx = csr_matrix.dot(connectivities, X)
    if issparse(X):
        Mx = Mx.astype(np.float32).A
    return Mx
def compress_graph_from_hard_partition_ts(G,nodes,features,p,partition,node_subset):
    """
    Obtain a sparse tall-skinny matrix and new probabilities from a hard partition of a graph.
    For each point, we only find the distance to its anchor, not to all other anchors.
    -----------
    Parameters:
    G : NetworkX graph
    nodes : sorted list of graph nodes
    p : probability vector of sorted nodes
    partition : list of sets containing node labels
    node_subset : sorted list of anchor node labels
    -------
    Returns:
    dists : |nodes|x|node_subset| matrix of distances from each
                block of partition to anchor in that block
    membership : |nodes|x|node_subset| membership matrix
    p_compressed : vector of aggregated probabilities on anchors
    """

    # Distances between anchors
    dists_subset = np.zeros((len(node_subset),len(node_subset)))
    for i in range(len(node_subset)):
        for j in range(i+1,len(node_subset)):
            dists_subset[i,j] = shortest_path_length(G,node_subset[i],node_subset[j])
    dists_subset = dists_subset + dists_subset.T

    # Sparse tall-skinny matrix of distances and feature-vector distances from points to their own anchors
    # Also, tall-skinny membership matrix and mass-compression matrix
    row_idx, col_idx, dist_data, mass_data, fdist_data = [], [], [], [], []
    for (aidx,anchor) in enumerate(node_subset):
        bidx = [anchor in v for v in partition].index(True) #block containing current anchor point
        block = partition[bidx]
        for b in block:
            idx = nodes.index(b)
            d = shortest_path_length(G,nodes[idx],anchor)
            fd = pairwise_distances(features[nodes.index(anchor),:].reshape(1,-1),
                                    features[idx,:].reshape(1,-1))[0][0]
            row_idx.append(idx)
            col_idx.append(aidx)
            dist_data.append(d)
            mass_data.append(p[idx])
            fdist_data.append(fd)

    dists = coo_matrix((dist_data, (row_idx, col_idx)),shape=(len(nodes), len(node_subset)))
    fdists = coo_matrix((fdist_data, (row_idx, col_idx)),shape=(len(nodes), len(node_subset)))
    membership = coo_matrix(([1 for v in row_idx], (row_idx, col_idx)),shape=(len(nodes), len(node_subset)))
#     coup = coo_matrix((mass_data, (row_idx, col_idx)),shape=(len(nodes), len(node_subset)))

    p_subset = csr_matrix.dot(p, membership)
    return dists.tocsr(),fdists.tocsr(),membership.tocsr(),p_subset, dists_subset
Example #26
0
def cut_size(A_spmat, partitions):
    nvert = A_spmat.get_shape()[0]

    cut = 0.0
    for part1 in partitions:
        p1 = np.zeros(nvert)
        for vert_id in part1:
            p1[vert_id] = 1
        not_p1 = np.ones(nvert) - p1

        Ay = csr_matrix.dot(A_spmat, not_p1)
        cut += np.dot(p1, Ay)

    return cut
Example #27
0
def second_order_moments(adata, adjusted=False):
    """Computes second order moments for stochastic velocity estimation.

    Arguments
    ---------
    adata: `AnnData`
        Annotated data matrix.

    Returns
    -------
    Mss: Second order moments for spliced abundances
    Mus: Second order moments for spliced with unspliced abundances
    """
    if 'neighbors' not in adata.uns:
        raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.')

    connectivities = get_connectivities(adata)
    s, u = csr_matrix(adata.layers['spliced']), csr_matrix(adata.layers['unspliced'])
    Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).A
    Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).A
    if adjusted:
        Mss = 2 * Mss - adata.layers['Ms'].reshape(Mss.shape)
        Mus = 2 * Mus - adata.layers['Mu'].reshape(Mus.shape)
    return Mss, Mus
def squaredis(P,Cent):    
    d=Cent.shape[1]
    C=SM((Cent.shape[0],d+2))    
    C[:,1]=1      #C is defined just as in the algorithm you sent me.
    C[:,0] =SM.sum(SM.power(Cent, 2), 1)
    C[:,2:d+2]=Cent
    D=SM.dot(P,C.T)
    D=D.toarray()
    Tags=D.argmin(1)#finding the most close centroid for each point 
    if min(D.shape)>1:
        dists=D.min(1)
    else:
        dists=np.ravel(D)
    y=D.argmin(0)
    return dists,Tags,y 
Example #29
0
def df_to_matrix(df, factors):
    #
    # fill sparse matrix with ratings from a dataframe.
    # matr[userid][movieid] = rating.
    #
    ratings = df['rating'].tolist()
    # subtract 1 to map id to index
    users = [id for id in df['userid'].tolist()]
    movies = [id for id in df['movieidx'].tolist()]

    review_matrix_csr = csr_matrix((ratings, (users, movies)),
                                   shape=(num_reviewers, num_movies + 1))
    u, s, vt = svds(review_matrix_csr, k=factors)
    user_factors = csr_matrix.dot(u, s)
    item_factors = vt
    return user_factors, item_factors, review_matrix_csr
Example #30
0
 def init_totals(self, t, only_target):
     assert self.graph_type == 1
     totals = np.zeros(t)
     colors = np.copy(self.partitions)
     if only_target:
         colors *= self.targets
     self.graph = self.graph.tocsr()
     totals[0] = sum(
         Network.eval_stats(self.targets, self.partitions, colors).values())
     for i in range(1, t):
         colors = csr_matrix.dot(self.graph, colors)
         totals[i] = sum(
             Network.eval_stats(self.targets, self.partitions,
                                colors).values())
     #print(totals)
     return totals
Example #31
0
def cosine(a, b):
	x = csr_matrix.dot(a, b.T)[0, 0] / (norm2(a) * norm2(b))
	return x
Example #32
0
def smoothed_cosine(a, b):
    # calculate set intersection by converting to binary and taking the dot product
    overlap = csr_matrix.dot(binarize(a), binarize(b).T)[0, 0]

    # smooth cosine by discounting by set intersection
    return (overlap / (SMOOTHING + overlap)) * cosine(a, b)
Example #33
0
 def cosine_dist(x, y):
     x_n = csr_matrix.sqrt(csr_matrix.dot(x, x.T))
     y_n = csr_matrix.sqrt(csr_matrix.dot(y, y.T))
     return 1 - csr_matrix.dot(x, y.T) / (x_n * y_n)