def cos_similarity(vec1,vec2):
     dot_product=np.dot(vec1,vec2) 
     norm_vec1=linalg.norm(vec1)
     norm_vec2=linalg.norm(vec2)
     if norm_vec1*norm_vec2!=0:
         return(dot_product/(norm_vec1*norm_vec2))
     else: return 0
Beispiel #2
0
def corrSparsityCliques(x, obj, constraints):
    n = len(x)  # Independent variable vector size
    C = identity(
        n, format='lil'
    )  # Initialising appropriate identity matrix (lil_matrix format)

    # Retrieve cross-terms of objective function and update C matrix
    objectiveCrossDependencies = getObjectiveCrossDependencies(obj, x)
    updateCWithCrossDependencies(objectiveCrossDependencies, C)

    # Retrieve co-dependent terms for every constraint and update C matrix
    for constraint in constraints:
        constraintCrossDependencies = getConstraintCodependencies(
            constraint, x)
        updateCWithCrossDependencies(constraintCrossDependencies, C)

    C = csc_matrix(
        C)  # Convert into CSC structure, most efficient for next steps
    C = C + norm(C, ord=1) * identity(
        n, format='csc')  # Ensure strict diagonal dominance for C

    cliqueStructure = cliquesFromSpMatD(
        C)  # Large function that will generate clique structure

    return cliqueStructure
Beispiel #3
0
 def l2_normalize(self):
     '''
     L2-normalize all vectors in the matrix.
     '''
     l2norm = linalg.norm(self.matrix, axis=1, ord=2)
     l2norm[l2norm==0.0] = 1.0 # Convert 0 values to 1
     self.matrix = csr_matrix(self.matrix/l2norm.reshape(len(l2norm),1))
Beispiel #4
0
def get_pagerank(adj_mat, theta=.85, epsilon=1e-03, max_iter=20):
    """
    Returns the vector of pagerank scores
    :param adj_mat: (scipy.sparse.csc.csc_matrix) n x n
    :param theta: (numeric) damping factor
    :param epsilon: (numeric) convergence parameter
    :return: vector of pagerank scores n x 1
    """
    n = adj_mat.shape[0]
    g_mat = get_gmat(adj_mat, theta)
    pr_vec = sparse.csc_matrix(np.ones(n)) / n
    norm_iter = adj_mat.shape[0]
    n = adj_mat.shape[0]

    i = 0
    norm = []

    while (norm_iter > epsilon * n) and (i < max_iter):
        pr_iter = pr_vec.dot(g_mat)
        norm_iter = linalg.norm(pr_vec - pr_iter)
        pr_vec = pr_iter

        i += 1
        norm += [norm_iter]
        print("iter {0}: {1}".format(i, norm_iter))
    return pr_vec.T
Beispiel #5
0
def normalize_sparse(M, norm="frag", order=1, iterations=3):
    """Applies a normalization type to a sparse matrix.
    """

    try:
        from scipy.sparse import csr_matrix
    except ImportError as e:
        print(str(e))
        print("I am peforming dense normalization by default.")
        return normalize_dense(M.todense())
    r = csr_matrix(M)
    if norm == "SCN":
        for _ in range(1, iterations):
            row_sums = np.array(r.sum(axis=1)).flatten()
            col_sums = np.array(r.sum(axis=0)).flatten()
            row_indices, col_indices = r.nonzero()
            r.data /= row_sums[row_indices] * col_sums[col_indices]

    elif norm == "global":
        try:
            from scipy.sparse import linalg
            r = linalg.norm(M, ord=order)
        except (ImportError, AttributeError) as e:
            print(str(e))
            print("I can't import linalg tools for sparse matrices.")
            print("Please upgrade your scipy version to 0.16.0.")

    elif callable(norm):
        r = norm(M)

    else:
        print("Unknown norm. Returning input as fallback")

    return r
Beispiel #6
0
def normalize_sparse(M, norm="frag", order=1, iterations=3):
    """Applies a normalization type to a sparse matrix."""
    try:
        from scipy.sparse import csr_matrix
    except ImportError as e:
        print(str(e))
        print("I am peforming dense normalization by default.")
        return normalize_dense(M.todense())
    r = csr_matrix(M)
    if norm == "SCN":
        for iteration in range(1, iterations):
            row_sums = np.array(r.sum(axis=1)).flatten()
            col_sums = np.array(r.sum(axis=0)).flatten()
            row_indices, col_indices = r.nonzero()
            r.data /= row_sums[row_indices] * col_sums[col_indices]

    elif norm == "global":
        try:
            from scipy.sparse import linalg
            r = linalg.norm(M, ord=order)
        except (ImportError, AttributeError) as e:
            print(str(e))
            print("I can't import linalg tools for sparse matrices.")
            print("Please upgrade your scipy version to 0.16.0.")

    elif callable(norm):
        r = norm(M)

    else:
        print("I don't recognize this norm, I am returning input matrix by default.")

    return r
Beispiel #7
0
def run_iterative(T, I, neg_percent_tr, links_te, signs_te, c,
                  convergence_threshold):
    R = T.copy().asformat('csc')  #intialize with T
    R_ = T.copy().asformat('csc')  #itialize with T
    norm2 = 999999999
    it = 0
    while norm2 > convergence_threshold:
        if it % 2 == 0:
            R_ = c * (T.dot(R)) + (1 - c) * I
        else:
            R = c * (T.dot(R_)) + (1 - c) * I

        norm2 = norm(R - R_)
        print('Iteration {} and difference {}'.format(it, norm2))
        it += 1

        #uncomment to see progress while converging
        #if it % 2 == 0:
        #    evaluate(R.copy().asformat('dok'), neg_percent_tr, links_te, signs_te)
        #else:
        #    evaluate(R_.copy().asformat('dok'), neg_percent_tr, links_te, signs_te)

    #get final evaluation
    #if we quit when it = 2 that means it = 1 was the last to execute
    #and so R was the last result
    if it % 2 == 0:
        evaluate(R.copy().asformat('dok'), neg_percent_tr, links_te, signs_te)
    else:
        evaluate(R_.copy().asformat('dok'), neg_percent_tr, links_te, signs_te)
Beispiel #8
0
 def normalize_test(self, K_test, feats_test): #K_test unormalized
     m = K_test.shape[0]
     #norms_test = np.sum(feats_test*feats_test,axis=1)
     norms_test = norm(feats_test,axis=1)
     matrix_norms = np.outer(norms_test, self.norms_train) #+ 1e-40  # matrix sqrt(K(xtest,xtest)*K(xtrain,xtrain))
     K_test = np.divide(K_test, matrix_norms)
     return K_test
Beispiel #9
0
def to_svd(beg=200, end=2613, jump=300, with_norm=True):
    """
        Wykonuje SVD na rzadkiej macierzy wczytanej z with_idf
    Zapisuej dla róznych wartosci k pod nazwa k, gdzie k = 1,2,3...A.shape-1 A-macierz

    :param beg: od ktorego k
    :param end:  do jakiego k
    :param jump: o jakie k
    :param with_norm: noramlizować przed zapisem
    :return: zapisuje macierze rzadkie do plikow
    """

    spare = load_sparse_csr('usage_files/with_idf.npz')
    x = []
    for k in range(beg, end, jump):
        print("Starting: " + str(k))
        U, sigma, V = svds(spare, k=k)
        print("Done svd")
        reconsign = csc_matrix(U) * diags(sigma, format='csc') * csc_matrix(V)
        print("Done multiplication")
        if with_norm:
            x = [(1 / norm(reconsign.getcol(i)))
                 for i in range(reconsign.shape[1])]
            reconsign = reconsign.multiply(csc_matrix(x))
            x = []
        save_sparse('svd/' + str(k), csc_matrix(reconsign))
        print("Saved " + str(k))
Beispiel #10
0
def lbfgs_step(gfk, k, vecs, props):
    m = props.get("lbfgsMemory", 10)
    q = gfk
    a = {}

    if k == 0:
        return -gfk / linalg.norm(gfk, numpy.inf)
    
    k = numpy.max(vecs.keys())+1
    
    bl = max(0, k-m)
    
    for i in range(k-1, bl-1, -1):
        (sk, yk, rhok) = vecs[i]
    
        a[i] = rhok * numpy.dot(sk, q)
        q = q - a[i]*yk
    
    (sk, yk, rhok) = vecs[k-1]
    gammak = numpy.dot(sk,yk)/(numpy.dot(yk,yk))
    
    r = gammak * q
    
    for i in range(bl, k):
        (sk, yk, rhok) = vecs[i]
        
        beta = rhok * numpy.dot(yk, r)
        r = r + sk*(a[i]-beta)
    
    return -r
Beispiel #11
0
def orthogonality(A, g):
    """Measure orthogonality between a vector and the null space of a matrix.

    Compute a measure of orthogonality between the null space
    of the (possibly sparse) matrix ``A`` and a given vector ``g``.

    The formula is a simplified (and cheaper) version of formula (3.13)
    from [1]_.
    ``orth =  norm(A g, ord=2)/(norm(A, ord='fro')*norm(g, ord=2))``.

    References
    ----------
    .. [1] Gould, Nicholas IM, Mary E. Hribar, and Jorge Nocedal.
           "On the solution of equality constrained quadratic
            programming problems arising in optimization."
            SIAM Journal on Scientific Computing 23.4 (2001): 1376-1395.
    """
    # Compute vector norms
    norm_g = np.linalg.norm(g)
    # Compute Frobenius norm of the matrix A
    if issparse(A):
        norm_A = linalg.norm(A, ord='fro')
    else:
        norm_A = np.linalg.norm(A, ord='fro')

    # Check if norms are zero
    if norm_g == 0 or norm_A == 0:
        return 0

    norm_A_g = np.linalg.norm(A.dot(g))
    # Orthogonality measure
    orth = norm_A_g / (norm_A * norm_g)
    return orth
Beispiel #12
0
def baseline():
    filename = './data/GSM3067191_08hpf.csv'  # 8hpf Zebrafish embryos
    df = pd.read_csv(filename)

    # Each row now corresponds to a cell (example) and each column to a gene (feature)
    data = df.values[:, 1:].astype(np.float).T
    # Make data sparse
    sata = sparse.dok_matrix(data)

    k = 50
    lsvec, svals, rsvect = la.svd(data)
    dnorm = sla.norm(sata)
    approx = lsvec.dot(np.diag(svals)).dot(rsvect)
    print("SVD reconstruction error:", la.norm(sata - approx) / dnorm)

    avgs = np.sum(data[:], axis=0) / data.shape[0]


    plt.plot(avgs)
    plt.show()

    expression_counts = np.sum(sata, axis=0)
    best = np.array(np.argsort(-expression_counts))[0]
    common = best[:k]
    uncommon = best[k:]
    common_norm = la.norm(data[:, common])
    print("Baseline reconstruction error:", np.sqrt(dnorm**2 - common_norm** 2) / dnorm)
Beispiel #13
0
def cosine_similarities(raw_texts,
                        dictionary_path='dictionary/AmericanDictionary.csv'):
    dictionary = read_csv(dictionary_path, sep=';', error_bad_lines=False)
    documents_per_code = dictionary.groupby('Icd1')['DiagnosisText'].agg(
        lambda x: ' '.join(x))
    tfidf_mapper = TfidfVectorizer()
    dictionary_vectors = tfidf_mapper.fit_transform(documents_per_code)
    raw_texts_vectors = tfidf_mapper.transform(raw_texts)
    products = raw_texts_vectors.dot(dictionary_vectors.T)
    raw_norms = norm(raw_texts_vectors, axis=1)
    dictionary_norms = norm(dictionary_vectors, axis=1)
    raw_norms[raw_norms == 0] = 1.0
    dictionary_norms[dictionary_norms == 0] = 1.0
    d = products / np.expand_dims(raw_norms, axis=1)
    d /= np.expand_dims(dictionary_norms, axis=0)
    return d
Beispiel #14
0
def gs_solve(D,b,lam):
    "Solve the smoothing problem with Gauss-Seidel iteration. Variables same as assignment."

    (m,n) = np.shape(D)
    ident = spsp.identity(n)
    A = lam * D.T @ D + ident

    # the matrix a is our target, we want to solve lx = b. gauss-seidel splits
    # this into two components: lower and strictly-upper triangular
    L = spsp.tril(A, format='csc')
    U = A - L

    assert(spspla.norm( A - (L + U), inf) < 1e-3 )

    Linv = spspla.inv(L)

    guess = spsp.csc_matrix(np.zeros(pred_shape(A)))
    err = 10

    while err > tol:
        temp = (b - U @ guess)
        guess = Linv @ temp
        result = A @ guess
        err = npla.norm(result - b, 2)
 #       print(err)

    return guess
Beispiel #15
0
def propagation(M, adj, alpha=0.7, tol=10e-6):  # TODO equation, M, alpha
    """Network propagation iterative process

    Iterative algorithm for apply propagation using random walk on a network:
        Initialize::
            X1 = M

        Repeat::
            X2 = alpha * X1.A + (1-alpha) * M
            X1 = X2

        Until::
            norm(X2-X1) < tol

        Where::
            A : degree-normalized adjacency matrix

    Parameters
    ----------
    M : sparse matrix
        Data matrix to be diffused.

    adj : sparse matrix
        Adjacency matrice.

    alpha : float, default: 0.7
        Diffusion/propagation factor with 0 <= alpha <= 1.
        For alpha = 0 : no diffusion.
        For alpha = 1 :

    tol : float, default: 10e-6
        Convergence threshold.

    Returns
    -------
    X2 : sparse matrix
        Smoothed matrix.
    """

    n = adj.shape[0]
    # diagonal = 1 -> degree
    # TODO to set diagonal = 0 before applying eye
    adj = adj + sp.eye(n, dtype=np.float32)

    d = sp.dia_matrix((np.array(adj.sum(axis=0))**-1, [0]),
                      shape=(n, n),
                      dtype=np.float32)
    A = adj.dot(d)

    X1 = M.astype(np.float32)
    X2 = alpha * X1.dot(A) + (1 - alpha) * M
    i = 0
    while norm(X2 - X1) > tol:
        X1 = X2
        X2 = alpha * X1.dot(A) + (1 - alpha) * M
        i += 1
        print(' Propagation iteration = {}  ----- {}'.format(
            i,
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
    return X2
Beispiel #16
0
def katz_hungarian_connection(adj,
                              nodes,
                              n_perturbations,
                              threshold=0.000001,
                              nsteps=10000,
                              dataset_name=None):
    graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.Graph)
    rows = nodes[:n_perturbations]
    cols = nodes[n_perturbations:]
    precomputed_path = f'data/tmp/{dataset_name}_katz.pkl'
    if dataset_name is not None and os.path.exists(precomputed_path):
        print("Loading precomputed_katz...")
        with open(precomputed_path, 'r') as ff:
            sigma = pickle.load(open(precomputed_path, 'rb'))
    else:
        D = nx.linalg.laplacianmatrix.laplacian_matrix(graph) + adj
        D_inv = spalg.inv(D)
        D_invA = D_inv * adj
        l, v = spalg.eigs(D_invA, k=1, which="LR")
        lmax = l[0].real
        alpha = (1 / lmax) * 0.9
        sigma = sp.csr_matrix(D_invA.shape, dtype=np.float)
        print('Calculate sigma matrix')
        for i in range(nsteps):
            sigma_new = alpha * D_invA * sigma + sp.identity(
                adj.shape[0], dtype=np.float, format='csr')
            diff = abs(spalg.norm(sigma, 1) - spalg.norm(sigma_new, 1))
            sigma = sigma_new
            print(diff)
            if diff < threshold:
                break
            print('Number of steps taken: ' + str(i))
        sigma = sigma.toarray().astype('float')
        if dataset_name is not None:
            pickle.dump(sigma, open(precomputed_path, "wb"))

    similarity = {u: {v: sigma[u][v] for v in cols} for u in rows}

    mtx = np.array(
        [np.array(list(similarity[u].values())) for u in similarity])

    i_u = {i: u for i, u in enumerate(similarity)}
    i_v = {i: v for i, v in enumerate(similarity[list(similarity.keys())[0]])}

    u, v = linear_sum_assignment(+mtx)

    return [[i_u[i], i_v[j]] for i, j in zip(u, v)]
def propagation(M, adj, alpha=0.7, tol=10e-6):  # TODO equation, M, alpha
    """Network propagation iterative process

    Iterative algorithm for apply propagation using random walk on a network:
        Initialize::
            X1 = M

        Repeat::
            X2 = alpha * X1.A + (1-alpha) * M
            X1 = X2

        Until::
            norm(X2-X1) < tol

        Where::
            A : degree-normalized adjacency matrix

    Parameters
    ----------
    M : sparse matrix
        Data matrix to be diffused.

    adj : sparse matrix
        Adjacency matrice.

    alpha : float, default: 0.7
        Diffusion/propagation factor with 0 <= alpha <= 1.
        For alpha = 0 : no diffusion.
        For alpha = 1 :

    tol : float, default: 10e-6
        Convergence threshold.

    Returns
    -------
    X2 : sparse matrix
        Smoothed matrix.
    """

    n = adj.shape[0]
    # diagonal = 1 -> degree
    # TODO to set diagonal = 0 before applying eye
    adj = adj+sp.eye(n, dtype=np.float32)

    d = sp.dia_matrix((np.array(adj.sum(axis=0))**-1, [0]),
                      shape=(n,  n),
                      dtype=np.float32)
    A = adj.dot(d)

    X1 = M.astype(np.float32)
    X2 = alpha * X1.dot(A) + (1-alpha) * M
    i = 0
    while norm(X2-X1) > tol:
        X1 = X2
        X2 = alpha * X1.dot(A) + (1-alpha) * M
        i += 1
        print(' Propagation iteration = {}  ----- {}'.format(
            i, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
    return X2
Beispiel #18
0
 def _compute_cost(self):
     pattern = self._R != 0
     proj_ABt = pattern.multiply((self._A).dot(self._B.T))
     cost = splinalg.norm((proj_ABt - self._R).multiply(
             self._ptwise_sqrt_W_sparse), 'fro') ** 2
     norm_A = self._Lambda * (np.linalg.norm(self._A, 'fro')) ** 2
     norm_B = self._Lambda * (np.linalg.norm(self._B, 'fro')) ** 2
     return cost + norm_A + norm_B
Beispiel #19
0
def sparse_cosine(x_mat: _SPARSE_SCIPY_TYPES,
                  y_mat: _SPARSE_SCIPY_TYPES) -> 'np.ndarray':
    """Cosine distance between each row in x_mat and each row in y_mat.
    :param x_mat:  scipy.sparse like array with ndim=2
    :param y_mat:  scipy.sparse like array with ndim=2
    :return: np.ndarray  with ndim=2
    """
    from scipy.sparse.linalg import norm

    # we need the np.asarray otherwise we get a np.matrix object that iterates differently
    return 1 - np.clip(
        np.asarray(
            x_mat.dot(y_mat.T) /
            (np.outer(norm(x_mat, axis=1), norm(y_mat, axis=1)))),
        -1,
        1,
    )
Beispiel #20
0
def _test_symmetry_W(problem_filename):
    problem = read_fclib_format(problem_filename)[1]
    A = csr_matrix(N.SBM_to_sparse(problem.M)[1])

    #print("A=", A)
    #print("A=", np.transpose(A))
    #print("A-A^T",A-np.transpose(A))
    print("norm A-A^T", linalg.norm(A - np.transpose(A)))
    is_symmetric = False
    symmetry_test = linalg.norm(A - np.transpose(A), np.inf)
    print("symmetry_test === ", symmetry_test)

    is_symmetric = (symmetry_test <= 1e-12)
    is_dd, dd_test = dd(A)
    print("is_dd", is_dd)
    print("is_symmetric", is_symmetric)
    return is_symmetric, symmetry_test, is_dd, dd_test
Beispiel #21
0
    def kernel_function(self, x, y, p=2, sigma=5.0):

        if self.kernel == 'linear_kernel':
            return np.dot(x, y)
        if self.kernel == 'polynomial_kernel':
            return (1 + np.dot(x, y))**p
        if self.kernel == 'gaussian_kernel':
            return np.exp(-linalg.norm(x - y)**2 / (2 * (sigma**2)))
def searchDirection(gk, Hk, epsilon=1e-8):
    pk = -lsqr(Hk.tocsr(),
               gk.toarray().ravel())[0]  # compute the search direction
    if -pk @ gk <= epsilon * norm(gk) * np.linalg.norm(pk):
        gk = gk.toarray()  # needed here to not get fail later
        return -gk.ravel(
        )  # ensure that the directional derivative is negative in this direction
    return pk
Beispiel #23
0
def __relative_error(A, W, H, trace_AT_A):
    '''Calculate relative error over sparse A matrix'''

    norm = trace_AT_A - 2 * __trace_of_product(
        H.T, W.T @ A) + __trace_of_product(H.T, W.T @ W @ H)
    relative_norm = norm / slg.norm(A)

    return relative_norm
Beispiel #24
0
def testit(problem='cylinderwake', N=None, nu=None, Re=None,
           nnwtnstps=9, npcrdstps=5, palpha=1e-5):


    vel_nwtn_tol = 1e-14
    # prefix for data files
    data_prfx = problem
    # dir to store data
    ddir = 'data/'
    # paraview output
    ParaviewOutput = True
    proutdir = 'results/'

    femp, stokesmatsc, rhsd = dnsps.get_sysmats(problem=problem, N=N, Re=Re,
                                                nu=nu, scheme='TH',
                                                mergerhs=True, bccontrol=True)
    proutdir = 'results/'
    ddir = 'data/'
    import scipy.sparse.linalg as spsla
    print 'get expmats: ||A|| = {0}'.format(spsla.norm(stokesmatsc['A']))
    print 'get expmats: ||Arob|| = {0}'.format(spsla.norm(stokesmatsc['Arob']))

    stokesmatsc['A'] = stokesmatsc['A'] + 1./palpha*stokesmatsc['Arob']
    b_mat = 0.*1./palpha*stokesmatsc['Brob']
    brhs = 1.5*b_mat[:, :1] - 1.5*b_mat[:, 1:]

    soldict = stokesmatsc  # containing A, J, JT
    soldict.update(femp)  # adding V, Q, invinds, diribcs

    soldict.update(fv=rhsd['fv']+brhs, fp=rhsd['fp'],
                   N=N, nu=nu,
                   vel_nwtn_stps=nnwtnstps,
                   vel_pcrd_stps=npcrdstps,
                   vel_nwtn_tol=vel_nwtn_tol,
                   ddir=ddir, get_datastring=None,
                   clearprvdata=True,
                   data_prfx=data_prfx,
                   paraviewoutput=ParaviewOutput,
                   vfileprfx=proutdir+'vel_',
                   pfileprfx=proutdir+'p_')

#
# compute the uncontrolled steady state Navier-Stokes solution
#
    v_ss_nse, list_norm_nwtnupd = snu.solve_steadystate_nse(**soldict)
def wolfeLineSearch(l,
                    thetas_k,
                    p,
                    fk,
                    gk,
                    pk,
                    c1,
                    c2,
                    rho,
                    ak=1.0,
                    nmaxls=100):
    pkgk = pk @ gk
    # Increase the step length until the Armijo rule is (almost) not satisfied
    while 0.5 * norm(r(l, thetas_k + rho * ak * pk,
                       p)) < fk + c1 * rho * ak * pkgk[0]:
        ak *= rho

    # Use bisection to find the optimal step length
    aU = ak  # upper step length limit
    aL = 0  # lower step length limit
    for i in range(nmaxls):

        # Find the midpoint of aU and aL
        ak = 0.5 * (aU + aL)

        if 0.5 * norm(r(l, thetas_k + ak * pk, p)) > fk + c1 * ak * pkgk:
            # Armijo condition is not satisfied, decrease the upper limit
            aU = ak
            continue

        gk_ak_pk = Jacobi(l, thetas_k + ak * pk).T @ r(l, thetas_k + ak * pk,
                                                       p)
        if pk @ gk_ak_pk > -c2 * pkgk:
            # Upper Wolfe condition is not satisfied, decrease the upper limit
            aU = ak
            continue

        if pk @ gk_ak_pk < c2 * pkgk:
            # Lower Wolfe condition is not satisfied, increase the lower limit
            aL = ak
            continue

        # Otherwise, all conditions are satisfied, stop the search
        break
    return ak
Beispiel #26
0
def diffusion(M, adj, alpha=0.7, tol=10e-6):  # TODO equation, M, alpha
    """
    Network propagation iterative process

    Iterative algorithm for apply propagation using random walk on a network:
        Initialize::
            X1 = M

        Repeat::
            X2 = alpha * X1.A + (1-alpha) * M
            X1 = X2

        Until::
            norm(X2-X1) < tol

        Where::
            A : degree-normalized adjacency matrix

    Parameters
    ----------
    M : sparse matrix
        Data matrix to be diffused.

    adj : sparse matrix
        Adjacency matrice.

    alpha : float, default: 0.7
        Diffusion/propagation factor with 0 <= alpha <= 1.
        For alpha = 0 : no diffusion.
        For alpha = 1 :

    tol : float, default: 10e-6
        Convergence threshold.

    Returns
    -------
    X2 : sparse matrix
        Smoothed matrix.

    Notes
    -----
    Copied from the stratipy Python library
    """
    n = adj.shape[0]
    adj = adj + sp.eye(n)

    d = sp.dia_matrix((np.array(adj.sum(axis=0))**-1, [0]), shape=(n, n))
    A = adj.dot(d)

    X1 = M
    X2 = alpha * X1.dot(A) + (1 - alpha) * M
    i = 0
    while norm(X2 - X1) > tol:
        X1 = X2
        X2 = alpha * X1.dot(A) + (1 - alpha) * M
        i += 1
    return X2
Beispiel #27
0
def dominate_value(mx):

    x1 = csr_matrix(random.rand(mx.shape[0], 1))
    x2 = x1
    print(x1.todense())
    x1 = mx * x1

    print(norm(x1, ord=float('inf')))
    x1 = x1 / norm(x1, ord=float('inf'))

    while abs(norm(x1 - x2)) > 0.0000001:
        print(x1.todense())
        print('\n')
        x2 = x1
        x1 = mx.dot(x1)
        x1 = x1 / norm(x1, ord=float('inf'))

    print((x1 / norm(x1)).todense())
Beispiel #28
0
 def test_sparse_varfd_1d(self):
     n_points = 100
     degree = 10
     quad = hm.Quad.gauss_hermite(n_points)
     mat1 = quad.varfd('1', degree, [0, 0], sparse=True).matrix
     mat2 = quad.varfd('x', degree, [0], sparse=True).matrix
     bk_ou = mat1 - mat2
     off_diag = bk_ou - sp.diags(bk_ou.diagonal())
     self.assertAlmostEqual(las.norm(off_diag), 0)
Beispiel #29
0
 def _calculate_neighbor_weight_matrix(self,
                                       train_r: csc_matrix) -> np.ndarray:
     l2norm = norm(train_r, ord=2, axis=1)
     l2norm[l2norm == 0] = 1  # handel 0 vector
     U: csc_matrix = train_r.multiply(1 / l2norm.reshape(-1, 1))
     UUT = U.dot(U.transpose()).toarray()  # dense
     W = np.exp(self._tau * np.power(1 - UUT, self._k))
     np.fill_diagonal(W, 0)
     return W
Beispiel #30
0
def norm_svd(di, q):
    """
    Oblicza norma miedzy di a q (q ma norme 1)
    :param di:
    :param q:
    :return:
    """
    a = norm(di)
    return ((q.dot(di) / a).toarray())[0][0]
Beispiel #31
0
 def test_basics(self):
     x = np.array([[-2, -1], [-2, 1], [2, 1], [2, -1]])
     knn = KNNDense(n_neighbors=1)
     knn.fit(x)
     truth = np.zeros(16).reshape((4, 4))
     truth[0, 1] = 1
     truth[2, 3] = 1
     truth = sparse.csr_matrix(truth + truth.T)
     self.assertAlmostEqual(norm(truth - knn.adjacency_), 0)
Beispiel #32
0
def euclidean_distance(vector1, vector2):
    from scipy.sparse import csr_matrix
    import numpy as np
    from scipy.sparse.linalg import norm
    vector1 = csr_matrix(vector1)
    vector2 = csr_matrix(vector2)
    result = norm(vector1 - vector2)
    #print(result)
    return result
Beispiel #33
0
 def test_basics(self):
     x = np.array([[-2, -1], [-2, 1], [2, 1], [2, -1]])
     knn = KNeighborsTransformer(n_neighbors=1)
     knn.fit(x)
     gt = np.zeros(16).reshape((4, 4))
     gt[0, 1] = 1
     gt[2, 3] = 1
     gt = sparse.csr_matrix(gt + gt.T)
     self.assertAlmostEqual(norm(gt - knn.adjacency_), 0)
Beispiel #34
0
def solve(A, name, use_umfpack=False):
    # Compute b such that the exact solution of Ax = b is xe = [1, 1, ...]
    n_rows, n_columns = A.shape
    p_non_zero = A.nnz / (n_rows * n_columns)
    xe = scipy.ones(n_rows)
    b = A * xe

    # Solve Ax = b
    start_time = datetime.now()
    x = spsolve(A, b, use_umfpack=use_umfpack)
    t = datetime.now() - start_time

    # || xe - x || / ||xe||
    relative_error = norm(x - xe, 2) / norm(xe, 2)

    row = f"{name},{n_rows},{A.nnz},{relative_error}," + \
            f"{t.seconds}.{t.microseconds},{platform.system()}"
    print(row)
def cosine_simil2(A, B):
	""" Returns the cosine similarity between the rows of A
	and B. The output is a Mx1 vector where M is the number
	of rows of A. A and B are sparse matrices.
	"""
	C = A.dot(B.T)
	AN = spla.norm(A, axis = 1).reshape((A.shape[0],1)) # Reshape into a column vector
	BN = np.linalg.norm(B)
	C = C * (1. / (AN * BN))
	return C
def cosine_simil(A, B):
	""" Returns the cosine similarity between the rows of A
	and B. The output is a Mx1 vector where M is the number
	of rows of A. A and B are sparse matrices.
	"""
	if len(A.shape) == 1:
		A = A.reshape((1,A.shape[0]))
	if len(B.shape) == 1:
		B = B.reshape((1,B.shape[0]))
	C = A.dot(B.T)
	# Reshape into a column vector
	AN = spla.norm(A, axis = 1).reshape((A.shape[0],1)) if issparse(A) else np.linalg.norm(A, axis = 1).reshape((A.shape[0],1))
	BN = spla.norm(B) if issparse(B) else np.linalg.norm(B)
	if issparse(C):
		C = C.multiply(1. / (AN * BN))
	else:
		if len(C.shape) == 1:
			C = C.reshape(C.shape[0],1)
		AN *= BN
		C /= AN
	return C
Beispiel #37
0
def genRandomDocInTopic( topic ):
    K = 20
    maxlen = -10.0
    id = -1
    tc.vec.set_params( norm=None )
    for i in range( K ):
        tid = random.choice( tc.inv_topic[ topic ] )
        d = tc.vec.transform( [ tc.corpus[ tid ] ] )
        len = norm( d )
        if len > maxlen:
            maxlen , id = len , tid
    tc.vec.set_params( norm='l2' )
    return tc.datas[ id ][ 'body' ]
Beispiel #38
0
    def test_sketch_preserves_frobenius_norm(self):
        # Given the probabilistic nature of the sketches
        # we run the test multiple times and check that
        # we pass all/almost all the tries.
        n_errors = 0
        for A in self.test_matrices:
            if issparse(A):
                true_norm = norm(A)
            else:
                true_norm = np.linalg.norm(A)
            for seed in self.seeds:
                sketch = clarkson_woodruff_transform(
                    A, self.n_sketch_rows, seed=seed,
                )
                if issparse(sketch):
                    sketch_norm = norm(sketch)
                else:
                    sketch_norm = np.linalg.norm(sketch)

                if np.abs(true_norm - sketch_norm) > 0.1 * true_norm:
                    n_errors += 1
        assert_(n_errors == 0)
Beispiel #39
0
 def test_norm_axis(self):
     a = np.array([[ 1, 2, 3],
                   [-1, 1, 4]])
             
     c = csr_matrix(a)        
     #Frobenius norm
     assert_equal(norm(c, axis=0), np.sqrt(np.power(np.asmatrix(a), 2).sum(axis=0)))
     assert_equal(norm(c, axis=1), np.sqrt(np.power(np.asmatrix(a), 2).sum(axis=1)))
     
     assert_equal(norm(c, np.inf, axis=0), max(abs(np.asmatrix(a)).sum(axis=0)))
     assert_equal(norm(c, np.inf, axis=1), max(abs(np.asmatrix(a)).sum(axis=1)))
             
     assert_equal(norm(c, -np.inf, axis=0), min(abs(np.asmatrix(a)).sum(axis=0)))
     assert_equal(norm(c, -np.inf, axis=1), min(abs(np.asmatrix(a)).sum(axis=1)))
                     
     assert_equal(norm(c, 1, axis=0), abs(np.asmatrix(a)).sum(axis=0))
     assert_equal(norm(c, 1, axis=1), abs(np.asmatrix(a)).sum(axis=1))
     
     assert_equal(norm(c, -1, axis=0), min(abs(np.asmatrix(a)).sum(axis=0))  )
     assert_equal(norm(c, -1, axis=1), min(abs(np.asmatrix(a)).sum(axis=1))  )
     
      #_multi_svd_norm is not implemented for sparse matrix
     assert_raises(NotImplementedError, norm, c, 2, 0)
Beispiel #40
0
def ksvd(AA,S,max_iter=10):
"""
Apply KSVD from the starting dictionary AA, given a set of signals S.
"""   
   M,N = AA.shape
   P = S.shape[1]
   A = AA.copy()
   X = zeros((N,P))

   #max_iter = 10
   i = 0
   while i < max_iter:
      # SMV, and slow.
      #for p in range(P):
      #   X[:,p] = mp.omp(A,S[:,p].reshape(M,1),0.1).flatten()
     
      # MMV
      # Default: no residual tolerance, max nonzeros 10%
      # Assume dictionary A is normalized! 
      X = sklearn.linear_model.orthogonal_mp(A,S)
      
      print i,linalg.norm(S - dot(A,X)), sum([sparse.zero_norm(X[:,p],0.01) for p in range(P)]) 

      for l in range(N):
         nonzeros = sparse.nonzero_idx(X[l,:]) # xs with nonzero in row 'l'
         if len(nonzeros) == 0:
            continue
         E_w = S[:,nonzeros] - dot(A,X[:,nonzeros]) + dot(A[:,l].reshape(M,1),X[l,nonzeros].reshape(1,len(nonzeros)))

         try:
            u,s,v = scipy.sparse.linalg.svds(E_w, k = 1)
         except:
            continue
         A[:,l] = u.flatten()
         X[l,nonzeros] = s*v.flatten()

      i += 1

   return A, X
Beispiel #41
0
 def test_norm(self):
     a = np.arange(9) - 4
     b = a.reshape((3, 3))
     b = csr_matrix(b)
     
     #Frobenius norm is the default
     assert_equal(norm(b), 7.745966692414834)        
     assert_equal(norm(b, 'fro'), 7.745966692414834)
     
     assert_equal(norm(b, np.inf), 9)
     assert_equal(norm(b, -np.inf), 2)
     assert_equal(norm(b, 1), 7)
     assert_equal(norm(b, -1), 6)
     
     #_multi_svd_norm is not implemented for sparse matrix
     assert_raises(NotImplementedError, norm, b, 2)
     assert_raises(NotImplementedError, norm, b, -2)
Beispiel #42
0
def sp_expm(A, p=13, sparse=False):
    """
    Sparse matrix exponential.
    
    Reference
    ---------
    Expokit, ACM-Transactions on Mathematical Software, 24(1):130-156, 1998
    
    """
    if _isdiag(A.indices, A.indptr, A.shape[0]):
        A.data = np.exp(A.data)
        return A
    N = A.shape[0]
    c = np.zeros(p+1,dtype=float)
    # Pade coefficients
    c[0] = 1
    for k in range(p):
        c[k+1] = c[k]*((p-k)/((k+1.0)*(2.0*p-k)))
    # Scaling
    if sparse:
        A = A.tocsc()
        nrm = spla.norm(A, np.inf)
    else:
        A = A.toarray()
        nrm = la.norm(A, np.inf)
    if nrm > 0.5:
        nrm = max(0, np.fix(np.log(nrm)/np.log(2))+2)
        A = 2.0**(-nrm)*A
    # Horner evaluation of the irreducible fraction
    if sparse:
        I = sp.identity(N, dtype=complex, format='csc')
    else:
        I = np.identity(N, dtype=complex)
    A2 = A.dot(A)
    Q = c[-1]*I
    P = c[p]*I
    odd = 1
    for k in range(p-2,-1,-1):
        if odd == 1:
            Q = Q.dot(A2) +c[k]*I
        else:
            P = P.dot(A2) +c[k]*I
        odd = 1-odd
    if odd == 1:
        Q = Q.dot(A)
        Q = Q-P
        if sparse:
            E = -(I+2.0*spla.spsolve(Q,P))
        else:
            E = -(I+2.0*la.solve(Q,P))
    else:
        P = P.dot(A)
        Q = Q-P
        if sparse:
            E = I+2.0*spla.spsolve(Q,P)
        else:
            E = I+2.0*la.solve(Q,P)
    # Squaring
    for k in range(int(nrm)):
        E = E.dot(E)
       
    return sp.csr_matrix(E)
Beispiel #43
0
    def _ASD(self, M, r = None, reltol=1e-5, maxiter=5000):
        """
        Alternating Steepest Descent (ASD)
        Taken from Low rank matrix completion by alternating steepest descent methods
        Jared Tanner and Ke Wei
        SIAM J. IMAGING SCIENCES (2014)
        
        We have a matrix M with incomplete entries,
        and want to estimate the full matrix
        
        Solves the following relaxation of the problem:
        minimize_{X,Y} \frac{1}{2} ||P_{\Omega}(Z^0) - P_\{Omega}(XY)||_F^2
        Where \Omega represents the set of m observed entries of the matrix M
        and P_{\Omega}() is an operator that represents the observed data. 
        
        Inputs:
         M := Incomplete matrix, with NaN on the unknown matrix
         r := hypothesized rank of the matrix
        
        Usage:
         Just call the function _ASD(M)
        """
    
        # Get shape and Omega
        m, n = M.shape
        if r == None:
            r = min(m-1, n-1, 50)
    
        # Set relative error
        I, J = [], []
        M_list = []
        for i, j in M.keys():
            I.append(i)
            J.append(j)
            M_list.append(M[i,j])
        M_list = np.array(M_list)
        Omega = [I,J]
        frob_norm_data = linalg_s.norm(M)
        relres = reltol * frob_norm_data
    
        # Initialize
        M_omega = M.tocsc()
        U, s, V = linalg_s.svds(M_omega, r)
        S = np.diag(s)
        X = np.dot(U, S)
        Y = V
        itres = np.zeros((maxiter+1, 1))
    
        XY = np.dot(X, Y)
        diff_on_omega = M_list - XY[Omega]
        res = linalg.norm(diff_on_omega)
        iter_c = 0
        itres[iter_c] = res/frob_norm_data 

        while iter_c < maxiter and res >= relres:
            
            # Gradient for X
            diff_on_omega_matrix = np.zeros((m,n))
            diff_on_omega_matrix[Omega] = diff_on_omega
            grad_X = np.dot(diff_on_omega_matrix, np.transpose(Y))
            
            # Stepsize for X
            delta_XY = np.dot(grad_X, Y)
            tx = linalg.norm(grad_X,'fro')**2/linalg.norm(delta_XY)**2
        
            # Update X
            X = X + tx*grad_X;
            diff_on_omega = diff_on_omega-tx*delta_XY[Omega]
        
            # Gradient for Y
            diff_on_omega_matrix = np.zeros((m,n))
            diff_on_omega_matrix[Omega] = diff_on_omega
            Xt = np.transpose(X)
            grad_Y = np.dot(Xt, diff_on_omega_matrix)
        
            # Stepsize for Y
            delta_XY = np.dot(X, grad_Y)
            ty = linalg.norm(grad_Y,'fro')**2/linalg.norm(delta_XY)**2
        
            # Update Y
            Y = Y + ty*grad_Y
            diff_on_omega = diff_on_omega-ty*delta_XY[Omega]
            
            res = linalg.norm(diff_on_omega)
            iter_c = iter_c + 1
            itres[iter_c] = res/frob_norm_data
    
        M_out = np.dot(X, Y)
    
        out_info = [iter_c, itres]
    
        return M_out, out_info    
def cosine_distance(x, y):
    xy = x.dot(y.T)         # should be 1x1 mat
    dist = xy / norm(x) / norm(y)
    return 1 - dist[0, 0]   # ~ arccos()
Beispiel #45
0
def get_svd_res(mat, components):
	usvt= svds(mat,k=components,which='LM') ## compute svd
	if usvt[1][-1]==0: usvt=flip_svd_res(usvt)
	enrgy=(usvt[1][-1]/norm(mat)) ## energy of first principal component
	var=enrgy**2 ## variance of first principal component
	return usvt, enrgy, var
def compute_depth(mask_array, normal_array, threshold=100):
	index_map = -np.ones(mask_array.shape)

	x = []
	y = []
	ind = 0
	for (xT, value) in np.ndenumerate(mask_array):
		if(value > threshold):
			index_map[xT] = ind
			x.append(xT[1])
			y.append(-xT[0])
			ind = ind+1

	row = []
	col = []
	data = []
	b = []

	i = 0
	for (xT, value) in np.ndenumerate(index_map):
		if value >= 0:
			normal = normal_array[xT]/128.0 - 1.0
			normal = normal/linalg.norm(normal)


			if not np.isnan(np.sum(normal)):
				#x
				iother = index_map[xT[0], xT[1]+1]
				if abs(normal[2]) > 0.01:
					if iother >= 0:
						row.append(i)
						col.append(value)
						data.append(-normal[2])
						row.append(i)
						col.append(iother)
						data.append(normal[2])
						b.append(-normal[0])
						i = i+1


					#y
					iother = index_map[xT[0]-1, xT[1]]
					if iother >= 0:
						row.append(i)
						col.append(value)
						data.append(-normal[2])
						row.append(i)
						col.append(iother)
						data.append(normal[2])
						b.append(-normal[1])
						i = i+1


	mat = scipy.sparse.coo_matrix((data, (row, col)), shape=(i, ind)).tocsc()
	b = np.array(b)

	z = scipy.sparse.linalg.lsqr(mat,b, iter_lim=1000, show=True)
	
	ind = 0
	for (xT, value) in np.ndenumerate(index_map):
		if value >= 0:
			index_map[xT] = z[0][ind]
			ind = ind+1

	return index_map
Beispiel #47
0
def cosine_distance(x, y):
    xy = x.dot(y.T)
    dist = xy/(norm(x)*norm(y))
    return 1-dist[0,0]
Beispiel #48
0
def lbfgs(f, xk, gfk, k, vecs, props):
    logger = logging.getLogger("phf.innersolve")
    solve_fraction = props.get("solveFraction", 0.2)
    stepFactor = props.get("innerSolveStepFactor", 0.5)
    average = props.get("innerSolveAverage", False)
    n = len(xk)
    w = lbfgs_step(gfk, k, vecs, props)
    wsum = zeros(n)
    wsum_count = 0
    gnorm = linalg.norm(gfk)
    
    # Each iteration requires two evaluations, so we half the max iters here
    maxiter = int(ceil(solve_fraction*f.parts/2.0))
    
    pkHpk = 0
    wHw = 0
    
    for i in range(maxiter):
        mv = f.make_mv_rand(xk)
    
        Hw = mv(w)
        ri = Hw + gfk
        
        pk = -lbfgs_step(ri, k+i, vecs, props)

        mpk = mv(pk)
        Hpk = mpk
        pkHpk = dot(pk, Hpk)
        sst = stepFactor*dot(ri, pk) / pkHpk
        wHw = dot(w, Hw)
        
        ###### Update quasi-newton approximation
        kmax = max_key(vecs)
        if pkHpk < 0:
            raise Exception("Hessian is not positive semi-definite. " + 
                            "Try using the Gauss-Newton approximation to the hessian." + 
                            "If your problem is convex, your gradient " +       
                            "calculation may just be wrong")
        else:
            vecs[kmax] = (pk, Hpk, 1.0 / numpy.dot(pk,Hpk))
            kmax = kmax + 1
            if wHw > 0:
                vecs[kmax] = (w, Hw, 1.0 / numpy.dot(w,Hw))
    
        wp = w - sst*pk
        
        cosdirection = dot(wp, gfk) / (linalg.norm(wp) * gnorm)
        
        if cosdirection < 0: 
            w = wp
            
            if i == 0 or (i % max(1,maxiter / 10) == 0):
                logger.debug("w: %s", w[0:min(5, n)])
        else:
            logger.debug("Skipping w update to ensure w is a descent direction")
        wnorm = linalg.norm(w)

        if i > maxiter/2:
            wsum += w
            wsum_count += 1

    if average:
        wavg = wsum / wsum_count 
        return wavg
    else:
        return w
Beispiel #49
0
def ncutW(W, num_eigs=10, kmeans_iters=10, offset = 0.5):
    """Run the normalized cut algorithm on the affinity matrix, W.

    (as implemented in Ng, Jordan, and Weiss, 2002)

    Parameters
    ----------
    W : scipy sparse matrix
        Square matrix with high values for edges to be preserved, and low
        values for edges to be cut.
    num_eigs : int, optional
        Number of eigenvectors of the affinity matrix to use for clustering.
    kmeans_iters : int, optional
        Number of iterations of the k-means algorithm to run when clustering
        eigenvectors.
    offset : float, optional
        Diagonal offset used to stabilise the eigenvector computation.

    Returns
    -------
    labels : array of int
        `labels[i]` is an integer value mapping node/row `i` to the cluster
        ID `labels[i]`.
    eigenvectors : list of array of float
        The computed eigenvectors of `W + offset * I`, where `I` is the
        identity matrix of same size as `W`.
    eigenvalues : array of float
        The corresponding eigenvalues.
    """
    
    n, m = W.shape
    # Add an offset in case some rows are zero
    # We also add the offset below to the diagonal matrix. See (Yu, 2001),
    # "Understanding Popout through Repulsion" for more information.  This
    # helps to stabilize the eigenvector computation.
    W = W + sparse.diags(np.full(n, offset))
    
    d = np.ravel(W.sum(axis=1))
    Dinv2 = sparse.diags(1 / (np.sqrt(d) + offset*np.ones(n)))
    P = Dinv2 @ W @ Dinv2
    
    # Get the eigenvectors and sort by eigenvalue
    eigvals, U = eigs(P, num_eigs, which='LR')
    eigvals = np.real(eigvals)  # it should be real anyway
    U = np.real(U)
    ind = np.argsort(eigvals)[::-1]
    eigvals = eigvals[ind]
    U = U[:, ind]
    
    # Normalize
    for i in range(n):
        U[i, :] /= norm(U[i, :])
    
    # Cluster them into labels, running k-means multiple times
    labels_list = []
    distortion_list = []
    for _iternum in range(kmeans_iters):
        # Cluster
        centroid, labels = vq.kmeans2(U, num_eigs, minit='points')
        # Calculate distortion
        distortion = 0
        for j in range(num_eigs):
            numvals = np.sum(labels == j)
            if numvals == 0:
                continue
            distortion += np.mean([norm(v - centroid[j])**2 for (i, v) in
                                   enumerate(U) if labels[i] == j])
        # Save values
        labels_list.append(labels)
        distortion_list.append(distortion)
    # Use lowest distortion
    labels = labels_list[np.argmin(distortion_list)]
    
    return labels, U, eigvals