예제 #1
0
def trans_eig(A, partition, k=3, perc_top=99.95, perc_bottom=1, gc=None):
    """
    Compute compartmentalization eigenvectors on trans contact data
    Parameters
    ----------
    A : 2D array
        balanced whole genome contact matrix
    partition : sequence of int
        bin offset of each contiguous region to treat separately (e.g., 
        chromosomes or chromosome arms)
    k : int
        number of eigenvectors to compute; default = 3
    perc_top : float (percentile)
        filter - clip trans blowout contacts above this cutoff; default = 99.95
    perc_bottom : float (percentile)
        filter - remove bins with trans coverage below this cutoff; default=1
    gc : 1D array, optional
        GC content per bin for reordering and orienting the primary compartment 
        eigenvector; not performed if no array is provided
    Returns
    -------
    eigenvalues, eigenvectors
    """
    if A.shape[0] != A.shape[1]:
        raise ValueError("A is not symmetric")

    A = np.array(A)
    A[np.isnan(A)] = 0
    n_bins = A.shape[0]
    if not (partition[0] == 0 and partition[-1] == n_bins
            and np.all(np.diff(partition) > 0)):
        raise ValueError("Not a valid partition. Must be a monotonic sequence "
                         "from 0 to {}.".format(n_bins))

    # Delete cis data and create trans mask
    extents = zip(partition[:-1], partition[1:])
    part_ids = []
    for n, (i0, i1) in enumerate(extents):
        A[i0:i1, i0:i1] = 0
        part_ids.extend([n] * (i1 - i0))
    part_ids = np.array(part_ids)
    transmask = (part_ids[:, None] != part_ids[None, :])

    # Filter heatmap
    A = _filter_heatmap(A, transmask, perc_top, perc_bottom)

    # Fake cis and re-balance
    A = _fake_cis(A, ~transmask)
    A = numutils.iterativeCorrection(A)[0]
    A = _fake_cis(A, ~transmask)
    A = numutils.iterativeCorrection(A)[0]

    # Compute eig
    Abar = A.mean()
    O = (A - Abar) / Abar
    lam, vecs = _eig(O, k)
    lam, vecs = _orient_eigs(lam, vecs, gc)

    return lam, vecs
def normalize_dense(M, norm="frag", order=1, iterations=3):
    """Apply one of the many normalization types to input dense
    matrix. Will also apply any callable norms such as a user-made
    or a lambda function.
    """

    s = np.array(M, np.float64)
    floatorder = np.float64(order)

    if norm == "SCN":
        for _ in range(0, iterations):

            sumrows = s.sum(axis=1)
            maskrows = (sumrows != 0)[:, None] * (sumrows != 0)[None, :]
            sums_row = sumrows[:, None] * np.ones(sumrows.shape)[None, :]
            s[maskrows] = 1. * s[maskrows] / sums_row[maskrows]

            sumcols = s.sum(axis=0)
            maskcols = (sumcols != 0)[:, None] * (sumcols != 0)[None, :]
            sums_col = sumcols[None, :] * np.ones(sumcols.shape)[:, None]
            s[maskcols] = 1. * s[maskcols] / sums_col[maskcols]

    elif norm == "mirnylib":
        try:
            from mirnylib import numutils as ntls

            s = ntls.iterativeCorrection(s, iterations)[0]
        except ImportError as e:
            print(str(e))
            print("I can't find mirnylib.")
            print("Please install it from "
                  "https://bitbucket.org/mirnylab/mirnylib")
            print("I will use default norm as fallback.")
            return normalize_dense(M, order=order, iterations=iterations)

    elif norm == "frag":
        for _ in range(1, iterations):
            s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0)
            s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1)
            s_norm = np.tensordot(s_norm_x, s_norm_y, axes=0)
            s[s_norm != 0] = 1. * s[s_norm != 0] / s_norm[s_norm != 0]

    elif norm == "global":
        s_norm = np.linalg.norm(s, ord=floatorder)
        s /= 1. * s_norm

    elif callable(norm):
        s = norm(M)

    else:
        print("Unknown norm. Returning input as fallback")

    return (s + s.T) / 2
예제 #3
0
def normalize_dense(M, norm="frag", order=1, iterations=3):
    """Applies one of the many normalization types to input dense matrix.
    Will also apply any callable norms such as a user-made or a lambda function.
    """

    s = np.copy(M)
    floatorder = np.float64(order)

    if norm == "SCN":
        for iteration in range(0, iterations):

            sumrows = s.sum(axis=1)
            maskrows = (sumrows != 0)[:, None] * (sumrows != 0)[None, :]
            sums_row = sumrows[:, None] * np.ones(sumrows.shape)[None, :]
            s[maskrows] = s[maskrows] / sums_row[maskrows]

            sumcols = s.sum(axis=0)
            maskcols = (sumcols != 0)[:, None] * (sumcols != 0)[None, :]
            sums_col = sumcols[None, :] * np.ones(sumcols.shape)[:, None]
            s[maskcols] = s[maskcols] / sums_col[maskcols]

    elif norm == "mirnylib":
        try:
            from mirnylib import numutils as ntls
            s = ntls.iterativeCorrection(s, iterations)[0]
        except ImportError as e:
            print(str(e))
            print("I can't find mirnylib.")
            print("Please install it from https://bitbucket.org/mirnylab/mirnylib")
            print("I will use default norm as fallback.")
            return normalize_dense(M, order=order, iterations=iterations)

    elif norm == "frag":
        for iteration in range(1, iterations):
            s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0)
            s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1)
            s_norm = np.tensordot(s_norm_x, s_norm_y, axes=0)
            s[s_norm != 0] = s[s_norm != 0] / s_norm[s_norm != 0]

    elif norm == "global":
        s_norm = np.linalg.norm(s, ord=floatorder)
        s /= s_norm

    elif callable(norm):
        s = norm(M)

    else:
        print("I don't recognize this norm, I am returning input matrix by default.")

    return (s + s.T) / 2
예제 #4
0
def cis_eig(A, k=3, robust=True, gc=None, classic=False):
    """
    Compute compartment eigenvector on a cis matrix
    Parameters
    ----------
    A : 2D array
        balanced whole genome contact matrix
    k : int
        number of eigenvectors to compute; default = 3
    robust : bool
        Clip top 0.1 percentile and smooth first two diagonals
    gc : 1D array, optional
        GC content per bin for choosing and orienting the primary compartment 
        eigenvector; not performed if no array is provided
    classic : bool
        Do it old-school
    Returns
    -------
    eigenvalues, eigenvectors
    """
    A = np.array(A)
    A[~np.isfinite(A)] = 0

    mask = A.sum(axis=0) > 0

    if A.shape[0] <= 5 or mask.sum() <= 5:
        return (np.array([np.nan for i in range(k)]),
                np.array([np.ones(A.shape[0]) * np.nan for i in range(k)]))

    if robust:
        A = np.clip(A, 0, np.percentile(A, 99.9))
        fill_value = np.mean(np.diag(A, 2) * 2)
        for d in [-1, 0, 1]:
            numutils.fillDiagonal(A, fill_value, d)
            A[~mask, :] = 0
            A[:, ~mask] = 0

    OE = numutils.observedOverExpected(A[mask, :][:, mask])

    if robust:
        OE = np.clip(OE, 0, np.percentile(OE, 99.9))

    if classic:
        OE = numutils.iterativeCorrection(OE)[0]
        if (~np.isfinite(OE)).sum() > 0:
            return (
                np.array([np.ones(A.shape[0]) * np.nan for i in range(k)]),
                np.array([np.nan for i in range(k)]),
            )
        # mean-centered (subtract mean)
        eigvecs_compressed, eigvals = numutils.EIG(OE, k)
    else:
        eigvecs_compressed, eigvals = numutils.EIG((OE - 1.0),
                                                   k,
                                                   subtractMean=False,
                                                   divideByMean=False)

    # Restore full eigs
    eigvecs = []
    for i in range(k):
        v = np.ones(mask.shape[0]) * np.nan
        v[mask] = eigvecs_compressed[i]
        eigvecs.append(v)
    eigvecs = np.array(eigvecs)

    # Orient and reorder
    eigvals, eigvecs = _orient_eigs(eigvals, eigvecs, gc)

    return eigvals, eigvecs
예제 #5
0
def process(t,matrix_name,normalization,order,iterations,exposant,gaussian_number,convolution_sigma):
    s = np.copy(t)
    mat = s
    if matrix_name != "raw":
    
        print "Normalizing with "+str(normalization)+" norm..."
        
        if normalization == "fragment-wise":
            floatorder = np.float64(order)
            s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0)
            s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1)
            s_norm = np.tensordot(s_norm_x,s_norm_y,axes=0)
            s[s_norm!=0] = s[s_norm!=0]/s_norm[s_norm!=0]
            print "Normalized "+str(normalization)+" with order "+str(order)
            
        elif normalization == "matrix-wise":
            
            floatorder = np.float64(order)
            s_norm = np.linalg.norm(s, ord=floatorder)
            s = s/s_norm
            print "Normalized "+str(normalization)+" with order "+str(order)
            
        elif normalization == "SCN":
            
            for iteration in range(1,iterations):
                sumrow = s.sum(axis=1)[:,None]
                sumcols = s.sum(axis=0)[None,:]
                s[sumrow!=0] = s[sumrow!=0]/sumrow[sumrow!=0]
                s[sumcols!=0] = s[sumcols!=0]/sumcols[sumcols!=0]
                print "Normalized "+str(iteration+1)+" time"+str("" if iteration <= 1 else "s")
            
            s = (s+s.T)/2
        
        elif normalization == "mirnylib":
            
            s_mirny = ntls.iterativeCorrection(s, iterations)[0]
            s = s_mirny
            print "Normalized "+str(iterations)+" time"+str("" if iterations <= 1 else "s")

        elif normalization == "sparsity":
            M = s.sum()
            sums = s.sum(axis=0)
            C = [[sums[i]*sums[j] for i in range(len(sums))] for j in range(len(sums))]/M
            s_coverage = s
            s_coverage[C!=0] /= C[C!=0]
            s = s_coverage
            
            print "Normalized for "+str(normalization)
            
        else:
            print "Error in normalization, using matrix-wise by default"
            s_norm = np.linalg.norm(s)
            s /= s_norm
        
        #Apply log or power
        try:
            s_exp = s**exposant
            s = s_exp
            print "Applied "+str(exposant)+" power to matrix"
        except ValueError:
            if exposant in ["log10", "log", "ln10"]:
                s = log10(s.astype(float))
                print "Applied base-10 logarithm to matrix"
            elif exposant in ["ln", "logarithm", "logarithme"]:
                s = log(s.astype(float))
                print "Applied natural logarithm to matrix"
            elif exposant in ["ln2", "log2"]:
                s = log2(s.astype(float))
                print "Applied base-2 logarithm to matrix"
            else:
                print "Warning, no valid normalization function encounter, ignoring"
        
        if matrix_name != "normalized":
            
            if "correlation" in matrix_name:
                s_corr = corrcoef(s)
                s_corr[s_corr<0] = 0
                s = s_corr
                print "Applied correlation function"
            
            if matrix_name != "correlation":
                
                if not "convolution" in matrix_name:
                    print "Error in matrix mode, using raw by default"
                    s = mat
                    
                else:
                    print "Convoluting..."
                    for i in range(0,gaussian_number):
                        s_gauss = ndimage.filters.gaussian_filter(s,convolution_sigma)
                        s = s_gauss
                        print "Convoluted "+str(i+1)+" time"+str("" if i+1 <= 1 else "s")
    return s