def trans_eig(A, partition, k=3, perc_top=99.95, perc_bottom=1, gc=None): """ Compute compartmentalization eigenvectors on trans contact data Parameters ---------- A : 2D array balanced whole genome contact matrix partition : sequence of int bin offset of each contiguous region to treat separately (e.g., chromosomes or chromosome arms) k : int number of eigenvectors to compute; default = 3 perc_top : float (percentile) filter - clip trans blowout contacts above this cutoff; default = 99.95 perc_bottom : float (percentile) filter - remove bins with trans coverage below this cutoff; default=1 gc : 1D array, optional GC content per bin for reordering and orienting the primary compartment eigenvector; not performed if no array is provided Returns ------- eigenvalues, eigenvectors """ if A.shape[0] != A.shape[1]: raise ValueError("A is not symmetric") A = np.array(A) A[np.isnan(A)] = 0 n_bins = A.shape[0] if not (partition[0] == 0 and partition[-1] == n_bins and np.all(np.diff(partition) > 0)): raise ValueError("Not a valid partition. Must be a monotonic sequence " "from 0 to {}.".format(n_bins)) # Delete cis data and create trans mask extents = zip(partition[:-1], partition[1:]) part_ids = [] for n, (i0, i1) in enumerate(extents): A[i0:i1, i0:i1] = 0 part_ids.extend([n] * (i1 - i0)) part_ids = np.array(part_ids) transmask = (part_ids[:, None] != part_ids[None, :]) # Filter heatmap A = _filter_heatmap(A, transmask, perc_top, perc_bottom) # Fake cis and re-balance A = _fake_cis(A, ~transmask) A = numutils.iterativeCorrection(A)[0] A = _fake_cis(A, ~transmask) A = numutils.iterativeCorrection(A)[0] # Compute eig Abar = A.mean() O = (A - Abar) / Abar lam, vecs = _eig(O, k) lam, vecs = _orient_eigs(lam, vecs, gc) return lam, vecs
def normalize_dense(M, norm="frag", order=1, iterations=3): """Apply one of the many normalization types to input dense matrix. Will also apply any callable norms such as a user-made or a lambda function. """ s = np.array(M, np.float64) floatorder = np.float64(order) if norm == "SCN": for _ in range(0, iterations): sumrows = s.sum(axis=1) maskrows = (sumrows != 0)[:, None] * (sumrows != 0)[None, :] sums_row = sumrows[:, None] * np.ones(sumrows.shape)[None, :] s[maskrows] = 1. * s[maskrows] / sums_row[maskrows] sumcols = s.sum(axis=0) maskcols = (sumcols != 0)[:, None] * (sumcols != 0)[None, :] sums_col = sumcols[None, :] * np.ones(sumcols.shape)[:, None] s[maskcols] = 1. * s[maskcols] / sums_col[maskcols] elif norm == "mirnylib": try: from mirnylib import numutils as ntls s = ntls.iterativeCorrection(s, iterations)[0] except ImportError as e: print(str(e)) print("I can't find mirnylib.") print("Please install it from " "https://bitbucket.org/mirnylab/mirnylib") print("I will use default norm as fallback.") return normalize_dense(M, order=order, iterations=iterations) elif norm == "frag": for _ in range(1, iterations): s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0) s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1) s_norm = np.tensordot(s_norm_x, s_norm_y, axes=0) s[s_norm != 0] = 1. * s[s_norm != 0] / s_norm[s_norm != 0] elif norm == "global": s_norm = np.linalg.norm(s, ord=floatorder) s /= 1. * s_norm elif callable(norm): s = norm(M) else: print("Unknown norm. Returning input as fallback") return (s + s.T) / 2
def normalize_dense(M, norm="frag", order=1, iterations=3): """Applies one of the many normalization types to input dense matrix. Will also apply any callable norms such as a user-made or a lambda function. """ s = np.copy(M) floatorder = np.float64(order) if norm == "SCN": for iteration in range(0, iterations): sumrows = s.sum(axis=1) maskrows = (sumrows != 0)[:, None] * (sumrows != 0)[None, :] sums_row = sumrows[:, None] * np.ones(sumrows.shape)[None, :] s[maskrows] = s[maskrows] / sums_row[maskrows] sumcols = s.sum(axis=0) maskcols = (sumcols != 0)[:, None] * (sumcols != 0)[None, :] sums_col = sumcols[None, :] * np.ones(sumcols.shape)[:, None] s[maskcols] = s[maskcols] / sums_col[maskcols] elif norm == "mirnylib": try: from mirnylib import numutils as ntls s = ntls.iterativeCorrection(s, iterations)[0] except ImportError as e: print(str(e)) print("I can't find mirnylib.") print("Please install it from https://bitbucket.org/mirnylab/mirnylib") print("I will use default norm as fallback.") return normalize_dense(M, order=order, iterations=iterations) elif norm == "frag": for iteration in range(1, iterations): s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0) s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1) s_norm = np.tensordot(s_norm_x, s_norm_y, axes=0) s[s_norm != 0] = s[s_norm != 0] / s_norm[s_norm != 0] elif norm == "global": s_norm = np.linalg.norm(s, ord=floatorder) s /= s_norm elif callable(norm): s = norm(M) else: print("I don't recognize this norm, I am returning input matrix by default.") return (s + s.T) / 2
def cis_eig(A, k=3, robust=True, gc=None, classic=False): """ Compute compartment eigenvector on a cis matrix Parameters ---------- A : 2D array balanced whole genome contact matrix k : int number of eigenvectors to compute; default = 3 robust : bool Clip top 0.1 percentile and smooth first two diagonals gc : 1D array, optional GC content per bin for choosing and orienting the primary compartment eigenvector; not performed if no array is provided classic : bool Do it old-school Returns ------- eigenvalues, eigenvectors """ A = np.array(A) A[~np.isfinite(A)] = 0 mask = A.sum(axis=0) > 0 if A.shape[0] <= 5 or mask.sum() <= 5: return (np.array([np.nan for i in range(k)]), np.array([np.ones(A.shape[0]) * np.nan for i in range(k)])) if robust: A = np.clip(A, 0, np.percentile(A, 99.9)) fill_value = np.mean(np.diag(A, 2) * 2) for d in [-1, 0, 1]: numutils.fillDiagonal(A, fill_value, d) A[~mask, :] = 0 A[:, ~mask] = 0 OE = numutils.observedOverExpected(A[mask, :][:, mask]) if robust: OE = np.clip(OE, 0, np.percentile(OE, 99.9)) if classic: OE = numutils.iterativeCorrection(OE)[0] if (~np.isfinite(OE)).sum() > 0: return ( np.array([np.ones(A.shape[0]) * np.nan for i in range(k)]), np.array([np.nan for i in range(k)]), ) # mean-centered (subtract mean) eigvecs_compressed, eigvals = numutils.EIG(OE, k) else: eigvecs_compressed, eigvals = numutils.EIG((OE - 1.0), k, subtractMean=False, divideByMean=False) # Restore full eigs eigvecs = [] for i in range(k): v = np.ones(mask.shape[0]) * np.nan v[mask] = eigvecs_compressed[i] eigvecs.append(v) eigvecs = np.array(eigvecs) # Orient and reorder eigvals, eigvecs = _orient_eigs(eigvals, eigvecs, gc) return eigvals, eigvecs
def process(t,matrix_name,normalization,order,iterations,exposant,gaussian_number,convolution_sigma): s = np.copy(t) mat = s if matrix_name != "raw": print "Normalizing with "+str(normalization)+" norm..." if normalization == "fragment-wise": floatorder = np.float64(order) s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0) s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1) s_norm = np.tensordot(s_norm_x,s_norm_y,axes=0) s[s_norm!=0] = s[s_norm!=0]/s_norm[s_norm!=0] print "Normalized "+str(normalization)+" with order "+str(order) elif normalization == "matrix-wise": floatorder = np.float64(order) s_norm = np.linalg.norm(s, ord=floatorder) s = s/s_norm print "Normalized "+str(normalization)+" with order "+str(order) elif normalization == "SCN": for iteration in range(1,iterations): sumrow = s.sum(axis=1)[:,None] sumcols = s.sum(axis=0)[None,:] s[sumrow!=0] = s[sumrow!=0]/sumrow[sumrow!=0] s[sumcols!=0] = s[sumcols!=0]/sumcols[sumcols!=0] print "Normalized "+str(iteration+1)+" time"+str("" if iteration <= 1 else "s") s = (s+s.T)/2 elif normalization == "mirnylib": s_mirny = ntls.iterativeCorrection(s, iterations)[0] s = s_mirny print "Normalized "+str(iterations)+" time"+str("" if iterations <= 1 else "s") elif normalization == "sparsity": M = s.sum() sums = s.sum(axis=0) C = [[sums[i]*sums[j] for i in range(len(sums))] for j in range(len(sums))]/M s_coverage = s s_coverage[C!=0] /= C[C!=0] s = s_coverage print "Normalized for "+str(normalization) else: print "Error in normalization, using matrix-wise by default" s_norm = np.linalg.norm(s) s /= s_norm #Apply log or power try: s_exp = s**exposant s = s_exp print "Applied "+str(exposant)+" power to matrix" except ValueError: if exposant in ["log10", "log", "ln10"]: s = log10(s.astype(float)) print "Applied base-10 logarithm to matrix" elif exposant in ["ln", "logarithm", "logarithme"]: s = log(s.astype(float)) print "Applied natural logarithm to matrix" elif exposant in ["ln2", "log2"]: s = log2(s.astype(float)) print "Applied base-2 logarithm to matrix" else: print "Warning, no valid normalization function encounter, ignoring" if matrix_name != "normalized": if "correlation" in matrix_name: s_corr = corrcoef(s) s_corr[s_corr<0] = 0 s = s_corr print "Applied correlation function" if matrix_name != "correlation": if not "convolution" in matrix_name: print "Error in matrix mode, using raw by default" s = mat else: print "Convoluting..." for i in range(0,gaussian_number): s_gauss = ndimage.filters.gaussian_filter(s,convolution_sigma) s = s_gauss print "Convoluted "+str(i+1)+" time"+str("" if i+1 <= 1 else "s") return s