def stchMatrix(rawMatrix, percentOfSparseToRemove, graphPath=None, biasValues=False, matrixFilePath=None, outputNormalizedMatrixFile=False, fithicOutputType=False, bedOutputType=False): R = rawMatrix.sum() mtxAndRemoved = removeZeroDiagonalCSR(rawMatrix, percentOfSparseToRemove) initialSize = rawMatrix.shape[0] rawMatrix = mtxAndRemoved[0] removed = mtxAndRemoved[1] newSize = rawMatrix.shape[0] print "Generating Normalized Matrix" st = time.time() result = knightRuizAlg(rawMatrix) colVec = result[0] if np.isnan(np.sum(colVec)): print "Too few rows/columns removed... try again" return None x = sps.diags(colVec.flatten(), 0, format='csr') if biasValues: bias = computeBiasVector(colVec) biasWZeros = addZeroBiases(removed, bias) del (colVec) normalizedMatrix = x.dot(rawMatrix.dot(x)) n = normalizedMatrix.shape[0] e = time.time() print "Normalization took %s" % (e - st) print "Normalized Matrix Generated" #calculate normalization difference difference = abs(rawMatrix.shape[0] - sps.csr_matrix.sum(normalizedMatrix)) scalar = R / n if graphPath is not None: from plot import plotMatrix print "Generating heatmap for Normalized Matrix" plotMatrix((normalizedMatrix * scalar), graphPath, "Normalized.Mtx") if outputNormalizedMatrixFile: from spsIO import outputMatrixFile print "Outputting Normalized Matrix" # if fithicOutputType: # from spsIO import outputNobleMatrix # outputNobleMatrix(normalizedMatrix, "Normalized.mtx", matrixFilePath) if bedOutputType or fithicOutputType: from spsIO import outputBedMatrix outputBedMatrix((normalizedMatrix * scalar), "Normalized.matrix", matrixFilePath) return normalizedMatrix, biasWZeros
def smoothedContactCounts(Alpha, Steps, picklePath, R, graphPath=None, matrixFilePath=None, outputSmoothedMatrixFile=False): for alpha in np.linspace(Alpha[0], Alpha[1], Alpha[2]).tolist(): for steps in np.linspace(Steps[0], Steps[1], Steps[2]).tolist(): rw = randomWalk(alpha, steps, picklePath) walk = rw[0] n = rw[1] scalar = R / n walk *= scalar name = "SmoothedCC.Alpha=" + str(alpha) + ".t=" + str(steps) if graphPath is not None: from plot import plotMatrix plotMatrix(walk, graphPath, name) fname = os.path.join(picklePath, name) if outputSmoothedMatrixFile: from spsIO import outputBedMatrix outputBedMatrix(walk, name, matrixFilePath) with open(fname, 'wb') as f: pickle.dump(walk, f) f.close()
def loadBed(bedPath, matrixPath, picklePath, graphPath=None): print "Loading..." print "The whole BED file will be read" startTime = time.time() with open(bedPath, 'r') as bedFile: data = np.loadtxt(bedFile, dtype="str") lengths = [(data[:, 0] == i).sum() for i in np.unique(data[:, 0])] lengths = np.array(lengths) bedFile.close() n = lengths.sum() with open(matrixPath, 'r') as matrixFile: data = np.loadtxt(matrixFile, dtype=int) ones = np.ones((data.shape[0], 2)) zero = np.zeros((data.shape[0], 1)) toSub = np.hstack((ones, zero)) data = data - toSub dirty_mtx = sps.coo_matrix((data[:, 2], (data[:, 0], data[:, 1])), shape=(n, n)) matrixFile.close() #convert to csr! dirty_mtx = sps.csr_matrix(dirty_mtx) #return transpose (csc) and add to dirty_mtx(csr) to make symmetric transp = dirty_mtx.transpose() dirty_mtx = dirty_mtx + transp del (transp) R = sps.csr_matrix.sum(dirty_mtx) if graphPath is not None: from plot import plotMatrix plotMatrix(dirty_mtx, graphPath, "Raw.Mtx") #done loading! endTime = time.time() print("Loading took %f" % (endTime - startTime)) #dump raw matrix preFname = os.path.join(picklePath, "Raw.Mtx") with open(preFname, 'wb') as f: pickle.dump(dirty_mtx, f) print "Pickled!" return R
def smoothedContactCounts(Alpha, Steps, normalizedMtx, R, graphPath=None, matrixFilePath = None, outputSmoothedMatrixFile=False): for alpha in np.linspace(Alpha[0], Alpha[1], Alpha[2]).tolist(): for steps in np.linspace(Steps[0], Steps[1], Steps[2]).tolist(): rw = randomWalk(alpha, steps, normalizedMtx) walk = rw[0] n = rw[1] scalar = R/n walk *= scalar name = "SmoothedCC.Alpha=" + str(alpha) + ".t=" + str(steps) if graphPath is not None: from plot import plotMatrix plotMatrix(walk, graphPath, name) if outputSmoothedMatrixFile: from spsIO import outputBedMatrix outputBedMatrix(walk, name, matrixFilePath)
def loadNoble(chrNum, resolution, noblePath, picklePath, lenDic, fragDic, graphPath=None): import math hiCFile = gzip.open(noblePath, 'r') print "Loading..." startTime = time.time() n = 0 halfRes = resolution / 2 if chrNum == 'whole': for key in lenDic: n += int(math.ceil(1.0 * int(lenDic[key]) / resolution)) else: n = int(math.ceil(1.0 * int(lenDic[chrNum]) / resolution)) #construct a sparse matrix of max resolution dirty_mtx = sps.lil_matrix((n, n), dtype=np.int64) #load values into the sparse matrix constructed earlier! for line in hiCFile: if chrNum != "whole" and line.startswith(chrNum): fileLine = line.rstrip().split() i = (int(fileLine[1]) - halfRes) / resolution j = (int(fileLine[3]) - halfRes) / resolution k = float(fileLine[4]) if (fileLine[0] == fileLine[2]): try: dirty_mtx[i, j] = k dirty_mtx[j, i] = k except: # print fileLine continue else: fileLine = line.rstrip().split() firstCh = fileLine[0] secCh = fileLine[2] mid1 = int(fileLine[1]) mid2 = int(fileLine[3]) k = float(fileLine[4]) i = fragDic[firstCh][mid1] j = fragDic[secCh][mid2] dirty_mtx[i, j] = k dirty_mtx[j, i] = k if graphPath is not None: from plot import plotMatrix plotMatrix(dirty_mtx, graphPath, "Raw.Mtx") #convert to csr! dirty_mtx = dirty_mtx.tocsr() R = sps.csr_matrix.sum(dirty_mtx) #done loading! endTime = time.time() print("Loading took %f" % (endTime - startTime)) #dump raw matrix preFname = os.path.join(picklePath, "Raw.Mtx") with open(preFname, 'wb') as f: pickle.dump(dirty_mtx, f) hiCFile.close() print "Pickled!" return R
def stchMatrix(picklePath, percentOfSparseToRemove, graphPath=None, biasValues=False, matrixFilePath=None, outputNormalizedMatrixFile=False, fithicOutputType=False, bedOutputType=False): import cPickle as pickle fname = os.path.join(picklePath, "Raw.Mtx") with open(fname, 'rb') as f: rawMatrix = pickle.load(f) f.close() R = rawMatrix.sum() mtxAndRemoved = removeZeroDiagonalCSR(rawMatrix, percentOfSparseToRemove) initialSize = rawMatrix.shape[0] rawMatrix = mtxAndRemoved[0] removed = mtxAndRemoved[1] newSize = rawMatrix.shape[0] print "Generating Normalized Matrix" result = knightRuizAlg(rawMatrix) colVec = result[0] if np.isnan(np.sum(colVec)): print "Too few rows/columns removed... try again" return None x = sps.diags(colVec.flatten(), 0, format='csr') if biasValues: bias = computeBiasVector(colVec) biasWZeros = addZeroBiases(removed, bias) biasFileName = os.path.join(picklePath, "Bias.Values") with open(biasFileName, 'wb') as f: pickle.dump(biasWZeros, f) f.close() del(colVec) normalizedMatrix = x.dot(rawMatrix.dot(x)) n = normalizedMatrix.shape[0] print "Normalized Matrix Generated" #calculate normalization difference difference = abs(rawMatrix.shape[0] - sps.csr_matrix.sum(normalizedMatrix)) scalar = R/n if graphPath is not None: from plot import plotMatrix print "Generating heatmap for Normalized Matrix" plotMatrix((normalizedMatrix*scalar), graphPath, "Normalized.Mtx") if outputNormalizedMatrixFile: from spsIO import outputMatrixFile print "Outputting Normalized Matrix" # if fithicOutputType: # from spsIO import outputNobleMatrix # outputNobleMatrix(normalizedMatrix, "Normalized.mtx", matrixFilePath) if bedOutputType or fithicOutputType: from spsIO import outputBedMatrix outputBedMatrix((normalizedMatrix*scalar), "Normalized.matrix", matrixFilePath) fileName = os.path.join(picklePath, "Normalized.Mtx") with open(fileName, 'wb') as f: pickle.dump(normalizedMatrix, f) f.close() fileName = os.path.join(picklePath, "removed") with open(fileName, 'wb') as f: pickle.dump(removed, f) f.close() print "Normalized Matrix pickled"