Ejemplo n.º 1
0
def stchMatrix(rawMatrix,
               percentOfSparseToRemove,
               graphPath=None,
               biasValues=False,
               matrixFilePath=None,
               outputNormalizedMatrixFile=False,
               fithicOutputType=False,
               bedOutputType=False):

    R = rawMatrix.sum()

    mtxAndRemoved = removeZeroDiagonalCSR(rawMatrix, percentOfSparseToRemove)
    initialSize = rawMatrix.shape[0]
    rawMatrix = mtxAndRemoved[0]
    removed = mtxAndRemoved[1]
    newSize = rawMatrix.shape[0]

    print "Generating Normalized Matrix"
    st = time.time()
    result = knightRuizAlg(rawMatrix)
    colVec = result[0]
    if np.isnan(np.sum(colVec)):
        print "Too few rows/columns removed... try again"
        return None
    x = sps.diags(colVec.flatten(), 0, format='csr')

    if biasValues:
        bias = computeBiasVector(colVec)
        biasWZeros = addZeroBiases(removed, bias)

    del (colVec)

    normalizedMatrix = x.dot(rawMatrix.dot(x))
    n = normalizedMatrix.shape[0]
    e = time.time()
    print "Normalization took %s" % (e - st)
    print "Normalized Matrix Generated"

    #calculate normalization difference
    difference = abs(rawMatrix.shape[0] - sps.csr_matrix.sum(normalizedMatrix))

    scalar = R / n

    if graphPath is not None:
        from plot import plotMatrix
        print "Generating heatmap for Normalized Matrix"
        plotMatrix((normalizedMatrix * scalar), graphPath, "Normalized.Mtx")

    if outputNormalizedMatrixFile:
        from spsIO import outputMatrixFile
        print "Outputting Normalized Matrix"
        #        if fithicOutputType:
        #            from spsIO import outputNobleMatrix
        #            outputNobleMatrix(normalizedMatrix, "Normalized.mtx", matrixFilePath)
        if bedOutputType or fithicOutputType:
            from spsIO import outputBedMatrix
            outputBedMatrix((normalizedMatrix * scalar), "Normalized.matrix",
                            matrixFilePath)

    return normalizedMatrix, biasWZeros
Ejemplo n.º 2
0
def smoothedContactCounts(Alpha,
                          Steps,
                          picklePath,
                          R,
                          graphPath=None,
                          matrixFilePath=None,
                          outputSmoothedMatrixFile=False):
    for alpha in np.linspace(Alpha[0], Alpha[1], Alpha[2]).tolist():
        for steps in np.linspace(Steps[0], Steps[1], Steps[2]).tolist():
            rw = randomWalk(alpha, steps, picklePath)
            walk = rw[0]
            n = rw[1]
            scalar = R / n
            walk *= scalar
            name = "SmoothedCC.Alpha=" + str(alpha) + ".t=" + str(steps)
            if graphPath is not None:
                from plot import plotMatrix
                plotMatrix(walk, graphPath, name)
            fname = os.path.join(picklePath, name)

            if outputSmoothedMatrixFile:
                from spsIO import outputBedMatrix
                outputBedMatrix(walk, name, matrixFilePath)

            with open(fname, 'wb') as f:
                pickle.dump(walk, f)
            f.close()
Ejemplo n.º 3
0
def loadBed(bedPath, matrixPath, picklePath, graphPath=None):
    print "Loading..."
    print "The whole BED file will be read"
    startTime = time.time()

    with open(bedPath, 'r') as bedFile:
        data = np.loadtxt(bedFile, dtype="str")
        lengths = [(data[:, 0] == i).sum() for i in np.unique(data[:, 0])]
        lengths = np.array(lengths)
    bedFile.close()
    n = lengths.sum()

    with open(matrixPath, 'r') as matrixFile:
        data = np.loadtxt(matrixFile, dtype=int)
        ones = np.ones((data.shape[0], 2))
        zero = np.zeros((data.shape[0], 1))
        toSub = np.hstack((ones, zero))
        data = data - toSub
        dirty_mtx = sps.coo_matrix((data[:, 2], (data[:, 0], data[:, 1])),
                                   shape=(n, n))
    matrixFile.close()

    #convert to csr!
    dirty_mtx = sps.csr_matrix(dirty_mtx)

    #return transpose (csc) and add to dirty_mtx(csr) to make symmetric
    transp = dirty_mtx.transpose()
    dirty_mtx = dirty_mtx + transp
    del (transp)
    R = sps.csr_matrix.sum(dirty_mtx)

    if graphPath is not None:
        from plot import plotMatrix
        plotMatrix(dirty_mtx, graphPath, "Raw.Mtx")

    #done loading!
    endTime = time.time()
    print("Loading took %f" % (endTime - startTime))

    #dump raw matrix
    preFname = os.path.join(picklePath, "Raw.Mtx")
    with open(preFname, 'wb') as f:
        pickle.dump(dirty_mtx, f)

    print "Pickled!"
    return R
Ejemplo n.º 4
0
def smoothedContactCounts(Alpha, Steps, normalizedMtx, R, graphPath=None, matrixFilePath = None, outputSmoothedMatrixFile=False):
    for alpha in np.linspace(Alpha[0], Alpha[1], Alpha[2]).tolist():
        for steps in np.linspace(Steps[0], Steps[1], Steps[2]).tolist():
            rw = randomWalk(alpha, steps, normalizedMtx)
            walk = rw[0]
            n = rw[1]
            scalar = R/n
            walk *= scalar
            name = "SmoothedCC.Alpha=" + str(alpha) + ".t=" + str(steps)
            if graphPath is not None:
                from plot import plotMatrix
                plotMatrix(walk, graphPath, name)



            if outputSmoothedMatrixFile:
                from spsIO import outputBedMatrix 
                outputBedMatrix(walk, name, matrixFilePath)
Ejemplo n.º 5
0
def loadNoble(chrNum,
              resolution,
              noblePath,
              picklePath,
              lenDic,
              fragDic,
              graphPath=None):
    import math
    hiCFile = gzip.open(noblePath, 'r')
    print "Loading..."
    startTime = time.time()
    n = 0
    halfRes = resolution / 2

    if chrNum == 'whole':
        for key in lenDic:
            n += int(math.ceil(1.0 * int(lenDic[key]) / resolution))

    else:
        n = int(math.ceil(1.0 * int(lenDic[chrNum]) / resolution))

    #construct a sparse matrix of max resolution
    dirty_mtx = sps.lil_matrix((n, n), dtype=np.int64)

    #load values into the sparse matrix constructed earlier!
    for line in hiCFile:

        if chrNum != "whole" and line.startswith(chrNum):
            fileLine = line.rstrip().split()
            i = (int(fileLine[1]) - halfRes) / resolution
            j = (int(fileLine[3]) - halfRes) / resolution
            k = float(fileLine[4])

            if (fileLine[0] == fileLine[2]):
                try:
                    dirty_mtx[i, j] = k
                    dirty_mtx[j, i] = k

                except:
                    #              print fileLine
                    continue

        else:
            fileLine = line.rstrip().split()
            firstCh = fileLine[0]
            secCh = fileLine[2]

            mid1 = int(fileLine[1])
            mid2 = int(fileLine[3])
            k = float(fileLine[4])

            i = fragDic[firstCh][mid1]
            j = fragDic[secCh][mid2]
            dirty_mtx[i, j] = k
            dirty_mtx[j, i] = k

    if graphPath is not None:
        from plot import plotMatrix
        plotMatrix(dirty_mtx, graphPath, "Raw.Mtx")

    #convert to csr!
    dirty_mtx = dirty_mtx.tocsr()
    R = sps.csr_matrix.sum(dirty_mtx)

    #done loading!
    endTime = time.time()
    print("Loading took %f" % (endTime - startTime))

    #dump raw matrix
    preFname = os.path.join(picklePath, "Raw.Mtx")
    with open(preFname, 'wb') as f:
        pickle.dump(dirty_mtx, f)

    hiCFile.close()
    print "Pickled!"
    return R
Ejemplo n.º 6
0
def stchMatrix(picklePath, percentOfSparseToRemove, graphPath=None, biasValues=False, matrixFilePath=None, outputNormalizedMatrixFile=False, fithicOutputType=False, bedOutputType=False):
    import cPickle as pickle


    fname = os.path.join(picklePath, "Raw.Mtx")
    with open(fname, 'rb') as f:
        rawMatrix = pickle.load(f)
    f.close()

    R = rawMatrix.sum()

    mtxAndRemoved = removeZeroDiagonalCSR(rawMatrix, percentOfSparseToRemove)
    initialSize = rawMatrix.shape[0]
    rawMatrix = mtxAndRemoved[0]
    removed = mtxAndRemoved[1]
    newSize = rawMatrix.shape[0]


    print "Generating Normalized Matrix"
    result = knightRuizAlg(rawMatrix)
    colVec = result[0]
    if np.isnan(np.sum(colVec)):
        print "Too few rows/columns removed... try again"
        return None
    x = sps.diags(colVec.flatten(), 0, format='csr')
    
    if biasValues:
        bias = computeBiasVector(colVec)
        biasWZeros = addZeroBiases(removed, bias)
        
        biasFileName = os.path.join(picklePath, "Bias.Values")
        with open(biasFileName, 'wb') as f:
            pickle.dump(biasWZeros, f)
        f.close()

    del(colVec)

    normalizedMatrix = x.dot(rawMatrix.dot(x))
    n = normalizedMatrix.shape[0]
    print "Normalized Matrix Generated"
    
    #calculate normalization difference
    difference = abs(rawMatrix.shape[0] - sps.csr_matrix.sum(normalizedMatrix))

    scalar = R/n

    if graphPath is not None:
        from plot import plotMatrix
        print "Generating heatmap for Normalized Matrix"
        plotMatrix((normalizedMatrix*scalar), graphPath, "Normalized.Mtx")



    if outputNormalizedMatrixFile:
        from spsIO import outputMatrixFile
        print "Outputting Normalized Matrix"
#        if fithicOutputType:
#            from spsIO import outputNobleMatrix 
#            outputNobleMatrix(normalizedMatrix, "Normalized.mtx", matrixFilePath)
        if bedOutputType or fithicOutputType:
            from spsIO import outputBedMatrix
            outputBedMatrix((normalizedMatrix*scalar), "Normalized.matrix", matrixFilePath)

    fileName = os.path.join(picklePath, "Normalized.Mtx")
    with open(fileName, 'wb') as f:
        pickle.dump(normalizedMatrix, f)
    f.close()

    fileName = os.path.join(picklePath, "removed")
    with open(fileName, 'wb') as f:
        pickle.dump(removed, f)
    f.close()
    print "Normalized Matrix pickled"