예제 #1
0
def doSaddle(filename, eig, gen):
    c = cooler.Cooler(filename)

    gen = Genome("/home/magus/HiC2011/data/" + gen, readChrms=["#", "X"])

    gen.setResolution(getResolution(filename))
    saddles = []
    for chrom in range(gen.chrmCount):
        saddle = np.zeros((5,5), dtype = float)
        st = gen.chrmStartsBinCont[chrom]
        end = gen.chrmEndsBinCont[chrom]
        cur = c.matrix(balance=False).fetch(gen.idx2label[chrom])
        cur = observedOverExpected(cur)
        mask = np.sum(cur , axis=0) > 0
        cur = cur [mask]
        cur = cur [:, mask]
        GC = eig[st:end]
        GC = GC[mask]
        if len(GC) > 5:
            for i in range(5):
                for j in range(5):
                    G1, G2 = np.percentile(GC, [20 * i, 20 * i + 20])
                    mask1 = (GC > G1) * (GC < G2)

                    G1, G2 = np.percentile(GC, [20 * j, 20 * j + 20])
                    mask2 = (GC > G1) * (GC < G2)
                    saddle[i, j] += cur[np.ix_(mask1, mask2)].mean()
        saddles.append(saddle)

    return saddles
예제 #2
0
def doSaddleError(filename, eig, gen, correct=False):


    gen = Genome("/home/magus/HiC2011/data/" + gen, readChrms=["#", "X"])
    cur = 0
    data = h5dict(filename,'r')["heatmap"]
    if correct:
        data = completeIC(data)
    gen.setResolution(getResolution(filename))
    if eig == "GC":
        eig = np.concatenate(gen.GCBin)
    saddles = []
    permutted = []
    saddle = np.zeros((5,5), dtype = float)
    for i in range(100):
        permutted.append(np.zeros((5,5), dtype = float))

    for chrom in range(gen.chrmCount):
        st = gen.chrmStartsBinCont[chrom]
        end = gen.chrmEndsBinCont[chrom]
        cur = data[st:end, st:end]
        cur = observedOverExpected(cur)
        mask = np.sum(cur , axis=0) > 0
        cur = cur [mask]
        cur = cur [:, mask]
        GC = eig[st:end]
        GC = GC[mask]
        if len(GC) > 5:
            for i in range(5):
                for j in range(5):
                    G1, G2 = np.percentile(GC, [20 * i, 20 * i + 20])
                    mask1 = (GC > G1) * (GC < G2)
                    G1, G2 = np.percentile(GC, [20 * j, 20 * j + 20])
                    mask2 = (GC > G1) * (GC < G2)
                    addition = cur[np.ix_(mask1, mask2)]
                    addition = np.reshape(addition, (-1))
                    for k in range(100):
                        resampled = np.random.choice(addition, len(addition), replace=True)
                        permutted[k][i,j] += resampled.mean()
                    saddle[i, j] += addition.mean()
    return saddle, permutted
예제 #3
0
def doEigenvector(filename, genome):
    if filename == "GC":
        gen = Genome("/home/magus/HiC2011/data/" + genome, readChrms=["#","X"])
        gen.setResolution(1000000)
        GC = np.concatenate(gen.GCBin)
        return GC
    resolution = getResolution(filename)
    BD = binnedData.binnedData(resolution, "/home/magus/HiC2011/data/" + genome, ["#","X"])

    BD.simpleLoad(filename, "bla")
    BD.removeDiagonal()

    BD.removeBySequencedCount(0.5)

    BD.removeCis()
    BD.truncTrans(high=0.0005)
    BD.removePoorRegions(cutoff=1)
    BD.fakeCis()
    BD.removeZeros()
    BD.doEig(numPCs=2)
    BD.restoreZeros(value=0)
    return BD.EigDict["bla"][0]
예제 #4
0
def byChrEig(filename,
             genome,
             chromosomes="all",
             resolution="auto",
             byArm=True,
             doSmooth=False):
    from mirnylib.genome import Genome
    if resolution == "auto":
        resolution = getResolution(filename)
    if type(genome) == str:
        genome = Genome(genome)
    assert isinstance(genome, Genome)
    genome.setResolution(resolution)
    mydict = mirnylib.h5dict.h5dict(filename)
    if chromosomes == "all":
        chromosomes = list(range(genome.chrmCount))
        chromosomes = [i for i in chromosomes if "{0} {0}".format(i) in mydict]
        if len(chromosomes) == 0:
            raise ValueError("No chromosomes left. Check h5dict file.")

    result = []
    for chrom in chromosomes:
        data = mydict["{0} {0}".format(chrom)]
        if not byArm:
            result.append(
                completeEig(data, genome.GCBin[chrom], doSmooth=doSmooth))
        else:
            GC = genome.GCBin[chrom]
            result.append(np.zeros(len(GC), dtype=float))
            cent = genome.cntrMids[chrom] / resolution
            result[-1][:cent] = completeEig(data[:cent, :cent],
                                            genome.GCBin[chrom][:cent],
                                            doSmooth=doSmooth)
            result[-1][cent:] = completeEig(data[cent:, cent:],
                                            genome.GCBin[chrom][cent:],
                                            doSmooth=doSmooth)
    return result