Exemple #1
0
def getHalTreeTurnover(halPath, args, rootName=None):
    root = rootName
    if root is None:
        root = getHalRootName(halPath)
    for child in getHalChildrenNames(halPath, root):
        if root != getHalRootName(halPath):
        
            consFile = os.path.join(args.workDir,
                                    args.conservedBedName % child)
            checkFile(consFile)
            pconsFile = os.path.join(args.workDir,
                                     args.conservedBedName % root)
            checkFile(pconsFile)

            outMappedAlignedBed = os.path.join(args.workDir,
                                               child + "_pa.bed")
            outParentSlicedBed = os.path.join(args.workDir,
                                              child + "_pslice.bed")
            outMappedGenomeBed = os.path.join(args.workDir,
                                              child + "_pm.bed")
            outConservationBed = os.path.join(args.workDir,
                                              child + "_int.bed")
            outAlignedBed = os.path.join(args.workDir, child + "_al.bed")
            outGainBed = os.path.join(args.workDir, child + "_gain.bed")
            outLossBed = os.path.join(args.workDir, child + "_loss.bed")

            (conLen, gainLen,
             lossLen, unconLen) = compareConservationOverBranch(
                halPath, child, consFile, pconsFile,
                outMappedAlignedBed, outParentSlicedBed,
                outMappedGenomeBed, outConservationBed, outAlignedBed,
                outGainBed, outLossBed)

            gainRate = 0
            if conLen + lossLen > 0:
                gainRate = float(gainLen) / (unconLen + gainLen)
            lossRate = 0
            if unconLen + gainLen > 0:
                lossRate = float(lossLen) / (conLen + lossLen)

            branchLength = getBranchLength(halPath, child)
                
            print "%s: cons %d  ucons %d  gain %d (%f) loss %d (%f) bl %f" % (
                child,                                                
                conLen,
                unconLen,
                gainLen,
                gainRate,
                lossLen,
                lossRate,
                branchLength)
        
        getHalTreeTurnover(halPath, args, child)
def readTurnoverFile(halPath, turnoverPath):
    result = dict()
    toFile = open(turnoverPath, "r")
    for line in toFile:
        toks = line.split()
        genome = toks[0].strip(":")
        cons = float(toks[2])
        ucons = float(toks[4])
        gain = float(toks[6])
        loss = float(toks[9])
        totalAligned = cons + ucons + gain + loss
                     
        if totalAligned <= 0 or cons < 0 or ucons < 0:
            sys.stderr.write("Warning, skipping %s\n" % genome)
        else:
            pi0 = (ucons + loss) / totalAligned
            pi1 = (cons + gain) / totalAligned
            pg = gain / (ucons + gain)
            pl = loss / (cons + loss)
            t = float(toks[12])
            #
            # Incorporate parent branch since it affects turnover
            #
            if genome != getHalRootName(halPath):
                parName = getParentGenomeName(halPath, genome)
                parBranch = getBranchLength(halPath, parName)
                t += float(parBranch)
            assert pi0 >= 0 and pi1 >=0
            assert pg >= 0 and pl >=0
            assert t >= 0
            result[genome] = ([pi0, pi1], [ [1.0 - pg, pg], [pl, 1.0 - pl] ], t)
    return result
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser()
    parser.add_argument("halFile", type=str,
                        help="Path of hal file")
    parser.add_argument("NITurnoverFile", type=str,
                        help="Output of halTreeNITurnover.py")
    parser.add_argument("--maxIt", type=int, default=100000,
                        help="number of iterations for gradient descent")
    parser.add_argument("--step", type=float, default=0.0001,
                        help="gradient descent step")
    parser.add_argument("--retries", type=int, default=5,
                        help="number of gradient descents to run")
    parser.add_argument("--root", type=str, default=None,
                        help="root of alignment to consder")
    parser.add_argument("--allInternals", action="store_true", default=False,
                        help="estimate params for all subtrees independently,"
                        " in addition to the root")
                       
    args = parser.parse_args()

    if args.root is None:
        args.root = getHalRootName(args.halFile)

    assert (args.maxIt > 0 and args.step > 0 and args.retries > 1)

    halTreeTurnoverParams(args.halFile, args.NITurnoverFile,
                          args.root, args.allInternals, args.maxIt,
                          args.step, args.retries)
def getHalTreeBackground(halPath, args, rootName=None):
    root = rootName
    if root is None:
        root = getHalRootName(halPath)
    for child in getHalChildrenNames(halPath, root):
        bgFile = os.path.join(args.workDir, args.backgroundBedName % child)
        if args.ar is True:
            command = "halMaskExtract %s %s --maskFile %s --extend %d --extendPct %f" % (halPath, child, bgFile, args.arExtend, args.arExtendPct)
        else:
            command = "halStats %s --bedSequences %s > %s" % (halPath, child,
                                                              bgFile)
        print command
        runShellCommand(command)
        getHalTreeBackground(halPath, args, child)
def getHalTreeConservation(halPath, args, events, rootName=None):
    root = rootName
    if root is None:
        root = getHalRootName(halPath)
    for child in getHalChildrenNames(halPath, root):
        bgFile = os.path.join(args.workDir, args.backgroundBedName % child)
        muFile = os.path.join(args.workDir, args.mutationsBedName % child)
        checkFiles(bgFile, muFile)
        outPath = os.path.join(args.workDir, args.conservedBedName % child)
        outFile = open(outPath, "w")
        bc = BedConservation()
        bc.computeBackgroundRate(muFile, bgFile, events)
        bc.identifyConservedIntervals(muFile, outFile, float(args.pval), float(args.cutoff))
        getHalTreeConservation(halPath, args, events, child)
        print "%s: %d segments with %d bases (%f pct of genome) found. bgrate= %f minDist=%d" % (
            child,
            bc.writtenCount,
            bc.writtenBases,
            float(bc.writtenBases) / float(genomeLength(halPath, child)),
            bc.rate,
            bc.minDistance(float(args.pval)),
        )