def getHalTreeTurnover(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): if root != getHalRootName(halPath): consFile = os.path.join(args.workDir, args.conservedBedName % child) checkFile(consFile) pconsFile = os.path.join(args.workDir, args.conservedBedName % root) checkFile(pconsFile) outMappedAlignedBed = os.path.join(args.workDir, child + "_pa.bed") outParentSlicedBed = os.path.join(args.workDir, child + "_pslice.bed") outMappedGenomeBed = os.path.join(args.workDir, child + "_pm.bed") outConservationBed = os.path.join(args.workDir, child + "_int.bed") outAlignedBed = os.path.join(args.workDir, child + "_al.bed") outGainBed = os.path.join(args.workDir, child + "_gain.bed") outLossBed = os.path.join(args.workDir, child + "_loss.bed") (conLen, gainLen, lossLen, unconLen) = compareConservationOverBranch( halPath, child, consFile, pconsFile, outMappedAlignedBed, outParentSlicedBed, outMappedGenomeBed, outConservationBed, outAlignedBed, outGainBed, outLossBed) gainRate = 0 if conLen + lossLen > 0: gainRate = float(gainLen) / (unconLen + gainLen) lossRate = 0 if unconLen + gainLen > 0: lossRate = float(lossLen) / (conLen + lossLen) branchLength = getBranchLength(halPath, child) print "%s: cons %d ucons %d gain %d (%f) loss %d (%f) bl %f" % ( child, conLen, unconLen, gainLen, gainRate, lossLen, lossRate, branchLength) getHalTreeTurnover(halPath, args, child)
def readTurnoverFile(halPath, turnoverPath): result = dict() toFile = open(turnoverPath, "r") for line in toFile: toks = line.split() genome = toks[0].strip(":") cons = float(toks[2]) ucons = float(toks[4]) gain = float(toks[6]) loss = float(toks[9]) totalAligned = cons + ucons + gain + loss if totalAligned <= 0 or cons < 0 or ucons < 0: sys.stderr.write("Warning, skipping %s\n" % genome) else: pi0 = (ucons + loss) / totalAligned pi1 = (cons + gain) / totalAligned pg = gain / (ucons + gain) pl = loss / (cons + loss) t = float(toks[12]) # # Incorporate parent branch since it affects turnover # if genome != getHalRootName(halPath): parName = getParentGenomeName(halPath, genome) parBranch = getBranchLength(halPath, parName) t += float(parBranch) assert pi0 >= 0 and pi1 >=0 assert pg >= 0 and pl >=0 assert t >= 0 result[genome] = ([pi0, pi1], [ [1.0 - pg, pg], [pl, 1.0 - pl] ], t) return result
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser() parser.add_argument("halFile", type=str, help="Path of hal file") parser.add_argument("NITurnoverFile", type=str, help="Output of halTreeNITurnover.py") parser.add_argument("--maxIt", type=int, default=100000, help="number of iterations for gradient descent") parser.add_argument("--step", type=float, default=0.0001, help="gradient descent step") parser.add_argument("--retries", type=int, default=5, help="number of gradient descents to run") parser.add_argument("--root", type=str, default=None, help="root of alignment to consder") parser.add_argument("--allInternals", action="store_true", default=False, help="estimate params for all subtrees independently," " in addition to the root") args = parser.parse_args() if args.root is None: args.root = getHalRootName(args.halFile) assert (args.maxIt > 0 and args.step > 0 and args.retries > 1) halTreeTurnoverParams(args.halFile, args.NITurnoverFile, args.root, args.allInternals, args.maxIt, args.step, args.retries)
def getHalTreeBackground(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): bgFile = os.path.join(args.workDir, args.backgroundBedName % child) if args.ar is True: command = "halMaskExtract %s %s --maskFile %s --extend %d --extendPct %f" % (halPath, child, bgFile, args.arExtend, args.arExtendPct) else: command = "halStats %s --bedSequences %s > %s" % (halPath, child, bgFile) print command runShellCommand(command) getHalTreeBackground(halPath, args, child)
def getHalTreeConservation(halPath, args, events, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): bgFile = os.path.join(args.workDir, args.backgroundBedName % child) muFile = os.path.join(args.workDir, args.mutationsBedName % child) checkFiles(bgFile, muFile) outPath = os.path.join(args.workDir, args.conservedBedName % child) outFile = open(outPath, "w") bc = BedConservation() bc.computeBackgroundRate(muFile, bgFile, events) bc.identifyConservedIntervals(muFile, outFile, float(args.pval), float(args.cutoff)) getHalTreeConservation(halPath, args, events, child) print "%s: %d segments with %d bases (%f pct of genome) found. bgrate= %f minDist=%d" % ( child, bc.writtenCount, bc.writtenBases, float(bc.writtenBases) / float(genomeLength(halPath, child)), bc.rate, bc.minDistance(float(args.pval)), )