def readTurnoverFile(halPath, turnoverPath): result = dict() toFile = open(turnoverPath, "r") for line in toFile: toks = line.split() genome = toks[0].strip(":") cons = float(toks[2]) ucons = float(toks[4]) gain = float(toks[6]) loss = float(toks[9]) totalAligned = cons + ucons + gain + loss if totalAligned <= 0 or cons < 0 or ucons < 0: sys.stderr.write("Warning, skipping %s\n" % genome) else: pi0 = (ucons + loss) / totalAligned pi1 = (cons + gain) / totalAligned pg = gain / (ucons + gain) pl = loss / (cons + loss) t = float(toks[12]) # # Incorporate parent branch since it affects turnover # if genome != getHalRootName(halPath): parName = getParentGenomeName(halPath, genome) parBranch = getBranchLength(halPath, parName) t += float(parBranch) assert pi0 >= 0 and pi1 >=0 assert pg >= 0 and pl >=0 assert t >= 0 result[genome] = ([pi0, pi1], [ [1.0 - pg, pg], [pl, 1.0 - pl] ], t) return result
def getHalTreeTurnover(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): if root != getHalRootName(halPath): consFile = os.path.join(args.workDir, args.conservedBedName % child) checkFile(consFile) pconsFile = os.path.join(args.workDir, args.conservedBedName % root) checkFile(pconsFile) outMappedAlignedBed = os.path.join(args.workDir, child + "_pa.bed") outParentSlicedBed = os.path.join(args.workDir, child + "_pslice.bed") outMappedGenomeBed = os.path.join(args.workDir, child + "_pm.bed") outConservationBed = os.path.join(args.workDir, child + "_int.bed") outAlignedBed = os.path.join(args.workDir, child + "_al.bed") outGainBed = os.path.join(args.workDir, child + "_gain.bed") outLossBed = os.path.join(args.workDir, child + "_loss.bed") (conLen, gainLen, lossLen, unconLen) = compareConservationOverBranch( halPath, child, consFile, pconsFile, outMappedAlignedBed, outParentSlicedBed, outMappedGenomeBed, outConservationBed, outAlignedBed, outGainBed, outLossBed) gainRate = 0 if conLen + lossLen > 0: gainRate = float(gainLen) / (unconLen + gainLen) lossRate = 0 if unconLen + gainLen > 0: lossRate = float(lossLen) / (conLen + lossLen) branchLength = getBranchLength(halPath, child) print "%s: cons %d ucons %d gain %d (%f) loss %d (%f) bl %f" % ( child, conLen, unconLen, gainLen, gainRate, lossLen, lossRate, branchLength) getHalTreeTurnover(halPath, args, child)
def getHalTreeTurnover(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): if root != getHalRootName(halPath): consFile = os.path.join(args.workDir, args.conservedBedName % child) checkFile(consFile) pconsFile = os.path.join(args.workDir, args.conservedBedName % root) checkFile(pconsFile) outMappedAlignedBed = os.path.join(args.workDir, child + "_pa.bed") outParentSlicedBed = os.path.join(args.workDir, child + "_pslice.bed") outMappedGenomeBed = os.path.join(args.workDir, child + "_pm.bed") outConservationBed = os.path.join(args.workDir, child + "_int.bed") outAlignedBed = os.path.join(args.workDir, child + "_al.bed") outGainBed = os.path.join(args.workDir, child + "_gain.bed") outLossBed = os.path.join(args.workDir, child + "_loss.bed") (conLen, gainLen, lossLen, unconLen) = compareConservationOverBranch( halPath, child, consFile, pconsFile, outMappedAlignedBed, outParentSlicedBed, outMappedGenomeBed, outConservationBed, outAlignedBed, outGainBed, outLossBed) gainRate = 0 if conLen + lossLen > 0: gainRate = float(gainLen) / (unconLen + gainLen) lossRate = 0 if unconLen + gainLen > 0: lossRate = float(lossLen) / (conLen + lossLen) branchLength = getBranchLength(halPath, child) print "%s: cons %d ucons %d gain %d (%f) loss %d (%f) bl %f" % ( child, conLen, unconLen, gainLen, gainRate, lossLen, lossRate, branchLength) getHalTreeTurnover(halPath, args, child)