def markCenterExpression(aFN, wigDir, rn = None, tn = None): extend = 25 timer = bioLibCG.cgTimer() timer.start() aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment) aNX.load(['centerExpression', 'tTcc', 'tStart', 'sLength', 'tELevel'], [rn, tn]) #load expression of degradome wigDict = cgWig.loadWigDict(wigDir) for aID in aNX.centerExpression: aNX.centerExpression[aID] = [0.0, 0.0, 0.0] chrom, strand, start, end = bioLibCG.tccSplit(aNX.tTcc[aID]) offset = aNX.tStart[aID] sLen = aNX.sLength[aID] if strand == '1': start = start - extend + offset end = start + sLen else: end = end + extend - offset start = end - sLen scanRange = bioLibCG.makeTcc(chrom, strand, start, end) stretch = cgWig.getExpressionProfile(scanRange, wigDict) #make sure peak is in the small range peakLevel = aNX.tELevel[aID] peakInRange = (peakLevel in stretch.values()) expressionSum = sum(stretch.values()) sortedKeys = stretch.keys() sortedKeys.sort() if strand == '-1': sortedKeys.reverse() if expressionSum != 0 and peakInRange: sumE = 0.0 for key in sortedKeys[8:12]: sumE += stretch[key] aNX.centerExpression[aID][0] = sumE/expressionSum sumE = 0.0 for key in sortedKeys[7:13]: sumE += stretch[key] aNX.centerExpression[aID][1] = sumE/expressionSum sumE = 0.0 for key in sortedKeys[6:14]: sumE += stretch[key] aNX.centerExpression[aID][2] = sumE/expressionSum aNX.save()
def markCenterExpression(aFN, wigDir, rn=None, tn=None): extend = 25 timer = bioLibCG.cgTimer() timer.start() aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment) aNX.load(['centerExpression', 'tTcc', 'tStart', 'sLength', 'tELevel'], [rn, tn]) #load expression of degradome wigDict = cgWig.loadWigDict(wigDir) for aID in aNX.centerExpression: aNX.centerExpression[aID] = [0.0, 0.0, 0.0] chrom, strand, start, end = bioLibCG.tccSplit(aNX.tTcc[aID]) offset = aNX.tStart[aID] sLen = aNX.sLength[aID] if strand == '1': start = start - extend + offset end = start + sLen else: end = end + extend - offset start = end - sLen scanRange = bioLibCG.makeTcc(chrom, strand, start, end) stretch = cgWig.getExpressionProfile(scanRange, wigDict) #make sure peak is in the small range peakLevel = aNX.tELevel[aID] peakInRange = (peakLevel in stretch.values()) expressionSum = sum(stretch.values()) sortedKeys = stretch.keys() sortedKeys.sort() if strand == '-1': sortedKeys.reverse() if expressionSum != 0 and peakInRange: sumE = 0.0 for key in sortedKeys[8:12]: sumE += stretch[key] aNX.centerExpression[aID][0] = sumE / expressionSum sumE = 0.0 for key in sortedKeys[7:13]: sumE += stretch[key] aNX.centerExpression[aID][1] = sumE / expressionSum sumE = 0.0 for key in sortedKeys[6:14]: sumE += stretch[key] aNX.centerExpression[aID][2] = sumE / expressionSum aNX.save()
def updateELevel(oFN, wigDir, rn=None, tn=None): oNX = cgNexusFlat.Nexus(oFN, degPeak.degPeak) oNX.load(['tcc', 'eLevel'], [rn, tn]) wigDict = cgWig.loadWigDict(wigDir) for oID in oNX.eLevel: expandTcc = bioLibCG.expandTcc(oNX.tcc[oID], 3) coord_value = cgWig.getExpressionProfile(expandTcc, wigDict) oNX.eLevel[oID] = max(coord_value.values()) oNX.save()
def updateELevel(oFN, wigDir, rn = None, tn = None): '''Dont need to do it by chromosome because it is small enough''' '''Also dont need to flip the strand because the wig is opposite as well''' oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA) oNX.load(['tcc', 'eLevel'], [rn, tn]) wigDict = cgWig.loadWigDict(wigDir) for oID in oNX.eLevel: coord_value = cgWig.getExpressionProfile(oNX.tcc[oID], wigDict) oNX.eLevel[oID] = max(coord_value.values()) oNX.save()
def updateELevel(oFN, wigDir, rn = None, tn = None): oNX = cgNexusFlat.Nexus(oFN, degPeak.degPeak) oNX.load(['tcc', 'eLevel'], [rn, tn]) wigDict = cgWig.loadWigDict(wigDir) for oID in oNX.eLevel: expandTcc = bioLibCG.expandTcc(oNX.tcc[oID], 3) coord_value = cgWig.getExpressionProfile(expandTcc, wigDict) oNX.eLevel[oID] = max(coord_value.values()) oNX.save()
def getPairInfo2(oFN, aFN, oWigDir, aWigDir, outFN, assembly): oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA) oNX.load(['tcc', 'filteredTargets']) aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment) aNX.load(['tTcc', 'tStart', 'tEnd', 'tLength', 'mismatchPositions']) #get expression Dicts for alignments and small RNAs oWigDict = cgWig.loadWigDict(oWigDir) aWigDict = cgWig.loadWigDict(aWigDir) myG = gf.GenomeFetch(assembly) fOut = open(outFN, 'w') for oID in oNX.tcc: for aID in oNX.filteredTargets[oID]: #expand the oTcc to fit the target oChrom, oStrand, oStart, oEnd = bioLibCG.tccSplit(oNX.tcc[oID]) if oStrand == '1': oStart -= aNX.tStart[aID] #5' oEnd += (aNX.tLength[aID] - 1) - aNX.tEnd[aID] #3' oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd) elif oStrand == '-1': oStart -= (aNX.tLength[aID] - 1) - aNX.tEnd[aID] #3' oEnd += aNX.tStart[aID] #5' oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd) #expand the peak tcc to be the target tcc (peak is only 1-4nt long) aChrom, aStrand, aStart, aEnd = bioLibCG.tccSplit(aNX.tTcc[aID]) aStart -= 25 aEnd += 25 aTcc = bioLibCG.makeTcc(aChrom, aStrand, aStart, aEnd) #get expression oCoord_value = cgWig.getExpressionProfile(oTcc, oWigDict) aCoord_value = cgWig.getExpressionProfile(aTcc, aWigDict) oValues = [oCoord_value[x] for x in sorted(oCoord_value.keys())] aValues = [aCoord_value[x] for x in sorted(aCoord_value.keys())] if oStrand == '-1': oValues.reverse() if aStrand == '-1': aValues.reverse() #get sequences aSeq = myG.getSequence(aTcc) oSeq = myG.getSequence(oTcc) pString = [str(oID), str(aID), ','.join([str(x) for x in oValues]), ','.join([str(x) for x in aValues]), str(aNX.tStart[aID]), str(aNX.tEnd[aID]), oNX.tcc[oID], aNX.tTcc[aID], oSeq, aSeq, ','.join([str(x) for x in aNX.mismatchPositions[aID]])] fOut.write('\t'.join(pString) + '\n') fOut.close()
def getPairInfo(oFN, aFN, oWigDir, aWigDir, outFN, assembly): oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA) oNX.load(['tcc', 'filteredTargets', 'sequence']) aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment) aNX.load(['tTcc', 'tStart', 'tEnd', 'tLength', 'targetSequence']) #get expression Dicts for alignments and small RNAs oWigDict = cgWig.loadWigDict(oWigDir) aWigDict = cgWig.loadWigDict(aWigDir) myG = gf.GenomeFetch(assembly) fOut = open(outFN, 'w') for oID in oNX.tcc: for aID in oNX.filteredTargets[oID]: #expand the oTcc to fit the target oChrom, oStrand, oStart, oEnd = bioLibCG.tccSplit(oNX.tcc[oID]) if oStrand == '-1': oStart -= (aNX.tLength[aID] - 1) - aNX.tEnd[aID] oEnd += aNX.tStart[aID] oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd) elif oStrand == '1': oStart -= aNX.tStart[aID] oEnd += (aNX.tLength[aID] - 1) - aNX.tEnd[aID] oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd) #expand the peak tcc to be the target tcc (peak is only 1-4nt long) aChrom, aStrand, aStart, aEnd = bioLibCG.tccSplit(aNX.tTcc[aID]) aStart -= 25 aEnd += 25 aTcc = bioLibCG.makeTcc(aChrom, aStrand, aStart, aEnd) print oNX.tcc[oID], oTcc, aNX.tTcc[aID], aTcc #get expression oCoord_value = cgWig.getExpressionProfile(oTcc, oWigDict) aCoord_value = cgWig.getExpressionProfile(aTcc, aWigDict) for aCoord in sorted(aCoord_value.keys()): print aCoord, aCoord_value[aCoord] oValues = [oCoord_value[x] for x in sorted(oCoord_value.keys())] aValues = [aCoord_value[x] for x in sorted(aCoord_value.keys())] maxO = max(oValues[aNX.tStart[aID]:aNX.tEnd[aID]]) # might want to change to max only within the small range maxA = max(aValues) oValues = [(float(x)/maxO) * 20 for x in oValues] aValues = [(float(x)/maxA) * 50 for x in aValues] #get sequence aSeq = myG.getSequence(aTcc) oSeq = myG.getSequence(oTcc) letter_value = zip(aSeq, aValues) for letter, value in letter_value: print letter, value pString = [str(oID), str(aID), ','.join([str(x) for x in oValues]), ','.join([str(x) for x in aValues]), str(maxO), str(maxA), str(aNX.tStart[aID]), str(aNX.tEnd[aID]), oNX.tcc[oID], aNX.tTcc[aID], oSeq, aSeq] fOut.write('\t'.join(pString) + '\n') fOut.close()
def getPairInfo2(oFN, aFN, oWigDir, aWigDir, outFN, assembly): oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA) oNX.load(['snrSS', 'tcc', 'filteredTargets', 'context']) aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment) aNX.load( ['tTcc', 'tStart', 'tEnd', 'tLength', 'mismatchPositions', 'context']) #get expression Dicts for alignments and small RNAs oWigDict = cgWig.loadWigDict(oWigDir) aWigDict = cgWig.loadWigDict(aWigDir) myG = gf.GenomeFetch(assembly) fOut = open(outFN, 'w') for oID in oNX.tcc: for aID in oNX.filteredTargets[oID]: #expand the oTcc to fit the target oChrom, oStrand, oStart, oEnd = bioLibCG.tccSplit(oNX.tcc[oID]) if oStrand == '1': oStart -= aNX.tStart[aID] #5' oEnd += (aNX.tLength[aID] - 1) - aNX.tEnd[aID] #3' oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd) elif oStrand == '-1': oStart -= (aNX.tLength[aID] - 1) - aNX.tEnd[aID] #3' oEnd += aNX.tStart[aID] #5' oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd) #expand the peak tcc to be the target tcc (peak is only 1-4nt long) aChrom, aStrand, aStart, aEnd = bioLibCG.tccSplit(aNX.tTcc[aID]) aStart -= 25 aEnd += 25 aTcc = bioLibCG.makeTcc(aChrom, aStrand, aStart, aEnd) #get expression oCoord_value = cgWig.getExpressionProfile(oTcc, oWigDict) aCoord_value = cgWig.getExpressionProfile(aTcc, aWigDict) oValues = [oCoord_value[x] for x in sorted(oCoord_value.keys())] aValues = [aCoord_value[x] for x in sorted(aCoord_value.keys())] if oStrand == '-1': oValues.reverse() if aStrand == '-1': aValues.reverse() #get sequences aSeq = myG.getSequence(aTcc) oSeq = myG.getSequence(oTcc) oSNR = oNX.snrSS[oID] oContext = oNX.context[oID] aContext = aNX.context[aID] pString = [ str(oID), str(aID), ','.join([str(x) for x in oValues]), ','.join([str(x) for x in aValues]), str(aNX.tStart[aID]), str(aNX.tEnd[aID]), oNX.tcc[oID], aNX.tTcc[aID], oSeq, aSeq, ','.join([str(x) for x in aNX.mismatchPositions[aID]]), str(oSNR), oContext, aContext ] fOut.write('\t'.join(pString) + '\n') fOut.close()