def transcriptSetOverlap(aDir, AS): AS = bool(AS) geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) #get degradome TCCS #note that you need to test the AS peaks, this is the location of the targetted transcript oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA) id_oRNA = oRNA_DC.load() if AS == True: degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()] else: degTccs = [x.tcc for x in id_oRNA.values()] #find all overlapping exons/transcripts, then all results sequences that overlap exons overlappingExons = allExons.transcriptOverlaps(degTccs) #print len(overlappingExons), "num of overlapping exons" overlappingExonTccs = [x.tcc for x in overlappingExons] overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1) #write new file for obj in id_oRNA.values(): if AS: degTcc = cg.convertToAS(obj.tcc) else: degTcc = obj.tcc if degTcc in overlappingDegTccs: obj.transcriptOverlap = True else: obj.transcriptOverlap = False oRNA_DC.commit(id_oRNA)
def transcriptSetOverlap(aDir, AS): AS = bool(AS) geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) #get degradome TCCS #note that you need to test the AS peaks, this is the location of the targetted transcript oRNA_DC = cgDB.dataController(aDir, cgOriginRNA.OriginRNA) id_oRNA = oRNA_DC.load() if AS == True: degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()] else: degTccs = [x.tcc for x in id_oRNA.values()] #find all overlapping exons/transcripts, then all results sequences that overlap exons overlappingExons = allExons.transcriptOverlaps(degTccs) #print len(overlappingExons), "num of overlapping exons" overlappingExonTccs = [x.tcc for x in overlappingExons] overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1) #write new file for obj in id_oRNA.values(): if AS: degTcc = cg.convertToAS(obj.tcc) else: degTcc = obj.tcc if degTcc in overlappingDegTccs: obj.transcriptOverlap = True else: obj.transcriptOverlap = False oRNA_DC.commit(id_oRNA)
def transcriptSetOverlapTargets(aDir): geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) #get degradome TCCS #note that you need to test the AS peaks, this is the location of the targetted transcript aDC = cgDB.dataController(aDir, cgAlignment.cgAlignment) id_alignment = aDC.load() #create list of unique tccs. uniqTccs = [] for alignment in id_alignment.values(): chrom, strand, start, end = cg.tccSplit(alignment.tTcc) offset = alignment.tStart sLen = alignment.sLength if strand == '1': start = start - 19 + offset end = start + sLen else: end = end + 19 - offset start = end - sLen tcc = cg.makeTcc(chrom, strand, start, end) if tcc not in uniqTccs: uniqTccs.append(tcc) degTccs = [cg.convertToAS(x) for x in uniqTccs] #find all overlapping exons/transcripts, then all results sequences that overlap exons overlappingExons = allExons.transcriptOverlaps(degTccs) overlappingExonTccs = [x.tcc for x in overlappingExons] overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1) #update for obj in id_alignment.values(): chrom, strand, start, end = cg.tccSplit(alignment.tTcc) offset = alignment.tStart sLen = alignment.sLength if strand == '1': start = start - 19 + offset end = start + sLen else: end = end + 19 - offset start = end - sLen tcc = cg.makeTcc(chrom, strand, start, end) degTcc = cg.convertToAS(tcc) if degTcc in overlappingDegTccs: obj.transcriptOverlap = True else: obj.transcriptOverlap = False aDC.commit(id_alignment)
def profileTargetsHistoAS(tccList, cName, name='boxplot'): range = 50 histDict = {} # {coord: []} histDictAS = {} for tcc in tccList: chrom, strand, start, end = cg.tccSplit(tcc) #Get highest peak (sense) tccStretch = cgPeaks.stretch(tcc, cName) tccStretch.createPeaks(span=2) highestCoord = tccStretch.getHighestPeak() if highestCoord == None: continue #AS tccAS = cg.convertToAS(tcc) tccStretch = cgPeaks.stretch(tccAS, cName) tccStretch.createPeaks(span=2) highestCoordAS = tccStretch.getHighestPeak() if highestCoordAS == None: continue #profile around point (Sense) zPoint = cg.makeTcc(chrom, strand, highestCoord, end) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio=True) for coord in cProfile: try: histDict[coord].append(cProfile[coord]) except: #quicker way to initialize histDict[coord] = [cProfile[coord]] #profile around point (AS) zPoint = cg.convertToAS(zPoint) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio=True, ratioCoord=highestCoordAS) for coord in cProfile: try: histDictAS[coord].append(cProfile[coord]) except: #quicker way to initialize histDictAS[coord] = [cProfile[coord]] plot.boxPlotHistoAS(histDict, histDictAS, name=name)
def profileTargetsHistoAS(tccList, cName, name = 'boxplot'): range = 50 histDict = {} # {coord: []} histDictAS = {} for tcc in tccList: chrom, strand, start, end = cg.tccSplit(tcc) #Get highest peak (sense) tccStretch = cgPeaks.stretch(tcc, cName) tccStretch.createPeaks(span = 2) highestCoord = tccStretch.getHighestPeak() if highestCoord == None: continue #AS tccAS = cg.convertToAS(tcc) tccStretch = cgPeaks.stretch(tccAS, cName) tccStretch.createPeaks(span = 2) highestCoordAS = tccStretch.getHighestPeak() if highestCoordAS == None: continue #profile around point (Sense) zPoint = cg.makeTcc(chrom, strand, highestCoord, end) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True) for coord in cProfile: try: histDict[coord].append(cProfile[coord]) except: #quicker way to initialize histDict[coord] = [cProfile[coord]] #profile around point (AS) zPoint = cg.convertToAS(zPoint) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True, ratioCoord = highestCoordAS) for coord in cProfile: try: histDictAS[coord].append(cProfile[coord]) except: #quicker way to initialize histDictAS[coord] = [cProfile[coord]] plot.boxPlotHistoAS(histDict, histDictAS, name = name)
def transcriptSetOverlapDegFileHitmap(degFile, runningChrom, runningStrand): geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) transcriptTccs = [] for gene in allExons.set.values(): for transcript in gene.transcripts: transcriptTccs.append(transcript.tcc) #create hitmap coordSet = set() for tcc in transcriptTccs: chrom, strand, start, end = cg.tccSplit(tcc) if chrom != runningChrom: continue if strand != runningStrand: continue for i in range(start, end + 1): coordSet.add(i) #find overlapping degTccs print 'done creating hitmap' f = open(degFile, 'r') newLines = [] for line in f: ls = line.strip().split('\t') degTcc = cg.convertToAS(ls[1]) chrom, strand, start, end = cg.tccSplit(degTcc) if chrom != runningChrom: continue if strand != runningStrand: continue inTran = '0' for i in xrange(start, end + 1): if i in coordSet: inTran = '1' break #update newLines newLine = cg.appendToLine(line, inTran, 3) newLines.append(newLine) f.close() f = open(degFile + '.%s.%s' % (runningChrom, runningStrand), 'w') f.writelines(newLines) f.close()
def transcriptSetOverlapDegFile(degFile): geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) #get degradome TCCS #note that you need to test the AS peaks, this is the location of the targetted transcript degTccs = [] f = open(degFile, 'r') for line in f: ls = line.strip().split('\t') degTccs.append(ls[1]) f.close() degTccs = [cg.convertToAS(x) for x in degTccs] #find all overlapping exons/transcripts, then all results sequences that overlap exons overlappingExons = allExons.transcriptOverlaps(degTccs) #print len(overlappingExons), "num of overlapping exons" overlappingExonTccs = [x.tcc for x in overlappingExons] overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1) f = open(degFile, 'r') newLines = [] for line in f: degTcc = cg.convertToAS(ls[1]) inTran = '0' if degTcc in overlappingDegTccs: inTran = '1' #update newLines newLine = cg.appendToLine(line, inTran, 3) f.close()