def transcriptSetOverlap(aDir, AS):
        AS = bool(AS)

	geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
	allExons = cgGenes.createGeneSetFromFile(geneSetFN)

	#get degradome TCCS
	#note that you need to test the AS peaks, this is the location of the targetted transcript
        oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA)
	id_oRNA = oRNA_DC.load()
        if AS == True:
                degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()]
        else:
                degTccs = [x.tcc for x in id_oRNA.values()]

	#find all overlapping exons/transcripts, then all results sequences that overlap exons
	overlappingExons = allExons.transcriptOverlaps(degTccs)
	#print len(overlappingExons), "num of overlapping exons"
        overlappingExonTccs = [x.tcc for x in overlappingExons]
	overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)


	#write new file
        for obj in id_oRNA.values():         
                if AS:
                        degTcc = cg.convertToAS(obj.tcc)
                else:
                        degTcc = obj.tcc

                if degTcc in overlappingDegTccs:
                        obj.transcriptOverlap = True
	        else:
                        obj.transcriptOverlap = False 

        oRNA_DC.commit(id_oRNA)	
Example #2
0
def transcriptSetOverlap(aDir, AS):
    AS = bool(AS)

    geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
    allExons = cgGenes.createGeneSetFromFile(geneSetFN)

    #get degradome TCCS
    #note that you need to test the AS peaks, this is the location of the targetted transcript
    oRNA_DC = cgDB.dataController(aDir, cgOriginRNA.OriginRNA)
    id_oRNA = oRNA_DC.load()
    if AS == True:
        degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()]
    else:
        degTccs = [x.tcc for x in id_oRNA.values()]

#find all overlapping exons/transcripts, then all results sequences that overlap exons
    overlappingExons = allExons.transcriptOverlaps(degTccs)
    #print len(overlappingExons), "num of overlapping exons"
    overlappingExonTccs = [x.tcc for x in overlappingExons]
    overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

    #write new file
    for obj in id_oRNA.values():
        if AS:
            degTcc = cg.convertToAS(obj.tcc)
        else:
            degTcc = obj.tcc

        if degTcc in overlappingDegTccs:
            obj.transcriptOverlap = True
        else:
            obj.transcriptOverlap = False

    oRNA_DC.commit(id_oRNA)
Example #3
0
def transcriptSetOverlapTargets(aDir):

    geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
    allExons = cgGenes.createGeneSetFromFile(geneSetFN)

    #get degradome TCCS
    #note that you need to test the AS peaks, this is the location of the targetted transcript

    aDC = cgDB.dataController(aDir, cgAlignment.cgAlignment)
    id_alignment = aDC.load()

    #create list of unique tccs.
    uniqTccs = []
    for alignment in id_alignment.values():
        chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
        offset = alignment.tStart
        sLen = alignment.sLength

        if strand == '1':
            start = start - 19 + offset
            end = start + sLen
        else:
            end = end + 19 - offset
            start = end - sLen

        tcc = cg.makeTcc(chrom, strand, start, end)
        if tcc not in uniqTccs: uniqTccs.append(tcc)

    degTccs = [cg.convertToAS(x) for x in uniqTccs]

    #find all overlapping exons/transcripts, then all results sequences that overlap exons
    overlappingExons = allExons.transcriptOverlaps(degTccs)
    overlappingExonTccs = [x.tcc for x in overlappingExons]
    overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

    #update
    for obj in id_alignment.values():
        chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
        offset = alignment.tStart
        sLen = alignment.sLength

        if strand == '1':
            start = start - 19 + offset
            end = start + sLen
        else:
            end = end + 19 - offset
            start = end - sLen

        tcc = cg.makeTcc(chrom, strand, start, end)
        degTcc = cg.convertToAS(tcc)

        if degTcc in overlappingDegTccs:
            obj.transcriptOverlap = True
        else:
            obj.transcriptOverlap = False

    aDC.commit(id_alignment)
def transcriptSetOverlapTargets(aDir):

	geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
	allExons = cgGenes.createGeneSetFromFile(geneSetFN)

	#get degradome TCCS
	#note that you need to test the AS peaks, this is the location of the targetted transcript
        
        aDC = cgDB.dataController(aDir, cgAlignment.cgAlignment)
        id_alignment = aDC.load()
        
        #create list of unique tccs.
        uniqTccs = []
        for alignment in id_alignment.values():
                chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
                offset = alignment.tStart
                sLen = alignment.sLength

                if strand == '1':
                        start = start - 19 + offset
                        end = start + sLen
                else:
                        end = end + 19 - offset
                        start = end - sLen

                tcc = cg.makeTcc(chrom, strand, start, end)
                if tcc not in uniqTccs: uniqTccs.append(tcc)

        degTccs = [cg.convertToAS(x) for x in uniqTccs]

	#find all overlapping exons/transcripts, then all results sequences that overlap exons
	overlappingExons = allExons.transcriptOverlaps(degTccs)
        overlappingExonTccs = [x.tcc for x in overlappingExons]
	overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

        #update
        for obj in id_alignment.values():         
                chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
                offset = alignment.tStart
                sLen = alignment.sLength

                if strand == '1':
                        start = start - 19 + offset
                        end = start + sLen
                else:
                        end = end + 19 - offset
                        start = end - sLen

                tcc = cg.makeTcc(chrom, strand, start, end)
                degTcc = cg.convertToAS(tcc)

                if degTcc in overlappingDegTccs:
                        obj.transcriptOverlap = True
	        else:
                        obj.transcriptOverlap = False 

        aDC.commit(id_alignment)
Example #5
0
def profileTargetsHistoAS(tccList, cName, name='boxplot'):

    range = 50
    histDict = {}  # {coord: []}
    histDictAS = {}
    for tcc in tccList:

        chrom, strand, start, end = cg.tccSplit(tcc)
        #Get highest peak (sense)
        tccStretch = cgPeaks.stretch(tcc, cName)
        tccStretch.createPeaks(span=2)
        highestCoord = tccStretch.getHighestPeak()
        if highestCoord == None: continue

        #AS
        tccAS = cg.convertToAS(tcc)
        tccStretch = cgPeaks.stretch(tccAS, cName)
        tccStretch.createPeaks(span=2)
        highestCoordAS = tccStretch.getHighestPeak()
        if highestCoordAS == None: continue

        #profile around point (Sense)
        zPoint = cg.makeTcc(chrom, strand, highestCoord, end)
        cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio=True)

        for coord in cProfile:
            try:
                histDict[coord].append(cProfile[coord])
            except:  #quicker way to initialize
                histDict[coord] = [cProfile[coord]]

        #profile around point (AS)
        zPoint = cg.convertToAS(zPoint)
        cProfile = svs.profileAroundPoint(zPoint,
                                          range,
                                          cName,
                                          ratio=True,
                                          ratioCoord=highestCoordAS)

        for coord in cProfile:
            try:
                histDictAS[coord].append(cProfile[coord])
            except:  #quicker way to initialize
                histDictAS[coord] = [cProfile[coord]]

    plot.boxPlotHistoAS(histDict, histDictAS, name=name)
def profileTargetsHistoAS(tccList, cName, name = 'boxplot'):
	
	range = 50
	histDict = {} # {coord: []}
	histDictAS = {}
	for tcc in tccList:
		
		chrom, strand, start, end = cg.tccSplit(tcc)
		#Get highest peak (sense)
		tccStretch = cgPeaks.stretch(tcc, cName)
		tccStretch.createPeaks(span = 2)
		highestCoord = tccStretch.getHighestPeak()
		if highestCoord == None: continue
		
		#AS
		tccAS = cg.convertToAS(tcc)
		tccStretch = cgPeaks.stretch(tccAS, cName)
		tccStretch.createPeaks(span = 2)
		highestCoordAS = tccStretch.getHighestPeak()
		if highestCoordAS == None: continue
		
		#profile around point (Sense)
		zPoint = cg.makeTcc(chrom, strand, highestCoord, end)
		cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True)
		
		for coord in cProfile:
			try:
				histDict[coord].append(cProfile[coord])
			except: #quicker way to initialize
				histDict[coord] = [cProfile[coord]]
	
		#profile around point (AS)
		zPoint = cg.convertToAS(zPoint)
		cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True, ratioCoord = highestCoordAS)
		
		for coord in cProfile:
			try:
				histDictAS[coord].append(cProfile[coord])
			except: #quicker way to initialize
				histDictAS[coord] = [cProfile[coord]]
	
	plot.boxPlotHistoAS(histDict, histDictAS, name = name)
def transcriptSetOverlapDegFileHitmap(degFile, runningChrom, runningStrand):

	geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
	allExons = cgGenes.createGeneSetFromFile(geneSetFN)
        transcriptTccs = []
        for gene in allExons.set.values():
                for transcript in gene.transcripts:
                        transcriptTccs.append(transcript.tcc)

        #create hitmap
        coordSet = set()
        for tcc in transcriptTccs:
                chrom, strand, start, end = cg.tccSplit(tcc)
                
                if chrom != runningChrom:
                        continue

                if strand != runningStrand:
                        continue

                for i in range(start, end + 1):
                        coordSet.add(i)

        #find overlapping degTccs
        print 'done creating hitmap'
        

        f = open(degFile, 'r')
	newLines = []
	for line in f:
	        ls = line.strip().split('\t') 
                degTcc = cg.convertToAS(ls[1])
                chrom, strand, start, end = cg.tccSplit(degTcc)
                if chrom != runningChrom:
                        continue

                if strand != runningStrand:
                        continue

                inTran = '0'
                for i in xrange(start, end + 1):
                        if i in coordSet:
                                inTran = '1'
                                break

		#update newLines
                newLine = cg.appendToLine(line, inTran, 3)
                newLines.append(newLine)         
	f.close()

        f = open(degFile + '.%s.%s' % (runningChrom, runningStrand), 'w')
        f.writelines(newLines)
        f.close()
Example #8
0
def transcriptSetOverlapDegFileHitmap(degFile, runningChrom, runningStrand):

    geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
    allExons = cgGenes.createGeneSetFromFile(geneSetFN)
    transcriptTccs = []
    for gene in allExons.set.values():
        for transcript in gene.transcripts:
            transcriptTccs.append(transcript.tcc)

#create hitmap
    coordSet = set()
    for tcc in transcriptTccs:
        chrom, strand, start, end = cg.tccSplit(tcc)

        if chrom != runningChrom:
            continue

        if strand != runningStrand:
            continue

        for i in range(start, end + 1):
            coordSet.add(i)

#find overlapping degTccs
    print 'done creating hitmap'

    f = open(degFile, 'r')
    newLines = []
    for line in f:
        ls = line.strip().split('\t')
        degTcc = cg.convertToAS(ls[1])
        chrom, strand, start, end = cg.tccSplit(degTcc)
        if chrom != runningChrom:
            continue

        if strand != runningStrand:
            continue

        inTran = '0'
        for i in xrange(start, end + 1):
            if i in coordSet:
                inTran = '1'
                break

    #update newLines
        newLine = cg.appendToLine(line, inTran, 3)
        newLines.append(newLine)
    f.close()

    f = open(degFile + '.%s.%s' % (runningChrom, runningStrand), 'w')
    f.writelines(newLines)
    f.close()
def transcriptSetOverlapDegFile(degFile):

	geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
	allExons = cgGenes.createGeneSetFromFile(geneSetFN)

	#get degradome TCCS
	#note that you need to test the AS peaks, this is the location of the targetted transcript
        
        degTccs = []
        f = open(degFile, 'r')
        for line in f:
                ls = line.strip().split('\t')
                degTccs.append(ls[1])
        f.close()
                        

        degTccs = [cg.convertToAS(x) for x in degTccs]

	#find all overlapping exons/transcripts, then all results sequences that overlap exons
	overlappingExons = allExons.transcriptOverlaps(degTccs)
	#print len(overlappingExons), "num of overlapping exons"
        overlappingExonTccs = [x.tcc for x in overlappingExons]
	overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

        
        f = open(degFile, 'r')
	newLines = []
	for line in f:
	        
                degTcc = cg.convertToAS(ls[1])
               
                inTran = '0'
                if degTcc in overlappingDegTccs:
                        inTran = '1'

		#update newLines
                newLine = cg.appendToLine(line, inTran, 3)
                
	f.close()
Example #10
0
def transcriptSetOverlapDegFile(degFile):

    geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
    allExons = cgGenes.createGeneSetFromFile(geneSetFN)

    #get degradome TCCS
    #note that you need to test the AS peaks, this is the location of the targetted transcript

    degTccs = []
    f = open(degFile, 'r')
    for line in f:
        ls = line.strip().split('\t')
        degTccs.append(ls[1])
    f.close()

    degTccs = [cg.convertToAS(x) for x in degTccs]

    #find all overlapping exons/transcripts, then all results sequences that overlap exons
    overlappingExons = allExons.transcriptOverlaps(degTccs)
    #print len(overlappingExons), "num of overlapping exons"
    overlappingExonTccs = [x.tcc for x in overlappingExons]
    overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

    f = open(degFile, 'r')
    newLines = []
    for line in f:

        degTcc = cg.convertToAS(ls[1])

        inTran = '0'
        if degTcc in overlappingDegTccs:
            inTran = '1'

    #update newLines
        newLine = cg.appendToLine(line, inTran, 3)

    f.close()