Ejemplo n.º 1
0
def transcriptSetOverlap(aDir, AS):
    AS = bool(AS)

    geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
    allExons = cgGenes.createGeneSetFromFile(geneSetFN)

    #get degradome TCCS
    #note that you need to test the AS peaks, this is the location of the targetted transcript
    oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA)
    id_oRNA = oRNA_DC.load()
    if AS == True:
        degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()]
    else:
        degTccs = [x.tcc for x in id_oRNA.values()]

#find all overlapping exons/transcripts, then all results sequences that overlap exons
    overlappingExons = allExons.transcriptOverlaps(degTccs)
    #print len(overlappingExons), "num of overlapping exons"
    overlappingExonTccs = [x.tcc for x in overlappingExons]
    overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

    #write new file
    for obj in id_oRNA.values():
        if AS:
            degTcc = cg.convertToAS(obj.tcc)
        else:
            degTcc = obj.tcc

        if degTcc in overlappingDegTccs:
            obj.transcriptOverlap = True
        else:
            obj.transcriptOverlap = False

    oRNA_DC.commit(id_oRNA)
Ejemplo n.º 2
0
def transcriptSetOverlap(aDir, AS):
        AS = bool(AS)

	geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
	allExons = cgGenes.createGeneSetFromFile(geneSetFN)

	#get degradome TCCS
	#note that you need to test the AS peaks, this is the location of the targetted transcript
        oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA)
	id_oRNA = oRNA_DC.load()
        if AS == True:
                degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()]
        else:
                degTccs = [x.tcc for x in id_oRNA.values()]

	#find all overlapping exons/transcripts, then all results sequences that overlap exons
	overlappingExons = allExons.transcriptOverlaps(degTccs)
	#print len(overlappingExons), "num of overlapping exons"
        overlappingExonTccs = [x.tcc for x in overlappingExons]
	overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)


	#write new file
        for obj in id_oRNA.values():         
                if AS:
                        degTcc = cg.convertToAS(obj.tcc)
                else:
                        degTcc = obj.tcc

                if degTcc in overlappingDegTccs:
                        obj.transcriptOverlap = True
	        else:
                        obj.transcriptOverlap = False 

        oRNA_DC.commit(id_oRNA)	
Ejemplo n.º 3
0
def transcriptSetOverlapTargets(aDir):

	geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
	allExons = cgGenes.createGeneSetFromFile(geneSetFN)

	#get degradome TCCS
	#note that you need to test the AS peaks, this is the location of the targetted transcript
        
        aDC = cgNexusFlat.dataController(aDir, cgAlignment.cgAlignment)
        id_alignment = aDC.load()
        
        #create list of unique tccs.
        uniqTccs = []
        for alignment in id_alignment.values():
                chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
                offset = alignment.tStart
                sLen = alignment.sLength
                if strand == '1':
                        start = start - 19 + offset
                        end = start + sLen
                else:
                        end = end + 19 - offset
                        start = end - sLen

                tcc = cg.makeTcc(chrom, strand, start, end)
                if tcc not in uniqTccs: uniqTccs.append(tcc)

        degTccs = [cg.convertToAS(x) for x in uniqTccs]

	#find all overlapping exons/transcripts, then all results sequences that overlap exons
	overlappingExons = allExons.transcriptOverlaps(degTccs)
        overlappingExonTccs = [x.tcc for x in overlappingExons]
	overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

        #update
        for obj in id_alignment.values():         
                chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
                offset = alignment.tStart
                sLen = alignment.sLength

                if strand == '1':
                        start = start - 19 + offset
                        end = start + sLen
                else:
                        end = end + 19 - offset
                        start = end - sLen

                tcc = cg.makeTcc(chrom, strand, start, end)
                degTcc = cg.convertToAS(tcc)

                if degTcc in overlappingDegTccs:
                        obj.transcriptOverlap = True
	        else:
                        obj.transcriptOverlap = False 

        aDC.commit(id_alignment)
Ejemplo n.º 4
0
def transcriptSetOverlapTargets(aDir):

    geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv'
    allExons = cgGenes.createGeneSetFromFile(geneSetFN)

    #get degradome TCCS
    #note that you need to test the AS peaks, this is the location of the targetted transcript

    aDC = cgNexusFlat.dataController(aDir, cgAlignment.cgAlignment)
    id_alignment = aDC.load()

    #create list of unique tccs.
    uniqTccs = []
    for alignment in id_alignment.values():
        chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
        offset = alignment.tStart
        sLen = alignment.sLength
        if strand == '1':
            start = start - 19 + offset
            end = start + sLen
        else:
            end = end + 19 - offset
            start = end - sLen

        tcc = cg.makeTcc(chrom, strand, start, end)
        if tcc not in uniqTccs: uniqTccs.append(tcc)

    degTccs = [cg.convertToAS(x) for x in uniqTccs]

    #find all overlapping exons/transcripts, then all results sequences that overlap exons
    overlappingExons = allExons.transcriptOverlaps(degTccs)
    overlappingExonTccs = [x.tcc for x in overlappingExons]
    overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1)

    #update
    for obj in id_alignment.values():
        chrom, strand, start, end = cg.tccSplit(alignment.tTcc)
        offset = alignment.tStart
        sLen = alignment.sLength

        if strand == '1':
            start = start - 19 + offset
            end = start + sLen
        else:
            end = end + 19 - offset
            start = end - sLen

        tcc = cg.makeTcc(chrom, strand, start, end)
        degTcc = cg.convertToAS(tcc)

        if degTcc in overlappingDegTccs:
            obj.transcriptOverlap = True
        else:
            obj.transcriptOverlap = False

    aDC.commit(id_alignment)
Ejemplo n.º 5
0
def updateMicroRNAOverlap(aDir, microFN):

    oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA)
    id_oRNA = oRNA_DC.load()

    #Put micro and small coords into lists
    microCoords = []
    smallCoords = []
    f = open(microFN, 'r')
    microCoords = [x.strip() for x in f]
    f.close()
    smallCoords = [x.tcc for x in id_oRNA.values()]

    #overlap them
    smallOverlaps = compare.compareTwoTcc(microCoords, smallCoords, 2)

    #For each sRNA, save overlap value.
    for oRNA in id_oRNA.values():
        oRNA.microOverlap = oRNA.tcc in smallOverlaps

    oRNA_DC.commit(id_oRNA)
Ejemplo n.º 6
0
def countForError(oDir, filteredFile):

    fList = []
    f = open(filteredFile, 'r')
    for line in f:
        ls = line.strip().split('\t')
        fList.append(int(line.strip()))

    oID_numTargets = {}

    for i in range(0, 10):
        print i
        simDirRNA = '/home/chrisgre/scripts/simulations/simsk50Filtered/simulation.%s/oRNA' % i
        oDC = cgNexusFlat.dataController(simDirRNA, cgOriginRNA.OriginRNA)
        id_sRNA = oDC.load()

        groupTotal = 0
        for id, sRNA in id_sRNA.items():
            if not id in fList:
                continue
            groupTotal += len(sRNA.filteredTargets)
        print groupTotal
Ejemplo n.º 7
0
def countForError(oDir, filteredFile):

        fList = []
        f = open(filteredFile, 'r')
        for line in f:
                ls = line.strip().split('\t')
                fList.append(int(line.strip()))
       
        oID_numTargets = {}

        for i in range(0,10):
                print i
                simDirRNA = '/home/chrisgre/scripts/simulations/simsk50Filtered/simulation.%s/oRNA' % i
                oDC = cgNexusFlat.dataController(simDirRNA, cgOriginRNA.OriginRNA)
                id_sRNA = oDC.load()
                
                groupTotal = 0
                for id, sRNA in id_sRNA.items():
                        if not id in fList:
                                continue
                        groupTotal +=  len(sRNA.filteredTargets)
                print groupTotal                        
Ejemplo n.º 8
0
def updateMicroRNAOverlap(aDir, microFN):
	
        oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA)
	id_oRNA = oRNA_DC.load()
        
        #Put micro and small coords into lists
        microCoords = []
        smallCoords = []
        f = open(microFN, 'r')
        microCoords = [x.strip() for x in f]
        f.close()
        smallCoords = [x.tcc for x in id_oRNA.values()]

        #overlap them
        smallOverlaps = compare.compareTwoTcc(microCoords, smallCoords, 2)


        #For each sRNA, save overlap value.
        for oRNA in id_oRNA.values():
                oRNA.microOverlap = oRNA.tcc in smallOverlaps
	        	
	
        oRNA_DC.commit(id_oRNA)