def updateContext(editFN, geneSetFN, outFN, refBase = 'A'): print refBase #Load Transcripts and Editing Sites print 'loading editing sites' eSites = cgEdit.loadEditingSites(editFN, refBase) print 'loading gene set' geneSet = cgGenes3.createGeneSetEditing(geneSetFN) #make the eSites 0 based for eSite in eSites: #redo coordinate and tcc eSite.coordinate = eSite.coordinate - 1 eSite.tcc = bioLibCG.makeTcc(eSite.chromosome, eSite.strand, eSite.coordinate, eSite.coordinate) #Create Joint dictionaries print 'creating joint dictionaries' eJoint = {} #tcc : eSite for eSite in eSites: eJoint[eSite.tcc] = eSite tJoint = {} # tcc : [transcript, ...] for transcript in geneSet.transcripts: if transcript.tcc in tJoint: tJoint[transcript.tcc].append(transcript) else: tJoint[transcript.tcc] = [transcript] #Overlap tccs print 'overlapping joints' ##make new 0-based keys tccOverlaps = compareData.getIndividualOverlaps(eJoint.keys(), tJoint.keys(), 1) print 'creating final dictionary' #create final dictionary containing {edit sites : [transcript, ..]} eSiteTranscripts = {} # edit site: [transcript, ..] for eTcc in tccOverlaps: eSite = eJoint[eTcc] eSiteTranscripts[eSite] = [] for tTcc in tccOverlaps[eTcc]: eSiteTranscripts[eSite].extend(tJoint[tTcc]) print 'get context info' #Go through each site and find out what it overlaps, and if it is in a coding region... fOut = open(outFN, 'w') for eSite in eSiteTranscripts: if len(eSiteTranscripts[eSite]) == 0: #label intergenic tType = 'INTER' codingFlag = 'NC' fOut.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (eSite.ID, 'NONE', 'NONE', tType, codingFlag, 'NONE')) continue for transcript in eSiteTranscripts[eSite]: codingTranscript = '_coding' in transcript.tType tType = None codingFlag = None tTypes = [ x[1] for x in transcript.getOverlappingElements(eSite.tcc)] if '3UTR' in tTypes: tType = '3UTR' elif '5UTR' in tTypes: tType = '5UTR' else: tType = tTypes[0] #has to be one thing...exon or intron #This only works because UTR takes precedence over EXON in TYPE. if tType == 'EXON': if codingTranscript: codingFlag = 'C' else: codingFlag = 'NC' else: codingFlag = 'NC' fOut.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (eSite.ID, transcript.parent, transcript.id, tType, codingFlag, transcript.tType)) #fOut.write('%s:%s:%s\t%s\t%s\t%s\t%s\n' % (eSite.chromosome, eSite.strand, eSite.coordinate, transcript.parent, transcript.id, tType, codingFlag)) fOut.close()
def updateContext(editFN, geneSetFN, outFN, refBase='A'): print refBase #Load Transcripts and Editing Sites print 'loading editing sites' eSites = cgEdit.loadEditingSites(editFN, refBase) print 'loading gene set' geneSet = cgGenes3.createGeneSetEditing(geneSetFN) #make the eSites 0 based for eSite in eSites: #redo coordinate and tcc eSite.coordinate = eSite.coordinate - 1 eSite.tcc = bioLibCG.makeTcc(eSite.chromosome, eSite.strand, eSite.coordinate, eSite.coordinate) #Create Joint dictionaries print 'creating joint dictionaries' eJoint = {} #tcc : eSite for eSite in eSites: eJoint[eSite.tcc] = eSite tJoint = {} # tcc : [transcript, ...] for transcript in geneSet.transcripts: if transcript.tcc in tJoint: tJoint[transcript.tcc].append(transcript) else: tJoint[transcript.tcc] = [transcript] #Overlap tccs print 'overlapping joints' ##make new 0-based keys tccOverlaps = compareData.getIndividualOverlaps(eJoint.keys(), tJoint.keys(), 1) print 'creating final dictionary' #create final dictionary containing {edit sites : [transcript, ..]} eSiteTranscripts = {} # edit site: [transcript, ..] for eTcc in tccOverlaps: eSite = eJoint[eTcc] eSiteTranscripts[eSite] = [] for tTcc in tccOverlaps[eTcc]: eSiteTranscripts[eSite].extend(tJoint[tTcc]) print 'get context info' #Go through each site and find out what it overlaps, and if it is in a coding region... fOut = open(outFN, 'w') for eSite in eSiteTranscripts: if len(eSiteTranscripts[eSite]) == 0: #label intergenic tType = 'INTER' codingFlag = 'NC' fOut.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (eSite.ID, 'NONE', 'NONE', tType, codingFlag, 'NONE')) continue for transcript in eSiteTranscripts[eSite]: codingTranscript = '_coding' in transcript.tType tType = None codingFlag = None tTypes = [ x[1] for x in transcript.getOverlappingElements(eSite.tcc) ] if '3UTR' in tTypes: tType = '3UTR' elif '5UTR' in tTypes: tType = '5UTR' else: tType = tTypes[0] #has to be one thing...exon or intron #This only works because UTR takes precedence over EXON in TYPE. if tType == 'EXON': if codingTranscript: codingFlag = 'C' else: codingFlag = 'NC' else: codingFlag = 'NC' fOut.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (eSite.ID, transcript.parent, transcript.id, tType, codingFlag, transcript.tType)) #fOut.write('%s:%s:%s\t%s\t%s\t%s\t%s\n' % (eSite.chromosome, eSite.strand, eSite.coordinate, transcript.parent, transcript.id, tType, codingFlag)) fOut.close()
def updateContext(oDir, geneSetFN): print 'loading oRNA' oDC = cgDB.dataController(oDir, cgOriginRNA.OriginRNA) id_oRNA = oDC.load() print 'loading gene set' geneSet = cgGenes3.createGeneSetEditing(geneSetFN) #Get in terms of tccs print 'Joining' oTcc_oRNA = oneToOne(id_oRNA.values(), 'tcc') tTcc_transcripts = oneToMany(geneSet.transcripts, 'tcc') #Overlap tccs print 'overlapping' oTcc_tTccs = compareData.getIndividualOverlaps(oTcc_oRNA.keys(), tTcc_transcripts.keys(), 1) #create final dictionary containing {oRNA : [transcript, ..]} oRNA_transcripts = {} for oTcc in oTcc_tTccs: oRNA = oTcc_oRNA[oTcc] oRNA_transcripts[oRNA] = [] for tTcc in oTcc_tTccs[oTcc]: oRNA_transcripts[oRNA].extend(tTcc_transcripts[tTcc]) print 'get context info' #Go through each site and find out what it overlaps, and if it is in a coding region... ds = bioLibCG.dominantSpotter(['EXON_INTRON', '3UTR', '5UTR', 'EXON', 'INTRON']) for oRNA in oRNA_transcripts: oRNA.transcriptIDs = [] oRNA.transcriptContexts = [] oRNA.transcriptTypes = [] oRNA.transcriptCodingTypes = [] if len(oRNA_transcripts[oRNA]) == 0: continue for transcript in oRNA_transcripts[oRNA]: codingTranscript = '_coding' in transcript.tType tType = None codingFlag = None tTypes = [x[1] for x in transcript.getOverlappingElements(oRNA.tcc)] #categorize border types tType = ds.spotItem(tTypes) if tType == 'EXON' or 'EXON_INTRON': if codingTranscript: codingFlag = 'C' else: codingFlag = 'NC' else: codingFlag = 'NC' oRNA.transcriptIDs.append(transcript.id) oRNA.transcriptContexts.append(tType) oRNA.transcriptTypes.append(transcript.tType) oRNA.transcriptCodingTypes.append(codingFlag) oDC.commit(id_oRNA)