Esempio n. 1
0
 def loadGFF_UTR(self,fields,line,transcriptBeginEnd,GFF,
                        transcripts,readOrder,genes):
     exonBegin=int(fields[3])-1
     exonEnd=int(fields[4])
     exonScore=fields[5]
     strand=fields[6]
     frame=fields[7]
     transcriptId=None
     rex=Rex()
     if(rex.find('transgrp[:=]\s*(\S+)',line)): transcriptId=rex[1]
     elif(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
         transcriptId=rex[1]
     elif(rex.find('Parent=([^;,\s]+)',line)): transcriptId=rex[1]
     geneId=None
     if(rex.find('genegrp=(\S+)',line)): geneId=rex[1]
     elif(rex.find('gene_id[:=]?\s*"?([^\s\;"]+)"?',line)): geneId=rex[1]
     if(transcriptId is None): transcriptId=geneId
     if(geneId is None): geneId=transcriptId
     if(transcriptId is None): 
         raise Exception(line+" : no transcript ID found")        
     if(rex.find("(\S+);$",transcriptId)): transcriptId=rex[1]
     if(rex.find("(\S+);$",geneId)): geneId=rex[1]
     extra=""
     for i in range(8,len(fields)): extra+=fields[i]+" "
     if(exonBegin>exonEnd): (exonBegin,exonEnd)=(exonEnd,exonBegin)
     transcript=transcripts.get(transcriptId,None)
     if(not transcript):
         transcripts[transcriptId]=transcript= \
             Transcript(transcriptId,strand)
         transcript.setStopCodons(self.stopCodons)
         transcript.readOrder=readOrder
         readOrder+=1
         transcript.substrate=fields[0]
         transcript.source=fields[1]
         if(transcriptBeginEnd.get(transcriptId,None) is not None):
             (begin,end)=transcriptBeginEnd[transcriptId]
             transcript.setBegin(begin)
             transcript.setEnd(end)
         else:
             transcript.setBegin(exonBegin)
             transcript.setEnd(exonEnd)
     transcript.geneId=geneId
     gene=genes.get(geneId,None)
     if(gene is None):
         genes[geneId]=gene=Gene(); gene.setId(geneId)
     transcript.setGene(gene)
     exon=Exon(exonBegin,exonEnd,transcript)
     exon.extraFields=extra
     if(transcript.rawExons is not None): 
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.rawExons.append(exon)
     elif(not transcript.exonOverlapsExon(exon)):
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.UTR.append(exon) # OK -- we sort later
     gene.addTranscript(transcript)
Esempio n. 2
0
 def loadGFF_UTR(self,fields,line,transcriptBeginEnd,GFF,
                        transcripts,readOrder,genes):
     exonBegin=int(fields[3])-1
     exonEnd=int(fields[4])
     exonScore=fields[5]
     strand=fields[6]
     frame=fields[7]
     transcriptId=None
     rex=Rex()
     if(rex.find('transgrp[:=]\s*(\S+)',line)): transcriptId=rex[1]
     elif(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
         transcriptId=rex[1]
     elif(rex.find('Parent=([^;,\s]+)',line)): transcriptId=rex[1]
     geneId=None
     if(rex.find('genegrp=(\S+)',line)): geneId=rex[1]
     elif(rex.find('gene_id[:=]?\s*"?([^\s\;"]+)"?',line)): geneId=rex[1]
     if(transcriptId is None): transcriptId=geneId
     if(geneId is None): geneId=transcriptId
     if(transcriptId is None): 
         raise Exception(line+" : no transcript ID found")        
     if(rex.find("(\S+);$",transcriptId)): transcriptId=rex[1]
     if(rex.find("(\S+);$",geneId)): geneId=rex[1]
     extra=""
     for i in range(8,len(fields)): extra+=fields[i]+" "
     if(exonBegin>exonEnd): (exonBegin,exonEnd)=(exonEnd,exonBegin)
     transcript=transcripts.get(transcriptId,None)
     if(not transcript):
         transcripts[transcriptId]=transcript= \
             Transcript(transcriptId,strand)
         transcript.setStopCodons(self.stopCodons)
         transcript.readOrder=readOrder
         readOrder+=1
         transcript.substrate=fields[0]
         transcript.source=fields[1]
         if(transcriptBeginEnd.find(transcriptId,None) is not None):
             (begin,end)=transcriptBeginEnd[transcriptId]
             transcript.setBegin(begin)
             transcript.setEnd(end)
     transcript.geneId=geneId
     gene=genes.get(geneId,None)
     if(gene is None):
         genes[geneId]=gene=Gene(); gene.setId(geneId)
     transcript.setGene(gene)
     exon=Exon(exonBegin,exonEnd,transcript)
     exon.extraFields=extra
     if(transcript.rawExons is not None): 
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.rawExons.append(exon)
     elif(not transcript.exonOverlapsExon(exon)):
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.UTR.append(exon) # OK -- we sort later
     gene.addTranscript(transcript)
Esempio n. 3
0
 def makeExon(self,root):
     begin=int(root["begin"])
     end=int(root["end"])
     exon=Exon(begin,end,None)
     exon.strand=root["strand"]
     exon.frame=root["frame"]
     exon.type=root["type"]
     exon.score=root["score"]
     exon.substrate=root["substrate"]
     extra=root["extra"]
     exon.extraFields=""
     for key in extra:
         exon.extraFields+=key+"="+extra[key]+";"
     return exon
Esempio n. 4
0
 def makeExon(self, root):
     begin = int(root["begin"])
     end = int(root["end"])
     exon = Exon(begin, end, None)
     exon.strand = root["strand"]
     exon.frame = root["frame"]
     exon.type = root["type"]
     exon.score = root["score"]
     exon.substrate = root["substrate"]
     extra = root["extra"]
     exon.extraFields = ""
     for key in extra:
         exon.extraFields += key + "=" + extra[key] + ";"
     return exon