def parseRecord(self, fields): if (len(fields) > 9): raise Exception("too many fields in GFF3 record" + "\t".join(fields)) (substrate, source, type, begin, end, score, strand, frame, extra) = fields extra = extra.rstrip() extraFields = extra.split(";") extraHash = {} rex = Rex() for field in extraFields: if (not rex.find("(.+)=(.+)", field)): raise Exception("Can't parse GFF3 field: " + field) key = rex[1] value = rex[2] extraHash[key] = value rec = { "substrate": substrate, "source": source, "type": type, "begin": int(begin) - 1, "end": int(end), "score": score, "strand": strand, "frame": frame, "extra": extraHash } return rec
def loadGFF_UTR(self,fields,line,transcriptBeginEnd,GFF, transcripts,readOrder,genes): exonBegin=int(fields[3])-1 exonEnd=int(fields[4]) exonScore=fields[5] strand=fields[6] frame=fields[7] transcriptId=None rex=Rex() if(rex.find('transgrp[:=]\s*(\S+)',line)): transcriptId=rex[1] elif(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)): transcriptId=rex[1] elif(rex.find('Parent=([^;,\s]+)',line)): transcriptId=rex[1] geneId=None if(rex.find('genegrp=(\S+)',line)): geneId=rex[1] elif(rex.find('gene_id[:=]?\s*"?([^\s\;"]+)"?',line)): geneId=rex[1] if(transcriptId is None): transcriptId=geneId if(geneId is None): geneId=transcriptId if(transcriptId is None): raise Exception(line+" : no transcript ID found") if(rex.find("(\S+);$",transcriptId)): transcriptId=rex[1] if(rex.find("(\S+);$",geneId)): geneId=rex[1] extra="" for i in range(8,len(fields)): extra+=fields[i]+" " if(exonBegin>exonEnd): (exonBegin,exonEnd)=(exonEnd,exonBegin) transcript=transcripts.get(transcriptId,None) if(not transcript): transcripts[transcriptId]=transcript= \ Transcript(transcriptId,strand) transcript.setStopCodons(self.stopCodons) transcript.readOrder=readOrder readOrder+=1 transcript.substrate=fields[0] transcript.source=fields[1] if(transcriptBeginEnd.get(transcriptId,None) is not None): (begin,end)=transcriptBeginEnd[transcriptId] transcript.setBegin(begin) transcript.setEnd(end) else: transcript.setBegin(exonBegin) transcript.setEnd(exonEnd) transcript.geneId=geneId gene=genes.get(geneId,None) if(gene is None): genes[geneId]=gene=Gene(); gene.setId(geneId) transcript.setGene(gene) exon=Exon(exonBegin,exonEnd,transcript) exon.extraFields=extra if(transcript.rawExons is not None): exon.frame=frame exon.score=exonScore exon.type=fields[2] transcript.rawExons.append(exon) elif(not transcript.exonOverlapsExon(exon)): exon.frame=frame exon.score=exonScore exon.type=fields[2] transcript.UTR.append(exon) # OK -- we sort later gene.addTranscript(transcript)
def crear_aldea(nombre, num_rex, num_spinosaurus, num_triceraptors): a = Aldea(nombre) for i in range(num_rex): a.add_dinosaurio( Rex("r" + str(i), 1000, random.randrange(-200, 200), a)) for i in range(num_spinosaurus): a.add_dinosaurio( Spinosaurus("s" + str(i), 1000, random.randrange(-200, 200), a)) for i in range(num_triceraptors): a.add_dinosaurio( Triceraptors("t" + str(i), 1000, random.randrange(-200, 200), a)) return a
def loadGFF_transcript(self,fields,line,transcriptBeginEnd,GFF, transcripts,readOrder,genes): begin=int(fields[3])-1 end=int(fields[4]) rex=Rex() if(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)): transcriptId=rex[1] transcriptBeginEnd[transcriptId]=[begin,end] strand=fields[6] score=fields[5] transcriptExtraFields="" for i in range(8,len(fields)): transcriptExtraFields+=fields[i]+" " transcript=transcripts.get(transcriptId,None) if(transcript is None): transcripts[transcriptId]=transcript= \ Transcript(transcriptId,strand) transcript.setStopCodons(self.stopCodons) transcript.readOrder=readOrder; readOrder+=1 transcript.substrate=fields[0] transcript.source=fields[1] transcript.setBegin(begin) transcript.setEnd(end) if(transcript.score is None and score!="."): transcript.score=float(score) geneId=None if(rex.find("genegrp=(\S+)",line)): geneId=rex[1] elif(rex.find('gene_id[:=]?\s*\"?([^\s\;"]+)\"?',line)): geneId=rex[1] if(not geneId): raise Exception("can't parse GTF: "+line) transcript.geneId=geneId gene=genes.get(geneId,None) if(not gene): genes[geneId]=gene=Gene(); gene.setId(geneId) transcript.setGene(gene) gene.addTranscript(transcript) transcript.extraFields=transcriptExtraFields
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public # License (GPL) version 3, as described at www.opensource.org. #========================================================================= from __future__ import (absolute_import, division, print_function, unicode_literals, generators, nested_scopes, with_statement) from builtins import (bytes, dict, int, list, object, range, str, ascii, chr, hex, input, next, oct, open, pow, round, super, filter, map, zip) # The above imports should allow this program to run in both Python 2 and # Python 3. You might need to update your version of module "future". import sys import ProgramName import gzip from Rex import Rex rex = Rex() from scipy import stats from statsmodels.stats.multitest import multipletests def getCounts(filename, variants, MIN_COUNT): counts = {} with open(filename, "rt") as IN: for line in IN: fields = line.rstrip().split() if (len(fields) != 7): continue (id, chr, pos, ref, alt, refCount, altCount) = fields refCount = int(refCount) altCount = int(altCount) if (refCount + altCount < MIN_COUNT): continue counts[id] = [refCount, altCount]