예제 #1
0
 def parseRecord(self, fields):
     if (len(fields) > 9):
         raise Exception("too many fields in GFF3 record" +
                         "\t".join(fields))
     (substrate, source, type, begin, end, score, strand, frame,
      extra) = fields
     extra = extra.rstrip()
     extraFields = extra.split(";")
     extraHash = {}
     rex = Rex()
     for field in extraFields:
         if (not rex.find("(.+)=(.+)", field)):
             raise Exception("Can't parse GFF3 field: " + field)
         key = rex[1]
         value = rex[2]
         extraHash[key] = value
     rec = {
         "substrate": substrate,
         "source": source,
         "type": type,
         "begin": int(begin) - 1,
         "end": int(end),
         "score": score,
         "strand": strand,
         "frame": frame,
         "extra": extraHash
     }
     return rec
예제 #2
0
 def loadGFF_UTR(self,fields,line,transcriptBeginEnd,GFF,
                        transcripts,readOrder,genes):
     exonBegin=int(fields[3])-1
     exonEnd=int(fields[4])
     exonScore=fields[5]
     strand=fields[6]
     frame=fields[7]
     transcriptId=None
     rex=Rex()
     if(rex.find('transgrp[:=]\s*(\S+)',line)): transcriptId=rex[1]
     elif(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
         transcriptId=rex[1]
     elif(rex.find('Parent=([^;,\s]+)',line)): transcriptId=rex[1]
     geneId=None
     if(rex.find('genegrp=(\S+)',line)): geneId=rex[1]
     elif(rex.find('gene_id[:=]?\s*"?([^\s\;"]+)"?',line)): geneId=rex[1]
     if(transcriptId is None): transcriptId=geneId
     if(geneId is None): geneId=transcriptId
     if(transcriptId is None): 
         raise Exception(line+" : no transcript ID found")        
     if(rex.find("(\S+);$",transcriptId)): transcriptId=rex[1]
     if(rex.find("(\S+);$",geneId)): geneId=rex[1]
     extra=""
     for i in range(8,len(fields)): extra+=fields[i]+" "
     if(exonBegin>exonEnd): (exonBegin,exonEnd)=(exonEnd,exonBegin)
     transcript=transcripts.get(transcriptId,None)
     if(not transcript):
         transcripts[transcriptId]=transcript= \
             Transcript(transcriptId,strand)
         transcript.setStopCodons(self.stopCodons)
         transcript.readOrder=readOrder
         readOrder+=1
         transcript.substrate=fields[0]
         transcript.source=fields[1]
         if(transcriptBeginEnd.get(transcriptId,None) is not None):
             (begin,end)=transcriptBeginEnd[transcriptId]
             transcript.setBegin(begin)
             transcript.setEnd(end)
         else:
             transcript.setBegin(exonBegin)
             transcript.setEnd(exonEnd)
     transcript.geneId=geneId
     gene=genes.get(geneId,None)
     if(gene is None):
         genes[geneId]=gene=Gene(); gene.setId(geneId)
     transcript.setGene(gene)
     exon=Exon(exonBegin,exonEnd,transcript)
     exon.extraFields=extra
     if(transcript.rawExons is not None): 
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.rawExons.append(exon)
     elif(not transcript.exonOverlapsExon(exon)):
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.UTR.append(exon) # OK -- we sort later
     gene.addTranscript(transcript)
예제 #3
0
 def crear_aldea(nombre, num_rex, num_spinosaurus, num_triceraptors):
     a = Aldea(nombre)
     for i in range(num_rex):
         a.add_dinosaurio(
             Rex("r" + str(i), 1000, random.randrange(-200, 200), a))
     for i in range(num_spinosaurus):
         a.add_dinosaurio(
             Spinosaurus("s" + str(i), 1000, random.randrange(-200, 200),
                         a))
     for i in range(num_triceraptors):
         a.add_dinosaurio(
             Triceraptors("t" + str(i), 1000, random.randrange(-200, 200),
                          a))
     return a
예제 #4
0
    def loadGFF_transcript(self,fields,line,transcriptBeginEnd,GFF,
                           transcripts,readOrder,genes):
        begin=int(fields[3])-1
        end=int(fields[4])
        rex=Rex()
        if(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
            transcriptId=rex[1]
            transcriptBeginEnd[transcriptId]=[begin,end]
            strand=fields[6]
            score=fields[5]
            transcriptExtraFields=""
            for i in range(8,len(fields)):
                transcriptExtraFields+=fields[i]+" "
            transcript=transcripts.get(transcriptId,None)
            if(transcript is None):
                transcripts[transcriptId]=transcript= \
	                                   Transcript(transcriptId,strand)
                transcript.setStopCodons(self.stopCodons)
                transcript.readOrder=readOrder;
                readOrder+=1
                transcript.substrate=fields[0]
                transcript.source=fields[1]
                transcript.setBegin(begin)
                transcript.setEnd(end)
            if(transcript.score is None and
               score!="."): transcript.score=float(score)
            geneId=None
            if(rex.find("genegrp=(\S+)",line)): geneId=rex[1]
            elif(rex.find('gene_id[:=]?\s*\"?([^\s\;"]+)\"?',line)):
                geneId=rex[1]
            if(not geneId): raise Exception("can't parse GTF: "+line)
            transcript.geneId=geneId
            gene=genes.get(geneId,None)
            if(not gene): genes[geneId]=gene=Gene(); gene.setId(geneId)
            transcript.setGene(gene)
            gene.addTranscript(transcript)
            transcript.extraFields=transcriptExtraFields
예제 #5
0
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
#=========================================================================
from __future__ import (absolute_import, division, print_function,
                        unicode_literals, generators, nested_scopes,
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import ProgramName
import gzip
from Rex import Rex
rex = Rex()
from scipy import stats
from statsmodels.stats.multitest import multipletests


def getCounts(filename, variants, MIN_COUNT):
    counts = {}
    with open(filename, "rt") as IN:
        for line in IN:
            fields = line.rstrip().split()
            if (len(fields) != 7): continue
            (id, chr, pos, ref, alt, refCount, altCount) = fields
            refCount = int(refCount)
            altCount = int(altCount)
            if (refCount + altCount < MIN_COUNT): continue
            counts[id] = [refCount, altCount]