Python Rexの例、Rex.Rex Pythonの例

コード例 #1

0

ファイルを表示

ファイル: GffTranscriptReader.py プロジェクト: bmajoros/python

    def loadGFF_transcript(self,fields,line,transcriptBeginEnd,GFF,
                           transcripts,readOrder,genes):
        begin=int(fields[3])-1
        end=int(fields[4])
        rex=Rex()
        if(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
            transcriptId=rex[1]
            transcriptBeginEnd[transcriptId]=[begin,end]
            strand=fields[6]
            transcriptExtraFields=""
            for i in range(8,len(fields)):
                transcriptExtraFields+=fields[i]+" "
            transcript=transcripts.get(transcriptId,None)
            if(transcript is None):
                transcripts[transcriptId]=transcript= \
	                                   Transcript(transcriptId,strand)
                transcript.setStopCodons(self.stopCodons)
                transcript.readOrder=readOrder;
                readOrder+=1
                transcript.substrate=fields[0]
                transcript.source=fields[1]
                transcript.setBegin(begin)
                transcript.setEnd(end)
            geneId=None
            if(rex.find("genegrp=(\S+)",line)): geneId=rex[1]
            elif(rex.find('gene_id[:=]?\s*\"?([^\s\;"]+)\"?',line)):
                geneId=rex[1]
            if(not geneId): raise Exception("can't parse GTF: "+line)
            transcript.geneId=geneId
            gene=genes.get(geneId,None)
            if(not gene): genes[geneId]=gene=Gene(); gene.setId(geneId)
            transcript.setGene(gene)
            gene.addTranscript(transcript)
            transcript.extraFields=transcriptExtraFields

コード例 #2

0

ファイルを表示

ファイル: GFF3Parser.py プロジェクト: bmajoros/python

 def parseRecord(self, fields):
     if (len(fields) > 9):
         raise Exception("too many fields in GFF3 record" +
                         "\t".join(fields))
     (substrate, source, type, begin, end, score, strand, frame,
      extra) = fields
     extra = extra.rstrip()
     extraFields = extra.split(";")
     extraHash = {}
     rex = Rex()
     for field in extraFields:
         if (not rex.find("(.+)=(.+)", field)):
             raise Exception("Can't parse GFF3 field: " + field)
         key = rex[1]
         value = rex[2]
         extraHash[key] = value
     rec = {
         "substrate": substrate,
         "source": source,
         "type": type,
         "begin": int(begin) - 1,
         "end": int(end),
         "score": score,
         "strand": strand,
         "frame": frame,
         "extra": extraHash
     }
     return rec

コード例 #3

0

ファイルを表示

 def loadGFF_UTR(self,fields,line,transcriptBeginEnd,GFF,
                        transcripts,readOrder,genes):
     exonBegin=int(fields[3])-1
     exonEnd=int(fields[4])
     exonScore=fields[5]
     strand=fields[6]
     frame=fields[7]
     transcriptId=None
     rex=Rex()
     if(rex.find('transgrp[:=]\s*(\S+)',line)): transcriptId=rex[1]
     elif(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
         transcriptId=rex[1]
     elif(rex.find('Parent=([^;,\s]+)',line)): transcriptId=rex[1]
     geneId=None
     if(rex.find('genegrp=(\S+)',line)): geneId=rex[1]
     elif(rex.find('gene_id[:=]?\s*"?([^\s\;"]+)"?',line)): geneId=rex[1]
     if(transcriptId is None): transcriptId=geneId
     if(geneId is None): geneId=transcriptId
     if(transcriptId is None): 
         raise Exception(line+" : no transcript ID found")        
     if(rex.find("(\S+);$",transcriptId)): transcriptId=rex[1]
     if(rex.find("(\S+);$",geneId)): geneId=rex[1]
     extra=""
     for i in range(8,len(fields)): extra+=fields[i]+" "
     if(exonBegin>exonEnd): (exonBegin,exonEnd)=(exonEnd,exonBegin)
     transcript=transcripts.get(transcriptId,None)
     if(not transcript):
         transcripts[transcriptId]=transcript= \
             Transcript(transcriptId,strand)
         transcript.setStopCodons(self.stopCodons)
         transcript.readOrder=readOrder
         readOrder+=1
         transcript.substrate=fields[0]
         transcript.source=fields[1]
         if(transcriptBeginEnd.get(transcriptId,None) is not None):
             (begin,end)=transcriptBeginEnd[transcriptId]
             transcript.setBegin(begin)
             transcript.setEnd(end)
         else:
             transcript.setBegin(exonBegin)
             transcript.setEnd(exonEnd)
     transcript.geneId=geneId
     gene=genes.get(geneId,None)
     if(gene is None):
         genes[geneId]=gene=Gene(); gene.setId(geneId)
     transcript.setGene(gene)
     exon=Exon(exonBegin,exonEnd,transcript)
     exon.extraFields=extra
     if(transcript.rawExons is not None): 
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.rawExons.append(exon)
     elif(not transcript.exonOverlapsExon(exon)):
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.UTR.append(exon) # OK -- we sort later
     gene.addTranscript(transcript)

コード例 #4

0

ファイルを表示

ファイル: GffTranscriptReader.py プロジェクト: bmajoros/python

 def loadGFF_UTR(self,fields,line,transcriptBeginEnd,GFF,
                        transcripts,readOrder,genes):
     exonBegin=int(fields[3])-1
     exonEnd=int(fields[4])
     exonScore=fields[5]
     strand=fields[6]
     frame=fields[7]
     transcriptId=None
     rex=Rex()
     if(rex.find('transgrp[:=]\s*(\S+)',line)): transcriptId=rex[1]
     elif(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
         transcriptId=rex[1]
     elif(rex.find('Parent=([^;,\s]+)',line)): transcriptId=rex[1]
     geneId=None
     if(rex.find('genegrp=(\S+)',line)): geneId=rex[1]
     elif(rex.find('gene_id[:=]?\s*"?([^\s\;"]+)"?',line)): geneId=rex[1]
     if(transcriptId is None): transcriptId=geneId
     if(geneId is None): geneId=transcriptId
     if(transcriptId is None): 
         raise Exception(line+" : no transcript ID found")        
     if(rex.find("(\S+);$",transcriptId)): transcriptId=rex[1]
     if(rex.find("(\S+);$",geneId)): geneId=rex[1]
     extra=""
     for i in range(8,len(fields)): extra+=fields[i]+" "
     if(exonBegin>exonEnd): (exonBegin,exonEnd)=(exonEnd,exonBegin)
     transcript=transcripts.get(transcriptId,None)
     if(not transcript):
         transcripts[transcriptId]=transcript= \
             Transcript(transcriptId,strand)
         transcript.setStopCodons(self.stopCodons)
         transcript.readOrder=readOrder
         readOrder+=1
         transcript.substrate=fields[0]
         transcript.source=fields[1]
         if(transcriptBeginEnd.find(transcriptId,None) is not None):
             (begin,end)=transcriptBeginEnd[transcriptId]
             transcript.setBegin(begin)
             transcript.setEnd(end)
     transcript.geneId=geneId
     gene=genes.get(geneId,None)
     if(gene is None):
         genes[geneId]=gene=Gene(); gene.setId(geneId)
     transcript.setGene(gene)
     exon=Exon(exonBegin,exonEnd,transcript)
     exon.extraFields=extra
     if(transcript.rawExons is not None): 
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.rawExons.append(exon)
     elif(not transcript.exonOverlapsExon(exon)):
         exon.frame=frame
         exon.score=exonScore
         exon.type=fields[2]
         transcript.UTR.append(exon) # OK -- we sort later
     gene.addTranscript(transcript)

コード例 #5

0

ファイルを表示

ファイル: GFF3Parser.py プロジェクト: bmajoros/python

 def parseRecord(self,fields):
     if(len(fields)>9):
         raise Exception("too many fields in GFF3 record"+"\t".join(fields))
     (substrate,source,type,begin,end,score,strand,frame,extra)=fields
     extra=extra.rstrip()
     extraFields=extra.split(";")
     extraHash={}
     rex=Rex()
     for field in extraFields:
         if(not rex.find("(.+)=(.+)",field)):
             raise Exception("Can't parse GFF3 field: "+field)
         key=rex[1]; value=rex[2]
         extraHash[key]=value
     rec={"substrate":substrate,
          "source":source,
          "type":type,
          "begin":begin,
          "end":end,
          "score":score,
          "strand":strand,
          "frame":frame,
          "extra":extraHash}
     return rec

コード例 #6

0

ファイルを表示

 def crear_aldea(nombre, num_rex, num_spinosaurus, num_triceraptors):
     a = Aldea(nombre)
     for i in range(num_rex):
         a.add_dinosaurio(
             Rex("r" + str(i), 1000, random.randrange(-200, 200), a))
     for i in range(num_spinosaurus):
         a.add_dinosaurio(
             Spinosaurus("s" + str(i), 1000, random.randrange(-200, 200),
                         a))
     for i in range(num_triceraptors):
         a.add_dinosaurio(
             Triceraptors("t" + str(i), 1000, random.randrange(-200, 200),
                          a))
     return a

コード例 #7

0

ファイルを表示

    def loadGFF_transcript(self,fields,line,transcriptBeginEnd,GFF,
                           transcripts,readOrder,genes):
        begin=int(fields[3])-1
        end=int(fields[4])
        rex=Rex()
        if(rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)):
            transcriptId=rex[1]
            transcriptBeginEnd[transcriptId]=[begin,end]
            strand=fields[6]
            score=fields[5]
            transcriptExtraFields=""
            for i in range(8,len(fields)):
                transcriptExtraFields+=fields[i]+" "
            transcript=transcripts.get(transcriptId,None)
            if(transcript is None):
                transcripts[transcriptId]=transcript= \
	                                   Transcript(transcriptId,strand)
                transcript.setStopCodons(self.stopCodons)
                transcript.readOrder=readOrder;
                readOrder+=1
                transcript.substrate=fields[0]
                transcript.source=fields[1]
                transcript.setBegin(begin)
                transcript.setEnd(end)
            if(transcript.score is None and
               score!="."): transcript.score=float(score)
            geneId=None
            if(rex.find("genegrp=(\S+)",line)): geneId=rex[1]
            elif(rex.find('gene_id[:=]?\s*\"?([^\s\;"]+)\"?',line)):
                geneId=rex[1]
            if(not geneId): raise Exception("can't parse GTF: "+line)
            transcript.geneId=geneId
            gene=genes.get(geneId,None)
            if(not gene): genes[geneId]=gene=Gene(); gene.setId(geneId)
            transcript.setGene(gene)
            gene.addTranscript(transcript)
            transcript.extraFields=transcriptExtraFields

コード例 #8

0

ファイルを表示

ファイル: revisions-crypskip-counts.py プロジェクト: ReddyLab/1000Genomes

#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import os
from Rex import Rex
rex=Rex()
COMBINED="/home/bmajoros/1000G/assembly/combined"
OUTDIR="/home/bmajoros/1000G/assembly/cryptic"
MIN_COUNT=3
EXPRESSED="/home/bmajoros/1000G/assembly/expressed.txt"

def loadExpressed(filename):
    hash={}
    with open(filename,"rt") as fh:
        for line in fh:
            fields=line.split()
            (gene,transcript,fpkm,SS)=fields
            if(rex.find("ALT\d+_(\S+)",transcript)): ###
                transcript=rex[1] ###
            hash[transcript]=True
    return hash

コード例 #9

0

ファイルを表示

ファイル: test-alleles.py プロジェクト: BeefSong/POPSTARR2

# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
#=========================================================================
from __future__ import (absolute_import, division, print_function,
                        unicode_literals, generators, nested_scopes,
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import ProgramName
import gzip
from Rex import Rex
rex = Rex()
from scipy import stats
from statsmodels.stats.multitest import multipletests


def getCounts(filename, variants, MIN_COUNT):
    counts = {}
    with open(filename, "rt") as IN:
        for line in IN:
            fields = line.rstrip().split()
            if (len(fields) != 7): continue
            (id, chr, pos, ref, alt, refCount, altCount) = fields
            refCount = int(refCount)
            altCount = int(altCount)
            if (refCount + altCount < MIN_COUNT): continue
            counts[id] = [refCount, altCount]

コード例 #10

0

ファイルを表示

ファイル: make-trim-slurms.py プロジェクト: ReddyLab/POPSTARR2

# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2017 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import os
import ProgramName
from SlurmWriter import SlurmWriter
from Rex import Rex
rex=Rex()

ROOT="/home/bmajoros/PopSTARR/graham"
MEM=50000
NICE=500
jobName="TRIM"
maxParallel=1000
THREADS=31
TRIMMOMATIC="java -jar /data/reddylab/software/Trimmomatic-0.33/Trimmomatic-0.33/trimmomatic-0.33.jar PE"

#=========================================================================
# main()
#=========================================================================
if(len(sys.argv)!=5):
    exit(ProgramName.get()+" <adapters.fasta> <fastq-in> <fastq-out> <full-path-to-slurms>\n")
(adaptersFasta,fastqIn,fastqOut,slurmDir)=sys.argv[1:]

コード例 #11

0

ファイルを表示

ファイル: revisions-get-junctions-nonbroken.py プロジェクト: ReddyLab/1000Genomes

# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from Transcript import Transcript
from Interval import Interval
from GffTranscriptReader import GffTranscriptReader
from Rex import Rex
rex=Rex()

if(len(sys.argv)!=8):
    exit(sys.argv[0]+
         " <indiv> <hap> <in.broken-sites> <junctions.bed> <in.gff> <in.readcounts> <all-broken-sites.txt>")
(indiv,hap,infile,junctionsFile,gffFile,readCountsFile,masterFile)=sys.argv[1:]

#============================= main() =================================

# Read the readcounts file
totalMappedReads=None
readCounts={}
with open(readCountsFile,"rt") as IN:
    while(True):
        line=IN.readline()
        if(line==""): break

コード例 #12

0

ファイルを表示

ファイル: fdr.py プロジェクト: ReddyLab/1000Genomes

#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from Rex import Rex
rex=Rex()

if(len(sys.argv)!=2):
    exit(sys.argv[0]+" <p-values.txt>")
infile=sys.argv[1]

values=[]
with open(infile,"rt") as fh:
    for line in fh:
        if(rex.find("(\S+\d\S+)",line)):
            values.append(float(rex[1]))

values.sort()
L=len(values)
qValues=(0.1,0.05,0.01,0.005,0.001)
for q in qValues:
    bestP=None

コード例 #13

0

ファイルを表示

# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2018 William H. Majoros ([email protected])
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import os
import math
import ProgramName
from Rex import Rex
rex=Rex()
import TempFilename
import getopt

WARMUP=1000
ALPHA=0.05
STDERR=TempFilename.generate(".stderr")
INPUT_FILE=TempFilename.generate(".staninputs")
INIT_FILE=TempFilename.generate(".staninit")
OUTPUT_TEMP=TempFilename.generate(".stanoutputs")

def printFields(fields,hFile):
    numFields=len(fields)
    for i in range(7,numFields):
        print(i-6,"=",fields[i],sep="",end="",file=hFile)
        if(i<numFields-1): print("\t",end="",file=hFile)

コード例 #14

0

ファイルを表示

ファイル: get-transcripts.py プロジェクト: ReddyLab/1000Genomes

# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from GffTranscriptReader import GffTranscriptReader
from Rex import Rex
rex=Rex()

# Process command line
if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>")
(fastaFile,gffFile,outFile)=sys.argv[1:]

# Read GFF
reader=GffTranscriptReader()
hash=reader.hashBySubstrate(gffFile)

# Open output file
OUT=open(outFile,"wt")
writer=FastaWriter()

# Process each substrate in the FASTA file
reader=FastaReader(fastaFile)

コード例 #15

0

ファイルを表示

#=========================================================================
from __future__ import (absolute_import, division, print_function,
                        unicode_literals, generators, nested_scopes,
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import ProgramName
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from Rex import Rex

rex = Rex()

#=========================================================================
# main()
#=========================================================================
if (len(sys.argv) != 3):
    exit(ProgramName.get() + " <in.fasta> <out.fasta>\n")
(infile, outfile) = sys.argv[1:]

OUT = open(outfile, "wt")
writer = FastaWriter()
reader = FastaReader(infile)
while (True):
    (defline, seq) = reader.nextSequence()
    if (not defline): break
    if (not rex.find(">chr", defline)): continue

コード例 #16

0

ファイルを表示

ファイル: fifty-two-percent.py プロジェクト: ReddyLab/1000Genomes

#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
from Rex import Rex
rex=Rex()

MIN_READS=3
ASSEMBLY="/home/bmajoros/1000G/assembly"
READS=ASSEMBLY+"/reads.txt-rev3"
EXPRESSED=ASSEMBLY+"/expressed.txt"

expressed={}
with open(EXPRESSED,"rt") as fh:
    for line in fh:
        fields=line.split()
        if(len(fields)!=4): continue
        (gene,trans,meanFPKM,SS)=fields
        expressed[trans]=True

hasAlts={}
supportedAlts={}
with open(READS,"rt") as fh:

コード例 #17

0

ファイルを表示

ファイル: revisions-get-junctions-broken.py プロジェクト: ReddyLab/1000Genomes

# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from Transcript import Transcript
from Interval import Interval
from GffTranscriptReader import GffTranscriptReader
from Rex import Rex
rex=Rex()

if(len(sys.argv)!=5):
    exit(sys.argv[0]+
         " <in.broken-sites> <junctions.bed> <in.gff> <in.readcounts>")
(infile,junctionsFile,gffFile,readCountsFile)=sys.argv[1:]

#============================= main() =================================

# Read the readcounts file
totalMappedReads=None
readCounts={}
with open(readCountsFile,"rt") as IN:
    while(True):
        line=IN.readline()
        if(line==""): break

コード例 #18

0

ファイルを表示

ファイル: set-defline-dna.py プロジェクト: ReddyLab/1000Genomes

#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import os
import glob
from Rex import Rex
rex=Rex()

if(len(sys.argv)!=2):
    exit(sys.argv[0]+" <dir>")
directory=sys.argv[1]

files=glob.glob(directory+"/*.fastb")
for file in files:
    with open("tmp.fastb","wt") as OUT:
        with open(file,"rt") as IN:
            for line in IN:
                if(rex.find(">\S+",line)):
                    OUT.write(">dna\n")
                else: OUT.write(line)
    os.system("mv tmp.fastb "+file)

コード例 #19

0

ファイルを表示

ファイル: pool-counts.py プロジェクト: ReddyLab/POPSTARR2

#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
#=========================================================================
from __future__ import (absolute_import, division, print_function,
                        unicode_literals, generators, nested_scopes,
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from Rex import Rex
rex = Rex()

#=========================================================================
# main()
#=========================================================================
counts = {}
for line in sys.stdin:
    fields = line.rstrip().split()
    id = fields[0]
    alleles = counts.get(id, None)
    if (alleles is None): alleles = counts[id] = {}
    for field in fields[1:]:
        if (not rex.find("(\S+)=(\d+)", field)):
            raise Exception("can't parse field: " + field)
        allele = rex[1]
        count = int(rex[2])

コード例 #20

0

ファイルを表示

                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import ProgramName
import gzip
import math
import numpy as np
from sklearn import linear_model
import statsmodels.api as sm
from scipy import stats
from Rex import Rex
rex = Rex()

MAX_P_ADJ = 1.0
MAX_P = 0.05
NUM_COVARIATES = 11
SKIP_NA = True
BASE = "/home/bmajoros/PopSTARR/sarah"
#EFFECTS=BASE+"/test-lucif-aug28-chr10.txt"
EFFECTS = BASE + "/test-sarah-beta-2sided.txt"
VCF = BASE + "/vcf"
PHENOTYPES = BASE + "/phenotypes.txt"
CHROMS = ("chr10", "chr11", "chr21", "chr5", "chr8")

CENTERS = ("A", "E", "F", "K", "N", "P", "Q")
CENTER_CODES = {}
nextCode = 0