Ejemplo n.º 1
0
 def __init__(self, alignerDir, matrixFile, gapOpenPenalty,
              gapExtendPenalty):
     self.alignerDir = alignerDir
     self.matrixFile = matrixFile
     self.gapOpen = gapOpenPenalty
     self.gapExtend = gapExtendPenalty
     self.fastaWriter = FastaWriter()
Ejemplo n.º 2
0
 def save(self,fh):
     writer=FastaWriter()
     id=self.id
     data=self.data
     deflineExtra=self.deflineExtra
     if(self.isDiscrete()):
         writer.addToFasta(">"+id+" "+deflineExtra,data,fh)
     else:
         fh.write("%"+id+" "+deflineExtra+"\n")
         n=len(data)
         for i in range(0,n): fh.write(str(data[i])+"\n")
Ejemplo n.º 3
0
 def save(self, fh):
     writer = FastaWriter()
     id = self.id
     data = self.data
     deflineExtra = self.deflineExtra
     if (self.isDiscrete()):
         writer.addToFasta(">" + id + " " + deflineExtra, data, fh)
     else:
         fh.write("%" + id + " " + deflineExtra + "\n")
         n = len(data)
         for i in range(0, n):
             fh.write(str(data[i]) + "\n")
Ejemplo n.º 4
0
class SmithWaterman:
    def __init__(self, alignerDir, matrixFile, gapOpenPenalty,
                 gapExtendPenalty):
        self.alignerDir = alignerDir
        self.matrixFile = matrixFile
        self.gapOpen = gapOpenPenalty
        self.gapExtend = gapExtendPenalty
        self.fastaWriter = FastaWriter()

    def writeFile(self, defline, seq):
        filename = TempFilename.generate("fasta")
        self.fastaWriter.writeFasta(defline, seq, filename)
        return filename

    def swapInsDel(self, cigar):
        # This is done because my aligner defines insertions and deletions
        # opposite to how they're defined in the SAM specification
        newCigar = ""
        for x in cigar:
            if (x == "I"): x = "D"
            elif (x == "D"): x = "I"
            newCigar += x
        return newCigar

    def align(self, seq1, seq2):
        file1 = self.writeFile("query", seq1)
        file2 = self.writeFile("reference", seq2)
        cmd=self.alignerDir+"/smith-waterman -q "+self.matrixFile+" "+\
            str(self.gapOpen)+" "+str(self.gapExtend)+" "+file1+" "+file2+" DNA"
        output = Pipe.run(cmd)
        os.remove(file1)
        os.remove(file2)
        if (not rex.find("CIGAR=(\S+)", output)):
            raise Exception("Can't parse aligner output: " + output)
        cigar = rex[1]
        cigar = self.swapInsDel(cigar)  # because I define cigars differently
        return CigarString(cigar)
Ejemplo n.º 5
0
#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
import random
import ProgramName
import sys
from FastaWriter import FastaWriter

if(len(sys.argv)!=3):
    exit(ProgramName.get()+" <length> <id>")
L=int(sys.argv[1])
id=sys.argv[2]
seq=""
alphabet=("A","C","G","T")
for i in range(L):
    index=int(random.random()*4)
    nuc=alphabet[index]
    seq+=nuc

writer=FastaWriter()
writer.addToFasta(">"+id,seq,sys.stdout)


Ejemplo n.º 6
0
#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
                        unicode_literals, generators, nested_scopes,
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
import random
import ProgramName
import sys
from FastaWriter import FastaWriter

if (len(sys.argv) != 3):
    exit(ProgramName.get() + " <length> <id>")
L = int(sys.argv[1])
id = sys.argv[2]
seq = ""
alphabet = ("A", "C", "G", "T")
for i in range(L):
    index = int(random.random() * 4)
    nuc = alphabet[index]
    seq += nuc

writer = FastaWriter()
writer.addToFasta(">" + id, seq, sys.stdout)
Ejemplo n.º 7
0
from FastaWriter import FastaWriter
from GffTranscriptReader import GffTranscriptReader
from Rex import Rex
rex=Rex()

# Process command line
if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>")
(fastaFile,gffFile,outFile)=sys.argv[1:]

# Read GFF
reader=GffTranscriptReader()
hash=reader.hashBySubstrate(gffFile)

# Open output file
OUT=open(outFile,"wt")
writer=FastaWriter()

# Process each substrate in the FASTA file
reader=FastaReader(fastaFile)
while(True):
    [defline,seq]=reader.nextSequence()
    if(not defline): break
    if(not rex.find("^\s*>\s*(\S+)",defline)): 
        exit("Can't parse defline: "+defline)
    id=rex[1]
    transcripts=hash.get(id,None)
    if(not transcripts): continue
    for transcript in transcripts:
        transSeq=transcript.loadTranscriptSeq(seq)
        writer.addToFasta(">"+transcript.getID(),transSeq,OUT)
reader.close()
Ejemplo n.º 8
0
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from GffTranscriptReader import GffTranscriptReader

if(len(sys.argv)!=4):
    exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>")
(fastaFile,gffFile,outFile)=sys.argv[1:]

reader=GffTranscriptReader()
transcripts=reader.loadGFF(gffFile)
keep=set()
for transcript in transcripts:
    if(transcript.getID()[:3]!="ALT"): continue
    keep.add(transcript.getSubstrate())

reader=FastaReader(fastaFile)
writer=FastaWriter()
fh=open(outFile,"wt")
while(True):
    (defline,seq)=reader.nextSequence()
    if(not defline): break
    (id,attr)=FastaReader.parseDefline(defline)
    if(id not in keep): continue
    writer.addToFasta(defline,seq,fh)
fh.close()
print("[done]",file=sys.stderr)

Ejemplo n.º 9
0
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import ProgramName
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from Rex import Rex

rex = Rex()

#=========================================================================
# main()
#=========================================================================
if (len(sys.argv) != 3):
    exit(ProgramName.get() + " <in.fasta> <out.fasta>\n")
(infile, outfile) = sys.argv[1:]

OUT = open(outfile, "wt")
writer = FastaWriter()
reader = FastaReader(infile)
while (True):
    (defline, seq) = reader.nextSequence()
    if (not defline): break
    if (not rex.find(">chr", defline)): continue
    writer.addToFasta(defline, seq, OUT)
OUT.close()
Ejemplo n.º 10
0
#filename="/home/bmajoros/1000G/assembly/BRCA1-NA19782.fasta";
filename="/Users/bmajoros/python/test/data/subset.fasta"
print(FastaReader.getSize(filename))

[defline,seq]=FastaReader.firstSequence(filename)
print(len(seq))

#filename="/home/bmajoros/1000G/assembly/test.fasta"
filename="/Users/bmajoros/python/test/data/subset.fasta"
hash=FastaReader.readAllAndKeepDefs(filename)
for key in hash.keys():
    [defline,seq]=hash[key]
    print(defline)
    [id,attrs]=FastaReader.parseDefline(defline)
    print("id="+id)
    for key,value in attrs.items():
        print(key+"="+value)

writer=FastaWriter()
writer.writeFasta(">ABCD","ATCGATCGTAGCTAGTCTGCGCGTATCGTCAGTCTCTATCGATCGTACTGCGATCTAGCTAGCTGATCGTAGCTTCTATGACTGCTAGTCATCTAGCTAGCTGATCGTAGCTGCGCGCGATATATTGCATCTATGCTATCATTGCATGCTAGCTCTAGCTAGTCGATGCTATCTTAGCTAC","test1.fasta")

writer.appendToFasta(">XYZ","GATTACA","test1.fasta")

print(Translation.translate(seq))
print("forward:",seq)
print("revcomp: ",Translation.reverseComplement(seq))