def __init__(self, alignerDir, matrixFile, gapOpenPenalty, gapExtendPenalty): self.alignerDir = alignerDir self.matrixFile = matrixFile self.gapOpen = gapOpenPenalty self.gapExtend = gapExtendPenalty self.fastaWriter = FastaWriter()
def save(self,fh): writer=FastaWriter() id=self.id data=self.data deflineExtra=self.deflineExtra if(self.isDiscrete()): writer.addToFasta(">"+id+" "+deflineExtra,data,fh) else: fh.write("%"+id+" "+deflineExtra+"\n") n=len(data) for i in range(0,n): fh.write(str(data[i])+"\n")
def save(self, fh): writer = FastaWriter() id = self.id data = self.data deflineExtra = self.deflineExtra if (self.isDiscrete()): writer.addToFasta(">" + id + " " + deflineExtra, data, fh) else: fh.write("%" + id + " " + deflineExtra + "\n") n = len(data) for i in range(0, n): fh.write(str(data[i]) + "\n")
class SmithWaterman: def __init__(self, alignerDir, matrixFile, gapOpenPenalty, gapExtendPenalty): self.alignerDir = alignerDir self.matrixFile = matrixFile self.gapOpen = gapOpenPenalty self.gapExtend = gapExtendPenalty self.fastaWriter = FastaWriter() def writeFile(self, defline, seq): filename = TempFilename.generate("fasta") self.fastaWriter.writeFasta(defline, seq, filename) return filename def swapInsDel(self, cigar): # This is done because my aligner defines insertions and deletions # opposite to how they're defined in the SAM specification newCigar = "" for x in cigar: if (x == "I"): x = "D" elif (x == "D"): x = "I" newCigar += x return newCigar def align(self, seq1, seq2): file1 = self.writeFile("query", seq1) file2 = self.writeFile("reference", seq2) cmd=self.alignerDir+"/smith-waterman -q "+self.matrixFile+" "+\ str(self.gapOpen)+" "+str(self.gapExtend)+" "+file1+" "+file2+" DNA" output = Pipe.run(cmd) os.remove(file1) os.remove(file2) if (not rex.find("CIGAR=(\S+)", output)): raise Exception("Can't parse aligner output: " + output) cigar = rex[1] cigar = self.swapInsDel(cigar) # because I define cigars differently return CigarString(cigar)
#!/usr/bin/env python #========================================================================= # This is OPEN SOURCE SOFTWARE governed by the Gnu General Public # License (GPL) version 3, as described at www.opensource.org. # Copyright (C)2016 William H. Majoros ([email protected]). #========================================================================= from __future__ import (absolute_import, division, print_function, unicode_literals, generators, nested_scopes, with_statement) from builtins import (bytes, dict, int, list, object, range, str, ascii, chr, hex, input, next, oct, open, pow, round, super, filter, map, zip) import random import ProgramName import sys from FastaWriter import FastaWriter if(len(sys.argv)!=3): exit(ProgramName.get()+" <length> <id>") L=int(sys.argv[1]) id=sys.argv[2] seq="" alphabet=("A","C","G","T") for i in range(L): index=int(random.random()*4) nuc=alphabet[index] seq+=nuc writer=FastaWriter() writer.addToFasta(">"+id,seq,sys.stdout)
#!/usr/bin/env python #========================================================================= # This is OPEN SOURCE SOFTWARE governed by the Gnu General Public # License (GPL) version 3, as described at www.opensource.org. # Copyright (C)2016 William H. Majoros ([email protected]). #========================================================================= from __future__ import (absolute_import, division, print_function, unicode_literals, generators, nested_scopes, with_statement) from builtins import (bytes, dict, int, list, object, range, str, ascii, chr, hex, input, next, oct, open, pow, round, super, filter, map, zip) import random import ProgramName import sys from FastaWriter import FastaWriter if (len(sys.argv) != 3): exit(ProgramName.get() + " <length> <id>") L = int(sys.argv[1]) id = sys.argv[2] seq = "" alphabet = ("A", "C", "G", "T") for i in range(L): index = int(random.random() * 4) nuc = alphabet[index] seq += nuc writer = FastaWriter() writer.addToFasta(">" + id, seq, sys.stdout)
from FastaWriter import FastaWriter from GffTranscriptReader import GffTranscriptReader from Rex import Rex rex=Rex() # Process command line if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>") (fastaFile,gffFile,outFile)=sys.argv[1:] # Read GFF reader=GffTranscriptReader() hash=reader.hashBySubstrate(gffFile) # Open output file OUT=open(outFile,"wt") writer=FastaWriter() # Process each substrate in the FASTA file reader=FastaReader(fastaFile) while(True): [defline,seq]=reader.nextSequence() if(not defline): break if(not rex.find("^\s*>\s*(\S+)",defline)): exit("Can't parse defline: "+defline) id=rex[1] transcripts=hash.get(id,None) if(not transcripts): continue for transcript in transcripts: transSeq=transcript.loadTranscriptSeq(seq) writer.addToFasta(">"+transcript.getID(),transSeq,OUT) reader.close()
# The above imports should allow this program to run in both Python 2 and # Python 3. You might need to update your version of module "future". import sys from FastaReader import FastaReader from FastaWriter import FastaWriter from GffTranscriptReader import GffTranscriptReader if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>") (fastaFile,gffFile,outFile)=sys.argv[1:] reader=GffTranscriptReader() transcripts=reader.loadGFF(gffFile) keep=set() for transcript in transcripts: if(transcript.getID()[:3]!="ALT"): continue keep.add(transcript.getSubstrate()) reader=FastaReader(fastaFile) writer=FastaWriter() fh=open(outFile,"wt") while(True): (defline,seq)=reader.nextSequence() if(not defline): break (id,attr)=FastaReader.parseDefline(defline) if(id not in keep): continue writer.addToFasta(defline,seq,fh) fh.close() print("[done]",file=sys.stderr)
with_statement) from builtins import (bytes, dict, int, list, object, range, str, ascii, chr, hex, input, next, oct, open, pow, round, super, filter, map, zip) # The above imports should allow this program to run in both Python 2 and # Python 3. You might need to update your version of module "future". import sys import ProgramName from FastaReader import FastaReader from FastaWriter import FastaWriter from Rex import Rex rex = Rex() #========================================================================= # main() #========================================================================= if (len(sys.argv) != 3): exit(ProgramName.get() + " <in.fasta> <out.fasta>\n") (infile, outfile) = sys.argv[1:] OUT = open(outfile, "wt") writer = FastaWriter() reader = FastaReader(infile) while (True): (defline, seq) = reader.nextSequence() if (not defline): break if (not rex.find(">chr", defline)): continue writer.addToFasta(defline, seq, OUT) OUT.close()
#filename="/home/bmajoros/1000G/assembly/BRCA1-NA19782.fasta"; filename="/Users/bmajoros/python/test/data/subset.fasta" print(FastaReader.getSize(filename)) [defline,seq]=FastaReader.firstSequence(filename) print(len(seq)) #filename="/home/bmajoros/1000G/assembly/test.fasta" filename="/Users/bmajoros/python/test/data/subset.fasta" hash=FastaReader.readAllAndKeepDefs(filename) for key in hash.keys(): [defline,seq]=hash[key] print(defline) [id,attrs]=FastaReader.parseDefline(defline) print("id="+id) for key,value in attrs.items(): print(key+"="+value) writer=FastaWriter() writer.writeFasta(">ABCD","ATCGATCGTAGCTAGTCTGCGCGTATCGTCAGTCTCTATCGATCGTACTGCGATCTAGCTAGCTGATCGTAGCTTCTATGACTGCTAGTCATCTAGCTAGCTGATCGTAGCTGCGCGCGATATATTGCATCTATGCTATCATTGCATGCTAGCTCTAGCTAGTCGATGCTATCTTAGCTAC","test1.fasta") writer.appendToFasta(">XYZ","GATTACA","test1.fasta") print(Translation.translate(seq)) print("forward:",seq) print("revcomp: ",Translation.reverseComplement(seq))