def try_int(s): "Convert to integer if possible." try: return int(s) except: return s def natsort_key(s): "Used internally to get a tuple by which s is sorted." import re return map(try_int, re.findall(r'(\d+|\D+)', s)) if len(sys.argv)==1: sys.exit(__doc__) usage = "%prog [- <] <input fasta file>" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args(sys.argv) if args[1]=='-': seqs = fasta.FastaFile(sys.stdin).readAll() else: seqs = fasta.FastaFile(args[1]).readAll() seqs.sort(key=lambda x: natsort_key(x[0])) for h,s in seqs: print '>%s' % h print fasta.pretty(s) print
#!/usr/bin/env python """ reverse_comp.py <filename> Prints the reverse complement of a DNA string (in fasta format). """ import sys from mungo import fasta from mungo import sequence if len(sys.argv)!=2 or '-h' in sys.argv or '--help' in sys.argv: sys.exit(__doc__) for h,s in fasta.FastaFile(sys.argv[1]): rc = sequence.reverseComplement(s.upper()) print '>%s' % h print fasta.pretty(rc)
""" fastaExtract.py <fasta file> <accession> <start> <end> Extract sequence between given start & end coordinates from fasta file. """ import sys from mungo.fasta import FastaFile, pretty if '-h' in sys.argv: sys.exit(__doc__) iFilename = sys.argv[1] accession = sys.argv[2] try: start = int(sys.argv[3]) end = int(sys.argv[4]) except: start = None end = None for h,s in FastaFile(iFilename): tokens = h.split() if tokens[0]==accession: print '>%s:%s-%s' % (tokens[0],start,end) if start: print pretty(s[start-1:end]) else: print pretty(s) break
parser.add_option("-o", "--output", dest="oFilename", help="Output filename", default=None) parser.add_option("-w", "--width", dest="width", type="int", help="Sequence width", default=60) options, args = parser.parse_args(sys.argv) if len(args) != 2: sys.exit(__doc__) if args[1] != '-': faFile = FastaFile(args[1]) else: faFile = FastaFile(sys.stdin) if options.oFilename: oFile = open(options.oFilename, 'w') else: oFile = sys.stdout for header, seq in faFile: protein = sequence.translate(seq) print >> oFile, '>%s' % header print >> oFile, pretty(protein, width=options.width)
from mungo.fasta import FastaFile, pretty from mungo import sequence usage = "%prog [options] <fasta file>" parser = OptionParser(usage=usage) parser.add_option("-o", "--output", dest="oFilename", help="Output filename", default=None) parser.add_option("-w", "--width", dest="width", type="int", help="Sequence width", default=60) options, args = parser.parse_args(sys.argv) if len(args)!=2: sys.exit(__doc__) if args[1]!='-': faFile = FastaFile(args[1]) else: faFile = FastaFile(sys.stdin) if options.oFilename: oFile = open(options.oFilename, 'w') else: oFile = sys.stdout for header,seq in faFile: protein = sequence.translate(seq) print >> oFile, '>%s' % header print >> oFile, pretty(protein, width=options.width)
""" fastaExtract.py <fasta file> <accession> <start> <end> Extract sequence between given start & end coordinates from fasta file. """ import sys from mungo.fasta import FastaFile, pretty if '-h' in sys.argv: sys.exit(__doc__) iFilename = sys.argv[1] accession = sys.argv[2] try: start = int(sys.argv[3]) end = int(sys.argv[4]) except: start = None end = None for h, s in FastaFile(iFilename): tokens = h.split() if tokens[0] == accession: print '>%s:%s-%s' % (tokens[0], start, end) if start: print pretty(s[start - 1:end]) else: print pretty(s) break
def try_int(s): "Convert to integer if possible." try: return int(s) except: return s def natsort_key(s): "Used internally to get a tuple by which s is sorted." import re return map(try_int, re.findall(r'(\d+|\D+)', s)) if len(sys.argv) == 1: sys.exit(__doc__) usage = "%prog [- <] <input fasta file>" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args(sys.argv) if args[1] == '-': seqs = fasta.FastaFile(sys.stdin).readAll() else: seqs = fasta.FastaFile(args[1]).readAll() seqs.sort(key=lambda x: natsort_key(x[0])) for h, s in seqs: print '>%s' % h print fasta.pretty(s) print