Example #1
0

def try_int(s):
    "Convert to integer if possible."
    try: return int(s)
    except: return s


def natsort_key(s):
    "Used internally to get a tuple by which s is sorted."
    import re
    return map(try_int, re.findall(r'(\d+|\D+)', s))


if len(sys.argv)==1: sys.exit(__doc__)

usage = "%prog [- <] <input fasta file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
options, args = parser.parse_args(sys.argv)

if args[1]=='-':
    seqs = fasta.FastaFile(sys.stdin).readAll()
else:
    seqs = fasta.FastaFile(args[1]).readAll()

seqs.sort(key=lambda x: natsort_key(x[0]))
for h,s in seqs:
    print '>%s' % h
    print fasta.pretty(s)
    print
Example #2
0
#!/usr/bin/env python

"""
reverse_comp.py <filename>

Prints the reverse complement of a DNA string (in fasta format).
"""

import sys

from mungo import fasta
from mungo import sequence


if len(sys.argv)!=2 or '-h' in sys.argv or '--help' in sys.argv:
    sys.exit(__doc__)

for h,s in fasta.FastaFile(sys.argv[1]):
    rc = sequence.reverseComplement(s.upper())
    
    print '>%s' % h
    print fasta.pretty(rc)
Example #3
0
"""
fastaExtract.py <fasta file> <accession> <start> <end>

Extract sequence between given start & end coordinates from fasta file.
"""

import sys
from mungo.fasta import FastaFile, pretty

if '-h' in sys.argv:
    sys.exit(__doc__)

iFilename = sys.argv[1]
accession = sys.argv[2]
try:
    start = int(sys.argv[3])
    end = int(sys.argv[4])
except:
    start = None
    end = None

for h,s in FastaFile(iFilename):
    tokens = h.split()
    if tokens[0]==accession:
        print '>%s:%s-%s' % (tokens[0],start,end)
        if start:
            print pretty(s[start-1:end])
        else:
            print pretty(s)
        break
Example #4
0
parser.add_option("-o",
                  "--output",
                  dest="oFilename",
                  help="Output filename",
                  default=None)
parser.add_option("-w",
                  "--width",
                  dest="width",
                  type="int",
                  help="Sequence width",
                  default=60)

options, args = parser.parse_args(sys.argv)

if len(args) != 2: sys.exit(__doc__)

if args[1] != '-':
    faFile = FastaFile(args[1])
else:
    faFile = FastaFile(sys.stdin)

if options.oFilename:
    oFile = open(options.oFilename, 'w')
else:
    oFile = sys.stdout

for header, seq in faFile:
    protein = sequence.translate(seq)
    print >> oFile, '>%s' % header
    print >> oFile, pretty(protein, width=options.width)
Example #5
0
from mungo.fasta import FastaFile, pretty
from mungo import sequence


usage = "%prog [options] <fasta file>"
parser = OptionParser(usage=usage)
parser.add_option("-o", "--output", dest="oFilename",
  help="Output filename", default=None)
parser.add_option("-w", "--width", dest="width", type="int",
  help="Sequence width", default=60)

options, args = parser.parse_args(sys.argv)

if len(args)!=2: sys.exit(__doc__)


if args[1]!='-':
    faFile = FastaFile(args[1])
else:
    faFile = FastaFile(sys.stdin)

if options.oFilename:
    oFile = open(options.oFilename, 'w')
else:
    oFile = sys.stdout

for header,seq in faFile:
    protein = sequence.translate(seq)
    print >> oFile, '>%s' % header
    print >> oFile, pretty(protein, width=options.width)
Example #6
0
"""
fastaExtract.py <fasta file> <accession> <start> <end>

Extract sequence between given start & end coordinates from fasta file.
"""

import sys
from mungo.fasta import FastaFile, pretty

if '-h' in sys.argv:
    sys.exit(__doc__)

iFilename = sys.argv[1]
accession = sys.argv[2]
try:
    start = int(sys.argv[3])
    end = int(sys.argv[4])
except:
    start = None
    end = None

for h, s in FastaFile(iFilename):
    tokens = h.split()
    if tokens[0] == accession:
        print '>%s:%s-%s' % (tokens[0], start, end)
        if start:
            print pretty(s[start - 1:end])
        else:
            print pretty(s)
        break
Example #7
0
def try_int(s):
    "Convert to integer if possible."
    try:
        return int(s)
    except:
        return s


def natsort_key(s):
    "Used internally to get a tuple by which s is sorted."
    import re
    return map(try_int, re.findall(r'(\d+|\D+)', s))


if len(sys.argv) == 1: sys.exit(__doc__)

usage = "%prog [- <] <input fasta file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
options, args = parser.parse_args(sys.argv)

if args[1] == '-':
    seqs = fasta.FastaFile(sys.stdin).readAll()
else:
    seqs = fasta.FastaFile(args[1]).readAll()

seqs.sort(key=lambda x: natsort_key(x[0]))
for h, s in seqs:
    print '>%s' % h
    print fasta.pretty(s)
    print