Esempio n. 1
0
import os, sys
from mungo.fasta import FastaFile

from optparse import OptionParser
usage = "%prog <Input file1> <Output file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
parser.add_option('-i', '--stdin', dest='stdin', action='store_true', help='Input from stdin')
parser.add_option('-o', '--stdout', dest='stdout', action='store_true', help='Output to stdout')
options, args = parser.parse_args()
if len(args)==0:
    parser.print_help()
    sys.exit()

if options.stdin:
    f = FastaFile(sys.stdin)
else:
    f = FastaFile(args[0])

if options.stdout:
    w = FastaFile(sys.stdout, 'w')
else:
    w = FastaFile(args[1], 'w')

accList= []
for h,s in f:
    acc = h.split()[0]
    if not acc in accList:
        w.write(h,s)
        accList.append(acc)
w.close()
Esempio n. 2
0
fasta_block_split.py <iFilenames> <oFilename>

Author: Tony Papenfuss
Date: Fri Mar 30 12:58:58 EST 2007

"""

import os, sys
from mungo.fasta import FastaFile

from optparse import OptionParser
usage = "%prog [-b <block_size>] <input_file1> [<input_file2> ...] <output_file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
parser.add_option("-b", "--blocksize", action="store", type="int", 
    dest="blocksize", default="10000000")

options, args = parser.parse_args()
if len(args)==0:
    parser.print_help()
    sys.exit()


iFilenames = args[:-1]
oFilename = args[-1]

writer = FastaFile(oFilename, 'w', blockSize=10000000)
for iFilename in iFilenames:
    for h,s in FastaFile(iFilename):
        writer.write(h,s)
writer.close()
Esempio n. 3
0
#!/usr/bin/env python

"""
exciseRepeats.py

Author: Tony Papenfuss
Date: Fri Mar 21 13:25:51 EST 2008

"""

import os, sys, re
from mungo.fasta import FastaFile


iFilename = sys.argv[1]
oFilename = iFilename + '.excised'

# Cleaned & now masked reads
fi = FastaFile(iFilename)
fo = FastaFile(oFilename, 'w')
for h,s in fi:
    s = s.replace('N', '')
    fo.write(h,s)
fi.close()
fo.close()
Esempio n. 4
0
#!/usr/bin/env python

"""
fasta_truncate.py

Author: Tony Papenfuss
Date: July 2013

"""

import sys
import argparse
from mungo.fasta import FastaFile


usage = "%prog <length> <Input file> <Output file>"

parser = argparse.ArgumentParser(description='Truncate fasta files')
parser.add_argument('length', type=int, help='Truncate to length')
parser.add_argument('input_filename', type=str, help='Input filename')
parser.add_argument('output_filename', type=str, help='Output filename')
args = parser.parse_args()

w = FastaFile(args.output_filename, 'w')
for h,s in FastaFile(args.input_filename):
    w.write(h, s[0:args.length])
w.close()
Esempio n. 5
0
"""
fasta2gffAssembly.py <fasta file pattern>

Author: Tony Papenfuss
Date: Mon Feb 25 14:54:57 EST 2008

"""

import os, sys, glob
from mungo.fasta import FastaFile
import mungo.gff as gff


iFilePattern = sys.argv[1]

oFile = open('assembly.gff', 'w')
for filename in glob.iglob(iFilePattern):
    f = FastaFile(filename)
    for h,L in f.lengthGenerator():
        print filename, h, L
        g = gff.Feature()
        g.reference = h
        g.source = 'assembly'
        g.type = 'chrom'
        g.start = 1
        g.end = L
        g.group = 'Reference %s' % h
        oFile.write(str(g) + '\n')
        oFile.flush()
oFile.close()
Esempio n. 6
0
                  '--stdin',
                  dest='stdin',
                  action='store_true',
                  help='Input from stdin')
parser.add_option('-o',
                  '--stdout',
                  dest='stdout',
                  action='store_true',
                  help='Output to stdout')
options, args = parser.parse_args()
if len(args) == 0:
    parser.print_help()
    sys.exit()

if options.stdin:
    f = FastaFile(sys.stdin)
else:
    f = FastaFile(args[0])

if options.stdout:
    w = FastaFile(sys.stdout, 'w')
else:
    w = FastaFile(args[1], 'w')

accList = []
for h, s in f:
    acc = h.split()[0]
    if not acc in accList:
        w.write(h, s)
        accList.append(acc)
w.close()
Esempio n. 7
0
#!/usr/bin/env python

"""
fastaSlice.py <fasta file> <first entry> <number>

Author: Tony Papenfuss
Date: Wed Apr 23 14:49:40 EST 2008

"""

import os, sys
from mungo.fasta import FastaFile


if len(sys.argv)==1 or '-h' in sys.argv:
    sys.exit(__doc__)

iFilename = sys.argv[1]
start = int(sys.argv[2])
n = int(sys.argv[3])

f = FastaFile(iFilename, indexed=True)
f.seek(start)

w = FastaFile(sys.stdout, 'w')
count = 0
for h,s in f:
    w.write(h,s)
    count += 1
    if count==n: break
Esempio n. 8
0
#!/usr/bin/env python
"""
fastaTotalLength.py

Author: Tony Papenfuss
Date: Wed Jun 25 11:13:57 EST 2008

"""

import os, sys
from mungo.fasta import FastaFile

from optparse import OptionParser
usage = "%prog <Input file1> [<Input file2> ...]"
parser = OptionParser(usage=usage, version="%prog - Version 1")
options, args = parser.parse_args()
if len(args) == 0:
    parser.print_help()
    sys.exit()

total = 0L
for filename in args:
    f = FastaFile(filename)
    for h, L in f.lengthGenerator():
        total += L
print total
Esempio n. 9
0
#!/usr/bin/env python
"""
fastaOneCharPerLine.py [--stdin] <filename>

Author: Tony Papenfuss
Date: Wed Apr 11 21:41:25 EST 2007

"""

import sys
from optparse import OptionParser
from mungo.fasta import FastaFile

if len(sys.argv) == 1: sys.exit(__doc__)

usage = "%prog [- <] <input fasta file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
parser.add_option('-s', '--stdin', action='store_true', dest='stdin')
options, args = parser.parse_args()

if options.stdin in args:
    faFile = FastaFile(sys.stdin)
else:
    faFile = FastaFile(args[0])

for header, seq in faFile:
    print '>%s' % header
    for b in seq:
        print b
    print
Esempio n. 10
0
parser.add_option("-o",
                  "--output",
                  dest="oFilename",
                  help="Output filename",
                  default=None)
parser.add_option("-w",
                  "--width",
                  dest="width",
                  type="int",
                  help="Sequence width",
                  default=60)

options, args = parser.parse_args(sys.argv)

if len(args) != 2: sys.exit(__doc__)

if args[1] != '-':
    faFile = FastaFile(args[1])
else:
    faFile = FastaFile(sys.stdin)

if options.oFilename:
    oFile = open(options.oFilename, 'w')
else:
    oFile = sys.stdout

for header, seq in faFile:
    protein = sequence.translate(seq)
    print >> oFile, '>%s' % header
    print >> oFile, pretty(protein, width=options.width)
Esempio n. 11
0
#!/usr/bin/env python
"""
fastaMaxLength.py

Author: Tony Papenfuss
Date: Wed Jul  8 11:53:34 EST 2009

"""

import os, sys
from mungo.fasta import FastaFile
import numpy

iFilename = sys.argv[1]
maxLength = -1
for h, s in FastaFile(iFilename):
    maxLength = max(maxLength, len(s))
print maxLength
Esempio n. 12
0
#!/usr/bin/env python

"""
exciseRepeats.py

Author: Tony Papenfuss
Date: Fri Mar 21 13:25:51 EST 2008

"""

import os, sys, re
from mungo.fasta import FastaFile


iFilename = sys.argv[1]
oFilename = iFilename + ".excised"

# Cleaned & now masked reads
fi = FastaFile(iFilename)
fo = FastaFile(oFilename, "w")
for h, s in fi:
    s = s.replace("N", "")
    fo.write(h, s)
fi.close()
fo.close()
Esempio n. 13
0
#!/usr/bin/env python
"""
fastaHeader.py <fasta file>

Author: Tony Papenfuss
Date: Mon May  7 09:09:27 EST 2007

"""

import os, sys
from mungo.fasta import FastaFile

for h, s in FastaFile(sys.argv[1]):
    print h
Esempio n. 14
0
Author: Tony Papenfuss
Date: Thu Mar  6 11:18:10 EST 2008

"""

import sys
import glob
from mungo.fasta import FastaFile

from optparse import OptionParser
usage = "%prog <Input file1> [<Input file2> ...] <Output file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
options, args = parser.parse_args()
if len(args) == 0:
    parser.print_help()
    sys.exit()

iFilenames = args[:-1]
oFilename = args[-1]

writer = FastaFile(oFilename, 'w')
for iFilename in iFilenames:
    print >> sys.stderr, '%s-->%s' % (iFilename, oFilename)
    f = FastaFile(iFilename)
    for h, s in f:
        writer.write(h, s)
    f.close()
    writer.flush()
writer.close()
Esempio n. 15
0
#!/usr/bin/env python

"""
fasta_average_length.py

Author: Tony Papenfuss
Date: Wed Mar 26 15:03:28 EST 2008
"""

import sys
from mungo.fasta import FastaFile

from optparse import OptionParser
usage = "%prog <Input file1> [<Input file2> ...]"
parser = OptionParser(usage=usage, version="%prog - Version 1")
options, args = parser.parse_args()
if len(args)==0:
    parser.print_help()
    sys.exit()


for filename in args:
    n = 0
    average = 0L
    for h,s in FastaFile(filename):
        n += 1
        average += len(s)
    print '%s\t%i' % (filename, float(average)/n)
Esempio n. 16
0
Author: Tony Papenfuss
Date: Thu Mar  6 11:18:10 EST 2008

"""

import sys
import glob
from mungo.fasta import FastaFile

from optparse import OptionParser

usage = "%prog <Input file1> [<Input file2> ...] <Output file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")
options, args = parser.parse_args()
if len(args) == 0:
    parser.print_help()
    sys.exit()

iFilenames = args[:-1]
oFilename = args[-1]

writer = FastaFile(oFilename, "w")
for iFilename in iFilenames:
    print >>sys.stderr, "%s-->%s" % (iFilename, oFilename)
    f = FastaFile(iFilename)
    for h, s in f:
        writer.write(h, s)
    f.close()
    writer.flush()
writer.close()
Esempio n. 17
0
"""
fastaOneLine.py [- <] <filename>

Author: Tony Papenfuss
Date: Wed Apr 11 21:41:25 EST 2007

"""

import sys
from optparse import OptionParser
from mungo.fasta import FastaFile


if len(sys.argv)==1: sys.exit(__doc__)

usage = "%prog [- <] <input fasta file>"
parser = OptionParser(usage=usage, version="%prog - Version 1")

options, args = parser.parse_args(sys.argv)

if '-' in args:
    iFile = sys.stdin
else:
    iFile = open(sys.argv[-1])

for header,seq in FastaFile(iFile):
    print '>%s' % header
    print seq
    print
Esempio n. 18
0
#!/usr/bin/env python
"""
fasta_truncate.py

Author: Tony Papenfuss
Date: July 2013

"""

import sys
import argparse
from mungo.fasta import FastaFile

usage = "%prog <length> <Input file> <Output file>"

parser = argparse.ArgumentParser(description='Truncate fasta files')
parser.add_argument('length', type=int, help='Truncate to length')
parser.add_argument('input_filename', type=str, help='Input filename')
parser.add_argument('output_filename', type=str, help='Output filename')
args = parser.parse_args()

w = FastaFile(args.output_filename, 'w')
for h, s in FastaFile(args.input_filename):
    w.write(h, s[0:args.length])
w.close()
Esempio n. 19
0
#!/usr/bin/env python
"""
fastaIndex.py <fasta file>

Author: Tony Papenfuss
Date: Wed Apr 23 14:49:40 EST 2008

"""

import os, sys
from mungo.fasta import FastaFile

if len(sys.argv) == 1 or '-h' in sys.argv:
    sys.exit(__doc__)

iFilename = sys.argv[1]
f = FastaFile(iFilename, indexed=True)
Esempio n. 20
0
"""
fastaRename.py <iFilename> <oFilename>

Author: Tony Papenfuss
Date: Fri Mar  7 14:17:14 EST 2008

"""

import sys
from mungo.fasta import FastaFile


iFilename = sys.argv[1]
oFilename = sys.argv[2]


def getSpp(line):
    token = line.split('[')[-1].split(']')[0]
    return token


writer= FastaFile(oFilename, 'w')
for h,s in FastaFile(iFilename):
    tokens = h.split()
    name = tokens[0]
    spp = getSpp(h)
    sppParts = spp.split()
    h2 = "%s%s_%s" % (sppParts[0][0:5], sppParts[1][0:2].title(), name)
    writer.write(h2, s)
writer.close()