import os, sys from mungo.fasta import FastaFile from optparse import OptionParser usage = "%prog <Input file1> <Output file>" parser = OptionParser(usage=usage, version="%prog - Version 1") parser.add_option('-i', '--stdin', dest='stdin', action='store_true', help='Input from stdin') parser.add_option('-o', '--stdout', dest='stdout', action='store_true', help='Output to stdout') options, args = parser.parse_args() if len(args)==0: parser.print_help() sys.exit() if options.stdin: f = FastaFile(sys.stdin) else: f = FastaFile(args[0]) if options.stdout: w = FastaFile(sys.stdout, 'w') else: w = FastaFile(args[1], 'w') accList= [] for h,s in f: acc = h.split()[0] if not acc in accList: w.write(h,s) accList.append(acc) w.close()
fasta_block_split.py <iFilenames> <oFilename> Author: Tony Papenfuss Date: Fri Mar 30 12:58:58 EST 2007 """ import os, sys from mungo.fasta import FastaFile from optparse import OptionParser usage = "%prog [-b <block_size>] <input_file1> [<input_file2> ...] <output_file>" parser = OptionParser(usage=usage, version="%prog - Version 1") parser.add_option("-b", "--blocksize", action="store", type="int", dest="blocksize", default="10000000") options, args = parser.parse_args() if len(args)==0: parser.print_help() sys.exit() iFilenames = args[:-1] oFilename = args[-1] writer = FastaFile(oFilename, 'w', blockSize=10000000) for iFilename in iFilenames: for h,s in FastaFile(iFilename): writer.write(h,s) writer.close()
#!/usr/bin/env python """ exciseRepeats.py Author: Tony Papenfuss Date: Fri Mar 21 13:25:51 EST 2008 """ import os, sys, re from mungo.fasta import FastaFile iFilename = sys.argv[1] oFilename = iFilename + '.excised' # Cleaned & now masked reads fi = FastaFile(iFilename) fo = FastaFile(oFilename, 'w') for h,s in fi: s = s.replace('N', '') fo.write(h,s) fi.close() fo.close()
#!/usr/bin/env python """ fasta_truncate.py Author: Tony Papenfuss Date: July 2013 """ import sys import argparse from mungo.fasta import FastaFile usage = "%prog <length> <Input file> <Output file>" parser = argparse.ArgumentParser(description='Truncate fasta files') parser.add_argument('length', type=int, help='Truncate to length') parser.add_argument('input_filename', type=str, help='Input filename') parser.add_argument('output_filename', type=str, help='Output filename') args = parser.parse_args() w = FastaFile(args.output_filename, 'w') for h,s in FastaFile(args.input_filename): w.write(h, s[0:args.length]) w.close()
""" fasta2gffAssembly.py <fasta file pattern> Author: Tony Papenfuss Date: Mon Feb 25 14:54:57 EST 2008 """ import os, sys, glob from mungo.fasta import FastaFile import mungo.gff as gff iFilePattern = sys.argv[1] oFile = open('assembly.gff', 'w') for filename in glob.iglob(iFilePattern): f = FastaFile(filename) for h,L in f.lengthGenerator(): print filename, h, L g = gff.Feature() g.reference = h g.source = 'assembly' g.type = 'chrom' g.start = 1 g.end = L g.group = 'Reference %s' % h oFile.write(str(g) + '\n') oFile.flush() oFile.close()
'--stdin', dest='stdin', action='store_true', help='Input from stdin') parser.add_option('-o', '--stdout', dest='stdout', action='store_true', help='Output to stdout') options, args = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit() if options.stdin: f = FastaFile(sys.stdin) else: f = FastaFile(args[0]) if options.stdout: w = FastaFile(sys.stdout, 'w') else: w = FastaFile(args[1], 'w') accList = [] for h, s in f: acc = h.split()[0] if not acc in accList: w.write(h, s) accList.append(acc) w.close()
#!/usr/bin/env python """ fastaSlice.py <fasta file> <first entry> <number> Author: Tony Papenfuss Date: Wed Apr 23 14:49:40 EST 2008 """ import os, sys from mungo.fasta import FastaFile if len(sys.argv)==1 or '-h' in sys.argv: sys.exit(__doc__) iFilename = sys.argv[1] start = int(sys.argv[2]) n = int(sys.argv[3]) f = FastaFile(iFilename, indexed=True) f.seek(start) w = FastaFile(sys.stdout, 'w') count = 0 for h,s in f: w.write(h,s) count += 1 if count==n: break
#!/usr/bin/env python """ fastaTotalLength.py Author: Tony Papenfuss Date: Wed Jun 25 11:13:57 EST 2008 """ import os, sys from mungo.fasta import FastaFile from optparse import OptionParser usage = "%prog <Input file1> [<Input file2> ...]" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit() total = 0L for filename in args: f = FastaFile(filename) for h, L in f.lengthGenerator(): total += L print total
#!/usr/bin/env python """ fastaOneCharPerLine.py [--stdin] <filename> Author: Tony Papenfuss Date: Wed Apr 11 21:41:25 EST 2007 """ import sys from optparse import OptionParser from mungo.fasta import FastaFile if len(sys.argv) == 1: sys.exit(__doc__) usage = "%prog [- <] <input fasta file>" parser = OptionParser(usage=usage, version="%prog - Version 1") parser.add_option('-s', '--stdin', action='store_true', dest='stdin') options, args = parser.parse_args() if options.stdin in args: faFile = FastaFile(sys.stdin) else: faFile = FastaFile(args[0]) for header, seq in faFile: print '>%s' % header for b in seq: print b print
parser.add_option("-o", "--output", dest="oFilename", help="Output filename", default=None) parser.add_option("-w", "--width", dest="width", type="int", help="Sequence width", default=60) options, args = parser.parse_args(sys.argv) if len(args) != 2: sys.exit(__doc__) if args[1] != '-': faFile = FastaFile(args[1]) else: faFile = FastaFile(sys.stdin) if options.oFilename: oFile = open(options.oFilename, 'w') else: oFile = sys.stdout for header, seq in faFile: protein = sequence.translate(seq) print >> oFile, '>%s' % header print >> oFile, pretty(protein, width=options.width)
#!/usr/bin/env python """ fastaMaxLength.py Author: Tony Papenfuss Date: Wed Jul 8 11:53:34 EST 2009 """ import os, sys from mungo.fasta import FastaFile import numpy iFilename = sys.argv[1] maxLength = -1 for h, s in FastaFile(iFilename): maxLength = max(maxLength, len(s)) print maxLength
#!/usr/bin/env python """ exciseRepeats.py Author: Tony Papenfuss Date: Fri Mar 21 13:25:51 EST 2008 """ import os, sys, re from mungo.fasta import FastaFile iFilename = sys.argv[1] oFilename = iFilename + ".excised" # Cleaned & now masked reads fi = FastaFile(iFilename) fo = FastaFile(oFilename, "w") for h, s in fi: s = s.replace("N", "") fo.write(h, s) fi.close() fo.close()
#!/usr/bin/env python """ fastaHeader.py <fasta file> Author: Tony Papenfuss Date: Mon May 7 09:09:27 EST 2007 """ import os, sys from mungo.fasta import FastaFile for h, s in FastaFile(sys.argv[1]): print h
Author: Tony Papenfuss Date: Thu Mar 6 11:18:10 EST 2008 """ import sys import glob from mungo.fasta import FastaFile from optparse import OptionParser usage = "%prog <Input file1> [<Input file2> ...] <Output file>" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit() iFilenames = args[:-1] oFilename = args[-1] writer = FastaFile(oFilename, 'w') for iFilename in iFilenames: print >> sys.stderr, '%s-->%s' % (iFilename, oFilename) f = FastaFile(iFilename) for h, s in f: writer.write(h, s) f.close() writer.flush() writer.close()
#!/usr/bin/env python """ fasta_average_length.py Author: Tony Papenfuss Date: Wed Mar 26 15:03:28 EST 2008 """ import sys from mungo.fasta import FastaFile from optparse import OptionParser usage = "%prog <Input file1> [<Input file2> ...]" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args() if len(args)==0: parser.print_help() sys.exit() for filename in args: n = 0 average = 0L for h,s in FastaFile(filename): n += 1 average += len(s) print '%s\t%i' % (filename, float(average)/n)
Author: Tony Papenfuss Date: Thu Mar 6 11:18:10 EST 2008 """ import sys import glob from mungo.fasta import FastaFile from optparse import OptionParser usage = "%prog <Input file1> [<Input file2> ...] <Output file>" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit() iFilenames = args[:-1] oFilename = args[-1] writer = FastaFile(oFilename, "w") for iFilename in iFilenames: print >>sys.stderr, "%s-->%s" % (iFilename, oFilename) f = FastaFile(iFilename) for h, s in f: writer.write(h, s) f.close() writer.flush() writer.close()
""" fastaOneLine.py [- <] <filename> Author: Tony Papenfuss Date: Wed Apr 11 21:41:25 EST 2007 """ import sys from optparse import OptionParser from mungo.fasta import FastaFile if len(sys.argv)==1: sys.exit(__doc__) usage = "%prog [- <] <input fasta file>" parser = OptionParser(usage=usage, version="%prog - Version 1") options, args = parser.parse_args(sys.argv) if '-' in args: iFile = sys.stdin else: iFile = open(sys.argv[-1]) for header,seq in FastaFile(iFile): print '>%s' % header print seq print
#!/usr/bin/env python """ fasta_truncate.py Author: Tony Papenfuss Date: July 2013 """ import sys import argparse from mungo.fasta import FastaFile usage = "%prog <length> <Input file> <Output file>" parser = argparse.ArgumentParser(description='Truncate fasta files') parser.add_argument('length', type=int, help='Truncate to length') parser.add_argument('input_filename', type=str, help='Input filename') parser.add_argument('output_filename', type=str, help='Output filename') args = parser.parse_args() w = FastaFile(args.output_filename, 'w') for h, s in FastaFile(args.input_filename): w.write(h, s[0:args.length]) w.close()
#!/usr/bin/env python """ fastaIndex.py <fasta file> Author: Tony Papenfuss Date: Wed Apr 23 14:49:40 EST 2008 """ import os, sys from mungo.fasta import FastaFile if len(sys.argv) == 1 or '-h' in sys.argv: sys.exit(__doc__) iFilename = sys.argv[1] f = FastaFile(iFilename, indexed=True)
""" fastaRename.py <iFilename> <oFilename> Author: Tony Papenfuss Date: Fri Mar 7 14:17:14 EST 2008 """ import sys from mungo.fasta import FastaFile iFilename = sys.argv[1] oFilename = sys.argv[2] def getSpp(line): token = line.split('[')[-1].split(']')[0] return token writer= FastaFile(oFilename, 'w') for h,s in FastaFile(iFilename): tokens = h.split() name = tokens[0] spp = getSpp(h) sppParts = spp.split() h2 = "%s%s_%s" % (sppParts[0][0:5], sppParts[1][0:2].title(), name) writer.write(h2, s) writer.close()