#!/usr/bin/env import sys from seqio import iteratorFromExtension from nucio import fileIterator if not len(sys.argv) == 2: sys.exit("sequencToLine.py in.{fa.fq}\n") it = iteratorFromExtension(sys.argv[1]) for record in fileIterator(sys.argv[1], it): if hasattr(record, "desc"): print "\t".join([record.name, record.seq, record.desc, record.qual]) else: print "\t".join([record.name, record.seq])
#!/usr/bin/env python import sys from seqio import iteratorFromExtension, recordToString from nucio import fileIterator from misc import reverse_complement if not len(sys.argv) == 2: sys.exit("reverseComplement.py in.{fa,fq}") f = sys.argv[1] for record in fileIterator(f,iteratorFromExtension(f)): print recordToString(record._replace(seq=reverse_complement(record.seq)))
#Downsample a library import sys from nucio import typeify, fileIterator from seqio import iteratorFromExtension, recordToString, seqlen if not len(sys.argv) == 5: sys.exit("Usage: downsample.py genome_size desired_cov input.{fa,fq} output.{fa,fq}\n") types = [int, float, str, str] sysins = sys.argv[1:len(types)+1] (genome_size, target_cov, infn, outfn) = typeify(sysins,types) max_bases = genome_size * target_cov total_bases = 0 with open(outfn, "w") as of: for record in fileIterator(infn,iteratorFromExtension(infn)): length = seqlen(record) if "N" in record.seq: continue if total_bases > max_bases: break of.write(recordToString(record)) of.write("\n") total_bases += length
#!/usr/bin/env python import sys from itertools import imap from seqio import iteratorFromExtension from nucio import fileIterator ##Create Kmers if not len(sys.argv) == 3: sys.exit("Usage: kmer.py k-size in.fa\n") fn = sys.argv[2] ksize = int(sys.argv[1]) for record in fileIterator(fn, iteratorFromExtension(fn)): seq = record.seq starts = range(len(seq)-ksize+1) kmers = imap(lambda start: seq[start:start+ksize], starts) for kmer in kmers: print kmer