#Downsample a library import sys from nucio import typeify, fileIterator from seqio import iteratorFromExtension, recordToString, seqlen if not len(sys.argv) == 5: sys.exit("Usage: downsample.py genome_size desired_cov input.{fa,fq} output.{fa,fq}\n") types = [int, float, str, str] sysins = sys.argv[1:len(types)+1] (genome_size, target_cov, infn, outfn) = typeify(sysins,types) max_bases = genome_size * target_cov total_bases = 0 with open(outfn, "w") as of: for record in fileIterator(infn,iteratorFromExtension(infn)): length = seqlen(record) if "N" in record.seq: continue if total_bases > max_bases: break of.write(recordToString(record)) of.write("\n") total_bases += length
from seqio import iteratorFromExtension, recordToString, FastqRecord from nucio import fileIterator from args import parseArgs, getHelpStr, CLArgument description = ( "Usage: deplex_pb.py [options] file1.{fa,fq} [file2.{fa,fq} ..]\n\n" "Deplexes a file based on some delimiter") argument_list = [["delim", "delim", str, "/", "Delimiter to split the input"]] arguments = map(CLArgument._make, argument_list) if not len(sys.argv) >= 2: sys.exit(getHelpStr(description, arguments) + "\n") (p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments) its = map(iteratorFromExtension, args_remaining) file_its = starmap(fileIterator, izip(args_remaining, its)) fh_h = {} for entry in chain.from_iterable(file_its): h = entry.name.split(p_arg_map["delim"])[0] ext = ".fastq" if isinstance(entry, FastqRecord) else ".fasta" if not h in fh_h: fh_h[h] = open(h + ext, "w") fh_h[h].write(recordToString(entry)) fh_h[h].write("\n")
#!/usr/bin/env python import sys from seqio import iteratorFromExtension, recordToString from nucio import fileIterator from misc import reverse_complement if not len(sys.argv) == 2: sys.exit("reverseComplement.py in.{fa,fq}") f = sys.argv[1] for record in fileIterator(f, iteratorFromExtension(f)): print recordToString(record._replace(seq=reverse_complement(record.seq)))
#!/usr/bin/env python import sys from seqio import iteratorFromExtension, recordToString from nucio import fileIterator from misc import reverse_complement if not len(sys.argv) == 2: sys.exit("reverseComplement.py in.{fa,fq}") f = sys.argv[1] for record in fileIterator(f,iteratorFromExtension(f)): print recordToString(record._replace(seq=reverse_complement(record.seq)))
from seqio import iteratorFromExtension, recordToString, FastqRecord from nucio import fileIterator from args import parseArgs, getHelpStr, CLArgument description = ( "Usage: deplex_pb.py [options] file1.{fa,fq} [file2.{fa,fq} ..]\n\n" "Deplexes a file based on some delimiter" ) argument_list = [["delim", "delim", str, "/", "Delimiter to split the input"]] arguments = map(CLArgument._make, argument_list) if not len(sys.argv) >= 2: sys.exit(getHelpStr(description, arguments) + "\n") (p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments) its = map(iteratorFromExtension, args_remaining) file_its = starmap(fileIterator, izip(args_remaining, its)) fh_h = {} for entry in chain.from_iterable(file_its): h = entry.name.split(p_arg_map["delim"])[0] ext = ".fastq" if isinstance(entry, FastqRecord) else ".fasta" if not h in fh_h: fh_h[h] = open(h + ext, "w") fh_h[h].write(recordToString(entry)) fh_h[h].write("\n")