default=sys.stdout, help='File to output.') parser.add_argument('--first', type=int, \ help='How many sequences to analyse, starting with the first.') parser.add_argument('--maxlength', type=int, \ help='Maximum sequence length.') parser.add_argument('--minlength', type=int, \ default=0, help='Minimum sequence length.') parser.add_argument('--coverage', type=argparse.FileType('r'), \ default=None, help='A CSV file with "read_name, coverage".') parser.add_argument('--maxcov', type=float, \ help='Maximum coverage for each sequence.') parser.add_argument('--mincov', type=float, \ default=0, help='Minimum coverage for each sequence.') args = parser.parse_args() infile = open(args.infile, 'rb') seqs = seqlist(infile) if args.first is not None: seqs = seqs[:args.first] if args.maxlength is not None: seqs = [s for s in seqs if len(s) <= args.maxlength \ and len(s) >= args.minlength] elif args.minlength > 0: seqs = [s for s in seqs if len(s) >= args.minlength] if args.coverage is not None: n2cov = dict(line.split(',') for line in args.coverage) def getcov(nme): return float(n2cov.get(nme.split(' ')[0], 0))
parser.add_argument( "--megan", type=argparse.FileType("r"), default=None, help='A CSV file from Megan with "read_name, taxon_name".' ) parser.add_argument( "--coverage", type=argparse.FileType("r"), default=None, help='A CSV file with "read_name, coverage".' ) parser.add_argument( "--assembler", choices=("idba", "velvet", "none"), default="none", help="The assembler used to produce the contigs.", ) # TODO: loose indexing for gc, bycontig and tetra? args = parser.parse_args() infile = open(args.infile, "rb") seqs = seqlist(infile) if args.first is not None: seqs = seqs[: args.first] if args.type == "bycontig": if args.coverage is not None: cov = args.coverage else: cov = args.assembler bycontig(seqs, args.outfile, kmer=args.kmer, megan=args.megan, cov=cov) elif args.type == "summary": summary(seqs, args.outfile) elif args.type == "gc": gc(seqs, args.outfile) elif args.type == "tetra": tetra(seqs, args.outfile) elif args.type == "tetraz":
import ochre with open('gc.csv', 'w') as f: for seq in ochre.seqlist('test.fa'): f.write(seq.name + ',' + str(seq.gc())) # VERSUS import Bio.SeqIO import Bio.SeqUtils with open('gc.csv', 'w') as f: for seq in Bio.SeqIO.parse('test.fa', 'fasta'): f.write(seq.name + ',' + str(Bio.SeqUtils.GC(seq)))