Esempio n. 1
0
      default=sys.stdout, help='File to output.')
    parser.add_argument('--first', type=int, \
      help='How many sequences to analyse, starting with the first.')
    parser.add_argument('--maxlength', type=int, \
      help='Maximum sequence length.')
    parser.add_argument('--minlength', type=int, \
      default=0, help='Minimum sequence length.')
    parser.add_argument('--coverage', type=argparse.FileType('r'), \
      default=None, help='A CSV file with "read_name, coverage".')
    parser.add_argument('--maxcov', type=float, \
      help='Maximum coverage for each sequence.')
    parser.add_argument('--mincov', type=float, \
      default=0, help='Minimum coverage for each sequence.')
    args = parser.parse_args()
    infile = open(args.infile, 'rb')
    seqs = seqlist(infile)
    if args.first is not None:
        seqs = seqs[:args.first]

    if args.maxlength is not None:
        seqs = [s for s in seqs if len(s) <= args.maxlength \
          and len(s) >= args.minlength]
    elif args.minlength > 0:
        seqs = [s for s in seqs if len(s) >= args.minlength]

    if args.coverage is not None:
        n2cov = dict(line.split(',') for line in args.coverage)

        def getcov(nme):
            return float(n2cov.get(nme.split(' ')[0], 0))
Esempio n. 2
0
File: stats.py Progetto: bovee/Ochre
 parser.add_argument(
     "--megan", type=argparse.FileType("r"), default=None, help='A CSV file from Megan with "read_name, taxon_name".'
 )
 parser.add_argument(
     "--coverage", type=argparse.FileType("r"), default=None, help='A CSV file with "read_name, coverage".'
 )
 parser.add_argument(
     "--assembler",
     choices=("idba", "velvet", "none"),
     default="none",
     help="The assembler used to produce the contigs.",
 )
 # TODO: loose indexing for gc, bycontig and tetra?
 args = parser.parse_args()
 infile = open(args.infile, "rb")
 seqs = seqlist(infile)
 if args.first is not None:
     seqs = seqs[: args.first]
 if args.type == "bycontig":
     if args.coverage is not None:
         cov = args.coverage
     else:
         cov = args.assembler
     bycontig(seqs, args.outfile, kmer=args.kmer, megan=args.megan, cov=cov)
 elif args.type == "summary":
     summary(seqs, args.outfile)
 elif args.type == "gc":
     gc(seqs, args.outfile)
 elif args.type == "tetra":
     tetra(seqs, args.outfile)
 elif args.type == "tetraz":
Esempio n. 3
0
File: test.py Progetto: bovee/Ochre
import ochre

with open('gc.csv', 'w') as f:
    for seq in ochre.seqlist('test.fa'):
        f.write(seq.name + ',' + str(seq.gc()))

# VERSUS

import Bio.SeqIO
import Bio.SeqUtils

with open('gc.csv', 'w') as f:
    for seq in Bio.SeqIO.parse('test.fa', 'fasta'):
        f.write(seq.name + ',' + str(Bio.SeqUtils.GC(seq)))