# parser.add_argument('--json', default="/dev/null") parser.add_argument('summary', nargs='?', type=argparse.FileType('w'), default=sys.stdout) args = parser.parse_args() import json from Bio import SeqIO from postanalysis.variant import Variant ''' Load references ''' seqs = [(s.id,s) for s in SeqIO.parse(args.reffile,'fasta')] sdict = dict(seqs) ''' ''' variants = {} vlines = [l.strip('\n') for l in open(args.vcffile,'rU') if not l.startswith('#')] for l in vlines: v = Variant.from_vcf(l) v.caller = 'gatk' if v.chrom not in variants: variants[v.chrom] = [] variants[v.chrom].append(v) ''' Output summary information ''' print >>args.summary, 'reference\tvariants' for ref,seq in seqs: if ref in variants: print >>args.summary, '%s\t%d' % (ref,len(variants[ref])) else: print >>args.summary, '%s\t0' % ref
) ''' Load references ''' seqs = [(s.id, s) for s in SeqIO.parse(args.reffile, 'fasta')] sdict = dict((ref, Reference(name=ref)) for ref, seq in seqs) ''' Summarize coverage data ''' covdata = parse_covdepth_samtools(args.covfile, reflens=dict( (s[0], len(s[1])) for s in seqs)) for ref, seq in seqs: p, m = summarize_coverage(covdata[ref]) sdict[ref].pct_cov = p sdict[ref].mean_cov = m ncvars = find_nocov_variants(covdata[ref], chrom=ref, caller='samdepth') if ncvars is not None: sdict[ref].dips.extend(ncvars) ''' Analyze variants ''' glines = [ l.strip('\n') for l in gzip.open(args.gfffile, 'rb') if not l.startswith('#') ] for l in glines: v = Variant.from_gff(l) v.caller = 'gencons' sdict[v.chrom].variants.append(v) ''' Output summary information ''' print >> args.summary, 'ref\tpct_cov\tmean_cov\tnvars\tndips\tcall' for ref, seq in seqs: call = sdict[ref].make_call(seq) print >> args.summary, '%s\t%s' % (sdict[ref].summary(), call)
return '%s\t%.1f\t%.1f\t%d\t%d' % (self.name, self.pct_cov*100, self.mean_cov, len(self.variants), len(self.dips), ) ''' Load references ''' seqs = [(s.id,s) for s in SeqIO.parse(args.reffile,'fasta')] sdict = dict((ref,Reference(name=ref)) for ref,seq in seqs) ''' Summarize coverage data ''' covdata = parse_covdepth_samtools(args.covfile,reflens=dict((s[0],len(s[1])) for s in seqs)) for ref,seq in seqs: p,m = summarize_coverage(covdata[ref]) sdict[ref].pct_cov = p sdict[ref].mean_cov = m ncvars = find_nocov_variants(covdata[ref],chrom=ref,caller='samdepth') if ncvars is not None: sdict[ref].dips.extend(ncvars) ''' Analyze variants ''' glines = [l.strip('\n') for l in gzip.open(args.gfffile,'rb') if not l.startswith('#')] for l in glines: v = Variant.from_gff(l) v.caller = 'gencons' sdict[v.chrom].variants.append(v) ''' Output summary information ''' print >>args.summary, 'ref\tpct_cov\tmean_cov\tnvars\tndips\tcall' for ref,seq in seqs: call = sdict[ref].make_call(seq) print >>args.summary, '%s\t%s' % (sdict[ref].summary(),call)