def tearDown(self): # print '\nref: ', self.ref # print 'query:', self.query errors = list(sequtils.itemize_errors(self.ref, self.query)) log.debug(sequtils.show_errors(errors)) log.debug('\n' + pprint.pformat(self.expected)) log.debug('\n' + pprint.pformat(errors)) # indices into ref are as expected self.assertEquals(set(e['i'] for e in errors), set(self.expected.keys())) # individual positions are as expected for pos in errors: self.assertEquals(pos['ref'], self.expected[pos['i']]['ref']) self.assertEquals(pos['query'], self.expected[pos['i']]['query'])
def action(args): fieldnames = ['t_name', 'q_name', 'length', 'snp', 'indel'] fieldnames += ['homoindel','compound', 'total', 'sw_zscore'] fieldnames += args.extra_fields.keys() if args.output_alignment: fieldnames += ['alignment'] aligns = DictReader(args.aligns) itemizer = lambda a: dict({'errors':itemize_errors(a['t_seq'], a['q_seq'])}, **a) aligns = imap(itemizer, aligns) tallies = DictWriter(args.out, fieldnames = fieldnames, extrasaction = 'ignore') tallies.writeheader() homopolymers = defaultdict(Counter) gtceil = 'geq{}'.format(args.homopolymer_max) # output error counts: for a in aligns: # instantiate d with zero counts for each error type row = {k:0 for k in fieldnames[2:]} row['q_name'], row['t_name'] = a['q_name'], a['t_name'] row.update(a) row.update(error_count(a['errors'])) # create total count to output total = row['snp'] + row['indel'] + row['homoindel'] + row['compound'] row.update({'total':total}) row.update(args.extra_fields) if args.output_alignment: row.update({'alignment':show_errors(a['errors'])}) tallies.writerow(row) log.debug(a['q_name']) log.debug('\n' + sequtils.format_alignment(a['t_seq'], a['q_seq'])) log.debug(a['q_seq'].replace('-','').replace('=','')) log.debug(show_errors(a['errors'])) if args.step: raw_input() # create homopolymer matrix for e in a['errors']: r, q = e['ref'].strip('=-'), e['query'].strip('=-') # count indels or homoindels; exclude compound errors and snps base = ''.join(set(r + q)) if len(base) == 1: ref_count = len(r) if len(r) <= args.homopolymer_max else gtceil query_count = len(q) if len(q) <= args.homopolymer_max else gtceil homopolymers['total'][(ref_count, query_count)] += 1 homopolymers[base][(ref_count, query_count)] += 1 # output homopolymer matrix if specified if args.matrix: # reference counts in rows, query in column ii = range(args.homopolymer_max) margins = ii + [gtceil] for base in sorted(homopolymers): args.matrix.writerow([base] + ['q{}'.format(i) for i in ii] + [gtceil]) for i_ref in margins: cols = [homopolymers[base][i_ref, i_query] for i_query in margins] args.matrix.writerow(['r{}'.format(i_ref)] + cols) args.matrix.writerow([''] * 10)
def action(args): fieldnames = ['t_name', 'q_name', 'length', 'snp', 'indel'] fieldnames += ['homoindel', 'compound', 'total', 'sw_zscore'] fieldnames += args.extra_fields.keys() if args.output_alignment: fieldnames += ['alignment'] aligns = DictReader(args.aligns) itemizer = lambda a: dict( {'errors': itemize_errors(a['t_seq'], a['q_seq'])}, **a) aligns = imap(itemizer, aligns) tallies = DictWriter(args.out, fieldnames=fieldnames, extrasaction='ignore') tallies.writeheader() homopolymers = defaultdict(Counter) gtceil = 'geq{}'.format(args.homopolymer_max) # output error counts: for a in aligns: # instantiate d with zero counts for each error type row = {k: 0 for k in fieldnames[2:]} row['q_name'], row['t_name'] = a['q_name'], a['t_name'] row.update(a) row.update(error_count(a['errors'])) # create total count to output total = row['snp'] + row['indel'] + row['homoindel'] + row['compound'] row.update({'total': total}) row.update(args.extra_fields) if args.output_alignment: row.update({'alignment': show_errors(a['errors'])}) tallies.writerow(row) log.debug(a['q_name']) log.debug('\n' + sequtils.format_alignment(a['t_seq'], a['q_seq'])) log.debug(a['q_seq'].replace('-', '').replace('=', '')) log.debug(show_errors(a['errors'])) if args.step: raw_input() # create homopolymer matrix for e in a['errors']: r, q = e['ref'].strip('=-'), e['query'].strip('=-') # count indels or homoindels; exclude compound errors and snps base = ''.join(set(r + q)) if len(base) == 1: ref_count = len( r) if len(r) <= args.homopolymer_max else gtceil query_count = len( q) if len(q) <= args.homopolymer_max else gtceil homopolymers['total'][(ref_count, query_count)] += 1 homopolymers[base][(ref_count, query_count)] += 1 # output homopolymer matrix if specified if args.matrix: # reference counts in rows, query in column ii = range(args.homopolymer_max) margins = ii + [gtceil] for base in sorted(homopolymers): args.matrix.writerow([base] + ['q{}'.format(i) for i in ii] + [gtceil]) for i_ref in margins: cols = [ homopolymers[base][i_ref, i_query] for i_query in margins ] args.matrix.writerow(['r{}'.format(i_ref)] + cols) args.matrix.writerow([''] * 10)