class MismatchDbSNP(object): def __init__(self, num, fname, verbose=None): sys.stderr.write('Note: MismatchDbSNP is considered *experimental*\n') self.num = int(num) self.fname = fname self.dbsnp = DBSNP(fname) if verbose == 'verbose': self.verbose = True else: self.verbose = False def filter(self, bam, read): if read.is_unmapped: return False if read_calc_mismatches(read) <= self.num: return True chrom = bam.getrname(read.tid) mm = 0 snps = 0 for op, pos, seq in read_calc_variations(read): if not self.dbsnp.is_valid_variation(chrom, op, pos, seq, self.verbose): mm += 1 else: snps += 1 if mm > self.num: return False if snps: read.tags = read.tags + [('ZS', snps)] return True def __repr__(self): return '>%s mismatch%s using %s' % (self.num, '' if self.num == 1 else 'es', os.path.basename(self.fname)) def close(self): self.dbsnp.close()
class MismatchRefDbSNP(object): def __init__(self, num, refname, dbsnpname): sys.stderr.write( 'Note: MismatchRefDbSNP is considered *experimental*\n') self.num = int(num) self.refname = refname self.dbsnp = DBSNP(dbsnpname) if not os.path.exists('%s.fai' % refname): pysam.faidx(refname) self.ref = pysam.Fastafile(refname) def filter(self, bam, read): if read.is_unmapped: return False chrom = bam.getrname(read.tid) mm = 0 snps = 0 for op, pos, seq in read_calc_mismatches_gen(self.ref, read, chrom): if not self.dbsnp.is_valid_variation(chrom, op, pos, seq): mm += 1 else: snps += 1 if mm > self.num: return False if snps: read.tags = read.tags + [('ZS', snps)] return True def __repr__(self): return '>%s mismatch%s using %s/%s' % ( self.num, '' if self.num == 1 else 'es', os.path.basename(self.dbsnpname), os.path.basename(self.refname)) def close(self): self.ref.close() self.dbsnp.close()
class MismatchRefDbSNP(object): def __init__(self, num, refname, dbsnpname): sys.stderr.write('Note: MismatchRefDbSNP is considered *experimental*\n') self.num = int(num) self.refname = refname self.dbsnp = DBSNP(dbsnpname) if not os.path.exists('%s.fai' % refname): pysam.faidx(refname) self.ref = pysam.Fastafile(refname) def filter(self, bam, read): if read.is_unmapped: return False chrom = bam.getrname(read.tid) mm = 0 snps = 0 for op, pos, seq in read_calc_mismatches_gen(self.ref, read, chrom): if not self.dbsnp.is_valid_variation(chrom, op, pos, seq): mm += 1 else: snps += 1 if mm > self.num: return False if snps: read.tags = read.tags + [('ZS', snps)] return True def __repr__(self): return '>%s mismatch%s using %s/%s' % (self.num, '' if self.num == 1 else 'es', os.path.basename(self.dbsnpname), os.path.basename(self.refname)) def close(self): self.ref.close() self.dbsnp.close()