Ejemplo n.º 1
0
def filter_snp(general, snp_info, sample_stats, mincov, minsnp, ploidy):
    ref = general[2]
    dp4 = map(int,
              snp_info.get('DP4', '0,0,0,0').split(
                  ','))  # fw.ref, rev.ref, fw.alt, rev.alt (filtered)
    total_reads = sum(dp4)
    if total_reads < mincov:  # coverage low
        return '0'  # '/'.join([ref]*ploidy)
    ratio = 100.0 * (dp4[2] + dp4[3]) / total_reads
    if ratio < minsnp / ploidy:
        return "-"
    alt = general[3]
    alts = [ref] + alt.split(',')
    sample = sample_stats[1]  # ex: "0/1:48,0,53:14:0:50"
    genotype = sample.split(':')[
        0]  # todo: better according to *format* = GT:PL:DP:SP:GQ
    sep = '/' if '/' in genotype else '|'  # phased if |, unphased if /
    genotype = [alts[int(i)] for i in genotype.split(sep)]
    #   Diploid:  GT: '0/1' -> 'T/A (50% of 8)'  [if ref='T' and alt='A']
    #   Haploid:  GT: '0/1' -> 'A (50% of 8)'
    if ploidy == 1:
        genotype = unique(genotype)
        if ref in genotype: genotype.remove(ref)
    genotype = sep.join(genotype)
    genotype = "%s (%.0f%% of %d)" % (genotype, ratio, total_reads)
    return genotype
Ejemplo n.º 2
0
 def _write_buffer(_buffer, outex):
     new_codon = None
     # One position at a time
     for chr, pos, refbase, variants, cds, strand, ref_codon, shift in _buffer:
         varbase = list(
             variants
         )  # Ex: ['G','G','G','T (56% of 167)','G'],  ['A/A','G/G (100% of 7)']
         if new_codon is None:
             new_codon = [[ref_codon] for _ in range(len(varbase))]
         variants = []  # [[variants sample1], [variants sample2], ...]
         # One sample at a time
         for variant in varbase:
             if variant in ['0', '-']:
                 variants.append([refbase])
             else:  # Ex: 'C/G (80% of 10)' : heterozygous simple (ref is C) or double snp (ref is not C)
                 v = variant.split()[0]  # Ex: C/G
                 v = unique(v.split('/'))  # Ex: G/G -> 'G'
                 if refbase in v:
                     v.remove(refbase)  # Ex: C/G -> 'G' (if ref is C)
                 variants.append(v)
         # One sample at a time
         for k, v in enumerate(variants):
             cnumb = len(new_codon[k])
             newc = new_codon[k] * len(v)
             for i, vari in enumerate(v):
                 for j in range(cnumb):
                     newc[i * cnumb +
                          j] = newc[i * cnumb +
                                    j][:shift] + vari + newc[i * cnumb +
                                                             j][shift + 1:]
                     assert ref_codon[
                         shift] == refbase, "bug with shift within codon"
             new_codon[k] = newc
     if new_codon is None: return
     if strand == -1:
         ref_codon = revcomp(ref_codon)
         new_codon = [[revcomp(s) for s in c] for c in new_codon]
     for chr, pos, refbase, variants, cds, strand, dummy, shift in _buffer:
         refc = [iupac.get(x, x) for x in ref_codon]
         ref_codon = [''.join(x) for x in product(*refc)]
         newc = [[[iupac.get(x, x) for x in variant] for variant in sample]
                 for sample in new_codon]
         new_codon = [[
             ''.join(x) for codon in sample for x in product(*codon)
         ] for sample in newc]
         if refbase == "*":
             result = [chr, pos+1, refbase] + list(variants) + [cds, strand] \
                      + [','.join([translate.get(refc,'?') for refc in ref_codon])] + ["indel"]
         else:
             result = [chr, pos+1, refbase] + list(variants) + [cds, strand] \
                      + [','.join([translate.get(refc,'?') for refc in ref_codon])] \
                      + [','.join([translate.get(s,'?') for s in newc]) for newc in new_codon]
         outex.write("\t".join([str(r) for r in result]) + "\n")
Ejemplo n.º 3
0
 def _write_buffer(_buffer, outex):
     new_codon = None
     # One position at a time
     for chr,pos,refbase,variants,cds,strand,ref_codon,shift in _buffer:
         varbase = list(variants)  # Ex: ['G','G','G','T (56% of 167)','G'],  ['A/A','G/G (100% of 7)']
         if new_codon is None:
             new_codon = [[ref_codon] for _ in range(len(varbase))]
         variants = []  # [[variants sample1], [variants sample2], ...]
         # One sample at a time
         for variant in varbase:
             if variant in ['0','-']:
                 variants.append([refbase])
             else: # Ex: 'C/G (80% of 10)' : heterozygous simple (ref is C) or double snp (ref is not C)
                 v = variant.split()[0]  # Ex: C/G
                 v = unique(v.split('/'))  # Ex: G/G -> 'G'
                 if refbase in v: v.remove(refbase)  # Ex: C/G -> 'G' (if ref is C)
                 variants.append(v)
         # One sample at a time
         for k,v in enumerate(variants):
             cnumb = len(new_codon[k])
             newc = new_codon[k]*len(v)
             for i,vari in enumerate(v):
                 for j in range(cnumb):
                     newc[i*cnumb+j] = newc[i*cnumb+j][:shift]  +vari +newc[i*cnumb+j][shift+1:]
                     assert ref_codon[shift] == refbase, "bug with shift within codon"
             new_codon[k] = newc
     if new_codon is None: return
     if strand == -1:
         ref_codon = revcomp(ref_codon)
         new_codon = [[revcomp(s) for s in c] for c in new_codon]
     for chr,pos,refbase,variants,cds,strand,dummy,shift in _buffer:
         refc = [iupac.get(x,x) for x in ref_codon]
         ref_codon = [''.join(x) for x in product(*refc)]
         newc = [[[iupac.get(x,x) for x in variant] for variant in sample]
                 for sample in new_codon]
         new_codon = [[''.join(x) for codon in sample for x in product(*codon)] for sample in newc]
         if refbase == "*":
             result = [chr, pos+1, refbase] + list(variants) + [cds, strand] \
                      + [','.join([translate.get(refc,'?') for refc in ref_codon])] + ["indel"]
         else:
             result = [chr, pos+1, refbase] + list(variants) + [cds, strand] \
                      + [','.join([translate.get(refc,'?') for refc in ref_codon])] \
                      + [','.join([translate.get(s,'?') for s in newc]) for newc in new_codon]
         outex.write("\t".join([str(r) for r in result])+"\n")
Ejemplo n.º 4
0
def filter_snp(general,snp_info,sample_stats,mincov,minsnp,ploidy):
    ref = general[2]
    dp4 = map(int, snp_info.get('DP4','0,0,0,0').split(',')) # fw.ref, rev.ref, fw.alt, rev.alt (filtered)
    total_reads = sum(dp4)
    if dp4[2]+dp4[3] < mincov:  # too few supporting alt
        return '-'  # '/'.join([ref]*ploidy)
    ratio = 100.0*(dp4[2]+dp4[3])/total_reads
    if ratio < minsnp/ploidy:
        return "0"
    alt = general[3]
    alts = [ref]+alt.split(',')
    sample = sample_stats[1]  # ex: "0/1:48,0,53:14:0:50"
    genotype = sample.split(':')[0]  # todo: better according to *format* = GT:PL:DP:SP:GQ
    sep = '/' if '/' in genotype else '|'  # phased if |, unphased if /
    genotype = [alts[int(i)] for i in genotype.split(sep)]
    #   Diploid:  GT: '0/1' -> 'T/A (50% of 8)'  [if ref='T' and alt='A']
    #   Haploid:  GT: '0/1' -> 'A (50% of 8)'
    if ploidy == 1:
        genotype = unique(genotype)
        if ref in genotype: genotype.remove(ref)
    genotype = sep.join(genotype)
    genotype = "%s (%.0f%% of %d)" % (genotype,ratio,total_reads)
    return genotype