Beispiel #1
0
            minor_het.append(hetsort.pop())
            

    while minor_het:
        af[(len(minor_het)) + (len(minor_hom)*2)] = prod([el[0][0] for el in hetsort]) * prod([el[0][1] for el in minor_het]) *  prod([el[0][2] for el in minor_hom])
        minor_hom.append(minor_het.pop())

    return af

def cut_fn(sd): #load sites with at least one non-ref site, 10 genotyped individuals
    return len(sd['indiv_gt']) > 10 and sd['mac'] >= 1

if __name__ == "__main__":

    #use the above, plus toss any calls with max quality < 4 (GQ)
    vcf = variant_detection.load_vcf('/n/hoekstrafs1/test-stampy/110910-lane5_stampy.vcf', cutoff_fn=cut_fn, indiv_gt_phred_cut=4)

    dephred = lambda x: 10**(x/-10.)

    l3_li = []
    for v in vcf.values()[9]['indiv_gt'].values():
        l3 = [dephred(int(p)) for p in v['PL'].split(',')]
        l3_li.append(([l/sum(l3) for l in l3],v))
       

    hetsort,minor_het,minor_hom,af = init_data(l3_li)

    af = af_method1(hetsort,minor_het,minor_hom,af)
           
    plot(*Util.dezip(sorted(af.items())))
    len(af) == 2*len(l3_li) #can be false, but so far all missing L = 0.0 (so irrelevant)
Beispiel #2
0
    #gq = 20
    min_indiv = 50
    fh = 0.7
    site_before = 32 #polymorphism must occur before this base in a fragment
    #chi2crit = 30
    
    vcfn,qd,gq,chi2crit = sys.argv[1:]
    
    
    outbase = os.path.splitext(vcfn)[0]

    cut_fn = lambda sd: sd.has_key('QD') and float(sd['QD']) >= float(qd) and len(sd['indiv_gt']) >= min_indiv and sd['fh'] < fh


    print >> sys.stderr, 'loading vcf',vcfn
    vcf = variant_detection.load_vcf(vcfn,cutoff_fn=cut_fn,indiv_gt_phred_cut=float(gq))

    print >> sys.stderr, 'convert to pm/gt matrices'
    pm,gt = extract_genotypes_from_mclgr.genotypes_from_vcf_obj(vcf)

    parents_prefixes = dict(zip(['A', 'B'],parent_str.split(',')))
    parents = dict([(l,[k for k in gt.keys() if k.startswith(p)]) for l,p in parents_prefixes.items()])

    polarized_loci,polarized_geno = extract_genotypes_from_mclgr.genotypes_by_parent(dict([(k,v) for k,v in pm.items() if int(k.split('.')[1]) < site_before]),gt,parents,remove_targets=reduce(lambda x,y: x+y,parents.values()))

    print >> sys.stderr, 'filter X linked, chi2 critical %s' % chi2crit
    xsites,autsites = extract_genotypes_from_mclgr.filter_Xlinked_loci(polarized_loci, polarized_geno,float(chi2crit))
    print >> sys.stderr, '%s X linked, %s autosomal' % (len(xsites),len(autsites))

    print >> sys.stderr, 'write output'
    ret = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(xsites,polarized_geno,'%s_QD%s-GQ%s_%sbp_Xchi%s.csv' % (outbase,qd,gq,site_before,chi2crit))
#!/usr/bin/env python

import os, sys
from short_read_analysis import variant_detection

invcf, outvcf = sys.argv[1:]

# this would be where one might tweak the multiallelic resolution parameters,
# see variant_detection docstrings for various multiallelic fuctions
multiallelic_fn = variant_detection.resolve_multiallelic_sd_fn(-0.01, 0.5, 0.02)

vcf_obj_ma = variant_detection.load_vcf(
    invcf, multiallelic_sites=multiallelic_fn, write_thresholded_vcf=outvcf, store_only=[]
)