minor_het.append(hetsort.pop()) while minor_het: af[(len(minor_het)) + (len(minor_hom)*2)] = prod([el[0][0] for el in hetsort]) * prod([el[0][1] for el in minor_het]) * prod([el[0][2] for el in minor_hom]) minor_hom.append(minor_het.pop()) return af def cut_fn(sd): #load sites with at least one non-ref site, 10 genotyped individuals return len(sd['indiv_gt']) > 10 and sd['mac'] >= 1 if __name__ == "__main__": #use the above, plus toss any calls with max quality < 4 (GQ) vcf = variant_detection.load_vcf('/n/hoekstrafs1/test-stampy/110910-lane5_stampy.vcf', cutoff_fn=cut_fn, indiv_gt_phred_cut=4) dephred = lambda x: 10**(x/-10.) l3_li = [] for v in vcf.values()[9]['indiv_gt'].values(): l3 = [dephred(int(p)) for p in v['PL'].split(',')] l3_li.append(([l/sum(l3) for l in l3],v)) hetsort,minor_het,minor_hom,af = init_data(l3_li) af = af_method1(hetsort,minor_het,minor_hom,af) plot(*Util.dezip(sorted(af.items()))) len(af) == 2*len(l3_li) #can be false, but so far all missing L = 0.0 (so irrelevant)
#gq = 20 min_indiv = 50 fh = 0.7 site_before = 32 #polymorphism must occur before this base in a fragment #chi2crit = 30 vcfn,qd,gq,chi2crit = sys.argv[1:] outbase = os.path.splitext(vcfn)[0] cut_fn = lambda sd: sd.has_key('QD') and float(sd['QD']) >= float(qd) and len(sd['indiv_gt']) >= min_indiv and sd['fh'] < fh print >> sys.stderr, 'loading vcf',vcfn vcf = variant_detection.load_vcf(vcfn,cutoff_fn=cut_fn,indiv_gt_phred_cut=float(gq)) print >> sys.stderr, 'convert to pm/gt matrices' pm,gt = extract_genotypes_from_mclgr.genotypes_from_vcf_obj(vcf) parents_prefixes = dict(zip(['A', 'B'],parent_str.split(','))) parents = dict([(l,[k for k in gt.keys() if k.startswith(p)]) for l,p in parents_prefixes.items()]) polarized_loci,polarized_geno = extract_genotypes_from_mclgr.genotypes_by_parent(dict([(k,v) for k,v in pm.items() if int(k.split('.')[1]) < site_before]),gt,parents,remove_targets=reduce(lambda x,y: x+y,parents.values())) print >> sys.stderr, 'filter X linked, chi2 critical %s' % chi2crit xsites,autsites = extract_genotypes_from_mclgr.filter_Xlinked_loci(polarized_loci, polarized_geno,float(chi2crit)) print >> sys.stderr, '%s X linked, %s autosomal' % (len(xsites),len(autsites)) print >> sys.stderr, 'write output' ret = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(xsites,polarized_geno,'%s_QD%s-GQ%s_%sbp_Xchi%s.csv' % (outbase,qd,gq,site_before,chi2crit))
#!/usr/bin/env python import os, sys from short_read_analysis import variant_detection invcf, outvcf = sys.argv[1:] # this would be where one might tweak the multiallelic resolution parameters, # see variant_detection docstrings for various multiallelic fuctions multiallelic_fn = variant_detection.resolve_multiallelic_sd_fn(-0.01, 0.5, 0.02) vcf_obj_ma = variant_detection.load_vcf( invcf, multiallelic_sites=multiallelic_fn, write_thresholded_vcf=outvcf, store_only=[] )