def single_validation(fraction=None, test_index=None): '''Run a single validation experiment with fraction% of deleted test genotypes. Returns a validation Experiment object.''' p = im.hutt('hutt.npz') e = im.v.Experiment(p, fraction=fraction, test_index=test_index) phaser = im.phase_main.main_phaser(print_times=True) e.run(phaser) (_, stats) = im.plots.plot_experiment_stats(e) print stats[np.argsort(-stats[:, 2] / stats[:, 1]), :] return e
def plot_two_families(): '''Test ancestor imputation and child POO alignment for two families on chromosome 22.''' # Parameters chrom = 22 plot = False # True save_plot = False # True debug = False # True # Read data p = im.hutt('hutt.phased.npz') q = p.pedigree.quasi_founders # aligned = set(p.haplotype.aligned_samples) t = frozenset([frozenset(im.gt.genotyped_children(p, p.pedigree.find_family_by_child(i, genotyped=False))) for i in q]) num_sibs = map(len, t) print 'Distribution of QF family sizes', util.occur_dict(num_sibs) # plot_hist_num_sibs(num_sibs) # ibd = im.index.segment_index.SegmentIndex(os.environ['OBER_OUT'] + '/index_segments') if plot: P.figure(1) s = set([x for x in t if 1049 in x][0]) - set([1049]) pa, _ = analyze_family(p, s, max_colors=4, title='Haplotype Coloring: Quasi-Founder Sibs, All, Chrom. %d' % (chrom,), plot=plot, debug=debug) if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/qf_family/hap_colors_poo.png') if plot: P.figure(2) s2 = set([x for x in t if 1049 in x][0]) analyze_family(p, s2, max_colors=4, title='Haplotype Coloring: Quasi-Founder Sibs, POOC Chrom. %d' % (chrom,), plot=plot, debug=debug) if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/qf_family/hap_colors_all.png') if plot: P.figure(3) f = p.find_family(10, 1414) # 4 children, genotyped parents s3 = f.children analyze_family(p, s3, max_colors=4, title='Haplotype Coloring: Non-Founder Sibs Chrom. %d' % (chrom,), plot=plot, debug=debug) if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/nf_family/hap_colors.png') if plot: P.show() print im.color.hap_color.best_hap_alignment_to_colors(pa) print 'Regions', pa.num_regions print 'Parental haplotype coverage %', parent_coverage_fraction(pa, p) print 'Children coverage by parental haplotypes', pa.color_sequence_coverage(np.arange(4))
decide whether to genotype them with a dense or sparse Illumina chip. Created on July 15, 2013 @author: Oren Livne <*****@*****.**> ============================================================ ''' import impute as im, os, numpy as np, matplotlib.pyplot as P # Load data ped = im.hutt_pedigree() path = os.environ['OBER_OUT'] + '/kids' chrom = 22 prefix = path + '/cytosnp/chr%d/cytosnp.imputed' % (chrom, ) illumina = im.io.read_npz(prefix + '.phased.npz') affy = im.hutt('hutt.phased.npz') # Large family - with lots of sibs of one of the new Hutt kids #parents = 246, 389 parents = 288, 465 f = ped.find_family(parents[0], parents[1]) # Compare Illumina, Affy IBD sharing pictures P.figure(1) im.plots.plot_family_comparison(affy, f, 1, xaxis='bp') P.savefig(os.environ['OBER'] + '/doc/kids/family_%d_%d_affy.png' % parents) P.figure(2) im.plots.plot_family_comparison(illumina, f, 1, xaxis='bp') P.savefig(os.environ['OBER'] + '/doc/kids/family_%d_%d_illumina.png' % parents)
#!/usr/bin/env python ''' ============================================================ Test GERMLINE IBD on 507's ungenotyped family. Created on September 15, 2012 @author: Oren Livne <*****@*****.**> ============================================================ ''' import impute as im import numpy as np p = im.hutt('hutt.stage3.npz') q = im.hutt('hutt.stage3.npz') phaser = im.phase_distant.family_sib_comparison_phaser() i = 507 phaser.run(q, im.PhaseParam(single_member=i, debug=True)) print np.where(p.haplotype.data[:, i, :] != q.haplotype.data[:, i, :])
def hutt_ibd_segments(hutt_file, i, ai, j, bj, **kwargs): '''IBD segments using a phasing npz file relative to the chr22 directory.''' return problem_ibd_segments(im.hutt(hutt_file), i, ai, j, bj, **kwargs)
print 'Parental haplotype coverage %', parent_coverage_fraction(pa, p) print 'Children coverage by parental haplotypes', pa.color_sequence_coverage(np.arange(4)) #################################################################################### if __name__ == '__main__': ''' -------------------------------------------------- Main program -------------------------------------------------- ''' # parent_coverage = Counter() # for chrom in CHROMOSOMES[-1:]: # print 'Chromosome', chrom # p = im.io.read_npz('%s/phasing/chr%d/hutt.phased.npz' % (os.environ['OBER_OUT'], chrom)) # # plot_two_families() # parent_coverage_chrom = qf_families_parent_coverage(p) # parent_coverage += parent_coverage_chrom # print parent_coverage_chrom # parents = set(x[0] for x in parent_coverage.iterkeys()) # a = [(b, (parent_coverage[(b, 0)] + parent_coverage[(b, 1)]) / (2.*sum(ChromDao.TOTAL_BP_TYPED[-1:]))) for b in set(x[0] for x in parent_coverage.iterkeys())] # print a p = im.hutt('hutt.phased.npz') # pa, segments = analyze_family(p, np.setdiff1d(im.gt.genotyped_children(p, f), [1069]))#, debug=True) # f = p.find_family_by_child(998, genotyped=False) # pa, segments = analyze_family(p, im.gt.genotyped_children(p, f)) f = p.find_family_by_child(640, genotyped=False) pa, segments = analyze_family(p, im.gt.genotyped_children(p, f))
#!/usr/bin/env python """ ============================================================ Imputation test - chromosome 22. Created on February 4, 2013 @author: Oren Livne <*****@*****.**> ============================================================ """ import impute as im, numpy as np, os OBER = os.environ["OBER"] p = im.hutt("hutt.phased.npz") ibd = im.smart_segment_set.SmartSegmentSet.load(p.pedigree.num_genotyped, OBER + "/out/segments.out") t = im.imputation.ImputationSet.from_file(p.pedigree, OBER + "/data/impute/rare/rare.npz") # @UndefinedVariable snps = np.where(t.snp["chrom"] == 22)[0] # SNP list out of all SNPs in t to impute im.imputation.iibd.impute(p.haplotype, ibd, t, snp=snps, debug=False)
Plot the recombination rate lambda=lambda(f) where f is the inbreeding coefficient. Per discussion with Mark Abney on IBD HMM for haplotypes. Created on January 23, 2013 @author: Oren Livne <*****@*****.**> ============================================================ ''' import matplotlib.pyplot as P, impute as im, os lam = im.hap_lambda.lambda_vs_f() F, L, S = im.hap_lambda.lambda_mean(lam) out_dir = os.environ['OBER'] + '/doc/ibd' # Bin lambda, f and calculate mean and stddev of each bin so that we can see trends P.close(1) P.figure(1) im.hap_lambda.plot_lambda_vs_f(lam) # P.title('Recombination Rate vs. Inbreeding in the Hutterities') P.show() P.savefig(out_dir + '/lambda_vs_f.eps') # Plot lambda std dev vs. mean lambda in family children P.close(2) problem = im.hutt('hutt.npz') P.figure(2) im.hap_lambda.plot_lambda_std(problem) P.show() P.savefig(out_dir + '/lambda_child.eps')
if __name__ == '__main__': ''' -------------------------------------------------- Main program -------------------------------------------------- ''' out_dir = os.environ['OBER'] + '/doc/imputation/validation/ibd-optimization' # Data format: [region start snp, region stop snp, # IBD pairs in 1415x1415 IBD set] # Data obtained manually with commands like #i=50; j=3168; cat segments.out | awk -v i=$i -vj=$j '($1 <= i) && (j <= $2)' | wc -l # (or into file and then wc -l) pairs = [((50, 3168), 1428), ((550, 2650), 7554), ((1050, 2150), 34630), ((1300, 1900), 62017), ((1425, 1775), 94152)] p = im.hutt('hutt.npz') cm = p.info.snp['dist_cm'] chrom = p.info.snp['chrom'][0] l = [cm[x[0][1]] - cm[x[0][0]] for x in pairs] num_pairs = [x[1] for x in pairs] P.figure(1) P.clf() P.semilogy(l, num_pairs, 'bo-') P.grid(True) P.title('Chromosome %d' % (chrom,)) P.xlabel('Region Length [cM]') P.ylabel('# IBD Pairs') P.show() P.savefig('%s/num_ibd_pairs.chr%d.png' % (out_dir, chrom,))
#!/usr/bin/env python ''' ============================================================ Test GERMLINE IBD on 507's ungenotyped family. Created on September 15, 2012 @author: Oren Livne <*****@*****.**> ============================================================ ''' import impute as im import numpy as np p = im.hutt('hutt.stage3.npz') q = im.hutt('hutt.stage3.npz') phaser = im.phase_distant.family_sib_comparison_phaser() i = 507 phaser.run(q, im.PhaseParam(single_member=i, debug=True)) print np.where(p.haplotype.data[:,i,:] != q.haplotype.data[:,i,:])
''' ============================================================ Plot phasing % after the different phasing stages. Created on August 16, 2012 @author: Oren Livne <*****@*****.**> ============================================================ ''' import impute as im, matplotlib.pyplot as P, numpy as np, os from numpy.core.function_base import linspace P.figure(1) P.clf() P.hold(True) p = im.hutt('hutt.phased.npz') d5 = im.plots.plot_fill_fraction(p, color='b', label='Stage 5') p = im.hutt('hutt.stage6.npz') d = im.plots.plot_fill_fraction(p, color='r', label='Stage 6') zoom = 0.96 ticks = 10 min_y = 0.95 * min(d[:, 1][0], d5[:, 1][0]) max_x = np.where(d[:, 1] > zoom)[0][0] P.xlim([0, max_x + 1]) P.ylim([min_y, 1.0]) yticks = linspace(min_y, 1.0, ticks) P.yticks(yticks, ['%.3f' % (t,) for t in yticks]) P.title('Hutterites Phasing Coverage, Chromosome 22')
#!/usr/bin/env python ''' ============================================================ Find Lung function study ID sublist of the entire Hutterites problem set ID list. Output indices into the latter. Created on Feb 18, 2013 @author: Oren Livne <*****@*****.**> ============================================================ ''' import sys, os, numpy as np, impute as im #from optparse import OptionParser #################################################################################### if __name__ == '__main__': ''' -------------------------------------------------- Main program -------------------------------------------------- ''' d = os.environ['OBER_DATA'] + '/lung' s = im.io_pedigree.read(d + '/hutt-lung.pdg.tfam', genotyped_id_file=d + '/hutt-lung-samples.txt') p = im.hutt('hutt.stage5.npz') i = np.array([ i in s.sample_id[0:s.num_genotyped] for i in p.pedigree.sample_id[0:p.pedigree.num_genotyped] ]) np.savetxt(sys.stdout, np.where(i)[0], fmt='%d')