def _test_(): singleton_snps = genotypes.simulate_k_tons(n=500, m=1000) doubleton_snps = genotypes.simulate_k_tons(k=2, n=500, m=1000) common_snps = genotypes.simulate_common_genotypes(500, 1000) snps = sp.vstack([common_snps, singleton_snps, doubleton_snps]) print snps snps = snps.T snps = (snps - sp.mean(snps, 0)) / sp.std(snps, 0) snps = snps.T print snps, snps.shape file_prefix = os.environ['HOME'] + '/tmp/test' phen_list = phenotypes.simulate_traits_w_snps_to_hdf5(snps, hdf5_file_prefix=file_prefix, num_traits=30, p=0.1) singletons_thres = [] doubletons_thres = [] common_thres = [] for i, y in enumerate(phen_list['phenotypes']): K = kinship.calc_ibd_kinship(snps) K = kinship.scale_k(K) lmm = lm.LinearMixedModel(y) lmm.add_random_effect(K) r1 = lmm.get_REML() print 'pseudo_heritability:', r1['pseudo_heritability'] ex_res = lm.emmax(snps, y, K) plt.figure() plt.hist(y, 50) plt.savefig('%s_%d_phen.png' % (file_prefix, i)) plt.clf() agr.plot_simple_qqplots_pvals('%s_%d' % (file_prefix, i), [ex_res['ps'][:1000], ex_res['ps'][1000:2000], ex_res['ps'][2000:]], result_labels=['Common SNPs', 'Singletons', 'Doubletons'], line_colors=['b', 'r', 'y'], num_dots=200, max_neg_log_val=3) # Cholesky permutations.. res = lm.emmax_perm_test(singleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] singletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(doubleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] doubletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(common_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] common_thres.append(res['threshold_05'][0]) #ATT permutations (Implement) #PC permutations (Implement) print sp.mean(singletons_thres), sp.std(singletons_thres) print sp.mean(doubletons_thres), sp.std(doubletons_thres) print sp.mean(common_thres), sp.std(common_thres)
def _test_scz_(): # Load Schizophrenia data singleton_snps = genotypes.simulate_k_tons(n=500, m=1000) doubleton_snps = genotypes.simulate_k_tons(k=2, n=500, m=1000) common_snps = genotypes.simulate_common_genotypes(500, 1000) snps = sp.vstack([common_snps, singleton_snps, doubleton_snps]) test_snps = sp.vstack([singleton_snps, doubleton_snps]) print snps phen_list = phenotypes.simulate_traits( snps, hdf5_file_prefix='/home/bv25/tmp/test', num_traits=30, p=1.0) singletons_thres = [] doubletons_thres = [] common_thres = [] for i, y in enumerate(phen_list): K = kinship.calc_ibd_kinship(snps) K = kinship.scale_k(K) lmm = lm.LinearMixedModel(y) lmm.add_random_effect(K) r1 = lmm.get_REML() print 'pseudo_heritability:', r1['pseudo_heritability'] ex_res = lm.emmax(snps, y, K) plt.figure() plt.hist(y, 50) plt.savefig('/home/bv25/tmp/test_%d_phen.png' % i) plt.clf() agr.plot_simple_qqplots_pvals('/home/bv25/tmp/test_%d' % i, [ ex_res['ps'][:1000], ex_res['ps'][1000:2000], ex_res['ps'][2000:] ], result_labels=[ 'Common SNPs', 'Singletons', 'Doubletons' ], line_colors=['b', 'r', 'y'], num_dots=200, max_neg_log_val=3) # Now permutations.. res = lm.emmax_perm_test(singleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] singletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(doubleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] doubletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(common_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] common_thres.append(res['threshold_05'][0]) print sp.mean(singletons_thres), sp.std(singletons_thres) print sp.mean(doubletons_thres), sp.std(doubletons_thres) print sp.mean(common_thres), sp.std(common_thres)
def _test_scz_(): # Load Schizophrenia data singleton_snps = genotypes.simulate_k_tons(n=500, m=1000) doubleton_snps = genotypes.simulate_k_tons(k=2, n=500, m=1000) common_snps = genotypes.simulate_common_genotypes(500, 1000) snps = sp.vstack([common_snps, singleton_snps, doubleton_snps]) test_snps = sp.vstack([singleton_snps, doubleton_snps]) print snps phen_list = phenotypes.simulate_traits(snps, hdf5_file_prefix='/home/bv25/tmp/test', num_traits=30, p=1.0) singletons_thres = [] doubletons_thres = [] common_thres = [] for i, y in enumerate(phen_list): K = kinship.calc_ibd_kinship(snps) K = kinship.scale_k(K) lmm = lm.LinearMixedModel(y) lmm.add_random_effect(K) r1 = lmm.get_REML() print 'pseudo_heritability:', r1['pseudo_heritability'] ex_res = lm.emmax(snps, y, K) plt.figure() plt.hist(y, 50) plt.savefig('/home/bv25/tmp/test_%d_phen.png' % i) plt.clf() agr.plot_simple_qqplots_pvals('/home/bv25/tmp/test_%d' % i, [ex_res['ps'][:1000], ex_res['ps'][1000:2000], ex_res['ps'][2000:]], result_labels=['Common SNPs', 'Singletons', 'Doubletons'], line_colors=['b', 'r', 'y'], num_dots=200, max_neg_log_val=3) # Now permutations.. res = lm.emmax_perm_test(singleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] singletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(doubleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] doubletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(common_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] common_thres.append(res['threshold_05'][0]) print sp.mean(singletons_thres), sp.std(singletons_thres) print sp.mean(doubletons_thres), sp.std(doubletons_thres) print sp.mean(common_thres), sp.std(common_thres)
def perform_cegs_gwas(kinship_type='ibd', phen_type='medians'): """ Perform a simple MLM GWAS for the 8 traits """ import hdf5_data import kinship import linear_models as lm import time import scipy as sp from matplotlib import pyplot as plt import analyze_gwas_results as agr phen_dict = hdf5_data.parse_cegs_drosophila_phenotypes() phenotypes = ['Protein', 'Sugar', 'Triglyceride', 'weight'] envs = ['mated', 'virgin'] for phenotype in phenotypes: for env in envs: print phenotype, env s1 = time.time() d = hdf5_data.coordinate_cegs_genotype_phenotype( phen_dict, phenotype, env) print 'Calculating kinship' if kinship_type == 'ibs': K = kinship.calc_ibs_kinship(d['snps']) elif kinship_type == 'ibd': K = kinship.calc_ibd_kinship(d['snps']) else: raise NotImplemented if phen_type == 'means': lmm = lm.LinearMixedModel(d['Y_means']) elif phen_type == 'medians': lmm = lm.LinearMixedModel(d['Y_medians']) else: raise NotImplementedError lmm.add_random_effect(K) print "Running EMMAX" res = lmm.emmax_f_test(d['snps'], emma_num=1000) print 'Mean p-value:', sp.mean(res['ps']) secs = time.time() - s1 if secs > 60: mins = int(secs) / 60 secs = secs - mins * 60 print 'Took %d mins and %f seconds.' % (mins, secs) else: print 'Took %f seconds.' % (secs) #Now generating QQ-plots label_str = '%s_%s_%s_%s' % (kinship_type, phenotype, env, phen_type) agr.plot_simple_qqplots_pvals( '/Users/bjarnivilhjalmsson/data/tmp/cegs_qq_%s' % (label_str), [res['ps']], result_labels=[label_str], line_colors=['green'], num_dots=1000, title=None, max_neg_log_val=6) # Perform multiple loci mixed model GWAS chromosomes = d['positions'][:, 0] positions = sp.array(d['positions'][:, 1], 'int32') x_positions = [] y_log_pvals = [] colors = [] x_shift = 0 for i, chrom in enumerate(sp.unique(chromosomes)): if chrom in ['2L', '2LHet', '3L', '3LHet', '4', 'X', 'XHet']: colors.append('c') else: # chrom in ['2R', '2RHet', '3R', '3RHet', 'U', 'Uextra'] #Toss U and Hets colors.append('m') chrom_filter = sp.in1d(chromosomes, chrom) positions_slice = positions[chrom_filter] x_positions.append(positions_slice + x_shift) x_shift += positions_slice.max() log_ps_slice = -sp.log10(res['ps'][chrom_filter]) y_log_pvals.append(log_ps_slice) m = len(positions) log_bonf = -sp.log10(1 / (20.0 * m)) print m, log_bonf # Plot manhattan plots? plt.figure(figsize=(12, 4)) plt.axes([0.03, 0.1, 0.95, 0.8]) for i, chrom in enumerate(sp.unique(chromosomes)): plt.plot(x_positions[i], y_log_pvals[i], c=colors[i], ls='', marker='.') xmin, xmax = plt.xlim() plt.hlines(log_bonf, xmin, xmax, colors='k', linestyles='--', alpha=0.5) plt.title('%s, %s' % (phenotype, env)) plt.savefig( '/Users/bjarnivilhjalmsson/data/tmp/cegs_gwas_%s_%s_%s_%s.png' % (kinship_type, phenotype, env, phen_type))
def perform_cegs_gwas(kinship_type='ibd', phen_type='medians'): """ Perform a simple MLM GWAS for the 8 traits """ import hdf5_data import kinship import linear_models as lm import time import scipy as sp from matplotlib import pyplot as plt import analyze_gwas_results as agr phen_dict = hdf5_data.parse_cegs_drosophila_phenotypes() phenotypes = ['Protein', 'Sugar', 'Triglyceride', 'weight'] envs = ['mated', 'virgin'] for phenotype in phenotypes: for env in envs: print phenotype, env s1 = time.time() d = hdf5_data.coordinate_cegs_genotype_phenotype( phen_dict, phenotype, env) print 'Calculating kinship' if kinship_type == 'ibs': K = kinship.calc_ibs_kinship(d['snps']) elif kinship_type == 'ibd': K = kinship.calc_ibd_kinship(d['snps']) else: raise NotImplementedError if phen_type == 'means': lmm = lm.LinearMixedModel(d['Y_means']) elif phen_type == 'medians': lmm = lm.LinearMixedModel(d['Y_medians']) else: raise NotImplementedError lmm.add_random_effect(K) print "Running EMMAX" res = lmm.emmax_f_test(d['snps'], emma_num=1000) print 'Mean p-value:', sp.mean(res['ps']) secs = time.time() - s1 if secs > 60: mins = int(secs) / 60 secs = secs - mins * 60 print 'Took %d mins and %f seconds.' % (mins, secs) else: print 'Took %f seconds.' % (secs) # Now generating QQ-plots label_str = '%s_%s_%s_%s' % ( kinship_type, phenotype, env, phen_type) agr.plot_simple_qqplots_pvals('/Users/bjarnivilhjalmsson/data/tmp/cegs_qq_%s' % (label_str), [res['ps']], result_labels=[ label_str], line_colors=['green'], num_dots=1000, title=None, max_neg_log_val=6) # Perform multiple loci mixed model GWAS chromosomes = d['positions'][:, 0] positions = sp.array(d['positions'][:, 1], 'int32') x_positions = [] y_log_pvals = [] colors = [] x_shift = 0 for i, chrom in enumerate(sp.unique(chromosomes)): if chrom in ['2L', '2LHet', '3L', '3LHet', '4', 'X', 'XHet']: colors.append('c') else: # chrom in ['2R', '2RHet', '3R', '3RHet', 'U', 'Uextra'] # Toss U and Hets colors.append('m') chrom_filter = sp.in1d(chromosomes, chrom) positions_slice = positions[chrom_filter] x_positions.append(positions_slice + x_shift) x_shift += positions_slice.max() log_ps_slice = -sp.log10(res['ps'][chrom_filter]) y_log_pvals.append(log_ps_slice) m = len(positions) log_bonf = -sp.log10(1 / (20.0 * m)) print m, log_bonf # Plot manhattan plots? plt.figure(figsize=(12, 4)) plt.axes([0.03, 0.1, 0.95, 0.8]) for i, chrom in enumerate(sp.unique(chromosomes)): plt.plot(x_positions[i], y_log_pvals[i], c=colors[i], ls='', marker='.') xmin, xmax = plt.xlim() plt.hlines(log_bonf, xmin, xmax, colors='k', linestyles='--', alpha=0.5) plt.title('%s, %s' % (phenotype, env)) plt.savefig('/Users/bjarnivilhjalmsson/data/tmp/cegs_gwas_%s_%s_%s_%s.png' % (kinship_type, phenotype, env, phen_type))
def _test_(): singleton_snps = genotypes.simulate_k_tons(n=500, m=1000) doubleton_snps = genotypes.simulate_k_tons(k=2, n=500, m=1000) common_snps = genotypes.simulate_common_genotypes(500, 1000) snps = sp.vstack([common_snps, singleton_snps, doubleton_snps]) print snps snps = snps.T snps = (snps - sp.mean(snps, 0)) / sp.std(snps, 0) snps = snps.T print snps, snps.shape file_prefix = os.environ['HOME'] + '/tmp/test' phen_list = phenotypes.simulate_traits_w_snps_to_hdf5( snps, hdf5_file_prefix=file_prefix, num_traits=30, p=0.1) singletons_thres = [] doubletons_thres = [] common_thres = [] for i, y in enumerate(phen_list['phenotypes']): K = kinship.calc_ibd_kinship(snps) K = kinship.scale_k(K) lmm = lm.LinearMixedModel(y) lmm.add_random_effect(K) r1 = lmm.get_REML() print 'pseudo_heritability:', r1['pseudo_heritability'] ex_res = lm.emmax(snps, y, K) plt.figure() plt.hist(y, 50) plt.savefig('%s_%d_phen.png' % (file_prefix, i)) plt.clf() agr.plot_simple_qqplots_pvals('%s_%d' % (file_prefix, i), [ ex_res['ps'][:1000], ex_res['ps'][1000:2000], ex_res['ps'][2000:] ], result_labels=[ 'Common SNPs', 'Singletons', 'Doubletons' ], line_colors=['b', 'r', 'y'], num_dots=200, max_neg_log_val=3) # Cholesky permutations.. res = lm.emmax_perm_test(singleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] singletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(doubleton_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] doubletons_thres.append(res['threshold_05'][0]) res = lm.emmax_perm_test(common_snps, y, K, num_perm=1000) print 1.0 / (20 * 1000.0), res['threshold_05'] common_thres.append(res['threshold_05'][0]) #ATT permutations (Implement) #PC permutations (Implement) print sp.mean(singletons_thres), sp.std(singletons_thres) print sp.mean(doubletons_thres), sp.std(doubletons_thres) print sp.mean(common_thres), sp.std(common_thres)