def main(p_dict): bimfile = None if p_dict['vbim'] is not None: bimfile = p_dict['vbim'] elif p_dict['vgf'] is not None: bimfile = p_dict['vgf'] + '.bim' elif p_dict['gf'] is not None: bimfile = p_dict['gf'] + '.bim' else: print('Set of validation SNPs is missing! Please specify either a validation PLINK genotype file, ' \ 'or a PLINK BIM file with the SNPs of interest.') if os.path.isfile(p_dict['out']): print('Output file (%s) already exists! Delete, rename it, or use a different output file.'\ % (p_dict['out'])) raise Exception('Output file already exists!') h5f = h5py.File(p_dict['out'], 'w') summary_dict = {} summary_dict[0]={'name':'Summary statistics filename:','value':p_dict['ssf']} summary_dict[1]={'name':'LD reference genotypes filename:','value':p_dict['gf']} summary_dict[3]={'name':'Coordinated data output filename:','value':p_dict['out']} if p_dict['vgf'] is not None: summary_dict[2]={'name':'Validation genotypes filename:','value':p_dict['vgf']} sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile, summary_dict) coordinate_datasets(p_dict['gf'], h5f,summary_dict, validation_genotype_file=p_dict['vgf'], max_freq_discrep=p_dict['max_freq_discrep'], min_maf=p_dict['maf'], skip_coordination=p_dict['skip_coordination'], debug=p_dict['debug']) h5f.close() reporting.print_summary(summary_dict, 'Summary of coordination step')
def test_parse_sum_stats(self): p_dict = { 'ssf': os.path.join(TEST_DIR, 'test_data/sim1_0_ss.txt'), 'ssf_format': 'LDPRED', 'only_hm3': False, 'N': 10000, 'debug': True, 'z_from_se': False, 'match_genomic_pos': False, 'eff_type': 'LINREG' } bimfile = os.path.join(TEST_DIR, 'test_data/sim1_0_test.bim') summary_dict = {} out = '%s_parse_sum_stats.hdf5' % self.tmp_file_prefix with h5py.File(out, 'w') as h5f: sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile, summary_dict) self.assertEqual(len(h5f['sum_stats']['chrom_1']['betas']), 2000) p_dict = { 'ssf': os.path.join(TEST_DIR, 'test_data/sim4_0_ss.txt'), 'ssf_format': 'LDPRED', 'only_hm3': False, 'N': None, 'debug': True, 'z_from_se': True, 'match_genomic_pos': False, } bimfile = os.path.join(TEST_DIR, 'test_data/sim4_0_test.bim') summary_dict = {} out = '%s_parse_sum_stats.hdf5' % self.tmp_file_prefix with h5py.File(out, 'w') as h5f: sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile, summary_dict) self.assertEqual(len(h5f['sum_stats']['chrom_1']['betas']), 2000)
def main(p_dict): bimfile = None if p_dict['N'] is None: print('Please specify an integer value for the sample size used to calculate the GWAS summary statistics.') print('Preparing to parse summary statistics') if p_dict['vbim'] is not None: bimfile = p_dict['vbim'] elif p_dict['vgf'] is not None: bimfile = p_dict['vgf'] + '.bim' elif p_dict['gf'] is not None: bimfile = p_dict['gf'] + '.bim' else: print('Set of validation SNPs is missing! Please specify either a validation PLINK genotype file, ' \ 'or a PLINK BIM file with the SNPs of interest.') if os.path.isfile(p_dict['out']): print('Output file (%s) already exists! Delete, rename it, or use a different output file.'\ % (p_dict['out'])) raise Exception('Output file already exists!') h5f = h5py.File(p_dict['out'], 'w') sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile) check_mafs = p_dict['maf']>0 if not p_dict['vgf'] == None: coordinate_genotypes_ss_w_ld_ref(genotype_file=p_dict['vgf'], reference_genotype_file=p_dict['gf'], check_mafs=check_mafs, hdf5_file=h5f, min_maf=p_dict['maf'], skip_coordination=p_dict['skip_coordination'], debug=p_dict['debug']) else: coordinate_genot_ss(genotype_file=p_dict['gf'], check_mafs=check_mafs, hdf5_file=h5f, min_maf=p_dict['maf'], skip_coordination=p_dict['skip_coordination'], debug=p_dict['debug']) h5f.close()
def test_parse_sum_stats(self): p_dict = { 'ssf': os.path.join(TEST_DIR, 'test_data/coord_genotypes_ss.txt'), 'ssf_format': 'STANDARD', 'only_hm3': False, 'N': 10000, 'debug': True, 'match_genomic_pos': False, } bimfile = os.path.join(TEST_DIR, 'test_data/LDpred_cc_data_p0.001_train_0.bim') summary_dict = {} out = '%s_parse_sum_stats.hdf5' % self.tmp_file_prefix with h5py.File(out, 'w') as h5f: sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile, summary_dict) self.assertEqual(len(h5f['sum_stats']['chrom_1']['betas']), 10)