예제 #1
0
def main(p_dict):

    bimfile = None
    if p_dict['vbim'] is not None:
        bimfile = p_dict['vbim']
    elif p_dict['vgf'] is not None:
        bimfile = p_dict['vgf'] + '.bim'
    elif p_dict['gf'] is not None:
        bimfile = p_dict['gf'] + '.bim'
    else:
        print('Set of validation SNPs is missing!  Please specify either a validation PLINK genotype file, ' \
              'or a PLINK BIM file with the SNPs of interest.')
    if os.path.isfile(p_dict['out']):
        print('Output file (%s) already exists!  Delete, rename it, or use a different output file.'\
              % (p_dict['out']))
        raise Exception('Output file already exists!')

    h5f = h5py.File(p_dict['out'], 'w')
    
    summary_dict = {}
    summary_dict[0]={'name':'Summary statistics filename:','value':p_dict['ssf']}
    summary_dict[1]={'name':'LD reference genotypes filename:','value':p_dict['gf']}
    summary_dict[3]={'name':'Coordinated data output filename:','value':p_dict['out']}
    if p_dict['vgf'] is not None:
        summary_dict[2]={'name':'Validation genotypes filename:','value':p_dict['vgf']}

    sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile, summary_dict)
    coordinate_datasets(p_dict['gf'], h5f,summary_dict,
                        validation_genotype_file=p_dict['vgf'], 
                        max_freq_discrep=p_dict['max_freq_discrep'],
                        min_maf=p_dict['maf'], 
                        skip_coordination=p_dict['skip_coordination'], 
                        debug=p_dict['debug'])
    h5f.close()
    reporting.print_summary(summary_dict, 'Summary of coordination step')
예제 #2
0
def main(p_dict):

    bimfile = None
    if p_dict['vbim'] is not None:
        bimfile = p_dict['vbim']
    elif p_dict['vgf'] is not None:
        bimfile = p_dict['vgf'] + '.bim'
    elif p_dict['gf'] is not None:
        bimfile = p_dict['gf'] + '.bim'
    else:
        print('Set of validation SNPs is missing!  Please specify either a validation PLINK genotype file, ' \
              'or a PLINK BIM file with the SNPs of interest.')
    if os.path.isfile(p_dict['out']):
        print('Output file (%s) already exists!  Delete, rename it, or use a different output file.'\
              % (p_dict['out']))
        raise Exception('Output file already exists!')

    h5f = h5py.File(p_dict['out'], 'w')
    
    summary_dict = {}
    summary_dict[0]={'name':'Summary statistics filename:','value':p_dict['ssf']}
    summary_dict[1]={'name':'LD reference genotypes filename:','value':p_dict['gf']}
    summary_dict[3]={'name':'Coordinated data output filename:','value':p_dict['out']}
    if p_dict['vgf'] is not None:
        summary_dict[2]={'name':'Validation genotypes filename:','value':p_dict['vgf']}

    sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile, summary_dict)
    coordinate_datasets(p_dict['gf'], h5f,summary_dict,
                        validation_genotype_file=p_dict['vgf'], 
                        max_freq_discrep=p_dict['max_freq_discrep'],
                        min_maf=p_dict['maf'], 
                        skip_coordination=p_dict['skip_coordination'], 
                        debug=p_dict['debug'])
    h5f.close()
    reporting.print_summary(summary_dict, 'Summary of coordination step')
예제 #3
0
    def test_parse_sum_stats(self):
        p_dict = {
            'ssf': os.path.join(TEST_DIR, 'test_data/sim1_0_ss.txt'),
            'ssf_format': 'LDPRED',
            'only_hm3': False,
            'N': 10000,
            'debug': True,
            'z_from_se': False,
            'match_genomic_pos': False,
            'eff_type': 'LINREG'
        }
        bimfile = os.path.join(TEST_DIR, 'test_data/sim1_0_test.bim')
        summary_dict = {}
        out = '%s_parse_sum_stats.hdf5' % self.tmp_file_prefix
        with h5py.File(out, 'w') as h5f:
            sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile,
                                              summary_dict)
            self.assertEqual(len(h5f['sum_stats']['chrom_1']['betas']), 2000)

        p_dict = {
            'ssf': os.path.join(TEST_DIR, 'test_data/sim4_0_ss.txt'),
            'ssf_format': 'LDPRED',
            'only_hm3': False,
            'N': None,
            'debug': True,
            'z_from_se': True,
            'match_genomic_pos': False,
        }
        bimfile = os.path.join(TEST_DIR, 'test_data/sim4_0_test.bim')
        summary_dict = {}
        out = '%s_parse_sum_stats.hdf5' % self.tmp_file_prefix
        with h5py.File(out, 'w') as h5f:
            sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile,
                                              summary_dict)
            self.assertEqual(len(h5f['sum_stats']['chrom_1']['betas']), 2000)
예제 #4
0
def main(p_dict):

    bimfile = None
    if p_dict['N'] is None:
        print('Please specify an integer value for the sample size used to calculate the GWAS summary statistics.')
    print('Preparing to parse summary statistics')
    if p_dict['vbim'] is not None:
        bimfile = p_dict['vbim']
    elif p_dict['vgf'] is not None:
        bimfile = p_dict['vgf'] + '.bim'
    elif p_dict['gf'] is not None:
        bimfile = p_dict['gf'] + '.bim'
    else:
        print('Set of validation SNPs is missing!  Please specify either a validation PLINK genotype file, ' \
              'or a PLINK BIM file with the SNPs of interest.')
    if os.path.isfile(p_dict['out']):
        print('Output file (%s) already exists!  Delete, rename it, or use a different output file.'\
              % (p_dict['out']))
        raise Exception('Output file already exists!')

    h5f = h5py.File(p_dict['out'], 'w')
    
    sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile)
    check_mafs = p_dict['maf']>0
    if not p_dict['vgf'] == None:
        coordinate_genotypes_ss_w_ld_ref(genotype_file=p_dict['vgf'], reference_genotype_file=p_dict['gf'],
                                         check_mafs=check_mafs,
                                         hdf5_file=h5f, min_maf=p_dict['maf'], skip_coordination=p_dict['skip_coordination'], 
                                         debug=p_dict['debug'])
    else:
        coordinate_genot_ss(genotype_file=p_dict['gf'], check_mafs=check_mafs,
                            hdf5_file=h5f, min_maf=p_dict['maf'], skip_coordination=p_dict['skip_coordination'], 
                            debug=p_dict['debug'])

    h5f.close()
예제 #5
0
 def test_parse_sum_stats(self):
     p_dict = {
         'ssf': os.path.join(TEST_DIR, 'test_data/coord_genotypes_ss.txt'),
         'ssf_format': 'STANDARD',
         'only_hm3': False,
         'N': 10000,
         'debug': True,
         'match_genomic_pos': False,
     }
     bimfile = os.path.join(TEST_DIR,
                            'test_data/LDpred_cc_data_p0.001_train_0.bim')
     summary_dict = {}
     out = '%s_parse_sum_stats.hdf5' % self.tmp_file_prefix
     with h5py.File(out, 'w') as h5f:
         sum_stats_parsers.parse_sum_stats(h5f, p_dict, bimfile,
                                           summary_dict)
         self.assertEqual(len(h5f['sum_stats']['chrom_1']['betas']), 10)