コード例 #1
0
ファイル: test_index.py プロジェクト: jeremymcrae/bgen
    def test_index_opens(self):
        ''' loads index when available
        '''
        bfile = BgenFile(self.folder / 'example.15bits.bgen')
        self.assertFalse(
            bfile._check_for_index(str(self.folder / 'example.15bits.bgen')))

        bfile = BgenFile(self.folder / 'example.16bits.bgen')
        self.assertTrue(
            bfile._check_for_index(str(self.folder / 'example.16bits.bgen')))
コード例 #2
0
    def test_context_handler_closed_bgen_length(self):
        ''' error raised if accessing length of exited BgenFile
        '''
        path = self.folder / 'example.16bits.zstd.bgen'
        with BgenFile(path) as bfile:
            self.assertTrue(len(bfile) > 0)

        with self.assertRaises(ValueError):
            len(bfile)
コード例 #3
0
 def test_zstd_compressed(self):
     ''' check we can parse genotypes from zstd compressed geno probabilities
     '''
     path = self.folder / 'example.16bits.zstd.bgen'
     bfile = BgenFile(str(path))
     for var, g in zip(bfile, self.gen_data):
         self.assertEqual(g, var)
         self.assertTrue(
             arrays_equal(g.probabilities, var.probabilities, 16))
コード例 #4
0
    def test_fetch(self):
        ''' can fetch variants within a genomic region
        '''
        chrom, start, stop = '01', 5000, 50000
        bfile = BgenFile(self.folder / 'example.16bits.bgen')
        self.assertTrue(
            bfile._check_for_index(str(self.folder / 'example.16bits.bgen')))

        self.assertTrue(list(bfile.fetch('02')) == [])
コード例 #5
0
    def test_context_handler_closed_bgen_slice(self):
        ''' error raised if slicing variant from exited BgenFile
        '''
        path = self.folder / 'example.16bits.zstd.bgen'
        with BgenFile(path) as bfile:
            self.assertTrue(len(bfile) > 0)

        with self.assertRaises(ValueError):
            var = bfile[0]
コード例 #6
0
    def test_context_handler_closed_bgen_with_rsid(self):
        ''' error raised if getting variant with rsid from exited BgenFile
        '''
        path = self.folder / 'example.16bits.zstd.bgen'
        with BgenFile(path) as bfile:
            self.assertTrue(len(bfile) > 0)

        with self.assertRaises(ValueError):
            var = bfile.with_rsid('rs111')
コード例 #7
0
    def test_context_handler_closed_bgen_at_position(self):
        ''' error raised if getting variant at position from exited BgenFile
        '''
        path = self.folder / 'example.16bits.zstd.bgen'
        with BgenFile(path) as bfile:
            self.assertTrue(len(bfile) > 0)

        with self.assertRaises(ValueError):
            var = bfile.at_position(100)
コード例 #8
0
    def test_context_handler_closed_bgen_positions(self):
        ''' no positions available from exited BgenFile
        '''
        path = self.folder / 'example.16bits.zstd.bgen'
        with BgenFile(path) as bfile:
            self.assertTrue(len(bfile.positions()) > 0)

        with self.assertRaises(ValueError):
            bfile.positions()
コード例 #9
0
ファイル: QRankGWAS.py プロジェクト: daverblair/QRankGWAS
    def __init__(self,
                 bgen_file_path,
                 phenotype_file_path,
                 index_column_name,
                 covariate_file_path=None,
                 sample_file_path=None):
        """
        This software is meant to be called from the command line, so no documentation is included here. Note, the code here is a bit verbose, which was done in an attempt to minimize the number of function calls given the need to perform millions of calls. This could likely be optimized in a better way.


        """
        self.index_column_name = index_column_name

        assert os.path.isfile(bgen_file_path), "bgen file does not exist"

        if os.path.isfile(bgen_file_path + '.bgi') is False:
            print(
                "Warning: No bgen index (.bgi) file provided in same directory as bgen file. Initial reading of the bgen is MUCH faster with index file. "
            )

        if sample_file_path is not None:
            assert os.path.isfile(
                sample_file_path
            ), "sample file does not exist at provided location"
        else:
            sample_file_path = bgen_file_path.strip('bgen') + 'sample'
            if os.path.isfile(sample_file_path) is False:
                raise FileNotFoundError(
                    "No sample file at {0:s}. A sample file must be provided.".
                    format(sample_file_path))

        print(
            'Reading bgen file from {0:s} using sample file {1:s}. If these seem like an error, kill program.'
            .format(bgen_file_path, sample_file_path))

        self.bgen_dataset = BgenFile(bgen_file_path,
                                     sample_path=sample_file_path)

        if os.path.isfile(phenotype_file_path):
            self.phenotype_dataset = pd.read_csv(phenotype_file_path,
                                                 sep='\t',
                                                 index_col=index_column_name)
        else:
            raise FileNotFoundError("No phenotype file at provided location")

        if covariate_file_path is not None:
            if os.path.isfile(covariate_file_path):
                self.covariate_dataset = pd.read_csv(
                    covariate_file_path, sep='\t', index_col=index_column_name)
            else:
                raise FileNotFoundError(
                    "No covariate file at provided location")
        else:
            print(
                "No covariate file provided. Will use phenotype file for covariates.\n",
                flush=True)
            self.covariate_dataset = self.phenotype_dataset
コード例 #10
0
 def test_load_haplotypes_bgen(self):
     ''' check we can open a bgen with haplotypes, and parse genotypes correctly
     '''
     path = self.folder / 'haplotypes.bgen'
     bfile = BgenFile(str(path))
     bit_depth = 16
     for var, g in zip(bfile, self.haps_data):
         self.assertEqual(g, var)
         self.assertTrue(
             arrays_equal(g.probabilities, var.probabilities, bit_depth))
コード例 #11
0
 def test_v11(self):
     ''' check we can open a bgen in v1.1 format, and parse genotypes correctly
     '''
     path = self.folder / 'example.v11.bgen'
     bfile = BgenFile(str(path))
     bit_depth = 16
     for var, g in zip(bfile, self.gen_data):
         self.assertEqual(g, var)
         self.assertTrue(
             arrays_equal(g.probabilities, var.probabilities, bit_depth))
コード例 #12
0
 def test_load_complex_file(self):
     ''' make sure we can open a complex bgen file
     '''
     path = self.folder / 'complex.bgen'
     bfile = BgenFile(path)
     bit_depth = 16
     for var, g in zip(bfile, self.vcf_data):
         self.assertEqual(g, var)
         self.assertTrue(
             arrays_equal(g.probabilities, var.probabilities, bit_depth))
         self.assertTrue(all(x == y for x, y in zip(g.ploidy, var.ploidy)))
コード例 #13
0
 def test_load_example_genotypes_bit_depths(self):
     ''' check parsing genotypes from the example files with different bit depths
     '''
     for path in self.folder.glob('example.*bits.bgen'):
         bit_depth = int(path.stem.split('.')[1].strip('bits'))
         bfile = BgenFile(str(path))
         for var, g in zip(bfile, self.gen_data):
             self.assertEqual(g, var)
             self.assertTrue(
                 arrays_equal(g.probabilities, var.probabilities,
                              bit_depth))
コード例 #14
0
    def test_load_complex_files(self):
        ''' make sure we can open the complex bgen files
        '''

        for path in self.folder.glob('complex.*.bgen'):
            bit_depth = int(path.stem.split('.')[1].strip('bits'))
            bfile = BgenFile(path)
            for var, g in zip(bfile, self.vcf_data):
                self.assertEqual(g, var)
                self.assertTrue(
                    arrays_equal(g.probabilities, var.probabilities,
                                 bit_depth))
コード例 #15
0
    def test_fetch_whole_chrom(self):
        ''' fetching just with chrom gives all variants on chromosome
        '''
        chrom, start, stop = '01', 5000, 50000
        bfile = BgenFile(self.folder / 'example.16bits.bgen')

        # test fetching a whole chromosome
        sortkey = lambda x: (x.chrom, x.pos)
        for x, y in zip(sorted(bfile.fetch(chrom), key=sortkey),
                        sorted(self.gen_data, key=sortkey)):
            self.assertEqual(x.rsid, y.rsid)
            self.assertEqual(x.chrom, y.chrom)
            self.assertEqual(x.pos, y.pos)
コード例 #16
0
    def test_fetch_after_position(self):
        ''' fetching variants with chrom and start gives all variants after pos
        '''
        chrom, start, stop = '01', 5000, 50000
        bfile = BgenFile(self.folder / 'example.16bits.bgen')

        sortkey = lambda x: (x.chrom, x.pos)
        gen_vars = [
            x for x in sorted(self.gen_data, key=sortkey) if start <= x.pos
        ]
        for x, y in zip(sorted(bfile.fetch(chrom, start), key=sortkey),
                        gen_vars):
            self.assertEqual(x.rsid, y.rsid)
            self.assertEqual(x.chrom, y.chrom)
            self.assertEqual(x.pos, y.pos)
コード例 #17
0
ファイル: test_bgenvar.py プロジェクト: jeremymcrae/bgen
 def test_pickling(self):
     ''' BgenVar should pickle and unpickle
     '''
     path = self.folder / 'example.16bits.zstd.bgen'
     with BgenFile(path) as bfile:
         for var in bfile:
             # this checks that we can pickle and unpickle a BgenVar
             pickled = pickle.dumps(var)
             unpickled = pickle.loads(pickled)
             
             # check attributes of the original and unpickled are identical
             self.assertEqual(var.varid, unpickled.varid)
             self.assertEqual(var.rsid, unpickled.rsid)
             self.assertEqual(var.chrom, unpickled.chrom)
             self.assertEqual(var.pos, unpickled.pos)
             self.assertEqual(var.alleles, unpickled.alleles)
コード例 #18
0
ファイル: test_bgenvar.py プロジェクト: jeremymcrae/bgen
 def test_minor_allele_dosage_v11(self):
     ''' test we calculate minor_allele_dosage correctly with version 1 bgens
     '''
     path = self.folder / 'example.v11.bgen'
     with BgenFile(path) as bfile:
         for var in bfile:
             dose = var.minor_allele_dosage
             probs = var.probabilities
             
             # calculate dosages for each allele
             a1 = (probs[:, 0] * 2 + probs[:, 1])
             a2 = (probs[:, 2] * 2 + probs[:, 1])
             
             # get delta between var.minor_allele_dosage and values calculated here
             recomputed = a2 if np.nansum(a1) >= np.nansum(a2) else a1
             delta = abs(dose - recomputed)
             
             # check difference between the two estimates is sufficiently low
             self.assertTrue(np.nanmax(delta) < 7e-5)
コード例 #19
0
    def test_fetch_in_region(self):
        ''' fetching variants with chrom, start, stop gives variants in region
        '''
        chrom, start, stop = '01', 5000, 50000
        bfile = BgenFile(self.folder / 'example.16bits.bgen')

        sortkey = lambda x: (x.chrom, x.pos)
        gen_vars = [
            x for x in sorted(self.gen_data, key=sortkey)
            if start <= x.pos <= stop
        ]
        for x, y in zip(sorted(bfile.fetch(chrom, start, stop), key=sortkey),
                        gen_vars):
            self.assertEqual(x.rsid, y.rsid)
            self.assertEqual(x.chrom, y.chrom)
            self.assertEqual(x.pos, y.pos)

        # check that we don't get any variants in a region without any
        self.assertEqual(list(bfile.fetch(chrom, start * 1000, stop * 1000)),
                         [])
コード例 #20
0
 def test_Path(self):
     ''' check we can open bgen files from Path objects
     '''
     path = self.folder / 'example.v11.bgen'
     bfile = BgenFile(path)
コード例 #21
0
 def test_load_missing_file(self):
     ''' check passing in a path to a missing file fails gracefully
     '''
     with self.assertRaises(ValueError):
         BgenFile('/zzz/jjj/qqq.bgen')