Esempio n. 1
0
    def test_export_gen_exprs(self):
        gen = hl.import_gen(resource('example.gen'),
                            sample_file=resource('example.sample'),
                            contig_recoding={
                                "01": "1"
                            },
                            reference_genome='GRCh37',
                            min_partitions=3).add_col_index().add_row_index()

        out1 = new_temp_file()
        hl.export_gen(gen,
                      out1,
                      id1=hl.str(gen.col_idx),
                      id2=hl.str(gen.col_idx),
                      missing=0.5,
                      varid=hl.str(gen.row_idx),
                      rsid=hl.str(gen.row_idx),
                      gp=[0.0, 1.0, 0.0])

        in1 = (hl.import_gen(out1 + '.gen',
                             sample_file=out1 + '.sample',
                             min_partitions=3).add_col_index().add_row_index())
        self.assertTrue(
            in1.aggregate_entries(hl.agg.fraction(
                in1.GP == [0.0, 1.0, 0.0])) == 1.0)
        self.assertTrue(
            in1.aggregate_rows(
                hl.agg.fraction((in1.varid == hl.str(in1.row_idx))
                                & (in1.rsid == hl.str(in1.row_idx)))) == 1.0)
        self.assertTrue(
            in1.aggregate_cols(hl.agg.fraction(
                (in1.s == hl.str(in1.col_idx)))))
Esempio n. 2
0
    def test_import_gen_skip_invalid_loci(self):
        mt = hl.import_gen(resource('skip_invalid_loci.gen'),
                           resource('skip_invalid_loci.sample'),
                           reference_genome='GRCh37',
                           skip_invalid_loci=True)
        self.assertTrue(mt._force_count_rows() == 3)

        with self.assertRaisesRegex(FatalError, 'Invalid locus'):
            hl.import_gen(resource('skip_invalid_loci.gen'),
                          resource('skip_invalid_loci.sample'))
Esempio n. 3
0
    def test_import_gen_skip_invalid_loci(self):
        mt = hl.import_gen(resource('skip_invalid_loci.gen'),
                           resource('skip_invalid_loci.sample'),
                           reference_genome='GRCh37',
                           skip_invalid_loci=True)
        self.assertTrue(mt._force_count_rows() == 3)

        with self.assertRaisesRegex(FatalError, 'Invalid locus'):
            hl.import_gen(resource('skip_invalid_loci.gen'),
                          resource('skip_invalid_loci.sample'))
Esempio n. 4
0
    def test_export_gen(self):
        gen = hl.import_gen(resource('example.gen'),
                            sample_file=resource('example.sample'),
                            contig_recoding={"01": "1"},
                            reference_genome='GRCh37',
                            min_partitions=3)

        file = '/tmp/test_export_gen'
        hl.export_gen(gen, file)
        gen2 = hl.import_gen(file + '.gen',
                             sample_file=file + '.sample',
                             reference_genome='GRCh37',
                             min_partitions=3)

        self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True))
Esempio n. 5
0
    def test_import_gen_no_reference_specified(self):
        gen = hl.import_gen(resource('example.gen'),
                            sample_file=resource('example.sample'),
                            reference_genome=None)

        self.assertTrue(gen.locus.dtype == hl.tstruct(contig=hl.tstr, position=hl.tint32))
        self.assertEqual(gen.count_rows(), 199)
Esempio n. 6
0
    def test_import_gen_no_reference_specified(self):
        gen = hl.import_gen(resource('example.gen'),
                            sample_file=resource('example.sample'),
                            reference_genome=None)

        self.assertTrue(gen.locus.dtype == hl.tstruct(contig=hl.tstr, position=hl.tint32))
        self.assertEqual(gen.count_rows(), 199)
Esempio n. 7
0
 def test_import_gen(self):
     gen = hl.import_gen(resource('example.gen'),
                         sample_file=resource('example.sample'),
                         contig_recoding={"01": "1"},
                         reference_genome = 'GRCh37').rows()
     self.assertTrue(gen.all(gen.locus.contig == "1"))
     self.assertEqual(gen.count(), 199)
     self.assertEqual(gen.locus.dtype, hl.tlocus('GRCh37'))
Esempio n. 8
0
 def test_import_gen(self):
     gen = hl.import_gen(resource('example.gen'),
                         sample_file=resource('example.sample'),
                         contig_recoding={"01": "1"},
                         reference_genome = 'GRCh37').rows()
     self.assertTrue(gen.all(gen.locus.contig == "1"))
     self.assertEqual(gen.count(), 199)
     self.assertEqual(gen.locus.dtype, hl.tlocus('GRCh37'))
Esempio n. 9
0
    def test_import_bgen_random(self):
        sample_file = resource('random.sample')
        genmt = hl.import_gen(resource('random.gen'), sample_file)

        bgen_file = resource('random.bgen')
        hl.index_bgen(bgen_file)
        bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file)
        self.assertTrue(bgenmt._same(genmt, tolerance=1.0 / 255,
                                     absolute=True))
Esempio n. 10
0
    def test_export_gen_exprs(self):
        gen = hl.import_gen(resource('example.gen'),
                            sample_file=resource('example.sample'),
                            contig_recoding={"01": "1"},
                            reference_genome='GRCh37',
                            min_partitions=3).add_col_index().add_row_index()

        out1 = new_temp_file()
        hl.export_gen(gen, out1, id1=hl.str(gen.col_idx), id2=hl.str(gen.col_idx), missing=0.5,
                      varid=hl.str(gen.row_idx), rsid=hl.str(gen.row_idx), gp=[0.0, 1.0, 0.0])

        in1 = (hl.import_gen(out1 + '.gen', sample_file=out1 + '.sample', min_partitions=3)
               .add_col_index()
               .add_row_index())
        self.assertTrue(in1.aggregate_entries(hl.agg.fraction(in1.GP == [0.0, 1.0, 0.0])) == 1.0)
        self.assertTrue(in1.aggregate_rows(hl.agg.fraction((in1.varid == hl.str(in1.row_idx)) &
                                                           (in1.rsid == hl.str(in1.row_idx)))) == 1.0)
        self.assertTrue(in1.aggregate_cols(hl.agg.fraction((in1.s == hl.str(in1.col_idx)))))
Esempio n. 11
0
    def test_multiple_files(self):
        sample_file = resource('random.sample')
        genmt = hl.import_gen(resource('random.gen'), sample_file)

        bgen_file = [resource('random-b.bgen'), resource('random-c.bgen'), resource('random-a.bgen')]
        hl.index_bgen(bgen_file)
        bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file, n_partitions=3)
        self.assertTrue(
            bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
Esempio n. 12
0
    def test_import_bgen_random(self):
        sample_file = resource('random.sample')
        genmt = hl.import_gen(resource('random.gen'), sample_file)

        bgen_file = resource('random.bgen')
        hl.index_bgen(bgen_file)
        bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file)
        self.assertTrue(
            bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
Esempio n. 13
0
    def test_export_gen(self):
        gen = hl.import_gen(resource('example.gen'),
                            sample_file=resource('example.sample'),
                            contig_recoding={"01": "1"},
                            reference_genome='GRCh37',
                            min_partitions=3)

        # permute columns so not in alphabetical order!
        import random
        indices = list(range(gen.count_cols()))
        random.shuffle(indices)
        gen = gen.choose_cols(indices)

        file = '/tmp/test_export_gen'
        hl.export_gen(gen, file)
        gen2 = hl.import_gen(file + '.gen',
                             sample_file=file + '.sample',
                             reference_genome='GRCh37',
                             min_partitions=3)

        self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True))
Esempio n. 14
0
    def test_import_bgen_gavin_example(self):
        recoding = {'0{}'.format(i): str(i) for i in range(1, 10)}

        sample_file = resource('example.sample')
        genmt = hl.import_gen(resource('example.gen'), sample_file,
                              contig_recoding=recoding,
                              reference_genome="GRCh37")

        bgen_file = resource('example.8bits.bgen')
        hl.index_bgen(bgen_file, contig_recoding=recoding,
                      reference_genome="GRCh37")
        bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file)
        self.assertTrue(
            bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
Esempio n. 15
0
    def test_multiple_files(self):
        sample_file = resource('random.sample')
        genmt = hl.import_gen(resource('random.gen'), sample_file)

        bgen_file = [
            resource('random-b.bgen'),
            resource('random-c.bgen'),
            resource('random-a.bgen')
        ]
        hl.index_bgen(bgen_file)
        bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'],
                                sample_file,
                                n_partitions=3)
        self.assertTrue(bgenmt._same(genmt, tolerance=1.0 / 255,
                                     absolute=True))
Esempio n. 16
0
    def test_import_bgen_gavin_example(self):
        recoding = {'0{}'.format(i): str(i) for i in range(1, 10)}

        sample_file = resource('example.sample')
        genmt = hl.import_gen(resource('example.gen'),
                              sample_file,
                              contig_recoding=recoding)

        bgen_file = resource('example.8bits.bgen')
        hl.index_bgen(bgen_file)
        bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'],
                                sample_file,
                                contig_recoding=recoding)
        self.assertTrue(bgenmt._same(genmt, tolerance=1.0 / 255,
                                     absolute=True))