def test_export_gen_exprs(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={ "01": "1" }, reference_genome='GRCh37', min_partitions=3).add_col_index().add_row_index() out1 = new_temp_file() hl.export_gen(gen, out1, id1=hl.str(gen.col_idx), id2=hl.str(gen.col_idx), missing=0.5, varid=hl.str(gen.row_idx), rsid=hl.str(gen.row_idx), gp=[0.0, 1.0, 0.0]) in1 = (hl.import_gen(out1 + '.gen', sample_file=out1 + '.sample', min_partitions=3).add_col_index().add_row_index()) self.assertTrue( in1.aggregate_entries(hl.agg.fraction( in1.GP == [0.0, 1.0, 0.0])) == 1.0) self.assertTrue( in1.aggregate_rows( hl.agg.fraction((in1.varid == hl.str(in1.row_idx)) & (in1.rsid == hl.str(in1.row_idx)))) == 1.0) self.assertTrue( in1.aggregate_cols(hl.agg.fraction( (in1.s == hl.str(in1.col_idx)))))
def test_import_gen_skip_invalid_loci(self): mt = hl.import_gen(resource('skip_invalid_loci.gen'), resource('skip_invalid_loci.sample'), reference_genome='GRCh37', skip_invalid_loci=True) self.assertTrue(mt._force_count_rows() == 3) with self.assertRaisesRegex(FatalError, 'Invalid locus'): hl.import_gen(resource('skip_invalid_loci.gen'), resource('skip_invalid_loci.sample'))
def test_export_gen(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome='GRCh37', min_partitions=3) file = '/tmp/test_export_gen' hl.export_gen(gen, file) gen2 = hl.import_gen(file + '.gen', sample_file=file + '.sample', reference_genome='GRCh37', min_partitions=3) self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True))
def test_import_gen_no_reference_specified(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), reference_genome=None) self.assertTrue(gen.locus.dtype == hl.tstruct(contig=hl.tstr, position=hl.tint32)) self.assertEqual(gen.count_rows(), 199)
def test_import_gen(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome = 'GRCh37').rows() self.assertTrue(gen.all(gen.locus.contig == "1")) self.assertEqual(gen.count(), 199) self.assertEqual(gen.locus.dtype, hl.tlocus('GRCh37'))
def test_import_bgen_random(self): sample_file = resource('random.sample') genmt = hl.import_gen(resource('random.gen'), sample_file) bgen_file = resource('random.bgen') hl.index_bgen(bgen_file) bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file) self.assertTrue(bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
def test_export_gen_exprs(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome='GRCh37', min_partitions=3).add_col_index().add_row_index() out1 = new_temp_file() hl.export_gen(gen, out1, id1=hl.str(gen.col_idx), id2=hl.str(gen.col_idx), missing=0.5, varid=hl.str(gen.row_idx), rsid=hl.str(gen.row_idx), gp=[0.0, 1.0, 0.0]) in1 = (hl.import_gen(out1 + '.gen', sample_file=out1 + '.sample', min_partitions=3) .add_col_index() .add_row_index()) self.assertTrue(in1.aggregate_entries(hl.agg.fraction(in1.GP == [0.0, 1.0, 0.0])) == 1.0) self.assertTrue(in1.aggregate_rows(hl.agg.fraction((in1.varid == hl.str(in1.row_idx)) & (in1.rsid == hl.str(in1.row_idx)))) == 1.0) self.assertTrue(in1.aggregate_cols(hl.agg.fraction((in1.s == hl.str(in1.col_idx)))))
def test_multiple_files(self): sample_file = resource('random.sample') genmt = hl.import_gen(resource('random.gen'), sample_file) bgen_file = [resource('random-b.bgen'), resource('random-c.bgen'), resource('random-a.bgen')] hl.index_bgen(bgen_file) bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file, n_partitions=3) self.assertTrue( bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
def test_import_bgen_random(self): sample_file = resource('random.sample') genmt = hl.import_gen(resource('random.gen'), sample_file) bgen_file = resource('random.bgen') hl.index_bgen(bgen_file) bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file) self.assertTrue( bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
def test_export_gen(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome='GRCh37', min_partitions=3) # permute columns so not in alphabetical order! import random indices = list(range(gen.count_cols())) random.shuffle(indices) gen = gen.choose_cols(indices) file = '/tmp/test_export_gen' hl.export_gen(gen, file) gen2 = hl.import_gen(file + '.gen', sample_file=file + '.sample', reference_genome='GRCh37', min_partitions=3) self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True))
def test_import_bgen_gavin_example(self): recoding = {'0{}'.format(i): str(i) for i in range(1, 10)} sample_file = resource('example.sample') genmt = hl.import_gen(resource('example.gen'), sample_file, contig_recoding=recoding, reference_genome="GRCh37") bgen_file = resource('example.8bits.bgen') hl.index_bgen(bgen_file, contig_recoding=recoding, reference_genome="GRCh37") bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file) self.assertTrue( bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
def test_multiple_files(self): sample_file = resource('random.sample') genmt = hl.import_gen(resource('random.gen'), sample_file) bgen_file = [ resource('random-b.bgen'), resource('random-c.bgen'), resource('random-a.bgen') ] hl.index_bgen(bgen_file) bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file, n_partitions=3) self.assertTrue(bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))
def test_import_bgen_gavin_example(self): recoding = {'0{}'.format(i): str(i) for i in range(1, 10)} sample_file = resource('example.sample') genmt = hl.import_gen(resource('example.gen'), sample_file, contig_recoding=recoding) bgen_file = resource('example.8bits.bgen') hl.index_bgen(bgen_file) bgenmt = hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file, contig_recoding=recoding) self.assertTrue(bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True))