def test_export_gen_exprs(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={ "01": "1" }, reference_genome='GRCh37', min_partitions=3).add_col_index().add_row_index() out1 = new_temp_file() hl.export_gen(gen, out1, id1=hl.str(gen.col_idx), id2=hl.str(gen.col_idx), missing=0.5, varid=hl.str(gen.row_idx), rsid=hl.str(gen.row_idx), gp=[0.0, 1.0, 0.0]) in1 = (hl.import_gen(out1 + '.gen', sample_file=out1 + '.sample', min_partitions=3).add_col_index().add_row_index()) self.assertTrue( in1.aggregate_entries(hl.agg.fraction( in1.GP == [0.0, 1.0, 0.0])) == 1.0) self.assertTrue( in1.aggregate_rows( hl.agg.fraction((in1.varid == hl.str(in1.row_idx)) & (in1.rsid == hl.str(in1.row_idx)))) == 1.0) self.assertTrue( in1.aggregate_cols(hl.agg.fraction( (in1.s == hl.str(in1.col_idx)))))
def test_export_gen(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome='GRCh37', min_partitions=3) file = '/tmp/test_export_gen' hl.export_gen(gen, file) gen2 = hl.import_gen(file + '.gen', sample_file=file + '.sample', reference_genome='GRCh37', min_partitions=3) self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True))
def generate_random_gen(): mt = hl.utils.range_matrix_table(30, 10) mt = (mt.annotate_rows(locus=hl.locus('20', mt.row_idx + 1), alleles=['A', 'G']).key_rows_by('locus', 'alleles')) mt = (mt.annotate_cols(s=hl.str(mt.col_idx)).key_cols_by('s')) # using totally random values leads rounding differences where # identical GEN values get rounded differently, leading to # differences in the GT call between import_{gen, bgen} mt = mt.annotate_entries(a=hl.int32(hl.rand_unif(0.0, 255.0))) mt = mt.annotate_entries(b=hl.int32(hl.rand_unif(0.0, 255.0 - mt.a))) mt = mt.transmute_entries(GP=hl.array([mt.a, mt.b, 255.0 - mt.a - mt.b]) / 255.0) # 20% missing mt = mt.filter_entries(hl.rand_bool(0.8)) hl.export_gen(mt, 'random', precision=4)
def generate_random_gen(): mt = hl.utils.range_matrix_table(30, 10) mt = (mt.annotate_rows(locus = hl.locus('20', mt.row_idx + 1), alleles = ['A', 'G']) .key_rows_by('locus', 'alleles')) mt = (mt.annotate_cols(s = hl.str(mt.col_idx)) .key_cols_by('s')) # using totally random values leads rounding differences where # identical GEN values get rounded differently, leading to # differences in the GT call between import_{gen, bgen} mt = mt.annotate_entries(a = hl.int32(hl.rand_unif(0.0, 255.0))) mt = mt.annotate_entries(b = hl.int32(hl.rand_unif(0.0, 255.0 - mt.a))) mt = mt.transmute_entries(GP = hl.array([mt.a, mt.b, 255.0 - mt.a - mt.b]) / 255.0) # 20% missing mt = mt.filter_entries(hl.rand_bool(0.8)) hl.export_gen(mt, 'random', precision=4)
def test_export_gen_exprs(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome='GRCh37', min_partitions=3).add_col_index().add_row_index() out1 = new_temp_file() hl.export_gen(gen, out1, id1=hl.str(gen.col_idx), id2=hl.str(gen.col_idx), missing=0.5, varid=hl.str(gen.row_idx), rsid=hl.str(gen.row_idx), gp=[0.0, 1.0, 0.0]) in1 = (hl.import_gen(out1 + '.gen', sample_file=out1 + '.sample', min_partitions=3) .add_col_index() .add_row_index()) self.assertTrue(in1.aggregate_entries(hl.agg.fraction(in1.GP == [0.0, 1.0, 0.0])) == 1.0) self.assertTrue(in1.aggregate_rows(hl.agg.fraction((in1.varid == hl.str(in1.row_idx)) & (in1.rsid == hl.str(in1.row_idx)))) == 1.0) self.assertTrue(in1.aggregate_cols(hl.agg.fraction((in1.s == hl.str(in1.col_idx)))))
def test_export_gen(self): gen = hl.import_gen(resource('example.gen'), sample_file=resource('example.sample'), contig_recoding={"01": "1"}, reference_genome='GRCh37', min_partitions=3) # permute columns so not in alphabetical order! import random indices = list(range(gen.count_cols())) random.shuffle(indices) gen = gen.choose_cols(indices) file = '/tmp/test_export_gen' hl.export_gen(gen, file) gen2 = hl.import_gen(file + '.gen', sample_file=file + '.sample', reference_genome='GRCh37', min_partitions=3) self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True))