def test_ndarray_transpose(): np_v = np.array([1, 2, 3]) np_m = np.array([[1, 2, 3], [4, 5, 6]]) np_cube = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) v = hl._ndarray(np_v) m = hl._ndarray(np_m) cube = hl._ndarray(np_cube) assert_ndarrays_eq( (v.T, np_v.T), (v.T, np_v), (m.T, np_m.T), (cube.transpose((0, 2, 1)), np_cube.transpose((0, 2, 1))), (cube.T, np_cube.T)) with pytest.raises(ValueError) as exc: v.transpose((1,)) assert "Invalid axis: 1" in str(exc.value) with pytest.raises(ValueError) as exc: cube.transpose((1, 1)) assert "Expected 3 axes, got 2" in str(exc.value) with pytest.raises(ValueError) as exc: cube.transpose((1, 1, 1)) assert "Axes cannot contain duplicates" in str(exc.value)
def test_ndarray_reshape(): np_single = np.array([8]) single = hl._ndarray([8]) np_zero_dim = np.array(4) zero_dim = hl._ndarray(4) np_a = np.array([1, 2, 3, 4, 5, 6]) a = hl._ndarray(np_a) np_cube = np.array([0, 1, 2, 3, 4, 5, 6, 7]).reshape((2, 2, 2)) cube = hl._ndarray([0, 1, 2, 3, 4, 5, 6, 7]).reshape((2, 2, 2)) cube_to_rect = cube.reshape((2, 4)) np_cube_to_rect = np_cube.reshape((2, 4)) cube_t_to_rect = cube.transpose((1, 0, 2)).reshape((2, 4)) np_cube_t_to_rect = np_cube.transpose((1, 0, 2)).reshape((2, 4)) np_hypercube = np.arange(3 * 5 * 7 * 9).reshape((3, 5, 7, 9)) hypercube = hl._ndarray(np_hypercube) assert_ndarrays_eq( (single.reshape(()), np_single.reshape(())), (zero_dim.reshape(()), np_zero_dim.reshape(())), (zero_dim.reshape((1,)), np_zero_dim.reshape((1,))), (a.reshape((6,)), np_a.reshape((6,))), (a.reshape((2, 3)), np_a.reshape((2, 3))), (a.reshape((3, 2)), np_a.reshape((3, 2))), (a.reshape((3, -1)), np_a.reshape((3, -1))), (a.reshape((-1, 2)), np_a.reshape((-1, 2))), (cube_to_rect, np_cube_to_rect), (cube_t_to_rect, np_cube_t_to_rect), (hypercube.reshape((5, 7, 9, 3)).reshape((7, 9, 3, 5)), np_hypercube.reshape((7, 9, 3, 5))), (hypercube.reshape(hl.tuple([5, 7, 9, 3])), np_hypercube.reshape((5, 7, 9, 3))) ) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((-1, -1))) assert "more than one -1" in str(exc) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((20,))) assert "requested shape is incompatible with number of elements" in str(exc) with pytest.raises(FatalError) as exc: hl.eval(a.reshape((3,))) assert "requested shape is incompatible with number of elements" in str(exc) with pytest.raises(FatalError) as exc: hl.eval(a.reshape(())) assert "requested shape is incompatible with number of elements" in str(exc) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((0, 2, 2))) assert "must contain only positive numbers or -1" in str(exc) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((2, 2, -2))) assert "must contain only positive numbers or -1" in str(exc)
def test_ndarray_ref(): scalar = 5.0 np_scalar = np.array(scalar) h_scalar = hl._ndarray(scalar) h_np_scalar = hl._ndarray(np_scalar) assert_evals_to(h_scalar[()], 5.0) assert_evals_to(h_np_scalar[()], 5.0) cube = [[[0, 1], [2, 3]], [[4, 5], [6, 7]]] h_cube = hl._ndarray(cube) h_np_cube = hl._ndarray(np.array(cube)) missing = hl._ndarray(hl.null(hl.tarray(hl.tint32))) assert_all_eval_to( (h_cube[0, 0, 1], 1), (h_cube[1, 1, 0], 6), (h_np_cube[0, 0, 1], 1), (h_np_cube[1, 1, 0], 6), (hl._ndarray([[[[1]]]])[0, 0, 0, 0], 1), (hl._ndarray([[[1, 2]], [[3, 4]]])[1, 0, 0], 3), (missing[1], None), (hl._ndarray([1, 2, 3])[hl.null(hl.tint32)], None), (h_cube[0, 0, hl.null(hl.tint32)], None) ) with pytest.raises(FatalError) as exc: hl.eval(hl._ndarray([1, 2, 3])[4]) assert "Index out of bounds" in str(exc)
def test_ndarray_eval(): data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] nd_expr = hl._ndarray(data_list) evaled = hl.eval(nd_expr) np_equiv = np.array(data_list, dtype=np.int32) assert(np.array_equal(evaled, np_equiv)) assert(evaled.strides == np_equiv.strides) assert hl.eval(hl._ndarray([[], []])).strides == (8, 8) assert np.array_equal(hl.eval(hl._ndarray([])), np.array([])) zero_array = np.zeros((10, 10), dtype=np.int64) evaled_zero_array = hl.eval(hl.literal(zero_array)) assert np.array_equal(evaled_zero_array, zero_array) assert zero_array.dtype == evaled_zero_array.dtype # Testing from hail arrays assert np.array_equal(hl.eval(hl._ndarray(hl.range(6))), np.arange(6)) assert np.array_equal(hl.eval(hl._ndarray(hl.int64(4))), np.array(4)) # Testing missing data assert hl.eval(hl._ndarray(hl.null(hl.tarray(hl.tint32)))) is None with pytest.raises(ValueError) as exc: hl._ndarray([[4], [1, 2, 3], 5]) assert "inner dimensions do not match" in str(exc.value)
def test_ndarray_sum(): np_m = np.array([[1, 2], [3, 4]]) m = hl._ndarray(np_m) assert_all_eval_to( (m.sum(axis=0), np_m.sum(axis=0)), (m.sum(axis=1), np_m.sum(axis=1)), (m.sum(), np_m.sum()))
def test_ndarray_save(): arrs = [ np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=np.int32), np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64), np.array(3.0, dtype=np.float32), np.array([3.0], dtype=np.float64), np.array([True, False, True, True]) ] for expected in arrs: with tempfile.NamedTemporaryFile(suffix='.npy') as f: hl._ndarray(expected).save(f.name) actual = np.load(f.name) assert(expected.dtype == actual.dtype, f'expected: {expected.dtype}, actual: {actual.dtype}') assert(np.array_equal(expected, actual))
def test_ndarray_map(): a = hl._ndarray([[2, 3, 4], [5, 6, 7]]) b = hl.map(lambda x: -x, a) c = hl.map(lambda x: True, a) assert_ndarrays_eq( (b, [[-2, -3, -4], [-5, -6, -7]]), (c, [[True, True, True], [True, True, True]]))
def test_ndarray_slice(): np_arr = np.array([[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]) arr = hl._ndarray(np_arr) np_mat = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) mat = hl._ndarray(np_mat) assert_ndarrays_eq( (arr[:, :, :], np_arr[:, :, :]), (arr[:, :, 1], np_arr[:, :, 1]), (arr[:, :, 1:4:2], np_arr[:, :, 1:4:2]), (arr[:, 2, 1:4:2], np_arr[:, 2, 1:4:2]), (arr[0, 2, 1:4:2], np_arr[0, 2, 1:4:2]), (arr[0, :, 1:4:2] + arr[:, :1, 1:4:2], np_arr[0, :, 1:4:2] + np_arr[:, :1, 1:4:2]), (arr[0:, :, 1:4:2] + arr[:, :1, 1:4:2], np_arr[0:, :, 1:4:2] + np_arr[:, :1, 1:4:2]), (mat[0, 1:4:2] + mat[:, 1:4:2], np_mat[0, 1:4:2] + np_mat[:, 1:4:2]))
def test_ndarray_shape(): np_e = np.array(3) np_row = np.array([1, 2, 3]) np_col = np.array([[1], [2], [3]]) np_m = np.array([[1, 2], [3, 4]]) np_nd = np.arange(30).reshape((2, 5, 3)) e = hl._ndarray(np_e) row = hl._ndarray(np_row) col = hl._ndarray(np_col) m = hl._ndarray(np_m) nd = hl._ndarray(np_nd) missing = hl._ndarray(hl.null(hl.tarray(hl.tint32))) assert_all_eval_to( (e.shape, np_e.shape), (row.shape, np_row.shape), (col.shape, np_col.shape), (m.shape, np_m.shape), (nd.shape, np_nd.shape), ((row + nd).shape, (np_row + np_nd).shape), ((row + col).shape, (np_row + np_col).shape), (m.transpose().shape, np_m.transpose().shape), (missing.shape, None) )
def test_ndarray_matmul(): np_v = np.array([1, 2]) np_m = np.array([[1, 2], [3, 4]]) np_cube = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) np_rect_prism = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]]) v = hl._ndarray(np_v) m = hl._ndarray(np_m) cube = hl._ndarray(np_cube) rect_prism = hl._ndarray(np_rect_prism) np_broadcasted_mat = np.array([[[1, 2], [3, 4]]]) assert(hl.eval(v @ v) == np_v @ np_v) assert_ndarrays_eq( (m @ m, np_m @ np_m), (m @ m.T, np_m @ np_m.T), (v @ m, np_v @ np_m), (m @ v, np_m @ np_v), (cube @ cube, np_cube @ np_cube), (cube @ v, np_cube @ np_v), (v @ cube, np_v @ np_cube), (cube @ m, np_cube @ np_m), (m @ cube, np_m @ np_cube), (rect_prism @ m, np_rect_prism @ np_m), (m @ rect_prism, np_m @ np_rect_prism), (m @ rect_prism.T, np_m @ np_rect_prism.T), (hl._ndarray(np_broadcasted_mat) @ rect_prism, np_broadcasted_mat @ np_rect_prism)) with pytest.raises(ValueError): m @ 5 with pytest.raises(ValueError): m @ hl._ndarray(5) with pytest.raises(ValueError): cube @ hl._ndarray(5)
def test_ndarray_matmul(): np_v = np.array([1, 2]) np_m = np.array([[1, 2], [3, 4]]) np_r = np.array([[1, 2, 3], [4, 5, 6]]) np_cube = np.arange(8).reshape((2, 2, 2)) np_rect_prism = np.arange(12).reshape((3, 2, 2)) np_broadcasted_mat = np.arange(4).reshape((1, 2, 2)) np_six_dim_tensor = np.arange(3 * 7 * 1 * 9 * 4 * 5).reshape((3, 7, 1, 9, 4, 5)) np_five_dim_tensor = np.arange(7 * 5 * 1 * 5 * 3).reshape((7, 5, 1, 5, 3)) v = hl._ndarray(np_v) m = hl._ndarray(np_m) r = hl._ndarray(np_r) cube = hl._ndarray(np_cube) rect_prism = hl._ndarray(np_rect_prism) broadcasted_mat = hl._ndarray(np_broadcasted_mat) six_dim_tensor = hl._ndarray(np_six_dim_tensor) five_dim_tensor = hl._ndarray(np_five_dim_tensor) assert_ndarrays_eq( (v @ v, np_v @ np_v), (m @ m, np_m @ np_m), (m @ m.T, np_m @ np_m.T), (r @ r.T, np_r @ np_r.T), (v @ m, np_v @ np_m), (m @ v, np_m @ np_v), (cube @ cube, np_cube @ np_cube), (cube @ v, np_cube @ np_v), (v @ cube, np_v @ np_cube), (cube @ m, np_cube @ np_m), (m @ cube, np_m @ np_cube), (rect_prism @ m, np_rect_prism @ np_m), (m @ rect_prism, np_m @ np_rect_prism), (m @ rect_prism.T, np_m @ np_rect_prism.T), (broadcasted_mat @ rect_prism, np_broadcasted_mat @ np_rect_prism), (six_dim_tensor @ five_dim_tensor, np_six_dim_tensor @ np_five_dim_tensor) ) with pytest.raises(ValueError): m @ 5 with pytest.raises(ValueError): m @ hl._ndarray(5) with pytest.raises(ValueError): cube @ hl._ndarray(5) with pytest.raises(FatalError) as exc: hl.eval(r @ r) assert "Matrix dimensions incompatible: 3 2" in str(exc) with pytest.raises(FatalError) as exc: hl.eval(hl._ndarray([1, 2]) @ hl._ndarray([1, 2, 3])) assert "Matrix dimensions incompatible" in str(exc)
def test_ndarray_to_numpy(): nd = np.array([[1, 2, 3], [4, 5, 6]]) np.array_equal(hl._ndarray(nd).to_numpy(), nd)
def generate_datasets(doctest_namespace, output_dir): doctest_namespace['hl'] = hl files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"] for f in files: if os.path.isdir(f): shutil.rmtree(f) ds = hl.import_vcf('data/sample.vcf.bgz') ds = ds.sample_rows(0.03) ds = ds.annotate_rows(use_as_marker=hl.rand_bool(0.5), panel_maf=0.1, anno1=5, anno2=0, consequence="LOF", gene="A", score=5.0) ds = ds.annotate_rows(a_index=1) ds = hl.sample_qc(hl.variant_qc(ds)) ds = ds.annotate_cols(is_case=True, pheno=hl.struct(is_case=hl.rand_bool(0.5), is_female=hl.rand_bool(0.5), age=hl.rand_norm(65, 10), height=hl.rand_norm(70, 10), blood_pressure=hl.rand_norm(120, 20), cohort_name="cohort1"), cov=hl.struct(PC1=hl.rand_norm(0, 1)), cov1=hl.rand_norm(0, 1), cov2=hl.rand_norm(0, 1), cohort="SIGMA") ds = ds.annotate_globals( global_field_1=5, global_field_2=10, pli={ 'SCN1A': 0.999, 'SONIC': 0.014 }, populations=['AFR', 'EAS', 'EUR', 'SAS', 'AMR', 'HIS']) ds = ds.annotate_rows(gene=['TTN']) ds = ds.annotate_cols(cohorts=['1kg'], pop='EAS') ds = ds.checkpoint(f'{output_dir.name}/example.vds', overwrite=True) doctest_namespace['ds'] = ds doctest_namespace['dataset'] = ds doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5) doctest_namespace['dataset_to_union_1'] = ds doctest_namespace['dataset_to_union_2'] = ds v_metadata = ds.rows().annotate_globals(global_field=5).annotate( consequence='SYN') doctest_namespace['v_metadata'] = v_metadata s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F') doctest_namespace['s_metadata'] = s_metadata doctest_namespace['cols_to_keep'] = s_metadata doctest_namespace['cols_to_remove'] = s_metadata doctest_namespace['rows_to_keep'] = v_metadata doctest_namespace['rows_to_remove'] = v_metadata # Table table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID') table1 = table1.annotate_globals(global_field_1=5, global_field_2=10) doctest_namespace['table1'] = table1 doctest_namespace['other_table'] = table1 table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID') doctest_namespace['table2'] = table2 table4 = hl.import_table('data/kt_example4.tsv', impute=True, types={ 'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr), 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32), 'E': hl.tstruct(A=hl.tint32, B=hl.tint32) }) doctest_namespace['table4'] = table4 people_table = hl.import_table('data/explode_example.tsv', delimiter='\\s+', types={ 'Age': hl.tint32, 'Children': hl.tarray(hl.tstr) }, key='Name') doctest_namespace['people_table'] = people_table # TDT doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf') ds2 = hl.variant_qc(ds) doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF) # Expressions doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie']) doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5]) doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1]) doctest_namespace['t'] = hl.literal(True) doctest_namespace['f'] = hl.literal(False) doctest_namespace['na'] = hl.null(hl.tbool) doctest_namespace['call'] = hl.call(0, 1, phased=False) doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5]) doctest_namespace['d'] = hl.literal({ 'Alice': 43, 'Bob': 33, 'Charles': 44 }) doctest_namespace['interval'] = hl.interval(3, 11) doctest_namespace['locus_interval'] = hl.parse_locus_interval( "1:53242-90543") doctest_namespace['locus'] = hl.locus('1', 1034245) doctest_namespace['x'] = hl.literal(3) doctest_namespace['y'] = hl.literal(4.5) doctest_namespace['s1'] = hl.literal({1, 2, 3}) doctest_namespace['s2'] = hl.literal({1, 3, 5}) doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'}) doctest_namespace['struct'] = hl.struct(a=5, b='Foo') doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3])) doctest_namespace['s'] = hl.literal('The quick brown fox') doctest_namespace['interval2'] = hl.Interval(3, 6) doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]]) # Overview doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv", impute=True) doctest_namespace['mt'] = ds gnomad_data = ds.rows() doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF) # BGEN bgen = hl.import_bgen('data/example.8bits.bgen', entry_fields=['GT', 'GP', 'dosage']) doctest_namespace['variants_table'] = bgen.rows() burden_ds = hl.import_vcf('data/example_burden.vcf') burden_kt = hl.import_table('data/example_burden.tsv', key='Sample', impute=True) burden_ds = burden_ds.annotate_cols(burden=burden_kt[burden_ds.s]) burden_ds = burden_ds.annotate_rows( weight=hl.float64(burden_ds.locus.position)) burden_ds = hl.variant_qc(burden_ds) genekt = hl.import_locus_intervals('data/gene.interval_list') burden_ds = burden_ds.annotate_rows(gene=genekt[burden_ds.locus]) burden_ds = burden_ds.checkpoint(f'{output_dir.name}/example_burden.vds', overwrite=True) doctest_namespace['burden_ds'] = burden_ds print("finished setting up doctest...")
def generate_datasets(doctest_namespace): doctest_namespace['hl'] = hl if not os.path.isdir("output/"): try: os.mkdir("output/") except OSError: pass files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"] for f in files: if os.path.isdir(f): shutil.rmtree(f) ds = hl.read_matrix_table('data/example.vds') doctest_namespace['ds'] = ds doctest_namespace['dataset'] = ds doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5) doctest_namespace['dataset_to_union_1'] = ds doctest_namespace['dataset_to_union_2'] = ds v_metadata = ds.rows().annotate_globals(global_field=5).annotate( consequence='SYN') doctest_namespace['v_metadata'] = v_metadata s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F') doctest_namespace['s_metadata'] = s_metadata doctest_namespace['cols_to_keep'] = s_metadata doctest_namespace['cols_to_remove'] = s_metadata doctest_namespace['rows_to_keep'] = v_metadata doctest_namespace['rows_to_remove'] = v_metadata # Table table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID') table1 = table1.annotate_globals(global_field_1=5, global_field_2=10) doctest_namespace['table1'] = table1 doctest_namespace['other_table'] = table1 table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID') doctest_namespace['table2'] = table2 table4 = hl.import_table('data/kt_example4.tsv', impute=True, types={ 'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr), 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32), 'E': hl.tstruct(A=hl.tint32, B=hl.tint32) }) doctest_namespace['table4'] = table4 people_table = hl.import_table('data/explode_example.tsv', delimiter='\\s+', types={ 'Age': hl.tint32, 'Children': hl.tarray(hl.tstr) }, key='Name') doctest_namespace['people_table'] = people_table # TDT doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf') ds2 = hl.variant_qc(ds) doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF) # Expressions doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie']) doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5]) doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1]) doctest_namespace['t'] = hl.literal(True) doctest_namespace['f'] = hl.literal(False) doctest_namespace['na'] = hl.null(hl.tbool) doctest_namespace['call'] = hl.call(0, 1, phased=False) doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5]) doctest_namespace['d'] = hl.literal({ 'Alice': 43, 'Bob': 33, 'Charles': 44 }) doctest_namespace['interval'] = hl.interval(3, 11) doctest_namespace['locus_interval'] = hl.parse_locus_interval( "1:53242-90543") doctest_namespace['locus'] = hl.locus('1', 1034245) doctest_namespace['x'] = hl.literal(3) doctest_namespace['y'] = hl.literal(4.5) doctest_namespace['s1'] = hl.literal({1, 2, 3}) doctest_namespace['s2'] = hl.literal({1, 3, 5}) doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'}) doctest_namespace['struct'] = hl.struct(a=5, b='Foo') doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3])) doctest_namespace['s'] = hl.literal('The quick brown fox') doctest_namespace['interval2'] = hl.Interval(3, 6) doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]]) # Overview doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv", impute=True) doctest_namespace['mt'] = ds gnomad_data = ds.rows() doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF) # BGEN bgen = hl.import_bgen('data/example.8bits.bgen', entry_fields=['GT', 'GP', 'dosage']) doctest_namespace['variants_table'] = bgen.rows() print("finished setting up doctest...")
def generate_datasets(doctest_namespace): doctest_namespace['hl'] = hl ds = hl.import_vcf('data/sample.vcf.bgz') ds = ds.sample_rows(0.03) ds = ds.annotate_rows(use_as_marker=hl.rand_bool(0.5), panel_maf=0.1, anno1=5, anno2=0, consequence="LOF", gene="A", score=5.0) ds = ds.annotate_rows(a_index=1) ds = hl.sample_qc(hl.variant_qc(ds)) ds = ds.annotate_cols(is_case=True, pheno=hl.struct(is_case=hl.rand_bool(0.5), is_female=hl.rand_bool(0.5), age=hl.rand_norm(65, 10), height=hl.rand_norm(70, 10), blood_pressure=hl.rand_norm(120, 20), cohort_name="cohort1"), cov=hl.struct(PC1=hl.rand_norm(0, 1)), cov1=hl.rand_norm(0, 1), cov2=hl.rand_norm(0, 1), cohort="SIGMA") ds = ds.annotate_globals( global_field_1=5, global_field_2=10, pli={ 'SCN1A': 0.999, 'SONIC': 0.014 }, populations=['AFR', 'EAS', 'EUR', 'SAS', 'AMR', 'HIS']) ds = ds.annotate_rows(gene=['TTN']) ds = ds.annotate_cols(cohorts=['1kg'], pop='EAS') ds = ds.checkpoint(f'output/example.mt', overwrite=True) doctest_namespace['ds'] = ds doctest_namespace['dataset'] = ds doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5) doctest_namespace['dataset_to_union_1'] = ds doctest_namespace['dataset_to_union_2'] = ds v_metadata = ds.rows().annotate_globals(global_field=5).annotate( consequence='SYN') doctest_namespace['v_metadata'] = v_metadata s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F') doctest_namespace['s_metadata'] = s_metadata doctest_namespace['cols_to_keep'] = s_metadata doctest_namespace['cols_to_remove'] = s_metadata doctest_namespace['rows_to_keep'] = v_metadata doctest_namespace['rows_to_remove'] = v_metadata small_mt = hl.balding_nichols_model(3, 4, 4) doctest_namespace['small_mt'] = small_mt.checkpoint('output/small.mt', overwrite=True) # Table table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID') table1 = table1.annotate_globals(global_field_1=5, global_field_2=10) doctest_namespace['table1'] = table1 doctest_namespace['other_table'] = table1 table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID') doctest_namespace['table2'] = table2 table4 = hl.import_table('data/kt_example4.tsv', impute=True, types={ 'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr), 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32), 'E': hl.tstruct(A=hl.tint32, B=hl.tint32) }) doctest_namespace['table4'] = table4 people_table = hl.import_table('data/explode_example.tsv', delimiter='\\s+', types={ 'Age': hl.tint32, 'Children': hl.tarray(hl.tstr) }, key='Name') doctest_namespace['people_table'] = people_table # TDT doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf') ds2 = hl.variant_qc(ds) doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF) # Expressions doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie']) doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5]) doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1]) doctest_namespace['t'] = hl.literal(True) doctest_namespace['f'] = hl.literal(False) doctest_namespace['na'] = hl.null(hl.tbool) doctest_namespace['call'] = hl.call(0, 1, phased=False) doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5]) doctest_namespace['d'] = hl.literal({ 'Alice': 43, 'Bob': 33, 'Charles': 44 }) doctest_namespace['interval'] = hl.interval(3, 11) doctest_namespace['locus_interval'] = hl.parse_locus_interval( "1:53242-90543") doctest_namespace['locus'] = hl.locus('1', 1034245) doctest_namespace['x'] = hl.literal(3) doctest_namespace['y'] = hl.literal(4.5) doctest_namespace['s1'] = hl.literal({1, 2, 3}) doctest_namespace['s2'] = hl.literal({1, 3, 5}) doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'}) doctest_namespace['struct'] = hl.struct(a=5, b='Foo') doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3])) doctest_namespace['s'] = hl.literal('The quick brown fox') doctest_namespace['interval2'] = hl.Interval(3, 6) doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]]) # Overview doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv", impute=True) doctest_namespace['mt'] = ds gnomad_data = ds.rows() doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF) # BGEN bgen = hl.import_bgen('data/example.8bits.bgen', entry_fields=['GT', 'GP', 'dosage']) doctest_namespace['variants_table'] = bgen.rows() burden_ds = hl.import_vcf('data/example_burden.vcf') burden_kt = hl.import_table('data/example_burden.tsv', key='Sample', impute=True) burden_ds = burden_ds.annotate_cols(burden=burden_kt[burden_ds.s]) burden_ds = burden_ds.annotate_rows( weight=hl.float64(burden_ds.locus.position)) burden_ds = hl.variant_qc(burden_ds) genekt = hl.import_locus_intervals('data/gene.interval_list') burden_ds = burden_ds.annotate_rows(gene=genekt[burden_ds.locus]) burden_ds = burden_ds.checkpoint(f'output/example_burden.vds', overwrite=True) doctest_namespace['burden_ds'] = burden_ds ld_score_one_pheno_sumstats = hl.import_table( 'data/ld_score_regression.one_pheno.sumstats.tsv', types={ 'locus': hl.tlocus('GRCh37'), 'alleles': hl.tarray(hl.tstr), 'chi_squared': hl.tfloat64, 'n': hl.tint32, 'ld_score': hl.tfloat64, 'phenotype': hl.tstr, 'chi_squared_50_irnt': hl.tfloat64, 'n_50_irnt': hl.tint32, 'chi_squared_20160': hl.tfloat64, 'n_20160': hl.tint32 }, key=['locus', 'alleles']) doctest_namespace[ 'ld_score_one_pheno_sumstats'] = ld_score_one_pheno_sumstats mt = hl.import_matrix_table( 'data/ld_score_regression.all_phenos.sumstats.tsv', row_fields={ 'locus': hl.tstr, 'alleles': hl.tstr, 'ld_score': hl.tfloat64 }, entry_type=hl.tstr) mt = mt.key_cols_by(phenotype=mt.col_id) mt = mt.key_rows_by(locus=hl.parse_locus(mt.locus), alleles=mt.alleles.split(',')) mt = mt.drop('row_id', 'col_id') mt = mt.annotate_entries(x=mt.x.split(",")) mt = mt.transmute_entries(chi_squared=hl.float64(mt.x[0]), n=hl.int32(mt.x[1])) mt = mt.annotate_rows(ld_score=hl.float64(mt.ld_score)) doctest_namespace['ld_score_all_phenos_sumstats'] = mt print("finished setting up doctest...")
def test_ndarray_ops(): a = 2.0 b = 3.0 x = np.array([a, b]) y = np.array([b, a]) row_vec = np.array([[1, 2]]) cube1 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) cube2 = np.array([[[9, 10], [11, 12]], [[13, 14], [15, 16]]]) na = hl._ndarray(a) nx = hl._ndarray(x) ny = hl._ndarray(y) nrow_vec = hl._ndarray(row_vec) ncube1 = hl._ndarray(cube1) ncube2 = hl._ndarray(cube2) assert_ndarrays_eq( # with lists/numerics (na + b, np.array(a + b)), (b + na, np.array(a + b)), (nx + y, x + y), (ncube1 + cube2, cube1 + cube2), # Addition (na + na, np.array(a + a)), (nx + ny, x + y), (ncube1 + ncube2, cube1 + cube2), # Broadcasting (ncube1 + na, cube1 + a), (na + ncube1, a + cube1), (ncube1 + ny, cube1 + y), (ny + ncube1, y + cube1), (nrow_vec + ncube1, row_vec + cube1), (ncube1 + nrow_vec, cube1 + row_vec), # Subtraction (na - na, np.array(a - a)), (nx - nx, x - x), (ncube1 - ncube2, cube1 - cube2), # Broadcasting (ncube1 - na, cube1 - a), (na - ncube1, a - cube1), (ncube1 - ny, cube1 - y), (ny - ncube1, y - cube1), (ncube1 - nrow_vec, cube1 - row_vec), (nrow_vec - ncube1, row_vec - cube1), # Multiplication (na * na, np.array(a * a)), (nx * nx, x * x), (nx * na, x * a), (na * nx, a * x), (ncube1 * ncube2, cube1 * cube2), # Broadcasting (ncube1 * na, cube1 * a), (na * ncube1, a * cube1), (ncube1 * ny, cube1 * y), (ny * ncube1, y * cube1), (ncube1 * nrow_vec, cube1 * row_vec), (nrow_vec * ncube1, row_vec * cube1), # Floor div (na // na, np.array(a // a)), (nx // nx, x // x), (nx // na, x // a), (na // nx, a // x), (ncube1 // ncube2, cube1 // cube2), # Broadcasting (ncube1 // na, cube1 // a), (na // ncube1, a // cube1), (ncube1 // ny, cube1 // y), (ny // ncube1, y // cube1), (ncube1 // nrow_vec, cube1 // row_vec), (nrow_vec // ncube1, row_vec // cube1)) # Division assert_ndarrays_almost_eq( (na / na, np.array(a / a)), (nx / nx, x / x), (nx / na, x / a), (na / nx, a / x), (ncube1 / ncube2, cube1 / cube2), # Broadcasting (ncube1 / na, cube1 / a), (na / ncube1, a / cube1), (ncube1 / ny, cube1 / y), (ny / ncube1, y / cube1), (ncube1 / nrow_vec, cube1 / row_vec), (nrow_vec / ncube1, row_vec / cube1))
def init(doctest_namespace): # This gets run once per process -- must avoid race conditions print("setting up doctest...") olddir = os.getcwd() os.chdir("docs/") doctest_namespace['hl'] = hl if not os.path.isdir("output/"): try: os.mkdir("output/") except OSError: pass files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"] for f in files: if os.path.isdir(f): shutil.rmtree(f) ds = hl.read_matrix_table('data/example.vds') doctest_namespace['ds'] = ds doctest_namespace['dataset'] = ds doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5) doctest_namespace['dataset_to_union_1'] = ds doctest_namespace['dataset_to_union_2'] = ds v_metadata = ds.rows().annotate_globals(global_field=5).annotate(consequence='SYN') doctest_namespace['v_metadata'] = v_metadata s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F') doctest_namespace['s_metadata'] = s_metadata doctest_namespace['cols_to_keep'] = s_metadata doctest_namespace['cols_to_remove'] = s_metadata doctest_namespace['rows_to_keep'] = v_metadata doctest_namespace['rows_to_remove'] = v_metadata # Table table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID') table1 = table1.annotate_globals(global_field_1=5, global_field_2=10) doctest_namespace['table1'] = table1 doctest_namespace['other_table'] = table1 table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID') doctest_namespace['table2'] = table2 table4 = hl.import_table('data/kt_example4.tsv', impute=True, types={'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr), 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32), 'E': hl.tstruct(A=hl.tint32, B=hl.tint32)}) doctest_namespace['table4'] = table4 people_table = hl.import_table('data/explode_example.tsv', delimiter='\\s+', types={'Age': hl.tint32, 'Children': hl.tarray(hl.tstr)}, key='Name') doctest_namespace['people_table'] = people_table # TDT doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf') ds2 = hl.variant_qc(ds) doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF) # Expressions doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie']) doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5]) doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1]) doctest_namespace['t'] = hl.literal(True) doctest_namespace['f'] = hl.literal(False) doctest_namespace['na'] = hl.null(hl.tbool) doctest_namespace['call'] = hl.call(0, 1, phased=False) doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5]) doctest_namespace['d'] = hl.literal({'Alice': 43, 'Bob': 33, 'Charles': 44}) doctest_namespace['interval'] = hl.interval(3, 11) doctest_namespace['locus_interval'] = hl.parse_locus_interval("1:53242-90543") doctest_namespace['locus'] = hl.locus('1', 1034245) doctest_namespace['x'] = hl.literal(3) doctest_namespace['y'] = hl.literal(4.5) doctest_namespace['s1'] = hl.literal({1, 2, 3}) doctest_namespace['s2'] = hl.literal({1, 3, 5}) doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'}) doctest_namespace['struct'] = hl.struct(a=5, b='Foo') doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3])) doctest_namespace['s'] = hl.literal('The quick brown fox') doctest_namespace['interval2'] = hl.Interval(3, 6) doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]]) # Overview doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv", impute=True) doctest_namespace['mt'] = ds gnomad_data = ds.rows() doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF) # BGEN bgen = hl.import_bgen('data/example.8bits.bgen', entry_fields=['GT', 'GP', 'dosage']) doctest_namespace['variants_table'] = bgen.rows() print("finished setting up doctest...") yield os.chdir(olddir)