예제 #1
0
def test_ndarray_transpose():
    np_v = np.array([1, 2, 3])
    np_m = np.array([[1, 2, 3], [4, 5, 6]])
    np_cube = np.array([[[1, 2],
                         [3, 4]],
                        [[5, 6],
                         [7, 8]]])
    v = hl._ndarray(np_v)
    m = hl._ndarray(np_m)
    cube = hl._ndarray(np_cube)

    assert_ndarrays_eq(
        (v.T, np_v.T),
        (v.T, np_v),
        (m.T, np_m.T),
        (cube.transpose((0, 2, 1)), np_cube.transpose((0, 2, 1))),
        (cube.T, np_cube.T))

    with pytest.raises(ValueError) as exc:
        v.transpose((1,))
    assert "Invalid axis: 1" in str(exc.value)

    with pytest.raises(ValueError) as exc:
        cube.transpose((1, 1))
    assert "Expected 3 axes, got 2" in str(exc.value)

    with pytest.raises(ValueError) as exc:
        cube.transpose((1, 1, 1))
    assert "Axes cannot contain duplicates" in str(exc.value)
예제 #2
0
def test_ndarray_reshape():
    np_single = np.array([8])
    single = hl._ndarray([8])

    np_zero_dim = np.array(4)
    zero_dim = hl._ndarray(4)

    np_a = np.array([1, 2, 3, 4, 5, 6])
    a = hl._ndarray(np_a)

    np_cube = np.array([0, 1, 2, 3, 4, 5, 6, 7]).reshape((2, 2, 2))
    cube = hl._ndarray([0, 1, 2, 3, 4, 5, 6, 7]).reshape((2, 2, 2))
    cube_to_rect = cube.reshape((2, 4))
    np_cube_to_rect = np_cube.reshape((2, 4))
    cube_t_to_rect = cube.transpose((1, 0, 2)).reshape((2, 4))
    np_cube_t_to_rect = np_cube.transpose((1, 0, 2)).reshape((2, 4))

    np_hypercube = np.arange(3 * 5 * 7 * 9).reshape((3, 5, 7, 9))
    hypercube = hl._ndarray(np_hypercube)

    assert_ndarrays_eq(
        (single.reshape(()), np_single.reshape(())),
        (zero_dim.reshape(()), np_zero_dim.reshape(())),
        (zero_dim.reshape((1,)), np_zero_dim.reshape((1,))),
        (a.reshape((6,)), np_a.reshape((6,))),
        (a.reshape((2, 3)), np_a.reshape((2, 3))),
        (a.reshape((3, 2)), np_a.reshape((3, 2))),
        (a.reshape((3, -1)), np_a.reshape((3, -1))),
        (a.reshape((-1, 2)), np_a.reshape((-1, 2))),
        (cube_to_rect, np_cube_to_rect),
        (cube_t_to_rect, np_cube_t_to_rect),
        (hypercube.reshape((5, 7, 9, 3)).reshape((7, 9, 3, 5)), np_hypercube.reshape((7, 9, 3, 5))),
        (hypercube.reshape(hl.tuple([5, 7, 9, 3])), np_hypercube.reshape((5, 7, 9, 3)))
    )

    with pytest.raises(FatalError) as exc:
        hl.eval(hl.literal(np_cube).reshape((-1, -1)))
    assert "more than one -1" in str(exc)

    with pytest.raises(FatalError) as exc:
        hl.eval(hl.literal(np_cube).reshape((20,)))
    assert "requested shape is incompatible with number of elements" in str(exc)

    with pytest.raises(FatalError) as exc:
        hl.eval(a.reshape((3,)))
    assert "requested shape is incompatible with number of elements" in str(exc)

    with pytest.raises(FatalError) as exc:
        hl.eval(a.reshape(()))
    assert "requested shape is incompatible with number of elements" in str(exc)

    with pytest.raises(FatalError) as exc:
        hl.eval(hl.literal(np_cube).reshape((0, 2, 2)))
    assert "must contain only positive numbers or -1" in str(exc)

    with pytest.raises(FatalError) as exc:
        hl.eval(hl.literal(np_cube).reshape((2, 2, -2)))
    assert "must contain only positive numbers or -1" in str(exc)
예제 #3
0
def test_ndarray_ref():

    scalar = 5.0
    np_scalar = np.array(scalar)
    h_scalar = hl._ndarray(scalar)
    h_np_scalar = hl._ndarray(np_scalar)

    assert_evals_to(h_scalar[()], 5.0)
    assert_evals_to(h_np_scalar[()], 5.0)

    cube = [[[0, 1],
             [2, 3]],
            [[4, 5],
             [6, 7]]]
    h_cube = hl._ndarray(cube)
    h_np_cube = hl._ndarray(np.array(cube))
    missing = hl._ndarray(hl.null(hl.tarray(hl.tint32)))

    assert_all_eval_to(
        (h_cube[0, 0, 1], 1),
        (h_cube[1, 1, 0], 6),
        (h_np_cube[0, 0, 1], 1),
        (h_np_cube[1, 1, 0], 6),
        (hl._ndarray([[[[1]]]])[0, 0, 0, 0], 1),
        (hl._ndarray([[[1, 2]], [[3, 4]]])[1, 0, 0], 3),
        (missing[1], None),
        (hl._ndarray([1, 2, 3])[hl.null(hl.tint32)], None),
        (h_cube[0, 0, hl.null(hl.tint32)], None)
    )

    with pytest.raises(FatalError) as exc:
        hl.eval(hl._ndarray([1, 2, 3])[4])
    assert "Index out of bounds" in str(exc)
예제 #4
0
def test_ndarray_eval():
    data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    nd_expr = hl._ndarray(data_list)
    evaled = hl.eval(nd_expr)
    np_equiv = np.array(data_list, dtype=np.int32)
    assert(np.array_equal(evaled, np_equiv))
    assert(evaled.strides == np_equiv.strides)

    assert hl.eval(hl._ndarray([[], []])).strides == (8, 8)
    assert np.array_equal(hl.eval(hl._ndarray([])), np.array([]))

    zero_array = np.zeros((10, 10), dtype=np.int64)
    evaled_zero_array = hl.eval(hl.literal(zero_array))

    assert np.array_equal(evaled_zero_array, zero_array)
    assert zero_array.dtype == evaled_zero_array.dtype

    # Testing from hail arrays
    assert np.array_equal(hl.eval(hl._ndarray(hl.range(6))), np.arange(6))
    assert np.array_equal(hl.eval(hl._ndarray(hl.int64(4))), np.array(4))

    # Testing missing data
    assert hl.eval(hl._ndarray(hl.null(hl.tarray(hl.tint32)))) is None

    with pytest.raises(ValueError) as exc:
        hl._ndarray([[4], [1, 2, 3], 5])
    assert "inner dimensions do not match" in str(exc.value)
예제 #5
0
def test_ndarray_sum():
    np_m = np.array([[1, 2], [3, 4]])
    m = hl._ndarray(np_m)

    assert_all_eval_to(
        (m.sum(axis=0), np_m.sum(axis=0)),
        (m.sum(axis=1), np_m.sum(axis=1)),
        (m.sum(), np_m.sum()))
예제 #6
0
def test_ndarray_save():
    arrs = [
        np.array([[[1, 2, 3], [4, 5, 6]],
                  [[7, 8, 9], [10, 11, 12]]], dtype=np.int32),
        np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64),
        np.array(3.0, dtype=np.float32),
        np.array([3.0], dtype=np.float64),
        np.array([True, False, True, True])
    ]

    for expected in arrs:
        with tempfile.NamedTemporaryFile(suffix='.npy') as f:
            hl._ndarray(expected).save(f.name)
            actual = np.load(f.name)

            assert(expected.dtype == actual.dtype, f'expected: {expected.dtype}, actual: {actual.dtype}')
            assert(np.array_equal(expected, actual))
예제 #7
0
def test_ndarray_map():
    a = hl._ndarray([[2, 3, 4], [5, 6, 7]])
    b = hl.map(lambda x: -x, a)
    c = hl.map(lambda x: True, a)

    assert_ndarrays_eq(
        (b, [[-2, -3, -4], [-5, -6, -7]]),
        (c, [[True, True, True],
             [True, True, True]]))
예제 #8
0
def test_ndarray_slice():
    np_arr = np.array([[[0, 1, 2, 3],
                        [4, 5, 6, 7],
                        [8, 9, 10, 11]],
                       [[12, 13, 14, 15],
                        [16, 17, 18, 19],
                        [20, 21, 22, 23]]])
    arr = hl._ndarray(np_arr)
    np_mat = np.array([[1, 2, 3, 4],
                       [5, 6, 7, 8]])
    mat = hl._ndarray(np_mat)

    assert_ndarrays_eq(
        (arr[:, :, :], np_arr[:, :, :]),
        (arr[:, :, 1], np_arr[:, :, 1]),
        (arr[:, :, 1:4:2], np_arr[:, :, 1:4:2]),
        (arr[:, 2, 1:4:2], np_arr[:, 2, 1:4:2]),
        (arr[0, 2, 1:4:2], np_arr[0, 2, 1:4:2]),
        (arr[0, :, 1:4:2] + arr[:, :1, 1:4:2], np_arr[0, :, 1:4:2] + np_arr[:, :1, 1:4:2]),
        (arr[0:, :, 1:4:2] + arr[:, :1, 1:4:2], np_arr[0:, :, 1:4:2] + np_arr[:, :1, 1:4:2]),
        (mat[0, 1:4:2] + mat[:, 1:4:2], np_mat[0, 1:4:2] + np_mat[:, 1:4:2]))
예제 #9
0
def test_ndarray_shape():
    np_e = np.array(3)
    np_row = np.array([1, 2, 3])
    np_col = np.array([[1], [2], [3]])
    np_m = np.array([[1, 2], [3, 4]])
    np_nd = np.arange(30).reshape((2, 5, 3))

    e = hl._ndarray(np_e)
    row = hl._ndarray(np_row)
    col = hl._ndarray(np_col)
    m = hl._ndarray(np_m)
    nd = hl._ndarray(np_nd)
    missing = hl._ndarray(hl.null(hl.tarray(hl.tint32)))

    assert_all_eval_to(
        (e.shape, np_e.shape),
        (row.shape, np_row.shape),
        (col.shape, np_col.shape),
        (m.shape, np_m.shape),
        (nd.shape, np_nd.shape),
        ((row + nd).shape, (np_row + np_nd).shape),
        ((row + col).shape, (np_row + np_col).shape),
        (m.transpose().shape, np_m.transpose().shape),
        (missing.shape, None)
    )
예제 #10
0
def test_ndarray_matmul():
    np_v = np.array([1, 2])
    np_m = np.array([[1, 2], [3, 4]])
    np_cube = np.array([[[1, 2],
                         [3, 4]],
                        [[5, 6],
                         [7, 8]]])
    np_rect_prism = np.array([[[1, 2],
                               [3, 4]],
                              [[5, 6],
                               [7, 8]],
                              [[9, 10],
                               [11, 12]]])
    v = hl._ndarray(np_v)
    m = hl._ndarray(np_m)
    cube = hl._ndarray(np_cube)
    rect_prism = hl._ndarray(np_rect_prism)
    np_broadcasted_mat = np.array([[[1, 2],
                                    [3, 4]]])

    assert(hl.eval(v @ v) == np_v @ np_v)

    assert_ndarrays_eq(
        (m @ m, np_m @ np_m),
        (m @ m.T, np_m @ np_m.T),
        (v @ m, np_v @ np_m),
        (m @ v, np_m @ np_v),
        (cube @ cube, np_cube @ np_cube),
        (cube @ v, np_cube @ np_v),
        (v @ cube, np_v @ np_cube),
        (cube @ m, np_cube @ np_m),
        (m @ cube, np_m @ np_cube),
        (rect_prism @ m, np_rect_prism @ np_m),
        (m @ rect_prism, np_m @ np_rect_prism),
        (m @ rect_prism.T, np_m @ np_rect_prism.T),
        (hl._ndarray(np_broadcasted_mat) @ rect_prism, np_broadcasted_mat @ np_rect_prism))

    with pytest.raises(ValueError):
        m @ 5

    with pytest.raises(ValueError):
        m @ hl._ndarray(5)

    with pytest.raises(ValueError):
        cube @ hl._ndarray(5)
예제 #11
0
def test_ndarray_matmul():
    np_v = np.array([1, 2])
    np_m = np.array([[1, 2], [3, 4]])
    np_r = np.array([[1, 2, 3], [4, 5, 6]])
    np_cube = np.arange(8).reshape((2, 2, 2))
    np_rect_prism = np.arange(12).reshape((3, 2, 2))
    np_broadcasted_mat = np.arange(4).reshape((1, 2, 2))
    np_six_dim_tensor = np.arange(3 * 7 * 1 * 9 * 4 * 5).reshape((3, 7, 1, 9, 4, 5))
    np_five_dim_tensor = np.arange(7 * 5 * 1 * 5 * 3).reshape((7, 5, 1, 5, 3))

    v = hl._ndarray(np_v)
    m = hl._ndarray(np_m)
    r = hl._ndarray(np_r)
    cube = hl._ndarray(np_cube)
    rect_prism = hl._ndarray(np_rect_prism)
    broadcasted_mat = hl._ndarray(np_broadcasted_mat)
    six_dim_tensor = hl._ndarray(np_six_dim_tensor)
    five_dim_tensor = hl._ndarray(np_five_dim_tensor)

    assert_ndarrays_eq(
        (v @ v, np_v @ np_v),
        (m @ m, np_m @ np_m),
        (m @ m.T, np_m @ np_m.T),
        (r @ r.T, np_r @ np_r.T),
        (v @ m, np_v @ np_m),
        (m @ v, np_m @ np_v),
        (cube @ cube, np_cube @ np_cube),
        (cube @ v, np_cube @ np_v),
        (v @ cube, np_v @ np_cube),
        (cube @ m, np_cube @ np_m),
        (m @ cube, np_m @ np_cube),
        (rect_prism @ m, np_rect_prism @ np_m),
        (m @ rect_prism, np_m @ np_rect_prism),
        (m @ rect_prism.T, np_m @ np_rect_prism.T),
        (broadcasted_mat @ rect_prism, np_broadcasted_mat @ np_rect_prism),
        (six_dim_tensor @ five_dim_tensor, np_six_dim_tensor @ np_five_dim_tensor)
    )

    with pytest.raises(ValueError):
        m @ 5

    with pytest.raises(ValueError):
        m @ hl._ndarray(5)

    with pytest.raises(ValueError):
        cube @ hl._ndarray(5)

    with pytest.raises(FatalError) as exc:
        hl.eval(r @ r)
    assert "Matrix dimensions incompatible: 3 2" in str(exc)

    with pytest.raises(FatalError) as exc:
        hl.eval(hl._ndarray([1, 2]) @ hl._ndarray([1, 2, 3]))
    assert "Matrix dimensions incompatible" in str(exc)
예제 #12
0
def test_ndarray_to_numpy():
    nd = np.array([[1, 2, 3], [4, 5, 6]])
    np.array_equal(hl._ndarray(nd).to_numpy(), nd)
예제 #13
0
파일: conftest.py 프로젝트: mpilo24a/hail
def generate_datasets(doctest_namespace, output_dir):
    doctest_namespace['hl'] = hl

    files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"]
    for f in files:
        if os.path.isdir(f):
            shutil.rmtree(f)

    ds = hl.import_vcf('data/sample.vcf.bgz')
    ds = ds.sample_rows(0.03)
    ds = ds.annotate_rows(use_as_marker=hl.rand_bool(0.5),
                          panel_maf=0.1,
                          anno1=5,
                          anno2=0,
                          consequence="LOF",
                          gene="A",
                          score=5.0)
    ds = ds.annotate_rows(a_index=1)
    ds = hl.sample_qc(hl.variant_qc(ds))
    ds = ds.annotate_cols(is_case=True,
                          pheno=hl.struct(is_case=hl.rand_bool(0.5),
                                          is_female=hl.rand_bool(0.5),
                                          age=hl.rand_norm(65, 10),
                                          height=hl.rand_norm(70, 10),
                                          blood_pressure=hl.rand_norm(120, 20),
                                          cohort_name="cohort1"),
                          cov=hl.struct(PC1=hl.rand_norm(0, 1)),
                          cov1=hl.rand_norm(0, 1),
                          cov2=hl.rand_norm(0, 1),
                          cohort="SIGMA")
    ds = ds.annotate_globals(
        global_field_1=5,
        global_field_2=10,
        pli={
            'SCN1A': 0.999,
            'SONIC': 0.014
        },
        populations=['AFR', 'EAS', 'EUR', 'SAS', 'AMR', 'HIS'])
    ds = ds.annotate_rows(gene=['TTN'])
    ds = ds.annotate_cols(cohorts=['1kg'], pop='EAS')
    ds = ds.checkpoint(f'{output_dir.name}/example.vds', overwrite=True)
    doctest_namespace['ds'] = ds
    doctest_namespace['dataset'] = ds
    doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5)
    doctest_namespace['dataset_to_union_1'] = ds
    doctest_namespace['dataset_to_union_2'] = ds

    v_metadata = ds.rows().annotate_globals(global_field=5).annotate(
        consequence='SYN')
    doctest_namespace['v_metadata'] = v_metadata

    s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F')
    doctest_namespace['s_metadata'] = s_metadata
    doctest_namespace['cols_to_keep'] = s_metadata
    doctest_namespace['cols_to_remove'] = s_metadata
    doctest_namespace['rows_to_keep'] = v_metadata
    doctest_namespace['rows_to_remove'] = v_metadata

    # Table
    table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID')
    table1 = table1.annotate_globals(global_field_1=5, global_field_2=10)
    doctest_namespace['table1'] = table1
    doctest_namespace['other_table'] = table1

    table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID')
    doctest_namespace['table2'] = table2

    table4 = hl.import_table('data/kt_example4.tsv',
                             impute=True,
                             types={
                                 'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr),
                                 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32),
                                 'E': hl.tstruct(A=hl.tint32, B=hl.tint32)
                             })
    doctest_namespace['table4'] = table4

    people_table = hl.import_table('data/explode_example.tsv',
                                   delimiter='\\s+',
                                   types={
                                       'Age': hl.tint32,
                                       'Children': hl.tarray(hl.tstr)
                                   },
                                   key='Name')
    doctest_namespace['people_table'] = people_table

    # TDT
    doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf')

    ds2 = hl.variant_qc(ds)
    doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF)

    # Expressions
    doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie'])
    doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5])
    doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1])
    doctest_namespace['t'] = hl.literal(True)
    doctest_namespace['f'] = hl.literal(False)
    doctest_namespace['na'] = hl.null(hl.tbool)
    doctest_namespace['call'] = hl.call(0, 1, phased=False)
    doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5])
    doctest_namespace['d'] = hl.literal({
        'Alice': 43,
        'Bob': 33,
        'Charles': 44
    })
    doctest_namespace['interval'] = hl.interval(3, 11)
    doctest_namespace['locus_interval'] = hl.parse_locus_interval(
        "1:53242-90543")
    doctest_namespace['locus'] = hl.locus('1', 1034245)
    doctest_namespace['x'] = hl.literal(3)
    doctest_namespace['y'] = hl.literal(4.5)
    doctest_namespace['s1'] = hl.literal({1, 2, 3})
    doctest_namespace['s2'] = hl.literal({1, 3, 5})
    doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'})
    doctest_namespace['struct'] = hl.struct(a=5, b='Foo')
    doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3]))
    doctest_namespace['s'] = hl.literal('The quick brown fox')
    doctest_namespace['interval2'] = hl.Interval(3, 6)
    doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]])

    # Overview
    doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv",
                                              impute=True)
    doctest_namespace['mt'] = ds

    gnomad_data = ds.rows()
    doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF)

    # BGEN
    bgen = hl.import_bgen('data/example.8bits.bgen',
                          entry_fields=['GT', 'GP', 'dosage'])
    doctest_namespace['variants_table'] = bgen.rows()

    burden_ds = hl.import_vcf('data/example_burden.vcf')
    burden_kt = hl.import_table('data/example_burden.tsv',
                                key='Sample',
                                impute=True)
    burden_ds = burden_ds.annotate_cols(burden=burden_kt[burden_ds.s])
    burden_ds = burden_ds.annotate_rows(
        weight=hl.float64(burden_ds.locus.position))
    burden_ds = hl.variant_qc(burden_ds)
    genekt = hl.import_locus_intervals('data/gene.interval_list')
    burden_ds = burden_ds.annotate_rows(gene=genekt[burden_ds.locus])
    burden_ds = burden_ds.checkpoint(f'{output_dir.name}/example_burden.vds',
                                     overwrite=True)
    doctest_namespace['burden_ds'] = burden_ds

    print("finished setting up doctest...")
예제 #14
0
파일: conftest.py 프로젝트: joonan30/hail
def generate_datasets(doctest_namespace):
    doctest_namespace['hl'] = hl

    if not os.path.isdir("output/"):
        try:
            os.mkdir("output/")
        except OSError:
            pass

    files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"]
    for f in files:
        if os.path.isdir(f):
            shutil.rmtree(f)

    ds = hl.read_matrix_table('data/example.vds')
    doctest_namespace['ds'] = ds
    doctest_namespace['dataset'] = ds
    doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5)
    doctest_namespace['dataset_to_union_1'] = ds
    doctest_namespace['dataset_to_union_2'] = ds

    v_metadata = ds.rows().annotate_globals(global_field=5).annotate(
        consequence='SYN')
    doctest_namespace['v_metadata'] = v_metadata

    s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F')
    doctest_namespace['s_metadata'] = s_metadata
    doctest_namespace['cols_to_keep'] = s_metadata
    doctest_namespace['cols_to_remove'] = s_metadata
    doctest_namespace['rows_to_keep'] = v_metadata
    doctest_namespace['rows_to_remove'] = v_metadata

    # Table
    table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID')
    table1 = table1.annotate_globals(global_field_1=5, global_field_2=10)
    doctest_namespace['table1'] = table1
    doctest_namespace['other_table'] = table1

    table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID')
    doctest_namespace['table2'] = table2

    table4 = hl.import_table('data/kt_example4.tsv',
                             impute=True,
                             types={
                                 'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr),
                                 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32),
                                 'E': hl.tstruct(A=hl.tint32, B=hl.tint32)
                             })
    doctest_namespace['table4'] = table4

    people_table = hl.import_table('data/explode_example.tsv',
                                   delimiter='\\s+',
                                   types={
                                       'Age': hl.tint32,
                                       'Children': hl.tarray(hl.tstr)
                                   },
                                   key='Name')
    doctest_namespace['people_table'] = people_table

    # TDT
    doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf')

    ds2 = hl.variant_qc(ds)
    doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF)

    # Expressions
    doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie'])
    doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5])
    doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1])
    doctest_namespace['t'] = hl.literal(True)
    doctest_namespace['f'] = hl.literal(False)
    doctest_namespace['na'] = hl.null(hl.tbool)
    doctest_namespace['call'] = hl.call(0, 1, phased=False)
    doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5])
    doctest_namespace['d'] = hl.literal({
        'Alice': 43,
        'Bob': 33,
        'Charles': 44
    })
    doctest_namespace['interval'] = hl.interval(3, 11)
    doctest_namespace['locus_interval'] = hl.parse_locus_interval(
        "1:53242-90543")
    doctest_namespace['locus'] = hl.locus('1', 1034245)
    doctest_namespace['x'] = hl.literal(3)
    doctest_namespace['y'] = hl.literal(4.5)
    doctest_namespace['s1'] = hl.literal({1, 2, 3})
    doctest_namespace['s2'] = hl.literal({1, 3, 5})
    doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'})
    doctest_namespace['struct'] = hl.struct(a=5, b='Foo')
    doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3]))
    doctest_namespace['s'] = hl.literal('The quick brown fox')
    doctest_namespace['interval2'] = hl.Interval(3, 6)
    doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]])

    # Overview
    doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv",
                                              impute=True)
    doctest_namespace['mt'] = ds

    gnomad_data = ds.rows()
    doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF)

    # BGEN
    bgen = hl.import_bgen('data/example.8bits.bgen',
                          entry_fields=['GT', 'GP', 'dosage'])
    doctest_namespace['variants_table'] = bgen.rows()

    print("finished setting up doctest...")
예제 #15
0
파일: conftest.py 프로젝트: zietzm/hail
def generate_datasets(doctest_namespace):
    doctest_namespace['hl'] = hl

    ds = hl.import_vcf('data/sample.vcf.bgz')
    ds = ds.sample_rows(0.03)
    ds = ds.annotate_rows(use_as_marker=hl.rand_bool(0.5),
                          panel_maf=0.1,
                          anno1=5,
                          anno2=0,
                          consequence="LOF",
                          gene="A",
                          score=5.0)
    ds = ds.annotate_rows(a_index=1)
    ds = hl.sample_qc(hl.variant_qc(ds))
    ds = ds.annotate_cols(is_case=True,
                          pheno=hl.struct(is_case=hl.rand_bool(0.5),
                                          is_female=hl.rand_bool(0.5),
                                          age=hl.rand_norm(65, 10),
                                          height=hl.rand_norm(70, 10),
                                          blood_pressure=hl.rand_norm(120, 20),
                                          cohort_name="cohort1"),
                          cov=hl.struct(PC1=hl.rand_norm(0, 1)),
                          cov1=hl.rand_norm(0, 1),
                          cov2=hl.rand_norm(0, 1),
                          cohort="SIGMA")
    ds = ds.annotate_globals(
        global_field_1=5,
        global_field_2=10,
        pli={
            'SCN1A': 0.999,
            'SONIC': 0.014
        },
        populations=['AFR', 'EAS', 'EUR', 'SAS', 'AMR', 'HIS'])
    ds = ds.annotate_rows(gene=['TTN'])
    ds = ds.annotate_cols(cohorts=['1kg'], pop='EAS')
    ds = ds.checkpoint(f'output/example.mt', overwrite=True)

    doctest_namespace['ds'] = ds
    doctest_namespace['dataset'] = ds
    doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5)
    doctest_namespace['dataset_to_union_1'] = ds
    doctest_namespace['dataset_to_union_2'] = ds

    v_metadata = ds.rows().annotate_globals(global_field=5).annotate(
        consequence='SYN')
    doctest_namespace['v_metadata'] = v_metadata

    s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F')
    doctest_namespace['s_metadata'] = s_metadata
    doctest_namespace['cols_to_keep'] = s_metadata
    doctest_namespace['cols_to_remove'] = s_metadata
    doctest_namespace['rows_to_keep'] = v_metadata
    doctest_namespace['rows_to_remove'] = v_metadata

    small_mt = hl.balding_nichols_model(3, 4, 4)
    doctest_namespace['small_mt'] = small_mt.checkpoint('output/small.mt',
                                                        overwrite=True)

    # Table
    table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID')
    table1 = table1.annotate_globals(global_field_1=5, global_field_2=10)
    doctest_namespace['table1'] = table1
    doctest_namespace['other_table'] = table1

    table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID')
    doctest_namespace['table2'] = table2

    table4 = hl.import_table('data/kt_example4.tsv',
                             impute=True,
                             types={
                                 'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr),
                                 'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32),
                                 'E': hl.tstruct(A=hl.tint32, B=hl.tint32)
                             })
    doctest_namespace['table4'] = table4

    people_table = hl.import_table('data/explode_example.tsv',
                                   delimiter='\\s+',
                                   types={
                                       'Age': hl.tint32,
                                       'Children': hl.tarray(hl.tstr)
                                   },
                                   key='Name')
    doctest_namespace['people_table'] = people_table

    # TDT
    doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf')

    ds2 = hl.variant_qc(ds)
    doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF)

    # Expressions
    doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie'])
    doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5])
    doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1])
    doctest_namespace['t'] = hl.literal(True)
    doctest_namespace['f'] = hl.literal(False)
    doctest_namespace['na'] = hl.null(hl.tbool)
    doctest_namespace['call'] = hl.call(0, 1, phased=False)
    doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5])
    doctest_namespace['d'] = hl.literal({
        'Alice': 43,
        'Bob': 33,
        'Charles': 44
    })
    doctest_namespace['interval'] = hl.interval(3, 11)
    doctest_namespace['locus_interval'] = hl.parse_locus_interval(
        "1:53242-90543")
    doctest_namespace['locus'] = hl.locus('1', 1034245)
    doctest_namespace['x'] = hl.literal(3)
    doctest_namespace['y'] = hl.literal(4.5)
    doctest_namespace['s1'] = hl.literal({1, 2, 3})
    doctest_namespace['s2'] = hl.literal({1, 3, 5})
    doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'})
    doctest_namespace['struct'] = hl.struct(a=5, b='Foo')
    doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3]))
    doctest_namespace['s'] = hl.literal('The quick brown fox')
    doctest_namespace['interval2'] = hl.Interval(3, 6)
    doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]])

    # Overview
    doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv",
                                              impute=True)
    doctest_namespace['mt'] = ds

    gnomad_data = ds.rows()
    doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF)

    # BGEN
    bgen = hl.import_bgen('data/example.8bits.bgen',
                          entry_fields=['GT', 'GP', 'dosage'])
    doctest_namespace['variants_table'] = bgen.rows()

    burden_ds = hl.import_vcf('data/example_burden.vcf')
    burden_kt = hl.import_table('data/example_burden.tsv',
                                key='Sample',
                                impute=True)
    burden_ds = burden_ds.annotate_cols(burden=burden_kt[burden_ds.s])
    burden_ds = burden_ds.annotate_rows(
        weight=hl.float64(burden_ds.locus.position))
    burden_ds = hl.variant_qc(burden_ds)
    genekt = hl.import_locus_intervals('data/gene.interval_list')
    burden_ds = burden_ds.annotate_rows(gene=genekt[burden_ds.locus])
    burden_ds = burden_ds.checkpoint(f'output/example_burden.vds',
                                     overwrite=True)
    doctest_namespace['burden_ds'] = burden_ds

    ld_score_one_pheno_sumstats = hl.import_table(
        'data/ld_score_regression.one_pheno.sumstats.tsv',
        types={
            'locus': hl.tlocus('GRCh37'),
            'alleles': hl.tarray(hl.tstr),
            'chi_squared': hl.tfloat64,
            'n': hl.tint32,
            'ld_score': hl.tfloat64,
            'phenotype': hl.tstr,
            'chi_squared_50_irnt': hl.tfloat64,
            'n_50_irnt': hl.tint32,
            'chi_squared_20160': hl.tfloat64,
            'n_20160': hl.tint32
        },
        key=['locus', 'alleles'])
    doctest_namespace[
        'ld_score_one_pheno_sumstats'] = ld_score_one_pheno_sumstats

    mt = hl.import_matrix_table(
        'data/ld_score_regression.all_phenos.sumstats.tsv',
        row_fields={
            'locus': hl.tstr,
            'alleles': hl.tstr,
            'ld_score': hl.tfloat64
        },
        entry_type=hl.tstr)
    mt = mt.key_cols_by(phenotype=mt.col_id)
    mt = mt.key_rows_by(locus=hl.parse_locus(mt.locus),
                        alleles=mt.alleles.split(','))
    mt = mt.drop('row_id', 'col_id')
    mt = mt.annotate_entries(x=mt.x.split(","))
    mt = mt.transmute_entries(chi_squared=hl.float64(mt.x[0]),
                              n=hl.int32(mt.x[1]))
    mt = mt.annotate_rows(ld_score=hl.float64(mt.ld_score))
    doctest_namespace['ld_score_all_phenos_sumstats'] = mt

    print("finished setting up doctest...")
예제 #16
0
def test_ndarray_ops():

    a = 2.0
    b = 3.0
    x = np.array([a, b])
    y = np.array([b, a])
    row_vec = np.array([[1, 2]])
    cube1 = np.array([[[1, 2],
                       [3, 4]],
                      [[5, 6],
                       [7, 8]]])
    cube2 = np.array([[[9, 10],
                       [11, 12]],
                      [[13, 14],
                       [15, 16]]])

    na = hl._ndarray(a)
    nx = hl._ndarray(x)
    ny = hl._ndarray(y)
    nrow_vec = hl._ndarray(row_vec)
    ncube1 = hl._ndarray(cube1)
    ncube2 = hl._ndarray(cube2)

    assert_ndarrays_eq(
        # with lists/numerics
        (na + b, np.array(a + b)),
        (b + na, np.array(a + b)),
        (nx + y, x + y),
        (ncube1 + cube2, cube1 + cube2),

        # Addition
        (na + na, np.array(a + a)),
        (nx + ny, x + y),
        (ncube1 + ncube2, cube1 + cube2),
        # Broadcasting
        (ncube1 + na, cube1 + a),
        (na + ncube1, a + cube1),
        (ncube1 + ny, cube1 + y),
        (ny + ncube1, y + cube1),
        (nrow_vec + ncube1, row_vec + cube1),
        (ncube1 + nrow_vec, cube1 + row_vec),

        # Subtraction
        (na - na, np.array(a - a)),
        (nx - nx, x - x),
        (ncube1 - ncube2, cube1 - cube2),
        # Broadcasting
        (ncube1 - na, cube1 - a),
        (na - ncube1, a - cube1),
        (ncube1 - ny, cube1 - y),
        (ny - ncube1, y - cube1),
        (ncube1 - nrow_vec, cube1 - row_vec),
        (nrow_vec - ncube1, row_vec - cube1),

        # Multiplication
        (na * na, np.array(a * a)),
        (nx * nx, x * x),
        (nx * na, x * a),
        (na * nx, a * x),
        (ncube1 * ncube2, cube1 * cube2),
        # Broadcasting
        (ncube1 * na, cube1 * a),
        (na * ncube1, a * cube1),
        (ncube1 * ny, cube1 * y),
        (ny * ncube1, y * cube1),
        (ncube1 * nrow_vec, cube1 * row_vec),
        (nrow_vec * ncube1, row_vec * cube1),

        # Floor div
        (na // na, np.array(a // a)),
        (nx // nx, x // x),
        (nx // na, x // a),
        (na // nx, a // x),
        (ncube1 // ncube2, cube1 // cube2),
        # Broadcasting
        (ncube1 // na, cube1 // a),
        (na // ncube1, a // cube1),
        (ncube1 // ny, cube1 // y),
        (ny // ncube1, y // cube1),
        (ncube1 // nrow_vec, cube1 // row_vec),
        (nrow_vec // ncube1, row_vec // cube1))

    # Division
    assert_ndarrays_almost_eq(
        (na / na, np.array(a / a)),
        (nx / nx, x / x),
        (nx / na, x / a),
        (na / nx, a / x),
        (ncube1 / ncube2, cube1 / cube2),
        # Broadcasting
        (ncube1 / na, cube1 / a),
        (na / ncube1, a / cube1),
        (ncube1 / ny, cube1 / y),
        (ny / ncube1, y / cube1),
        (ncube1 / nrow_vec, cube1 / row_vec),
        (nrow_vec / ncube1, row_vec / cube1))
예제 #17
0
파일: conftest.py 프로젝트: jigold/hail
def init(doctest_namespace):
    # This gets run once per process -- must avoid race conditions
    print("setting up doctest...")

    olddir = os.getcwd()
    os.chdir("docs/")

    doctest_namespace['hl'] = hl

    if not os.path.isdir("output/"):
        try:
            os.mkdir("output/")
        except OSError:
            pass

    files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"]
    for f in files:
        if os.path.isdir(f):
            shutil.rmtree(f)

    ds = hl.read_matrix_table('data/example.vds')
    doctest_namespace['ds'] = ds
    doctest_namespace['dataset'] = ds
    doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5)
    doctest_namespace['dataset_to_union_1'] = ds
    doctest_namespace['dataset_to_union_2'] = ds

    v_metadata = ds.rows().annotate_globals(global_field=5).annotate(consequence='SYN')
    doctest_namespace['v_metadata'] = v_metadata

    s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F')
    doctest_namespace['s_metadata'] = s_metadata
    doctest_namespace['cols_to_keep'] = s_metadata
    doctest_namespace['cols_to_remove'] = s_metadata
    doctest_namespace['rows_to_keep'] = v_metadata
    doctest_namespace['rows_to_remove'] = v_metadata

    # Table
    table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID')
    table1 = table1.annotate_globals(global_field_1=5, global_field_2=10)
    doctest_namespace['table1'] = table1
    doctest_namespace['other_table'] = table1

    table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID')
    doctest_namespace['table2'] = table2

    table4 = hl.import_table('data/kt_example4.tsv', impute=True,
                             types={'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr),
                                    'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32),
                                    'E': hl.tstruct(A=hl.tint32, B=hl.tint32)})
    doctest_namespace['table4'] = table4

    people_table = hl.import_table('data/explode_example.tsv', delimiter='\\s+',
                                   types={'Age': hl.tint32, 'Children': hl.tarray(hl.tstr)},
                                   key='Name')
    doctest_namespace['people_table'] = people_table

    # TDT
    doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf')

    ds2 = hl.variant_qc(ds)
    doctest_namespace['ds2'] = ds2.select_rows(AF=ds2.variant_qc.AF)

    # Expressions
    doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie'])
    doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5])
    doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1])
    doctest_namespace['t'] = hl.literal(True)
    doctest_namespace['f'] = hl.literal(False)
    doctest_namespace['na'] = hl.null(hl.tbool)
    doctest_namespace['call'] = hl.call(0, 1, phased=False)
    doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5])
    doctest_namespace['d'] = hl.literal({'Alice': 43, 'Bob': 33, 'Charles': 44})
    doctest_namespace['interval'] = hl.interval(3, 11)
    doctest_namespace['locus_interval'] = hl.parse_locus_interval("1:53242-90543")
    doctest_namespace['locus'] = hl.locus('1', 1034245)
    doctest_namespace['x'] = hl.literal(3)
    doctest_namespace['y'] = hl.literal(4.5)
    doctest_namespace['s1'] = hl.literal({1, 2, 3})
    doctest_namespace['s2'] = hl.literal({1, 3, 5})
    doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'})
    doctest_namespace['struct'] = hl.struct(a=5, b='Foo')
    doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3]))
    doctest_namespace['s'] = hl.literal('The quick brown fox')
    doctest_namespace['interval2'] = hl.Interval(3, 6)
    doctest_namespace['nd'] = hl._ndarray([[1, 2], [3, 4]])

    # Overview
    doctest_namespace['ht'] = hl.import_table("data/kt_example1.tsv", impute=True)
    doctest_namespace['mt'] = ds

    gnomad_data = ds.rows()
    doctest_namespace['gnomad_data'] = gnomad_data.select(gnomad_data.info.AF)

    # BGEN
    bgen = hl.import_bgen('data/example.8bits.bgen',
                          entry_fields=['GT', 'GP', 'dosage'])
    doctest_namespace['variants_table'] = bgen.rows()

    print("finished setting up doctest...")
    yield
    os.chdir(olddir)