def test_read_bgen_fancy_index(shared_datadir, chunks): path = shared_datadir / "example.bgen" ds = read_bgen(path, chunks=chunks) npt.assert_almost_equal( ds["call_genotype_probability"][INDEXES, 0], EXPECTED_PROBABILITIES, decimal=3 ) npt.assert_almost_equal(ds["call_dosage"][INDEXES, 0], EXPECTED_DOSAGES, decimal=3)
def _rechunk_bgen(shared_datadir: Path, tmp_path: Path, **kwargs: Any) -> Tuple[xr.Dataset, xr.Dataset, str]: path = shared_datadir / "example.bgen" ds = read_bgen(path, chunks=(10, -1, -1)) store = tmp_path / "example.zarr" dsr = rechunk_bgen(ds, store, **kwargs) return ds, dsr, str(store)
def test_read_bgen_with_sample_file(shared_datadir): # The example file was generated using # qctool -g sgkit_bgen/tests/data/example.bgen -og sgkit_bgen/tests/data/example-separate-samples.bgen -os sgkit_bgen/tests/data/example-separate-samples.sample -incl-samples sgkit_bgen/tests/data/samples # Then editing example-separate-samples.sample to change the sample IDs path = shared_datadir / "example-separate-samples.bgen" ds = read_bgen(path) # Check the sample IDs are the ones from the .sample file assert ds["sample_id"].values.tolist() == ["s1", "s2", "s3", "s4", "s5"]
def test_read_bgen_with_no_samples(shared_datadir): # The example file was generated using # qctool -g sgkit_bgen/tests/data/example.bgen -og sgkit_bgen/tests/data/example-no-samples.bgen -os sgkit_bgen/tests/data/example-no-samples.sample -bgen-omit-sample-identifier-block -incl-samples sgkit_bgen/tests/data/samples # Then deleting example-no-samples.sample path = shared_datadir / "example-no-samples.bgen" ds = read_bgen(path) # Check the sample IDs are generated assert ds["sample_id"].values.tolist() == [ "sample_0", "sample_1", "sample_2", "sample_3", "sample_4", ]
def test_read_bgen_scalar_index(shared_datadir, chunks): path = shared_datadir / "example.bgen" ds = read_bgen(path, chunks=chunks) for i, ix in enumerate(INDEXES): npt.assert_almost_equal( ds["call_genotype_probability"][ix, 0], EXPECTED_PROBABILITIES[i], decimal=3 ) npt.assert_almost_equal( ds["call_dosage"][ix, 0], EXPECTED_DOSAGES[i], decimal=3 ) for j in range(3): npt.assert_almost_equal( ds["call_genotype_probability"][ix, 0, j], EXPECTED_PROBABILITIES[i, j], decimal=3, )
def test_read_bgen(shared_datadir, chunks): path = shared_datadir / "example.bgen" ds = read_bgen(path, chunks=chunks) # check some of the data (in different chunks) assert ds["call_dosage"].shape == _shape("variants", "samples") npt.assert_almost_equal(ds["call_dosage"].values[1][0], 1.987, decimal=3) npt.assert_almost_equal(ds["call_dosage"].values[100][0], 0.160, decimal=3) npt.assert_array_equal(ds["call_dosage_mask"].values[0, 0], [True]) npt.assert_array_equal(ds["call_dosage_mask"].values[0, 1], [False]) assert ds["call_genotype_probability"].shape == _shape( "variants", "samples", "genotypes") npt.assert_almost_equal(ds["call_genotype_probability"].values[1][0], [0.005, 0.002, 0.992], decimal=3) npt.assert_almost_equal(ds["call_genotype_probability"].values[100][0], [0.916, 0.007, 0.076], decimal=3) npt.assert_array_equal(ds["call_genotype_probability_mask"].values[0, 0], [True] * 3) npt.assert_array_equal(ds["call_genotype_probability_mask"].values[0, 1], [False] * 3)