def test_identity_by_state__chunked_sample_dimension(): ds = simulate_genotype_call_dataset(n_variant=20, n_sample=10, n_ploidy=2) ds["call_genotype"] = ds.call_genotype.dims, da.asarray( ds.call_genotype.data, chunks=((20, ), (5, 5), (2, )), ) with pytest.raises( NotImplementedError, match= "identity_by_state does not support chunking in the samples dimension", ): identity_by_state(ds)
def test_identity_by_state__tetraploid_multiallelic(chunks): ds = simulate_genotype_call_dataset( n_variant=2, n_sample=3, n_ploidy=4, n_allele=3, seed=0, ) ds = count_call_alleles(ds) ds.call_genotype.data[0, 2] = -1 # null call if chunks is not None: ds["call_allele_count"] = ( ds.call_allele_count.dims, ds.call_allele_count.data.rechunk(chunks), ) ds = identity_by_state(ds) actual = ds.stat_identity_by_state.values expect = np.nanmean( np.array([ [ [0.5, 0.375, np.nan], [0.375, 0.375, np.nan], [np.nan, np.nan, np.nan], ], [[1.0, 0.25, 0.0], [0.25, 0.625, 0.1875], [0.0, 0.1875, 0.625]], ]), axis=0, ) np.testing.assert_array_equal(expect, actual)
def test_identity_by_state__reference_implementation(ploidy, chunks, seed): ds = simulate_genotype_call_dataset( n_variant=sum(chunks[0]), n_sample=sum(chunks[1]), n_ploidy=ploidy, n_allele=sum(chunks[2]), missing_pct=0.2, seed=seed, ) ds = count_call_alleles(ds) ds["call_allele_count"] = ( ds.call_allele_count.dims, ds.call_allele_count.data.rechunk(chunks), ) ds = identity_by_state(ds) actual = ds.stat_identity_by_state.values # reference implementation AF = ds.call_allele_frequency.data expect = np.nanmean( (AF[..., None, :, :] * AF[..., :, None, :]).sum(axis=-1), axis=0).compute() np.testing.assert_array_almost_equal(expect, actual)
def test_identity_by_state__diploid_biallelic(chunks): ds = simulate_genotype_call_dataset( n_variant=2, n_sample=3, n_ploidy=2, n_allele=2, seed=2, ) ds = count_call_alleles(ds) if chunks is not None: ds["call_allele_count"] = ( ds.call_allele_count.dims, ds.call_allele_count.data.rechunk(chunks), ) ds = identity_by_state(ds) actual = ds.stat_identity_by_state.values expect = np.nanmean( np.array([ [[1.0, 0.0, 0.5], [0.0, 1.0, 0.5], [0.5, 0.5, 0.5]], [[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [0.5, 0.5, 0.5]], ]), axis=0, ) np.testing.assert_array_equal(expect, actual)