def test_same_as_the_reference_implementation() -> None: """ This test validates that our implementation gets exactly the same results as the reference R implementation. """ d = Path(__file__).parent ds = read_plink(path="hapmap_JPT_CHB_r23a_filtered") pcs = da.from_array( pd.read_csv(d.joinpath("pcs.csv").as_posix(), usecols=[1, 2]).to_numpy() ) ds[sample_pca_projection] = (("samples", "components"), pcs) phi = pc_relate(ds).pc_relate_phi.compute() n_samples = 90 assert isinstance(phi, xr.DataArray) assert phi.shape == (n_samples, n_samples) # Get genesis/reference results: genesis_phi = pd.read_csv(d.joinpath("kinbtwe.csv")) genesis_phi = genesis_phi[["kin"]].to_numpy() phi_s = phi.data[np.triu_indices_from(phi.data, 1)] # type: ignore[no-untyped-call] assert phi_s.size == genesis_phi.size assert np.allclose(phi_s, genesis_phi.T)
def test_pc_relate__genotype_inputs_checks() -> None: g_wrong_ploidy = simulate_genotype_call_dataset(100, 10, n_ploidy=3) with pytest.raises(ValueError, match="PC Relate only works for diploid genotypes"): pc_relate(g_wrong_ploidy) g_non_biallelic = simulate_genotype_call_dataset(100, 10, n_allele=3) with pytest.raises( ValueError, match="PC Relate only works for biallelic genotypes" ): pc_relate(g_non_biallelic) g_no_pcs = simulate_genotype_call_dataset(100, 10) with pytest.raises(ValueError, match="sample_pca_projection not present"): pc_relate(g_no_pcs) with pytest.raises(ValueError, match="call_genotype not present"): pc_relate(g_no_pcs.drop_vars("call_genotype")) with pytest.raises(ValueError, match="call_genotype_mask not present"): pc_relate(g_no_pcs.drop_vars("call_genotype_mask"))
def test_pc_relate__maf_inputs_checks() -> None: g = simulate_genotype_call_dataset(100, 10) with pytest.raises(ValueError, match=r"MAF must be between \(0.0, 1.0\)"): pc_relate(g, maf=-1) with pytest.raises(ValueError, match=r"MAF must be between \(0.0, 1.0\)"): pc_relate(g, maf=1.0) with pytest.raises(ValueError, match=r"MAF must be between \(0.0, 1.0\)"): pc_relate(g, maf=0.0)