def test_gwas_linear_regression__scalar_vars(ds: xr.Dataset) -> None: res_scalar = gwas_linear_regression( ds, dosage="dosage", covariates="covar_0", traits="trait_0" ) res_list = gwas_linear_regression( ds, dosage="dosage", covariates=["covar_0"], traits=["trait_0"] ) xr.testing.assert_equal(res_scalar, res_list) # type: ignore[no-untyped-call]
def run(traits: Sequence[str]) -> Dataset: return gwas_linear_regression( ds, dosage="dosage", covariates=["covar_0"], traits=traits, add_intercept=True, )
def test_gwas_linear_regression__lazy_results(ds): res = gwas_linear_regression(ds, dosage="dosage", covariates="covar_0", traits="trait_0", merge=False) for v in res: assert isinstance(res[v].data, da.Array)
def test_gwas_linear_regression__variable_shapes(ds, chunks): ds = ds.chunk(chunks=chunks) res = gwas_linear_regression(ds, dosage="dosage", covariates="covar_0", traits="trait_0", merge=False) shape = (ds.dims["variants"], 1) for v in res: assert res[v].data.shape == shape assert res[v].data.compute().shape == shape
def _get_statistics(ds: Dataset, add_intercept: bool, **kwargs: Any) -> Tuple[DataFrame, DataFrame]: df_pred: List[Dict[str, Any]] = [] df_true: List[Dict[str, Any]] = [] for i in range(ds.dims["variants"]): dsr = gwas_linear_regression( ds, dosage="dosage", traits=[f"trait_{i}"], add_intercept=add_intercept, **kwargs, ) res = _sm_statistics(ds, i, add_intercept) df_pred.append(dsr.to_dataframe().rename( columns=lambda c: c.replace("variant_", "")).iloc[i].to_dict()) # First result in satsmodels RegressionResultsWrapper for # [t|p]values will correspond to variant (not covariate/intercept) df_true.append(dict(t_value=res.tvalues[0], p_value=res.pvalues[0])) return pd.DataFrame(df_pred), pd.DataFrame(df_true)