def test_normalize_df_no_normalize_by(): pheno_df = pd.DataFrame( [ { "person_id": 112233, "i1.m1": 10 }, { "person_id": 445566, "i1.m1": 20 }, ], columns=["person_id", "i1.m1"], ) expected = pd.DataFrame( [ { "person_id": 112233, "i1.m1": 10, "normalized": 10 }, { "person_id": 445566, "i1.m1": 20, "normalized": 20 }, ], columns=["person_id", "i1.m1", "normalized"], ) normalized = PhenoTool._normalize_df(pheno_df, "i1.m1") assert pd.DataFrame.equals(normalized, expected)
def test_normalize_df(): pheno_df = pd.DataFrame( [ { "person_id": 112233, "i1.m1": 1e6, "i1.m2": 1e3 }, { "person_id": 445566, "i1.m1": 2e12, "i1.m2": 1e-3 }, ], columns=["person_id", "i1.m1", "i1.m2"], ) normalized = PhenoTool._normalize_df(pheno_df, "i1.m1", normalize_by=["i1.m2"]) assert list(normalized) == ["person_id", "i1.m1", "i1.m2", "normalized"] assert normalized["person_id"][0] == pytest.approx(112233) assert normalized["person_id"][1] == pytest.approx(445566) assert normalized["i1.m1"][0] == pytest.approx(1e6) assert normalized["i1.m1"][1] == pytest.approx(2e12) assert normalized["i1.m2"][0] == pytest.approx(1e3) assert normalized["i1.m2"][1] == pytest.approx(1e-3) assert normalized["normalized"][0] == pytest.approx(-0.000299, abs=1e-4) # FIXME: assert normalized["normalized"][1] == pytest.approx(0.000488, abs=1e-2) # FIXME:
def test_normalize_df_does_not_contain_normalize_measure_id(): pheno_df = pd.DataFrame( [ { "person_id": 112233, "i1.m1": 1e6, "i1.m2": 1e3 }, { "person_id": 445566, "i1.m1": 2e12, "i1.m2": 1e-3 }, ], columns=["person_id", "i1.m1", "i1.m2"], ) with pytest.raises(AssertionError): PhenoTool._normalize_df(pheno_df, "i1.m1", normalize_by=["i1.m3"])
def test_normalize_df_by_empty_df(fake_phenotype_data): pheno_df = fake_phenotype_data.get_persons_values_df(["i1.m1", "i1.m2"], person_ids=[]) with pytest.raises(AssertionError): PhenoTool._normalize_df(pheno_df, "i1.m1", "i1.m2")