def test_calc_split_by_sex(mocker, fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") variants = Counter({ "f4.p1": 1, "f5.p1": 1, "f7.p1": 1, "f10.p1": 1, "f12.p1": 1, "f16.p1": 1, "f24.p1": 1, "f25.p1": 1, "f30.p1": 1, "f32.p1": 1, }) merged_df = PhenoTool.join_pheno_df_with_variants(pheno_tool.pheno_df, variants) mocker.spy(PhenoTool, "join_pheno_df_with_variants") mocker.spy(pheno_tool, "_calc_stats") pheno_tool.calc(variants, sex_split=True) assert pheno_tool._calc_stats.call_count == 2 assert PhenoTool.join_pheno_df_with_variants.call_count == 1 call_arg_df, call_arg_sex_split = pheno_tool._calc_stats.call_args_list[0][ 0] assert merged_df.equals(call_arg_df) assert call_arg_sex_split is Sex.M call_arg_df, call_arg_sex_split = pheno_tool._calc_stats.call_args_list[1][ 0] assert merged_df.equals(call_arg_df) assert call_arg_sex_split is Sex.F
def prepare_pheno_tool_adapter(self, data): study_wrapper = self.gpf_instance.get_wdae_wrapper(data["datasetId"]) if not (study_wrapper and study_wrapper.phenotype_data.has_measure( data["measureId"])): return None if study_wrapper.is_remote: return RemotePhenoToolAdapter(study_wrapper.rest_client, study_wrapper._remote_study_id) helper = PhenoToolHelper(study_wrapper) pheno_filter_family_ids = helper.pheno_filter_persons( data.get("familyFilters")) study_persons = helper.genotype_data_persons(data.get("familyIds", [])) person_ids = set(study_persons) tool = PhenoTool( helper.genotype_data.phenotype_data, measure_id=data["measureId"], person_ids=person_ids, family_ids=pheno_filter_family_ids, normalize_by=data["normalizeBy"], ) return PhenoToolAdapter(tool, helper)
def test_calc_empty_variants(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") variants = Counter({}) res = pheno_tool.calc(variants) assert isinstance(res, PhenoResult) assert res.positive_count == 0 assert res.positive_mean == 0 assert res.positive_deviation == 0 assert res.negative_count == len(pheno_tool.pheno_df) assert res.negative_mean assert res.negative_deviation assert res.pvalue == "NA" resM, resF = pheno_tool.calc(variants, sex_split=True).values() assert isinstance(resM, PhenoResult) assert isinstance(resF, PhenoResult) assert resM.positive_count == 0 assert resM.positive_mean == 0 assert resM.positive_deviation == 0 assert resM.negative_count == res.negative_count - resF.negative_count assert resM.negative_mean assert resM.negative_deviation assert resM.pvalue == "NA" assert resF.positive_count == 0 assert resF.positive_mean == 0 assert resF.positive_deviation == 0 assert resF.negative_count == res.negative_count - resM.negative_count assert resF.negative_mean assert resF.negative_deviation assert resF.pvalue == "NA"
def test_get_normalize_measure_id_same_measure(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") measure_id = pheno_tool._get_normalize_measure_id({ "measure_name": "m1", "instrument_name": "i1" }) assert measure_id is None
def test_calc(mocker, fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") variants = Counter({ "f4.p1": 1, "f5.p1": 1, "f7.p1": 1, "f10.p1": 1, "f12.p1": 1, "f16.p1": 1, "f24.p1": 1, "f25.p1": 1, "f30.p1": 1, "f32.p1": 1, }) merged_df = PhenoTool.join_pheno_df_with_variants(pheno_tool.pheno_df, variants) mocker.spy(PhenoTool, "join_pheno_df_with_variants") mocker.spy(pheno_tool, "_calc_stats") pheno_tool.calc(variants, sex_split=False) pheno_tool._calc_stats.assert_called_once() PhenoTool.join_pheno_df_with_variants.assert_called_once() call_arg_df, call_arg_sex_split = pheno_tool._calc_stats.call_args_list[0][ 0] assert merged_df.equals(call_arg_df) assert call_arg_sex_split is None
def test_get_normalize_measure_id_no_instrument_name(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") measure_id = pheno_tool._get_normalize_measure_id({ "measure_name": "m3", "instrument_name": None }) assert measure_id == "i1.m3"
def test_get_normalize_measure_id_non_dict_measure(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") with pytest.raises(AssertionError): pheno_tool._get_normalize_measure_id(["measure"]) with pytest.raises(AssertionError): pheno_tool._get_normalize_measure_id("measure") with pytest.raises(AssertionError): pheno_tool._get_normalize_measure_id(None)
def test_init_with_person_ids(fake_phenotype_data): pheno_tool = PhenoTool( fake_phenotype_data, "i1.m1", person_ids=["f1.p1", "f3.p1", "f5.p1", "f7.p1"], ) assert set(pheno_tool.pheno_df["person_id"]) == set( ["f1.p1", "f3.p1", "f5.p1", "f7.p1"])
def test_get_normalize_measure_id_non_existent(fake_phenotype_data, measure_name, instrument_name): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") measure_id = pheno_tool._get_normalize_measure_id({ "measure_name": measure_name, "instrument_name": instrument_name }) assert measure_id is None
def test_init_empty_person_ids_normalize(fake_phenotype_data): pheno_tool = PhenoTool( fake_phenotype_data, "i1.m1", person_ids=[], normalize_by=[{ "instrument_name": "i1", "measure_name": "m2" }], ) assert pheno_tool
def test_calc_empty_pheno_df(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1", person_ids=[]) variants = Counter({ "f4.p1": 1, "f5.p1": 1, "f7.p1": 1, "f10.p1": 1, "f12.p1": 1, "f16.p1": 1, "f24.p1": 1, "f25.p1": 1, "f30.p1": 1, "f32.p1": 1, }) res = pheno_tool.calc(variants) assert isinstance(res, PhenoResult) assert res.positive_count == 0 assert res.positive_mean == 0 assert res.positive_deviation == 0 assert res.negative_count == 0 assert res.negative_mean == 0 assert res.negative_deviation == 0 assert res.pvalue == "NA" resM, resF = pheno_tool.calc(variants, sex_split=True).values() assert isinstance(resM, PhenoResult) assert isinstance(resF, PhenoResult) assert resM.positive_count == 0 assert resM.positive_mean == 0 assert resM.positive_deviation == 0 assert resM.negative_count == 0 assert resM.negative_mean == 0 assert resM.negative_deviation == 0 assert resM.pvalue == "NA" assert resF.positive_count == 0 assert resF.positive_mean == 0 assert resF.positive_deviation == 0 assert resF.negative_count == 0 assert resF.negative_mean == 0 assert resF.negative_deviation == 0 assert resF.pvalue == "NA"
def test_init_normalize_measures_non_continuous(fake_phenotype_data, measure_name): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") norm_measures = [ { "measure_name": "??", "instrument_name": "" }, { "measure_name": "m5", "instrument_name": "" }, { "measure_name": "m7", "instrument_name": "i1" }, ] with pytest.raises(AssertionError): pheno_tool._init_normalize_measures(norm_measures)
def test_init_normalize_measures(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") norm_measures = [ { "measure_name": "??", "instrument_name": "" }, { "measure_name": "m3", "instrument_name": "" }, { "measure_name": "m7", "instrument_name": "i1" }, ] norm_measures = pheno_tool._init_normalize_measures(norm_measures) assert len(norm_measures) == 2 assert set(norm_measures) == {"i1.m3", "i1.m7"} for measure_id in norm_measures: assert fake_phenotype_data.has_measure(measure_id)
def test_init_pheno_df(fake_phenotype_data): pheno_tool = PhenoTool( fake_phenotype_data, "i1.m1", normalize_by=[{ "instrument_name": "i1", "measure_name": "m2" }], ) assert pheno_tool.pheno_df is not None assert not pheno_tool.pheno_df.empty assert set(pheno_tool.pheno_df) == { "person_id", "family_id", "role", "status", "sex", "i1.m1", "i1.m2", "normalized", }
def test_init_non_continuous_or_ordinal_measure(fake_phenotype_data): with pytest.raises(AssertionError): PhenoTool(fake_phenotype_data, "i1.m5") # categorical with pytest.raises(AssertionError): PhenoTool(fake_phenotype_data, "i1.m9") # raw
def test_init_nonexistent_measure(fake_phenotype_data): with pytest.raises(AssertionError): PhenoTool(fake_phenotype_data, "i1.??")
def test_init_empty_person_ids(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1", person_ids=[]) assert pheno_tool
def test_get_normalize_measure_id_measure_dict_no_keys(fake_phenotype_data): pheno_tool = PhenoTool(fake_phenotype_data, "i1.m1") with pytest.raises(AssertionError): pheno_tool._get_normalize_measure_id({"measure_name": "something"}) with pytest.raises(AssertionError): pheno_tool._get_normalize_measure_id({"instrument_name": "something"})