def test_compare_ui_score(): ## GIVEN three sets a = set(["a", "b"]) b = set(["b", "a", "d"]) c = set(["b", "c", "d"]) ## WHEN finding the similarity similarity_ab = ui_score(a, b) similarity_ac = ui_score(a, c) ## THEN assert a and b are more similar than a and c assert similarity_ab > similarity_ac
def test_compare_ui_score(): ## GIVEN three sets a = set(['a', 'b']) b = set(['b', 'a', 'd']) c = set(['b', 'c', 'd']) ## WHEN finding the similarity similarity_ab = ui_score(a, b) similarity_ac = ui_score(a, c) ## THEN assert a and b are more similar than a and c assert similarity_ab > similarity_ac
def test_get_one_empty_ui_score(): ## GIVEN one empty sets a = set(["a", "b"]) b = set() ## WHEN finding the similarity similarity = ui_score(a, b) ## THEN assert similarity is 0 assert similarity == 0
def test_get_disjunct_ui_score(): ## GIVEN two disjunct sets a = set(["a", "b"]) b = set(["c", "d"]) ## WHEN finding the similarity similarity = ui_score(a, b) ## THEN assert similarity is 0 assert similarity == 0
def test_get_identical_ui_score(): ## GIVEN two disjunct sets a = set(["a", "b"]) b = set(["b", "a"]) ## WHEN finding the similarity similarity = ui_score(a, b) ## THEN assert similarity is 1 assert similarity == 1
def test_get_disjunct_ui_score(): ## GIVEN two disjunct sets a = set(['a', 'b']) b = set(['c', 'd']) ## WHEN finding the similarity similarity = ui_score(a, b) ## THEN assert similarity is 0 assert similarity == 0
def test_get_two_empty_ui_score(): ## GIVEN two empty sets a = set() b = set() ## WHEN finding the similarity similarity = ui_score(a, b) ## THEN assert similarity is 0 assert similarity == 0
def test_get_almost_identical_ui_score(): ## GIVEN two disjunct sets a = set(['a', 'b']) b = set(['b', 'a', 'd']) ## WHEN finding the similarity similarity = ui_score(a, b) ## THEN assert 0 < similarity < 1 assert 0 < similarity assert similarity < 1
def cases_by_phenotype(self, phenotype_terms, owner, case_id=None): """Take a case obj and return a iterable with the most phenotypically similar cases Args: phenotype_terms(list):[ "HP:0001250", "HP:0000707",.. ] owner(str): institute id case_id(str): Id of a case, when searching for cases similar to another Returns: scores(list(tuple)): Returns a list of tuples like (case_id, score) with the most similar case first """ scores = {} set_1 = set() if len(phenotype_terms) == 0: LOG.warning("No phenotype terms provided, please provide ar least one HPO term") return None # Add all ancestors of all terms for term in phenotype_terms: hpo_term = self.hpo_term(term) if not hpo_term: continue set_1 = set_1.union(set(hpo_term.get("all_ancestors", []))) # Need to control what cases to look for here # Fetch all cases with phenotypes for case in self.cases(phenotype_terms=True, owner=owner): set_2 = set() if case["_id"] == case_id: continue # Add all ancestors if all terms for term in case["phenotype_terms"]: hpo_term = self.hpo_term(term["phenotype_id"]) if not hpo_term: continue set_2 = set_2.union(set(hpo_term.get("all_ancestors", []))) LOG.debug( f"Check phenotypic similarity between terms:{phenotype_terms} and case {case['_id']}" ) scores[case["_id"]] = ui_score(set_1, set_2) # Returns a list of tuples with highest score first return sorted(scores.items(), key=operator.itemgetter(1), reverse=True)
def get_similar_cases(self, case_obj): """Take a case obj and return a iterable with the most phenotypically similar cases Args: case_obj(models.Case) Returns: scores(list(tuple)): Returns a list of tuples like (case_id, score) with the most similar case first """ scores = {} set_1 = set() if not case_obj.get("phenotype_terms"): LOG.warning("No phenotypes could be found for case %s", case_obj["_id"]) return None # Add all ancestors of all terms for term in case_obj["phenotype_terms"]: hpo_term = self.hpo_term(term["phenotype_id"]) if not hpo_term: continue set_1 = set_1.union(set(hpo_term.get("all_ancestors", []))) # Need to control what cases to look for here # Fetch all cases with phenotypes for case in self.cases(phenotype_terms=True, owner=case_obj["owner"]): set_2 = set() if case["_id"] == case_obj["_id"]: continue # Add all ancestors if all terms for term in case["phenotype_terms"]: hpo_term = self.hpo_term(term["phenotype_id"]) if not hpo_term: continue set_2 = set_2.union(set(hpo_term.get("all_ancestors", []))) LOG.debug("Check phenotypic similarity of %s and %s", case_obj["_id"], case["_id"]) scores[case["_id"]] = ui_score(set_1, set_2) # Returns a list of tuples with highest score first return sorted(scores.items(), key=operator.itemgetter(1), reverse=True)