Ejemplo n.º 1
0
def test_compare_ui_score():
    ## GIVEN three sets
    a = set(["a", "b"])
    b = set(["b", "a", "d"])
    c = set(["b", "c", "d"])

    ## WHEN finding the similarity
    similarity_ab = ui_score(a, b)
    similarity_ac = ui_score(a, c)

    ## THEN assert a and b are more similar than a and c
    assert similarity_ab > similarity_ac
Ejemplo n.º 2
0
def test_compare_ui_score():
    ## GIVEN three sets
    a = set(['a', 'b'])
    b = set(['b', 'a', 'd'])
    c = set(['b', 'c', 'd'])

    ## WHEN finding the similarity
    similarity_ab = ui_score(a, b)
    similarity_ac = ui_score(a, c)

    ## THEN assert a and b are more similar than a and c
    assert similarity_ab > similarity_ac
Ejemplo n.º 3
0
def test_get_one_empty_ui_score():
    ## GIVEN one empty sets
    a = set(["a", "b"])
    b = set()

    ## WHEN finding the similarity
    similarity = ui_score(a, b)

    ## THEN assert similarity is 0
    assert similarity == 0
Ejemplo n.º 4
0
def test_get_disjunct_ui_score():
    ## GIVEN two disjunct sets
    a = set(["a", "b"])
    b = set(["c", "d"])

    ## WHEN finding the similarity
    similarity = ui_score(a, b)

    ## THEN assert similarity is 0
    assert similarity == 0
Ejemplo n.º 5
0
def test_get_identical_ui_score():
    ## GIVEN two disjunct sets
    a = set(["a", "b"])
    b = set(["b", "a"])

    ## WHEN finding the similarity
    similarity = ui_score(a, b)

    ## THEN assert similarity is 1
    assert similarity == 1
Ejemplo n.º 6
0
def test_get_disjunct_ui_score():
    ## GIVEN two disjunct sets
    a = set(['a', 'b'])
    b = set(['c', 'd'])

    ## WHEN finding the similarity
    similarity = ui_score(a, b)

    ## THEN assert similarity is 0
    assert similarity == 0
Ejemplo n.º 7
0
def test_get_two_empty_ui_score():
    ## GIVEN two empty sets
    a = set()
    b = set()

    ## WHEN finding the similarity
    similarity = ui_score(a, b)

    ## THEN assert similarity is 0

    assert similarity == 0
Ejemplo n.º 8
0
def test_get_almost_identical_ui_score():
    ## GIVEN two disjunct sets
    a = set(['a', 'b'])
    b = set(['b', 'a', 'd'])

    ## WHEN finding the similarity
    similarity = ui_score(a, b)

    ## THEN assert 0 < similarity < 1
    assert 0 < similarity
    assert similarity < 1
Ejemplo n.º 9
0
    def cases_by_phenotype(self, phenotype_terms, owner, case_id=None):
        """Take a case obj and return a iterable with the most phenotypically similar cases

        Args:
            phenotype_terms(list):[ "HP:0001250", "HP:0000707",.. ]
            owner(str): institute id
            case_id(str): Id of a case, when searching for cases similar to another

        Returns:
            scores(list(tuple)): Returns a list of tuples like (case_id, score) with the most
                                 similar case first
        """
        scores = {}
        set_1 = set()
        if len(phenotype_terms) == 0:
            LOG.warning("No phenotype terms provided, please provide ar least one HPO term")
            return None
        # Add all ancestors of all terms
        for term in phenotype_terms:
            hpo_term = self.hpo_term(term)
            if not hpo_term:
                continue
            set_1 = set_1.union(set(hpo_term.get("all_ancestors", [])))
        # Need to control what cases to look for here
        # Fetch all cases with phenotypes
        for case in self.cases(phenotype_terms=True, owner=owner):
            set_2 = set()
            if case["_id"] == case_id:
                continue
            # Add all ancestors if all terms
            for term in case["phenotype_terms"]:
                hpo_term = self.hpo_term(term["phenotype_id"])
                if not hpo_term:
                    continue
                set_2 = set_2.union(set(hpo_term.get("all_ancestors", [])))

            LOG.debug(
                f"Check phenotypic similarity between terms:{phenotype_terms} and case {case['_id']}"
            )

            scores[case["_id"]] = ui_score(set_1, set_2)
        # Returns a list of tuples with highest score first
        return sorted(scores.items(), key=operator.itemgetter(1), reverse=True)
Ejemplo n.º 10
0
    def get_similar_cases(self, case_obj):
        """Take a case obj and return a iterable with the most phenotypically similar cases

        Args:
            case_obj(models.Case)

        Returns:
            scores(list(tuple)): Returns a list of tuples like (case_id, score) with the most
                                 similar case first
        """
        scores = {}
        set_1 = set()
        if not case_obj.get("phenotype_terms"):
            LOG.warning("No phenotypes could be found for case %s",
                        case_obj["_id"])
            return None
        # Add all ancestors of all terms
        for term in case_obj["phenotype_terms"]:
            hpo_term = self.hpo_term(term["phenotype_id"])
            if not hpo_term:
                continue
            set_1 = set_1.union(set(hpo_term.get("all_ancestors", [])))
        # Need to control what cases to look for here
        # Fetch all cases with phenotypes
        for case in self.cases(phenotype_terms=True, owner=case_obj["owner"]):
            set_2 = set()
            if case["_id"] == case_obj["_id"]:
                continue
            # Add all ancestors if all terms
            for term in case["phenotype_terms"]:
                hpo_term = self.hpo_term(term["phenotype_id"])
                if not hpo_term:
                    continue
                set_2 = set_2.union(set(hpo_term.get("all_ancestors", [])))
            LOG.debug("Check phenotypic similarity of %s and %s",
                      case_obj["_id"], case["_id"])
            scores[case["_id"]] = ui_score(set_1, set_2)
        # Returns a list of tuples with highest score first
        return sorted(scores.items(), key=operator.itemgetter(1), reverse=True)