예제 #1
0
파일: semsim.py 프로젝트: valearna/ontobio
def jaccard_similarity(aset: AssociationSet, s1: str, s2: str) -> float:
    """
    Calculate jaccard index of inferred associations of two subjects

    |ancs(s1) /\ ancs(s2)|
    ---
    |ancs(s1) \/ ancs(s2)|

    """
    a1 = aset.inferred_types(s1)
    a2 = aset.inferred_types(s2)
    num_union = len(a1.union(a2))
    if num_union == 0:
        return 0.0
    return len(a1.intersection(a2)) / num_union
    def jaccard_similarity(aset: AssociationSet, s1: str, s2: str) -> Tuple[float, list]:
        """
        Calculate jaccard index of inferred associations of two subjects

        |ancs(s1) /\ ancs(s2)|
        ---
        |ancs(s1) \/ ancs(s2)|

        """
        a1 = aset.inferred_types(s1)
        a2 = aset.inferred_types(s2)
        num_union = len(a1.union(a2))
        if num_union == 0:
            return 0.0, list()

        shared_terms = a1.intersection(a2)

        # Note: we need to convert the shared_terms set to a list
        # to avoid later JSON serialization problems
        return len(shared_terms) / num_union, list(shared_terms)