コード例 #1
0
 def compute_jaccard(self,
                     input_genes: List[dict],
                     lower_bound: float = 0.7) -> List[dict]:
     similarities = []
     for index, igene in enumerate(input_genes):
         for subject_curie in self.associations.subject_label_map.keys():
             input_gene = GenericSimilarity.trim_mgi_prefix(
                 input_gene=igene['sim_input_curie'],
                 subject_curie=subject_curie)
             if input_gene is not subject_curie:
                 score = jaccard_similarity(self.associations, input_gene,
                                            subject_curie)
                 if float(score) > float(lower_bound):
                     subject_label = self.associations.label(subject_curie)
                     similarities.append({
                         'input_id':
                         input_gene,
                         'input_symbol':
                         igene['input_symbol'],
                         'hit_symbol':
                         subject_label,
                         'hit_id':
                         subject_curie,
                         'score':
                         score,
                     })
     return similarities
コード例 #2
0
    def compute_jaccard(self, input_curies:List[str], lower_bound:float=0.7, upper_bound:float=1.0) -> List[dict]:
        similarities = []

        for input_curie in input_curies:
            for subject_curie in self.associations.subject_label_map.keys():
                score = jaccard_similarity(self.associations, input_curie, subject_curie)

                if score > lower_bound and score < upper_bound:
                    similarities.append({
                        'input_curie': input_curie,
                        'sim_hit_name': self.associations.label(subject_curie),
                        'sim_hit_curie': subject_curie,
                        'sim_score': score,
                    })

        return similarities