def compute_jaccard(self, input_genes: List[dict], lower_bound: float = 0.7) -> List[dict]: similarities = [] for index, igene in enumerate(input_genes): for subject_curie in self.associations.subject_label_map.keys(): input_gene = GenericSimilarity.trim_mgi_prefix( input_gene=igene['sim_input_curie'], subject_curie=subject_curie) if input_gene is not subject_curie: score = jaccard_similarity(self.associations, input_gene, subject_curie) if float(score) > float(lower_bound): subject_label = self.associations.label(subject_curie) similarities.append({ 'input_id': input_gene, 'input_symbol': igene['input_symbol'], 'hit_symbol': subject_label, 'hit_id': subject_curie, 'score': score, }) return similarities
def compute_jaccard(self, input_curies:List[str], lower_bound:float=0.7, upper_bound:float=1.0) -> List[dict]: similarities = [] for input_curie in input_curies: for subject_curie in self.associations.subject_label_map.keys(): score = jaccard_similarity(self.associations, input_curie, subject_curie) if score > lower_bound and score < upper_bound: similarities.append({ 'input_curie': input_curie, 'sim_hit_name': self.associations.label(subject_curie), 'sim_hit_curie': subject_curie, 'sim_score': score, }) return similarities