def schema_alignment_by_headword_string_sim(self, entity_synset, dul_ontology_classes): ''' The intuition is that head-word carry important information about concept. Dbpedia classification can enrich more meaningful type sometimes with the same words with the Ontology that needs to be aligned. With the set of enriched "keywords" about entity and local schema types, we can iteratively compare the maximum similarity. By applying a threshold, we can choose a ontology class with maximum likelihood. params: entity_sim - set() contains representative labels about entity and types mentioned in context dul_ontology_classes - dict() contains dul classes and representative labels ''' from oke.oak.util import levenshtein_similarity most_similiar_dul_class=dict() for entity_label in entity_synset: entity_label_headword=entity_label.split(' ')[-1:][0] for classUri,classLabels in dul_ontology_classes.items(): max_sim = max([levenshtein_similarity(entity_label_headword,class_label) for class_label in classLabels]) if most_similiar_dul_class.get(classUri) is None or most_similiar_dul_class.get(classUri) < max_sim: most_similiar_dul_class[classUri]=max_sim import operator print(most_similiar_dul_class) suggested_class= max(most_similiar_dul_class, key=most_similiar_dul_class.get) print("suggested_class:",suggested_class) suggested_class_prob = most_similiar_dul_class.get(suggested_class) return suggested_class if suggested_class_prob> 0.9 else None
def string_similarity_comparision_with_dulOntologies(self, class_labels): from oke.oak.util import levenshtein_similarity similarity_threshold=0.9 most_similiar_dul_classes=set() for _classlabel in class_labels: for classUri,classLabels in self.dul_ontology_classes.items(): suggested_classes = [class_label for class_label in classLabels if levenshtein_similarity(_classlabel.lower(),class_label.lower())>similarity_threshold] if len(suggested_classes) > 0: most_similiar_dul_classes.add(classUri) return most_similiar_dul_classes
def string_similarity_comparision_with_dulOntologies(self, class_labels): from oke.oak.util import levenshtein_similarity similarity_threshold = 0.9 most_similiar_dul_classes = set() for _classlabel in class_labels: for classUri, classLabels in self.dul_ontology_classes.items(): suggested_classes = [ class_label for class_label in classLabels if levenshtein_similarity(_classlabel.lower( ), class_label.lower()) > similarity_threshold ] if len(suggested_classes) > 0: most_similiar_dul_classes.add(classUri) return most_similiar_dul_classes
def schema_alignment_by_headword_string_sim(self, entity_synset, dul_ontology_classes): ''' The intuition is that head-word carry important information about concept. Dbpedia classification can enrich more meaningful type sometimes with the same words with the Ontology that needs to be aligned. With the set of enriched "keywords" about entity and local schema types, we can iteratively compare the maximum similarity. By applying a threshold, we can choose a ontology class with maximum likelihood. params: entity_sim - set() contains representative labels about entity and types mentioned in context dul_ontology_classes - dict() contains dul classes and representative labels ''' from oke.oak.util import levenshtein_similarity most_similiar_dul_class = dict() for entity_label in entity_synset: entity_label_headword = entity_label.split(' ')[-1:][0] for classUri, classLabels in dul_ontology_classes.items(): max_sim = max([ levenshtein_similarity(entity_label_headword, class_label) for class_label in classLabels ]) if most_similiar_dul_class.get( classUri) is None or most_similiar_dul_class.get( classUri) < max_sim: most_similiar_dul_class[classUri] = max_sim import operator print(most_similiar_dul_class) suggested_class = max(most_similiar_dul_class, key=most_similiar_dul_class.get) print("suggested_class:", suggested_class) suggested_class_prob = most_similiar_dul_class.get(suggested_class) return suggested_class if suggested_class_prob > 0.9 else None