def __init__(self, result_limit=5000, expansion=True, show_query=False): self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit)
def __init__(self, result_limit=5000, expansion=False, show_query=False): """ semantic search of entities and concepts :param result_limit: maximumn number of retrieved entities :param expansion: if conduct concept expansion :param show_query: if SPARQL query is shown """ self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit)
class Matcher: """This class is used for concept based entity match in DBpedia""" def __init__(self, result_limit=5000, expansion=False, show_query=False): """ semantic search of entities and concepts :param result_limit: maximumn number of retrieved entities :param expansion: if conduct concept expansion :param show_query: if SPARQL query is shown """ self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit) def type_links(self, word, lang='eng'): synsets = self._yago.multilingual2synset(word, lang=lang) if self._expansion: synsets = list(set(itertools.chain.from_iterable([self._yago.synset_expand(s) for s in synsets]))) links = [] for s in synsets: link_dic = {} link_dic['name'] = s.name() link_dic['gloss'] = s._definition link_dic['lemma'] = ' '.join(s._lemma_names) concept_link = [] yago_link = self._yago.synset2yago(s) dbpedia_link = self._yago.synset2dbpedia(s) concept_link.append(yago_link) if yago_link else None concept_link.append(dbpedia_link) if dbpedia_link else None link_dic['lod'] = concept_link if link_dic['lod']: links.append(link_dic) return links def query_process(self, query): """ Process query into concept (common noun) and entity (proper noun). Link them to Knowledge Graph uri links respectively. :param query: short text query :return: tuple of concepts and entities in uris. """ entities = self._extracter.extract_chunks_sent(query) entity_filter = list(itertools.chain.from_iterable([e.lower().split() for e in entities])) entity_filter = set(entity_filter) concepts = list(set(self._extracter.extract_nouns(query))) concepts = [c for c in concepts if c not in entity_filter] concept_uris = [list(itertools.chain.from_iterable([s['lod'] for s in self.type_links(c)])) for c in concepts] concept_uris = list(itertools.chain.from_iterable(concept_uris)) entity_uris = list(itertools.chain.from_iterable(map(self._linker.name2entities, entities))) return list(set(concept_uris)), list(set(entity_uris)) def match_concepts(self, concepts, lang='en'): results = [] for i in xrange(0, len(concepts), 5): results.extend(self._query_graph.type_query(concepts[i:i + 5], lang, self._show_query)) result_dic = {} for res in results: if res['uri'] not in result_dic: result_dic[res['uri']] = res return [result_dic[key] for key in result_dic.keys()] def match_type(self, query, lang='eng'): lang_map = {'eng':'en','spa':'es', 'cmn':'zh'} result_lang = lang_map[lang] words = query.split() concept_uris = [] for w in words: concepts = list(itertools.chain.from_iterable([s['lod'] for s in self.type_links(w,lang)])) concept_uris.extend(concepts) concept_uris = list(set(concept_uris)) return self.match_concepts(concept_uris, result_lang) def match_entity_type(self, query): results = [] concepts, entities = self.query_process(query) for e in entities: for i in xrange(0, len(concepts), 5): results.extend(self._query_graph.type_entity_query(concepts[i:i + 5], e, self._show_query)) result_dic = {} for res in results: if res['uri'] not in result_dic: result_dic[res['uri']] = res result = [result_dic[key] for key in result_dic.keys()] return result