def __init__(self, orig_phrase: str = None, orig_phrase_norm: str = None, wiki_title: str = None, wiki_title_norm: str = None, score: int = 0, pageid: int = 0, description: str = None, relations: WikipediaPageExtractedRelations = None) -> None: """ Object represent a Wikipedia Page and extracted fields. Args: orig_phrase (str): original search phrase orig_phrase_norm (str): original search phrase normalized wiki_title (str): page title wiki_title_norm (str): page title normalized score (int): score for getting wiki_title from orig_phrase pageid (int): the unique page identifier description (str, optional): the page description relations (WikipediaPageExtractedRelations): Object that represent all extracted Wikipedia relations """ self.orig_phrase = orig_phrase if orig_phrase_norm is None: self.orig_phrase_norm = StringUtils.normalize_str(orig_phrase) else: self.orig_phrase_norm = orig_phrase_norm self.wiki_title = wiki_title.replace(DISAMBIGUATION_TITLE, '') if wiki_title_norm is None: self.wiki_title_norm = StringUtils.normalize_str(wiki_title) else: self.wiki_title_norm = wiki_title_norm self.score = score self.pageid = int(pageid) self.description = description self.relations = relations
def get_pages(self, mention): if mention.tokens_str in self.cache: return self.cache[mention.tokens_str] head_synonyms, head_names_derivationally = self.extract_synonyms_and_derivation( mention.mention_head) head_lemma_synonyms, head_lemma_derivationally = self.extract_synonyms_and_derivation( mention.mention_head_lemma) clean_phrase = StringUtils.normalize_str(mention.tokens_str) all_clean_words_synonyms = self.all_clean_words_synonyms(clean_phrase) wordnet_page = WordnetPage( mention.tokens_str, clean_phrase, mention.mention_head, mention.mention_head_lemma, head_synonyms, head_lemma_synonyms, head_names_derivationally, head_lemma_derivationally, all_clean_words_synonyms, ) self.cache[mention.tokens_str] = wordnet_page return wordnet_page