def __init__(self, orig_phrase: str = None, orig_phrase_norm: str = None,
                 wiki_title: str = None, wiki_title_norm: str = None,
                 score: int = 0, pageid: int = 0, description: str = None,
                 relations: WikipediaPageExtractedRelations = None) -> None:
        """
        Object represent a Wikipedia Page and extracted fields.

        Args:
            orig_phrase (str): original search phrase
            orig_phrase_norm (str): original search phrase normalized
            wiki_title (str): page title
            wiki_title_norm (str): page title normalized
            score (int): score for getting wiki_title from orig_phrase
            pageid (int): the unique page identifier
            description (str, optional): the page description
            relations (WikipediaPageExtractedRelations): Object that represent all
                                                         extracted Wikipedia relations
        """
        self.orig_phrase = orig_phrase
        if orig_phrase_norm is None:
            self.orig_phrase_norm = StringUtils.normalize_str(orig_phrase)
        else:
            self.orig_phrase_norm = orig_phrase_norm

        self.wiki_title = wiki_title.replace(DISAMBIGUATION_TITLE, '')
        if wiki_title_norm is None:
            self.wiki_title_norm = StringUtils.normalize_str(wiki_title)
        else:
            self.wiki_title_norm = wiki_title_norm

        self.score = score
        self.pageid = int(pageid)
        self.description = description
        self.relations = relations
Exemple #2
0
    def get_pages(self, mention):
        if mention.tokens_str in self.cache:
            return self.cache[mention.tokens_str]

        head_synonyms, head_names_derivationally = self.extract_synonyms_and_derivation(
            mention.mention_head)
        head_lemma_synonyms, head_lemma_derivationally = self.extract_synonyms_and_derivation(
            mention.mention_head_lemma)
        clean_phrase = StringUtils.normalize_str(mention.tokens_str)
        all_clean_words_synonyms = self.all_clean_words_synonyms(clean_phrase)

        wordnet_page = WordnetPage(
            mention.tokens_str,
            clean_phrase,
            mention.mention_head,
            mention.mention_head_lemma,
            head_synonyms,
            head_lemma_synonyms,
            head_names_derivationally,
            head_lemma_derivationally,
            all_clean_words_synonyms,
        )

        self.cache[mention.tokens_str] = wordnet_page
        return wordnet_page