def get_article_representation(self, article_title): ''' Returns a bag of words build from the text content on the given article. @return: a list of tokens ''' article_text = wikipedia_api_util.query_page_content_text(article_title) # get the text of the article's page cleaned_tokens = text_util.get_clean_BOW_doc(article_text) # clean the article text return cleaned_tokens
def get_article_representation(self, article_title): ''' Returns a bag of words build from the title of the categories of the given article. @return: a list of tokens ''' category_titles = wikipedia_api_util.query_categories_of_res(article_title) category_titles_str = self.__format_category__(' '.join(category_titles)) cleaned_titles = text_util.get_clean_BOW_doc(category_titles_str) return cleaned_titles
def get_article_representation(self, article_title): ''' Returns a bag of words build from the title of the given article. @return: a list of tokens ''' article_title_str = article_title.replace('_', ' ') cleaned_title = text_util.get_clean_BOW_doc(article_title_str) return cleaned_title