def get_article_representation(self, article_title): ''' Returns a list of ids of the categories of this article @return: a list of numbers ''' category_titles = wikipedia_api_util.query_categories_of_res(article_title) print "categories of "+str(article_title)+" is "+str(category_titles) category_ids = [wikipedia_api_util.query_page_id(cat_title) for cat_title in category_titles] return category_ids
def get_article_representation(self, article_title): ''' Returns a bag of words build from the title of the categories of the given article. @return: a list of tokens ''' category_titles = wikipedia_api_util.query_categories_of_res(article_title) category_titles_str = self.__format_category__(' '.join(category_titles)) cleaned_titles = text_util.get_clean_BOW_doc(category_titles_str) return cleaned_titles