Beispiel #1
0
 def get_article_representation(self, article_title):
     ''' Returns a list of ids of the categories of this article
     @return: a list of numbers '''
     category_titles = wikipedia_api_util.query_categories_of_res(article_title)
     print "categories of "+str(article_title)+" is "+str(category_titles)
     category_ids = [wikipedia_api_util.query_page_id(cat_title) for cat_title in category_titles]
     return category_ids
Beispiel #2
0
 def get_article_representation(self, article_title):
     ''' Returns a bag of words build from the title
     of the categories of the given article.
     @return: a list of tokens '''
     category_titles = wikipedia_api_util.query_categories_of_res(article_title)
     category_titles_str = self.__format_category__(' '.join(category_titles))
     cleaned_titles = text_util.get_clean_BOW_doc(category_titles_str)
     return cleaned_titles