def get_perspectives(url): '''Get different perspectives on the topic covered by article. Args: url: A string. Returns: A JSON-encoded string representing other articles with different perspectives than the original article. Format: a list of Article.to_dict()s, each with an additional 'sentences' attribute. 'sentences' contains a list of sentences with semantically different words that were extracted from the corresponding article's body. ''' article = url_to_article(url) if article: article_topic = extract_keywords.extract_keywords(article.headline) related_articles = query_all_news_orgs(article_topic) return compare_articles.compare.to_all_articles(article, related_articles)
def get_perspectives(url): '''Get different perspectives on the topic covered by article. Args: url: A string. Returns: A JSON-encoded string representing other articles with different perspectives than the original article. Format: a list of Article.to_dict()s, each with an additional 'sentences' attribute. 'sentences' contains a list of sentences with semantically different words that were extracted from the corresponding article's body. ''' article = url_to_article(url) if article: headline = article.headline body = article.body org = article.news_org article_topic = extract_keywords.extract_keywords(headline) (NP_to_sentence, VP_to_sentence, NPs, VPs, NP_synsets, VP_synsets) = \ get_article_phrases(body, org) n = len(NEWS_ORGS) with futures.ProcessPoolExecutor(max_workers=n) as executor: comparisons = executor.map(get_comparison, NEWS_ORGS, [article_topic] * n, [NP_to_sentence] * n, [VP_to_sentence] * n, [NPs] * n, [VPs] * n, [NP_synsets] * n, [VP_synsets] * n, [1] * n) compared_articles_by_org = list(comparisons) # flatten from list of lists of articles (separated by news org) to list # of articles compared_articles = [ article for org_articles in compared_articles_by_org for article in org_articles ] return json.dumps(compared_articles) else: return json.dumps("Not a recognized article")
def get_perspectives(url): '''Get different perspectives on the topic covered by article. Args: url: A URLencoded string. Returns: A JSON-encoded string representing other articles with different perspectives than the original article. Format: a list of Article.to_dict()s, each with an additional 'sentences' attribute. 'sentences' contains a list of sentences with semantically different words that were extracted from the corresponding article's body. ''' article = url_to_article(urldecode(url)) if article: headline = article.headline body = article.body org = article.news_org article_topic = extract_keywords.extract_keywords(headline) (NP_to_sentence, VP_to_sentence, NPs, VPs, NP_synsets, VP_synsets) = \ get_article_phrases(body, org) n = len(NEWS_ORGS) with futures.ProcessPoolExecutor(max_workers=n) as executor: comparisons = executor.map(get_comparison, NEWS_ORGS, [NP_to_sentence]*n, [VP_to_sentence]*n, [NPs]*n, [VPs]*n, [NP_synsets]*n, [VP_synsets]*n, [article_topic]*n, [headline]*n, [org]*n) compared_articles_by_org = list(comparisons) # flatten from list of lists of articles (separated by news org) to list # of articles compared_articles = [article for org_articles in compared_articles_by_org for article in org_articles if article] return json.dumps(compared_articles) else: return json.dumps({"Error": "Not a recognized article"})
from save_full_words import save_full_words from extract_keywords import extract_keywords if __name__ == "__main__": weibo_name = input("请输入需要对评论内容进行分词的微博名:") save_full_words(weibo_name) extract_keywords(weibo_name)
def test_extract_keywords(self): headline = 'What really happened with NBC and Ayman Mohyeldin?' res = extract_keywords.extract_keywords(headline) self.assertEqual(res, 'nbc ayman mohyeldin')