Example #1
0
def get_perspectives(url):
  '''Get different perspectives on the topic covered by article.

  Args:
    url: A string.

  Returns:
    A JSON-encoded string representing other articles with different
    perspectives than the original article.

    Format: a list of Article.to_dict()s, each with an additional 'sentences'
    attribute. 'sentences' contains a list of sentences with semantically
    different words that were extracted from the corresponding article's body.
  '''
  article = url_to_article(url)
  if article:
    article_topic = extract_keywords.extract_keywords(article.headline)
    related_articles = query_all_news_orgs(article_topic)
    return compare_articles.compare.to_all_articles(article, related_articles)
Example #2
0
def get_perspectives(url):
    '''Get different perspectives on the topic covered by article.

  Args:
    url: A string.

  Returns:
    A JSON-encoded string representing other articles with different
    perspectives than the original article.

    Format: a list of Article.to_dict()s, each with an additional 'sentences'
    attribute. 'sentences' contains a list of sentences with semantically
    different words that were extracted from the corresponding article's body.
  '''
    article = url_to_article(url)
    if article:
        headline = article.headline
        body = article.body
        org = article.news_org

        article_topic = extract_keywords.extract_keywords(headline)

        (NP_to_sentence, VP_to_sentence, NPs, VPs, NP_synsets, VP_synsets) = \
            get_article_phrases(body, org)

        n = len(NEWS_ORGS)
        with futures.ProcessPoolExecutor(max_workers=n) as executor:
            comparisons = executor.map(get_comparison, NEWS_ORGS,
                                       [article_topic] * n,
                                       [NP_to_sentence] * n,
                                       [VP_to_sentence] * n, [NPs] * n,
                                       [VPs] * n, [NP_synsets] * n,
                                       [VP_synsets] * n, [1] * n)
            compared_articles_by_org = list(comparisons)
            # flatten from list of lists of articles (separated by news org) to list
            # of articles
            compared_articles = [
                article for org_articles in compared_articles_by_org
                for article in org_articles
            ]
            return json.dumps(compared_articles)
    else:
        return json.dumps("Not a recognized article")
Example #3
0
def get_perspectives(url):
  '''Get different perspectives on the topic covered by article.

  Args:
    url: A URLencoded string.

  Returns:
    A JSON-encoded string representing other articles with different
    perspectives than the original article.

    Format: a list of Article.to_dict()s, each with an additional 'sentences'
    attribute. 'sentences' contains a list of sentences with semantically
    different words that were extracted from the corresponding article's body.
  '''
  article = url_to_article(urldecode(url))
  if article:
    headline = article.headline
    body = article.body
    org = article.news_org

    article_topic = extract_keywords.extract_keywords(headline)

    (NP_to_sentence, VP_to_sentence, NPs, VPs, NP_synsets, VP_synsets) = \
        get_article_phrases(body, org)

    n = len(NEWS_ORGS)
    with futures.ProcessPoolExecutor(max_workers=n) as executor:
      comparisons = executor.map(get_comparison, NEWS_ORGS,
                                 [NP_to_sentence]*n, [VP_to_sentence]*n,
                                 [NPs]*n, [VPs]*n,
                                 [NP_synsets]*n, [VP_synsets]*n,
                                 [article_topic]*n, [headline]*n, [org]*n)
      compared_articles_by_org = list(comparisons)
      # flatten from list of lists of articles (separated by news org) to list
      # of articles
      compared_articles = [article for org_articles in compared_articles_by_org
                           for article in org_articles if article]
      return json.dumps(compared_articles)
  else:
    return json.dumps({"Error": "Not a recognized article"})
Example #4
0
from save_full_words import save_full_words
from extract_keywords import extract_keywords

if __name__ == "__main__":
    weibo_name = input("请输入需要对评论内容进行分词的微博名:")
    save_full_words(weibo_name)
    extract_keywords(weibo_name)
 def test_extract_keywords(self):
   headline = 'What really happened with NBC and Ayman Mohyeldin?'
   res = extract_keywords.extract_keywords(headline)
   self.assertEqual(res, 'nbc ayman mohyeldin')
 def test_extract_keywords(self):
     headline = 'What really happened with NBC and Ayman Mohyeldin?'
     res = extract_keywords.extract_keywords(headline)
     self.assertEqual(res, 'nbc ayman mohyeldin')