Python get_default_news 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: News.items

메소드/함수: get_default_news

hotexamples.com에서의 예제들: 7

Python get_default_news - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 News.items.get_default_news에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: toutiao.py 프로젝트: xiaol/NewsCrawlerPG

 def g_news_item(self, article, start_url="", meta=None):
     if article.get("has_video"):
         return None
     docid = article["source_url"]
     crawl_url = self._g_crawl_url(article)
     key = g_cache_key(crawl_url)
     if news_already_exists(key):
         return None
     news = get_default_news(
         crawl_url=crawl_url,
         key=key,
         title=article["title"],
         tags=article.get("keywords", "").split(","),
         summary=article.get("abstract", ""),
         publish_time=str_from_timestamp(article["publish_time"]),
         love=article.get("favorite_count", 0),
         up=article.get("digg_count", 0),
         down=article.get("bury_count", 0),
         original_url=article.get("url", ""),
         original_source=article.get("source", ""),
         crawl_source=CRAWL_SOURCE,
         start_url=start_url,
         start_meta_info=meta,
         comment_url=self._g_comment_url(docid),
         comment_queue=COMMENT_SPIDER_NAME + ":start_urls")
     news["docid"] = news["comment_url"]
     return news

예제 #2

파일 보기

파일: general.py 프로젝트: xiaol/NewsCrawlerPG

def parse_item(self, response):
    body = response.body_as_unicode().encode("utf-8")
    extractor = GeneralExtractor(body)
    title, post_date, post_user, summary, content = extractor(
        self.title_param, self.post_date_param, self.post_user_param,
        self.summary_param, self.content_param)
    if not post_user:
        post_user = self.crawl_source
    news = get_default_news(
        crawl_url=response.url,
        key=g_cache_key(response.url),
        title=title,
        publish_time=post_date,
        original_source=post_user,
        original_url=response.url,
        content=content,
        crawl_source=self.crawl_source,
    )
    print("*" * 50)
    print("url: %s" % response.url)
    print("title: %s" % title)
    print("post date: %s" % post_date)
    print("post user: %s" % post_user)
    print("summary: %s" % summary)
    show(content)
    print("\n\n")

예제 #3

파일 보기

파일: yidianzixun.py 프로젝트: xiaol/NewsCrawlerPG

 def g_news_item(self, article, start_url="", meta=None):
     if article["ctype"] not in ["news", "picture"]:
         return None  # fixme: only support news now
     docid = article["docid"]
     crawl_url = self._g_article_url(article.get("url"), docid)
     if not crawl_url:
         return None
     key = g_cache_key(crawl_url)
     if news_already_exists(key):
         return None
     news = get_default_news(crawl_url=crawl_url,
                             key=key,
                             title=article["title"],
                             summary=article.get("summary", ""),
                             publish_time=article["date"],
                             love=article.get("like", 0),
                             up=article.get("up", 0),
                             original_url=article.get("url", ""),
                             crawl_source=CRAWL_SOURCE,
                             original_source=article.get("source", ""),
                             start_url=start_url,
                             start_meta_info=meta,
                             comment_url=self._g_comment_url(docid),
                             comment_queue=COMMENT_SPIDER_NAME +
                             ":start_urls")
     news["docid"] = news["comment_url"]
     return news

예제 #4

파일 보기

파일: __init__.py 프로젝트: xiaol/NewsCrawlerPG

 def g_news_item(self, article, start_url="", meta=None):
     """
     生成 news item 对象
     :param article: 包含新闻 url, title 字段的字典
     :type article: dict
     :param start_url: 抓取 meta info 的起始 url
     :type start_url: str
     :param meta: 附加配置信息
     :type meta: dict
     :return: 新闻 Item
     :rtype: News.items.NewsItem | None
     """
     crawl_url = article["url"]
     key = g_cache_key(crawl_url)
     if news_already_exists(key):
         return None
     news = get_default_news(title=article["title"],
                             crawl_url=crawl_url,
                             docid=crawl_url,
                             key=key,
                             crawl_source=self.crawl_source,
                             start_url=start_url,
                             summary=article.get("summary", ""),
                             start_meta_info=meta)
     return news

예제 #5

파일 보기

파일: oushinet.py 프로젝트: xiaol/NewsCrawlerPG

 def g_news_item(self, article, start_url="", meta=None):
     docid = article['url']
     crawl_url = self._g_article_url(article['url'])
     if not crawl_url:
         return None
     key = g_cache_key(crawl_url)
     if news_already_exists(key):
         return None
     news = get_default_news(crawl_url=crawl_url,
                             key=key,
                             title=article['title'],
                             docid=docid,
                             start_meta_info=meta,
                             crawl_source=CRAWL_SOURCE)
     return news

예제 #6

파일 보기

파일: www_ettoday_net.py 프로젝트: xiaol/NewsCrawlerPG

 def g_news_item(self, article, start_url="", meta=None):
     crawl_url = article['crawl_url']
     key = g_cache_key(crawl_url)
     news = get_default_news(
         crawl_url=crawl_url,
         key=key,
         title=article['title'],
         love=article.get("like", 0),
         up=article.get("up", 0),
         original_url=article.get("url", ""),
         crawl_source=CRAWL_SOURCE,
         start_url=start_url,
         start_meta_info=meta,
         docid=crawl_url,
     )
     return news

예제 #7

파일 보기

 def g_news_item(self, article, start_url="", meta=None):
     crawl_url = article["crawl_url"]
     comment_url = self._g_comment_url(crawl_url)
     news = get_default_news(
         crawl_url=crawl_url,
         docid=comment_url,
         key=g_cache_key(article["title"].encode("utf-8")),
         crawl_source=CRAWL_SOURCE,
         start_url=start_url,
         summary=article["summary"],
         publish_time=article["publish_time"],
         title=article["title"],
         start_meta_info=meta,
         comment_url=comment_url,
         comment_queue=COMMENT_SPIDER_NAME+":start_urls"
     )
     return None if news_already_exists(news["key"]) else news