Exemple #1
0
def fetch_latest(params):
    """下载最新的新闻(包括图片),并保存

    :return:
    """
    zh = daily.ZhiHu()

    # 获取最新的news_id列表
    latest_news = zh.get_latest_news()
    latest_news_ids = _extract_news_ids(latest_news)
    date_str = _extract_date_str(latest_news)

    # 找出数据库中没有的news_id列表
    not_exists_news_ids = _not_exists_news_ids(date_str, latest_news_ids)

    # 获取news和下载图片
    not_exists_news_ids.reverse()
    wait_for_store_news_list = _get_news_list(not_exists_news_ids)

    # 保存图片
    wait_for_store_news_list = _store_images(wait_for_store_news_list,
                                             date_str)

    # 保存news到数据库中
    _store_news_list(wait_for_store_news_list)

    # 创建索引
    _index_news_list([
        wait_for_store_news['news']
        for wait_for_store_news in wait_for_store_news_list
    ])
Exemple #2
0
def _get_news_list(news_ids):
    """获取所有的news,image信息

    :param news_ids:
    :return:
    """
    zh = daily.ZhiHu()

    wait_for_store_news_list = []
    for news_id in news_ids:
        try:
            news = zh.get_news(news_id)
            # 下载图片
            image_url = news['image'] if 'image' in news else news[
                'theme_image']
            image_type, image_data = _fetch_image(news['share_url'], image_url)

            wait_for_store_news_list.append(
                dict(news=news,
                     image_type=image_type,
                     image_data=image_data,
                     image_url=image_url))
        except Exception as e:
            stack = traceback.format_exc()
            logging.error("fetch latest error %s\n%s" % (e, stack))

    return wait_for_store_news_list
Exemple #3
0
def fetch(params):
    """下载最新的新闻(包括图片),并保存

    :return:
    """
    zh = daily.ZhiHu()

    if 'date' not in params:
        latest_news = zh.get_latest_news()
    else:
        date_str = params['date'][0]
        latest_news = zh.get_before_news(date_str)

    # 获取最新的news_id列表
    latest_news_ids = _extract_news_ids(latest_news)
    date_str = _extract_date_str(latest_news)

    # 找出数据库中没有的news_id列表
    not_exists_news_ids = _not_exists_news_ids(date_str, latest_news_ids)

    # 获取news和下载图片
    not_exists_news_ids.reverse()
    wait_for_store_news_list = _fetch_news_list(not_exists_news_ids)

    # 保存图片
    wait_for_store_news_list = _store_images(wait_for_store_news_list,
                                             date_str)

    # 保存news到数据库中
    _store_news_list(wait_for_store_news_list)
Exemple #4
0
def fetch_before(params):
    """下载某天的新闻,并保存

    :param params:
    :return:
    """
    if 'date' not in params:
        raise OperationException("lack of param date")

    date_str = params['date'][0]
    zh = daily.ZhiHu()

    # 获取最新的news_id列表
    latest_news = zh.get_before_news(date_str)
    news_ids = _extract_news_ids(latest_news)
    date_str = _extract_date_str(latest_news)

    # 找出数据库中没有的news_id列表
    not_exists_news_ids = _not_exists_news_ids(date_str, news_ids)

    # 获取news和下载图片
    not_exists_news_ids.reverse()
    wait_for_store_news_list = _get_news_list(not_exists_news_ids)

    # 保存图片
    wait_for_store_news_list = _store_images(wait_for_store_news_list,
                                             date_str)

    # 保存news到数据库中
    _store_news_list(wait_for_store_news_list)

    # 创建索引
    _index_news_list([
        wait_for_store_news['news']
        for wait_for_store_news in wait_for_store_news_list
    ])