Exemple #1
0
 articles = craw.get_page_articles(page_uri)
 if not articles:
     print("当前抓取到列表为空!当前处理页数: {} 当前处理文章总数: {}, 请检查原因后继续运行.".format(
         page_number, article_number))
     break
 # 抓取成功, 存储
 for item in articles:
     write_line = "{};{};{};{}\n".format(item['title'],
                                         item['summary'],
                                         item['article_uri'],
                                         item['account_name'])
     fp.write(write_line)
     if item['article_uri'] == '':
         print("文章uri 为空!")
         break
     article_detail = craw.get_article(item['article_uri'])
     if article_detail is None:
         print("抓取文章失败!")
         break
     # 成功. 保存文件
     save_article(article_detail)
     article_number += 1
     time.sleep(2)
 fp.flush()
 print("刷盘成功! 页数: {} 处理条数: {}".format(page_number, article_number))
 page_number += 1
 # 保存断点
 save_break_point(break_point_filename, page_number, article_number)
 wait_interval = random.randint(1, 5)
 print("Waiting for the interval: {}s".format(wait_interval))
 time.sleep(wait_interval)