def test_get_total_comment_page(self): """ 测试获取所有评论页数 :return: """ from page_parse import comment with open('tests/comment.html', encoding='utf-8') as f: html = f.read() total_page = comment.get_total_page(html) self.assertEqual(total_page, 227)
def test_get_total_comment_page(self): """ 测试获取所有评论页数 :return: """ from page_parse import comment url = TEST_SERVER + 'comment.html' resp = requests.get(url) resp.encoding = 'utf-8' html = resp.text total_page = comment.get_total_page(html) self.assertEqual(total_page, 227)
def crawl_comment_page(mid): limit = conf.get_max_comment_page() + 1 # 这里为了马上拿到返回结果,采用本地调用的方式 first_page = crawl_comment_by_page(mid, 1) total_page = comment.get_total_page(first_page) if total_page < limit: limit = total_page + 1 for page_num in range(2, limit): app.send_task('tasks.comment.crawl_comment_by_page', args=(mid, page_num), queue='comment_page_crawler', routing_key='comment_page_info')
def crawl_comment_page(mid): limit = conf.get_max_comment_page() + 1 # 这里为了马上拿到返回结果,采用本地调用的方式 first_page = crawl_comment_by_page(mid, 1)[0] total_page = comment.get_total_page(first_page) if total_page < limit: limit = total_page + 1 for page_num in range(2, limit): crawl_comment_by_page(mid, page_num) time.sleep(3)
def crawl_comment_page(mid): limit = conf.get_max_comment_page() + 1 first_page = crawl_comment_by_page(mid, 1, db_session)[0] total_page = comment.get_total_page(first_page) if total_page < limit: limit = total_page + 1 for page_num in range(2, limit): #app.send_task('tasks.comment.crawl_comment_by_page', args=(mid, page_num), queue='comment_page_crawler', # routing_key='comment_page_info') Tasks.push_task(1, mid, page_num)
def crawl_dialogue(mid): limit = conf.get_max_dialogue_page() + 1 first_page = crawl_dialogue_by_comment_page(mid, 1) total_page = comment.get_total_page(first_page) if total_page < limit: limit = total_page + 1 for page_num in range(2, limit): # crawl_dialogue_by_comment_page(mid, page_num) app.send_task('tasks.comment.crawl_dialogue_by_comment_page', args=(mid, page_num), queue='comment_page_crawler', routing_key='comment_page_info')