コード例 #1
0
 def test_get_total_comment_page(self):
     """
     测试获取所有评论页数
     :return: 
     """
     from page_parse import comment
     with open('tests/comment.html', encoding='utf-8') as f:
         html = f.read()
     total_page = comment.get_total_page(html)
     self.assertEqual(total_page, 227)
コード例 #2
0
ファイル: test_wbspider.py プロジェクト: zqy1/weibospider
 def test_get_total_comment_page(self):
     """
     测试获取所有评论页数
     :return: 
     """
     from page_parse import comment
     url = TEST_SERVER + 'comment.html'
     resp = requests.get(url)
     resp.encoding = 'utf-8'
     html = resp.text
     total_page = comment.get_total_page(html)
     self.assertEqual(total_page, 227)
コード例 #3
0
ファイル: comment.py プロジェクト: zjlx/WeiboSpider
def crawl_comment_page(mid):
    limit = conf.get_max_comment_page() + 1
    # 这里为了马上拿到返回结果,采用本地调用的方式
    first_page = crawl_comment_by_page(mid, 1)
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        app.send_task('tasks.comment.crawl_comment_by_page', args=(mid, page_num), queue='comment_page_crawler',
                      routing_key='comment_page_info')
コード例 #4
0
ファイル: comment.py プロジェクト: 402730243/spider-weibo
def crawl_comment_page(mid):
    limit = conf.get_max_comment_page() + 1
    # 这里为了马上拿到返回结果,采用本地调用的方式
    first_page = crawl_comment_by_page(mid, 1)[0]
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        crawl_comment_by_page(mid, page_num)
        time.sleep(3)
コード例 #5
0
ファイル: comment.py プロジェクト: cptBTptpwbct/weibo
def crawl_comment_page(mid):
    limit = conf.get_max_comment_page() + 1
    first_page = crawl_comment_by_page(mid, 1, db_session)[0]
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        #app.send_task('tasks.comment.crawl_comment_by_page', args=(mid, page_num), queue='comment_page_crawler',
        # routing_key='comment_page_info')
        Tasks.push_task(1, mid, page_num)
コード例 #6
0
ファイル: comment.py プロジェクト: ResolveWang/WeiboSpider
def crawl_comment_page(mid):
    limit = conf.get_max_comment_page() + 1
    # 这里为了马上拿到返回结果,采用本地调用的方式
    first_page = crawl_comment_by_page(mid, 1)[0]
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        crawl_comment_by_page(mid, page_num)
        time.sleep(3)
コード例 #7
0
ファイル: test_wbspider.py プロジェクト: dittoyi/weibospider
 def test_get_total_comment_page(self):
     """
     测试获取所有评论页数
     :return: 
     """
     from page_parse import comment
     url = TEST_SERVER + 'comment.html'
     resp = requests.get(url)
     resp.encoding = 'utf-8'
     html = resp.text
     total_page = comment.get_total_page(html)
     self.assertEqual(total_page, 227)
コード例 #8
0
ファイル: comment.py プロジェクト: dittoyi/weibospider
def crawl_comment_page(mid):
    limit = conf.get_max_comment_page() + 1
    # 这里为了马上拿到返回结果,采用本地调用的方式
    first_page = crawl_comment_by_page(mid, 1)
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        app.send_task('tasks.comment.crawl_comment_by_page', args=(mid, page_num), queue='comment_page_crawler',
                      routing_key='comment_page_info')
コード例 #9
0
ファイル: dialogue.py プロジェクト: lan1tian/weibospider
def crawl_dialogue(mid):
    limit = conf.get_max_dialogue_page() + 1

    first_page = crawl_dialogue_by_comment_page(mid, 1)
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        # crawl_dialogue_by_comment_page(mid, page_num)
        app.send_task('tasks.comment.crawl_dialogue_by_comment_page',
                      args=(mid, page_num),
                      queue='comment_page_crawler',
                      routing_key='comment_page_info')
コード例 #10
0
ファイル: dialogue.py プロジェクト: ResolveWang/WeiboSpider
def crawl_dialogue(mid):
    limit = conf.get_max_dialogue_page() + 1

    first_page = crawl_dialogue_by_comment_page(mid, 1)
    total_page = comment.get_total_page(first_page)

    if total_page < limit:
        limit = total_page + 1

    for page_num in range(2, limit):
        # crawl_dialogue_by_comment_page(mid, page_num)
        app.send_task('tasks.comment.crawl_dialogue_by_comment_page',
                      args=(mid, page_num),
                      queue='comment_page_crawler',
                      routing_key='comment_page_info')