from urllib import parse as url_parse from logger import crawler from .workers import app from page_get import get_page from config import get_max_search_page from page_parse import search as parse_search from db.dao import (KeywordsOper, KeywordsDataOper, WbDataOper) # This url is just for original weibos. # If you want other kind of search, you can change the url below URL = 'http://s.weibo.com/weibo/{}&scope=ori&suball=1&page={}' LIMIT = get_max_search_page() + 1 @app.task(ignore_result=True) def search_keyword(keyword, keyword_id): cur_page = 1 encode_keyword = url_parse.quote(keyword) while cur_page < LIMIT: cur_url = URL.format(encode_keyword, cur_page) if cur_page == 1: search_page = get_page(cur_url, auth_level=1) else: search_page = get_page(cur_url, auth_level=2) if not search_page: crawler.warning( 'No result for keyword {}, the source page is {}'.format( keyword, search_page)) return
from logger import crawler from .workers import app from page_get import get_page from config import get_max_search_page from page_parse import search as parse_search from db.dao import ( KeywordsOper, KeywordsDataOper, WbDataOper) # This url is just for original weibos. # If you want other kind of search, you can change the url below # But if you change this url, maybe you have to rewrite some part of the parse code URL = 'http://s.weibo.com/weibo/{}&scope=ori&suball=1&page={}' # Use this if results are too little # URL = 'http://s.weibo.com/weibo/{}&nodup=1&page={}' LIMIT = get_max_search_page() + 1 @app.task(ignore_result=True) def search_keyword(keyword, keyword_id): crawler.info('We are searching keyword "{}"'.format(keyword)) cur_page = 1 encode_keyword = url_parse.quote(keyword) while cur_page < LIMIT: cur_url = URL.format(encode_keyword, cur_page) # current only for login, maybe later crawling page one without login search_page = get_page(cur_url, auth_level=2) if not search_page: crawler.warning('No search result for keyword {}, the source page is {}'.format(keyword, search_page)) return