from urllib3 import * import json import re from Spider_Def import Read_Headers from bs4 import BeautifulSoup headers = Read_Headers('head_jd.txt') # 忽略告警是一个不安全的选择,这样证书报错就会被略过 disable_warnings() # PoolManager介绍 # http://www.cnblogs.com/shadowwalker/p/5283372.html http = PoolManager() def getJSONDetail(url, productId, page): # 找到url中productId,page,callback内容 re_result = re.match( '.+callback=(.+)&productId=(.+)&score.+&page=(.+)&pageSize.+', url).groups() url_callback = re_result[0] url_productId = re_result[1] url_page = re_result[2] # 替换productId url = url.replace(url_productId, str(productId)) # 替换page url1 = url.split('&page=') url2 = url.split('&pageSize') url = url1[0] + '&page=' + page + '&pageSize' + url2[1]
from urllib3 import * import json import re from Spider_Def import Read_Headers from bs4 import BeautifulSoup # 读取头部信息,主要是读取Cookie值 headers = Read_Headers('head_tm.txt') # 忽略告警是一个不安全的选择,这样证书报错就会被略过 disable_warnings() # PoolManager介绍,主要用于控制并发 # http://www.cnblogs.com/shadowwalker/p/5283372.html http = PoolManager() def getJSONDetail(url, itemId, currentPage): # 找到url中itemId,currentPage,callback内容 re_result = re.match( '.+itemId=(.+)&spuId.+¤tPage=(.+)&append.+callback=(.+)$', url).groups() url_itemId = re_result[0] url_currentPage = re_result[1] url_callback = re_result[2] # 替换itemId url = url.replace(url_itemId, str(itemId)) # 替换currentPage url1 = url.split('¤tPage=') url2 = url.split('&append=')