def main(): header = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36" } login_url = "https://www.yaozhi.com/login" login_from_data = { "username":"******", "pwd":"zmw981111", "formhash":"99D101779B", "backurl": "https%3A%2F%2Fwww.yaozh.com%2F" } cook = cookiejar.CookieJar() handler = urllib.request.HTTPCookieProcessor(cook) openr = urllib.request.build_opener(handler) login_str = parse.urlencode(login_form_data).encode("utf-8") login_request = urllib.request.Request(login_url, headers=header, data=login_str) resp = opener.open(login_request) print(resp) data = resp.read().decode("utf-8") # print(data) member_url = "https://www.yaozh.com/member/" member_request = urllib.request.Request(member_url, headers=header) data = opener.open(member_request).read().decode("utf-8") with open("08.html", "w") as f: f.write(data)
def __init__(self): self.header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36' } #cookie 支持 self.cookie_handle = cookiejar.CookieJar() self.opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(self.cookie_handle)) urllib.request.install_opener(self.opener)
delayload bool值,是否支持延迟加载(需要的时候才回去读取cookie文件) MozillaCookieJar * FileCookieJar 子类,创建与 Mozilla内核浏览器兼容的 FileCookieJar 实例 LWPCookieJar * FileCookieJar 子类,创建与 libwww-per 标准的 Set-Cookie3 兼容的 FileCookieJar 实例 * HTTPCookieProcessor 是 urllib.request 的对象(Handler系列),作用就是保存服务器响应的cookie * cookie处理 demo from urllib import request from http import cookiejar # 创建 cookie 对象,用于保存cookie信息 cookie = cookiejar.CookieJar() # 创建 HTTPCookieProcessor 对象(Cookie处理器对象) # 返回的就是一个处理器对象 cookieHandler = request.HTTPCookieProcessor(cookie) # 创建opener opener = request.build_opener() # 添加cookie处理器对象到opener opener.add_handler(cookieHandler) with opener.open('http://javaweb.io') as response: print(cookie) # <CookieJar[<Cookie JSESSIONID=121BA7EE1190B9358F80E219F5DD3EEB for javaweb.io/>]> # 此时,该 opener 已经有了该cookie,如果该 opener 再次发起请求,会携带cookie * 登录 javaweb.io 实战 from urllib import request,parse
'Connection': 'keep-alive' } hdr5 = { 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } hdr = random.choice([hdr1, hdr2, hdr3, hdr4, hdr5]) req = urllib.request.Request(site, headers=hdr) cj = cookiejar.CookieJar() opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj) ) # Added cookie handling function while url opening due to new myntra weird redirect behaviour response = opener.open(req) content = response.read() response.close() page_soup = soup(content, "html.parser") xyz = page_soup.findAll("div", {"class": "productInfo"}) pqr = page_soup.findAll("div", {"class": "product"}) y = len(xyz) print(y)