import urllib2 # 爬虫库2 import cookielib #提供可存储cookie的对象,以便于与urllib2配合使用来访问Internet资源 # http proxy # 右到网站会检测某一段时间内某个IP的访问次数,如果访问次数过多,它会禁止你的访问。 # 所以你可以设置一些代理服务器来帮助你,每隔一段时间换一个代理来避免被禁用 # 设置代理服务器是方式如下: proxy_server = {"http": 'http://some-proxy.com:8080'} proxy_handler = urllib2.ProxyHandler(proxy_server) proxy_opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(proxy_opener) # cookie配置 # CookieJar —-派生—->FileCookieJar —-派生—–>MozillaCookieJar和LWPCookieJar #cookie = cookielib.CookieJar() # 声明一个CookieJar对象实例来保存cookie cookie = cookielib.MozillaCookieJar( 'cookies/baidu_cookie.txt') # 声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件 #cookie.load('cookies/baidu_cookie.txt', ignore_discard=True, ignore_expires=True) # 从文件中读取cookie内容到变量 cookie_handler = urllib2.HTTPCookieProcessor( cookie) # 利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器 cookie_opener = urllib2.build_opener(cookie_handler) # 通过handler来构建opener # 请求参数配置 url = "https://www.baidu.com" #url = "https://passport.baidu.com/v2/api/?login" #url = "https://passport.csdn.net/account/login" headers = { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'Referer':
def login(self): try: cookiejar = cookielib.MozillaCookieJar() self.opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cookiejar)) # cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar()) # self.opener = urllib2.build_opener(cookie_support) self.opener.addheaders = [('User-agent', 'Opera/9.23')] urllib2.install_opener(self.opener) url1 = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=' + \ 'sinaSSOController.preloginCallBack&su=c2h1aW11Xzg4JTQwMTYzLmNvbQ%3D%3D' + \ '&client=ssologin.js(v1.3.17)&_=' + str(time.time()).split('.')[0] url2 = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.3.17)' header1 = { #'Host': 'login.sina.com.cn', 'Host': 'weibo.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': '*/*', 'Accept-Language': 'zh-cn,zh;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Accept-Charset': 'GB2312,utf-8;q=0.7,*;q=0.7', 'Connection': 'keep-alive', 'Referer': 'http://weibo.com/', 'Cookie': 'SUS=SID-1789744932-1325485823-JA-ab6v3-825d71755a572f8423b7abbd7a8674b4; SUE=es%3D0c9e0ff431d182656f826475eb0dfa43%26ev%3Dv1%26es2%3D152ef8d9ca7753718fc83db371ee72ff%26rs0%3DDgKaAcjk3lwVy5kPC5dnIiNo3YUrEtWRRtvJ2JPNAzFWIEw3u3hX%252FbvwWNCJnEeyPICk%252B9J0ZjSup9vVgqJCOL%252B%252FUFztxT69u7gTnqHx7pxkM7CypI5pQF7ah71N5GvK6F4lPsvD44JkS8p%252FcdyezraMt8yyU5MsB%252B397U2LRUo%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1325485823%26et%3D1325572223%26d%3Dc909%26i%3D13b0%26us%3D1%26uid%3D1789744932%26user%3Dshuimu_88%2540163.com%26ag%3D4%26name%3Dshuimu_88%2540163.com%26nick%3Dguge%26fmp%3D%26lcp%3D2011-08-04%252015%253A41%253A22%26vf%3D0%26ac%3D2; ALF=1326090618; SSOLoginState=1325485823; wvr=3.6; USRHAJAWB=usrmdins13121; [email protected]; USRHAWB=usrmdins212_542; ads_ck=1; _s_tentry=weibo.com; UOR=weibo.com,weibo.com,; Apache=8430538139278.456.1325485892311; SINAGLOBAL=8430538139278.456.1325485892311; ULV=1325485892540:1:1:1:8430538139278.456.1325485892311:' } header2 = { 'Host': 'login.sina.com.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-cn,zh;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Accept-Charset': 'GB2312,utf-8;q=0.7,*;q=0.7', 'Connection': 'keep-alive', 'Referer': 'http://weibo.com/', 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': '378' } header3 = { 'Host': 'weibo.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': '*/*', 'Accept-Language': 'zh-cn,zh;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Accept-Charset': 'GB2312,utf-8;q=0.7,*;q=0.7', 'Connection': 'keep-alive', 'Referer': url2 } req1 = urllib2.Request('http://weibo.com/whitefoxx', None, header1) #self.opener.open('http://weibo.com/') #req1 = urllib2.Request('http://') fs = self.opener.open(req1) # fs = urllib2.urlopen(req1) hdoc = fs.read() print hdoc return # buf = StringIO.StringIO(hdoc) # f = gzip.GzipFile(fileobj=buf) # hdoc = f.read() hdoc = hdoc.split('(')[1].split(')')[0] items = json.loads(hdoc) servertime = items['servertime'] nonce = items['nonce'] sp = self.hash_password('zcgyb0668', servertime, nonce) print sp postdata = { 'entry': 'weibo', 'gateway': '1', 'from': '', 'savestate': '7', 'useticket': '1', 'ssosimplelogin': '******', 'vsnf': '1', 'vsnval': '', 'su': 'c2h1aW11Xzg4JTQwMTYzLmNvbQ==', 'service': 'miniblog', 'servertime': str(servertime), 'nonce': nonce, 'pwencode': 'wsse', 'sp': sp, 'encoding': 'UTF-8', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype': 'META' } postdata = urllib.urlencode(postdata) req2 = urllib2.Request(url2, postdata) fs = self.opener.open(req2) # fs = urllib2.urlopen(req2) print fs.read() fs.close() print cookiejar req3 = urllib2.Request( 'http://weibo.com/u/1789744932?wvr=3.6&lf=reg') f = open('foxxcyb.html', 'w') fs2 = self.opener.open(req3) # fs2 = urllib2.urlopen(req3) f.write(fs2.read()) fs2.close() f.close() print cookiejar except urllib2.URLError, e: print e
def getURLRequestData(self, params={}, post_data=None): def urlOpen(req, customOpeners): if len(customOpeners) > 0: opener = urllib2.build_opener(*customOpeners) response = opener.open(req) else: response = urllib2.urlopen(req) return response cj = cookielib.MozillaCookieJar() response = None req = None out_data = None opener = None if 'host' in params: host = params['host'] else: host = self.HOST if 'header' in params: headers = params['header'] elif None != self.HEADER: headers = self.HEADER else: headers = {'User-Agent': host} if dbg == 'true': log.info('pCommon - getURLRequestData() -> params: ' + str(params)) log.info('pCommon - getURLRequestData() -> params: ' + str(headers)) customOpeners = [] #cookie support if 'use_cookie' not in params and 'cookiefile' in params and ( 'load_cookie' in params or 'save_cookie' in params): params['use_cookie'] = True if params.get('use_cookie', False): customOpeners.append(urllib2.HTTPCookieProcessor(cj)) if params.get('load_cookie', True): try: cj.load(params['cookiefile'], ignore_discard=True) except: pass if None != post_data: if dbg == 'true': log.info('pCommon - getURLRequestData() -> post data: ' + str(post_data)) if params.get('raw_post_data', False): dataPost = post_data else: dataPost = urllib.urlencode(post_data) req = urllib2.Request(params['url'], dataPost, headers) else: req = urllib2.Request(params['url'], None, headers) if not params.get('return_data', False): out_data = urlOpen(req, customOpeners) else: gzip_encoding = False try: response = urlOpen(req, customOpeners) if response.info().get('Content-Encoding') == 'gzip': gzip_encoding = True data = response.read() response.close() except urllib2.HTTPError, e: if e.code == 404: if dbg == 'true': log.info( 'pCommon - getURLRequestData() -> !!!!!!!! 404 - page not found handled' ) if e.fp.info().get('Content-Encoding') == 'gzip': gzip_encoding = True data = e.fp.read() #e.msg #e.headers else: #printExc() raise try: if gzip_encoding: if dbg == 'true': log.info( 'pCommon - getURLRequestData() -> Content-Encoding == gzip' ) buf = StringIO(data) f = gzip.GzipFile(fileobj=buf) out_data = f.read() else: out_data = data except: out_data = data
# --* encoding:utf-8 *-- import urllib import urllib2 import cookielib # values = {"username": "******", "password": "******"} # data = urllib.urlencode(values) # url = "http://127.0.0.1:8080/cba/" # request = urllib2.Request(url, data) # response = urllib2.urlopen(request) # # f = open(r'F:\PythonLearning\resource\就分.html'.decode('utf-8'), 'w') # f.write(response.read()) cookie = cookielib.MozillaCookieJar(r'F:\PythonLearning\resource\workfile') opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) postdata = urllib.urlencode({'stuid': '201200131012', 'pwd': '23342321'}) # 登录教务系统的URL loginUrl = 'http://jwxt.sdu.edu.cn:7890/pls/wwwbks/bks_login2.login' # 模拟登录,并把cookie保存到变量 result = opener.open(loginUrl, postdata) # 保存cookie到cookie.txt中 cookie.save(ignore_discard=True, ignore_expires=True) f = open(r'F:\PythonLearning\resource\就分.html'.decode('utf-8'), 'w') f.write(result.read()) # 利用cookie请求访问另一个网址,此网址是成绩查询网址 gradeUrl = 'http://jwxt.sdu.edu.cn:7890/pls/wwwbks/bkscjcx.curscopre' # 请求访问成绩查询网址 result = opener.open(gradeUrl) print result.read()
def __init__(self, host, auth_function, user_agent, source, host_override=None, extra_headers=None, save_cookies=False, auth_tries=3, account_type=None, debug_data=True, secure=True, ignore_certs=False, rpc_tries=3, options=None): """Creates a new HttpRpcServer. Args: host: The host to send requests to. auth_function: A function that takes no arguments and returns an (email, password) tuple when called. Will be called if authentication is required. user_agent: The user-agent string to send to the server. Specify None to omit the user-agent header. source: The source to specify in authentication requests. host_override: The host header to send to the server (defaults to host). extra_headers: A dict of extra headers to append to every request. Values supplied here will override other default headers that are supplied. save_cookies: If True, save the authentication cookies to local disk. If False, use an in-memory cookiejar instead. Subclasses must implement this functionality. Defaults to False. auth_tries: The number of times to attempt auth_function before failing. account_type: One of GOOGLE, HOSTED_OR_GOOGLE, or None for automatic. debug_data: Whether debugging output should include data contents. secure: If the requests sent using Send should be sent over HTTPS. ignore_certs: If the certificate mismatches should be ignored. rpc_tries: The number of rpc retries upon http server error (i.e. Response code >= 500 and < 600) before failing. options: the command line options (ignored in this implementation). """ if secure: self.scheme = "https" else: self.scheme = "http" self.ignore_certs = ignore_certs self.host = host self.host_override = host_override self.auth_function = auth_function self.source = source self.authenticated = False self.auth_tries = auth_tries self.debug_data = debug_data self.rpc_tries = rpc_tries # TODO(user): Consider validating account_type? self.account_type = account_type self.extra_headers = {} if user_agent: self.extra_headers["User-Agent"] = user_agent if extra_headers: self.extra_headers.update(extra_headers) self.save_cookies = save_cookies # By default there are no cookies to use or save. self.cookie_jar = cookielib.MozillaCookieJar() self.opener = self._GetOpener() if self.host_override: logger.debug("Server: %s; Host: %s", self.host, self.host_override) else: logger.debug("Server: %s", self.host) # If we're being run against localhost, set the dev_appserver cookie. if ((self.host_override and self.host_override == "localhost") or self.host == "localhost" or self.host.startswith("localhost:")): self._DevAppServerAuthenticate()
def login(): try: '''get login viewstate''' login_url = 'http://jwc1.wtc.edu.cn/default3.aspx' login_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0' } view = urllib2.urlopen( urllib2.Request(login_url, headers=login_headers)).read() soup = BeautifulSoup(view, "html.parser") tmp = soup.find('input', attrs={'name': '__VIEWSTATE'}) viewstate = tmp['value'] '''get login values''' StudentNo = raw_input("输入你的学号:") PassWord = raw_input("请输入你的密码:") login_data = urllib.urlencode({ "__VIEWSTATE": viewstate, "TextBox1": StudentNo, "TextBox2": PassWord, "ddl_js": u'学生', "Button1": "+%B5%C7+%C2%BC+" }) '''make a handler = opener request = urllib2.Request(login_url,login_data,login_headers) result = opener.open(request) 两种登录的方法下面那种方法需要使用 "opener.addheaders" 修改http头 ''' mycookie = cookielib.MozillaCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(mycookie)) '''login and get cookie''' opener.addheaders = [( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0' )] result = opener.open(login_url, login_data) '''find error''' soup = BeautifulSoup(result.read(), "html.parser") error = soup.find_all('script') source = error[0].get_text().encode("utf-8") PassWord_error = "密码错误!!" PassWord_tmp = source.find(PassWord_error) StudentNo_error = "用户名不存在或未按照要求参加教学活动!!" StudentNo_tmp = source.find(StudentNo_error) try: if PassWord_tmp != -1: sys.exit(0) except: print PassWord_error print "请重新", main() try: if StudentNo_tmp != -1: sys.exit(0) except: print StudentNo_error print "请重新", main() '''get StudentName''' xs_main_url = "http://jwc1.wtc.edu.cn/xs_main.aspx?xh=" + StudentNo xs_main = opener.open(xs_main_url) soup = BeautifulSoup(xs_main.read(), "html.parser") tmp = soup.find(id="xhxm") StudentName = str(tmp.string.decode('gbk')[:-2]) result_url = "http://jwc1.wtc.edu.cn/xscj_gc.aspx?xh=" + StudentNo + "&xm=" + StudentName + "&gnmkdm=N121605" viewstate_headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0", 'Referer': xs_main_url, } result_headers = { 'Referer': result_url, 'user-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0" } '''get result viewstate''' request_gra1 = urllib2.Request(result_url, headers=viewstate_headers) result = opener.open(request_gra1) soup = BeautifulSoup(result.read(), "html.parser") tmp = soup.find('input', attrs={'name': '__VIEWSTATE'}) viewstate = tmp['value'] '''get Inquiry mode''' Inquiry_mode = raw_input("请选择按学年(1)还是学期(2)查询输入1或2:") if Inquiry_mode == '1': Inquiry_mode = '1' Button = 'Button5' Value = '按学年查询' Semester = '' elif Inquiry_mode == '2': Inquiry_mode = '2' Semester = raw_input("请输入第几学期(1或2):") Button = 'Button1' Value = '按学期查询' else: print "请键入1或2" main() Interval = raw_input("请输入学年区间例(2015-2016):") result_data = urllib.urlencode({ '__VIEWSTATE': viewstate, 'ddlXN': Interval, 'ddlXQ': Semester, Button: Value }) '''login and get result then return''' result = urllib2.Request(result_url, result_data, result_headers) result = opener.open(result) return result.read() except urllib2.URLError, e: if hasattr(e, "code"): return e.code
def get_response_and_text(url, headers=None, needupdate=False, update_info=None): if headers: this_headers = headers else: this_headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' } num_reply = 5 while True: # 强制请求 try: timea = time.time() cookies1 = cookielib.MozillaCookieJar() proxies1 = {'http': 'http://' + get_proxy_from_redis()} proxyhandler = urllib2.ProxyHandler(proxies1) cookiehandler = urllib2.HTTPCookieProcessor(cookies1) request1 = urllib2.Request(url=url, headers=this_headers) opener1 = urllib2.build_opener(proxyhandler, cookiehandler) response_in_function = opener1.open(request1, timeout=timeout_value) response_in_function_text = response_in_function.read() if response_in_function.code == 204: num_reply -= 1 if num_reply < 0: sys.exit() else: raise Exception if needupdate: file1 = BASIC_FILE + '/chengdu/chengdu_sechdule.text' sechdule = 1700000 sechdule = update_info['page_num'] with open(file1, 'w') as fl: fl.write(sechdule) break except Exception as e: if hasattr(e, 'code'): if e.code in [404, 400]: opener1.close() sys.exit() elif e.code == [204, 403]: #可能是有数据的,但是被屏蔽了 num_reply -= 1 opener1.close() if num_reply < 1: sys.exit() timeb = time.time() proxy_here = proxies1.values()[0].split('//')[1] opener1.close() if timeb - timea < 10: proxy_sendback(proxy_here) if response_in_function.code == 204: return {'response_in_function': None, 'response_in_function_text': {}} return { 'response_in_function': response_in_function, 'response_in_function_text': response_in_function_text }
def post_machine(t, j): pre = 'learn.tsinghua.edu.cn FALSE / FALSE ' f = open('cookies.txt', 'wb') f.write('# Netscape HTTP Cookie File') f.write(''.join([pre, 'JSESSIONID', '\t', j, '\n'])) f.write(''.join([pre, 'THNSV2COOKIE', '\t', t, '\n'])) f.close() cjar = cookielib.MozillaCookieJar() cjar.load('cookies.txt', ignore_discard=True, ignore_expires=True) #print cjar opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cjar)) opener.addheaders = [ ('User-agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)') ] urllib2.install_opener(opener) domain = 'http://learn.tsinghua.edu.cn' login_page = "".join( [domain, '/MultiLanguage/lesson/teacher/loginteacher.jsp']) userName = "******" password = "******" page = opener.open(login_page, urllib.urlencode({ 'userid': userName, 'userpass': password })) #POST cjar.save('cookies.txt', ignore_discard=True, ignore_expires=True) print cjar try: #get list of courses page = opener.open("".join( [domain, '/MultiLanguage/lesson/student/MyCourse.jsp?typepage=2'])) soup = BeautifulSoup(page.read()) course = [ soup.findAll(attrs={'class': 'info_tr'}), soup.findAll(attrs={'class': 'info_tr2'}) ] bbs_url = 'http://learn.tsinghua.edu.cn/MultiLanguage/public/bbs/bbs_talk_submit.jsp?post_par_id=0000&post_up_url=talk_list_student.jsp&post_cate_id=1' post_title = 'Americans%20attack%20Tsinghua%20network%3F' post_detail = 'Yes%2C%20we%20scan%21' bbs_url = "".join([ bbs_url, '&post_title=', post_title, '&post_detail=', post_detail ]) count = 0 for c in course: course_id = c[0].td.a['href'][58:] #get post_bbs_id page = opener.open(''.join([ domain, '/MultiLanguage/public/bbs/gettalkid_student.jsp?course_id=', course_id ])) soup = BeautifulSoup(page.read()) new_url = soup.find(attrs={'id': 'new_url'})['href'] post_bbs_id = new_url[52:new_url.find('&', 52)] POST_url = "".join([ bbs_url, '&course_id=', course_id, '&post_bbs_id=', post_bbs_id ]) #post bbs opener.open(POST_url) print course_id, "done!" count = count + 1 if count > 2: break except Exception, e: print str(e)
def load_cookies(): '''模拟浏览器登录微博,获取cookies字符串 ''' mobile = WEIBO_USER password = WEIBO_PWD cookie_str = '' user_agent = '''Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10''' header = {'User-Agent': user_agent} cj = cookielib.MozillaCookieJar() if os.path.isfile(COOKIES_FILE): cj.load(COOKIES_FILE) #opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) cookie_list = [] for cookie in cj: if cookie.domain == '.weibo.cn': cookie_list.append(str(cookie).split(' ')[1]) cookie_str = ';'.join(cookie_list) return cookie_str login_url = '''http://3g.sina.com.cn/prog/wapsite/sso/login.php?ns=1&backURL=http%3A%2F%2Fweibo.cn%2Fdpool%2Fttt%2Fhome.php%3Fs2w%3Dlogin&backTitle=%D0%C2%C0%CB%CE%A2%B2%A9&vt=4&wm=ig_0001_index''' res = urllib2.urlopen(urllib2.Request(login_url, headers=header)) login_html = res.read() res.close() login_soup = BeautifulSoup(login_html) login_form_action = login_soup.find('form')['action'] vk = pwd = submit = backURL = backTitle = None for input_box in login_soup.findAll('input'): if input_box['type'] == 'password': pwd = input_box['name'] elif input_box['type'] == 'submit': submit = input_box['value'] elif input_box['type'] == 'hidden': if input_box['name'] == 'vk': vk = input_box['value'] elif input_box['name'] == 'backURL': backURL = input_box['value'] elif input_box['name'] == 'backTitle': backTitle = input_box['value'] submit = '%E7%99%BB%E5%BD%95' #登录 params = urllib.urlencode({ 'mobile': mobile, pwd: password, 'remember': 'on', 'backURL': backURL, 'vk': vk, 'submit': submit }) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) submit_url = 'http://3g.sina.com.cn/prog/wapsite/sso/' + login_form_action res = opener.open(urllib2.Request(submit_url, headers=header), params) redirect_html = res.read() res.close() redirect_soup = BeautifulSoup(redirect_html) redirect_url = redirect_soup.find('a')['href'] res = opener.open(urllib2.Request(redirect_url, headers=header)) res.close() cj.save(COOKIES_FILE, ignore_discard=True) cookie_list = [] for cookie in cj: if cookie.domain == '.weibo.cn': cookie_list.append(str(cookie).split(' ')[1]) cookie_str = ';'.join(cookie_list) return cookie_str
def __init__(self): self.cookie=cookielib.MozillaCookieJar() self.opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
#-*-coding:utf-8-*- import urllib, urllib2, cookielib, os url = "http://go.cgddgc.cn/auth/login" url1 = "http://go.cgddgc.cn/user/node" valu = { "email": "*****@*****.**", "passwd": "cgd1011", "remember_me": "week" } data = urllib.urlencode(valu) ckjar = cookielib.MozillaCookieJar("cookie.txt") ckproc = urllib2.HTTPCookieProcessor(ckjar) opener = urllib2.build_opener(ckproc) request = urllib2.Request(url, data) respon = opener.open(request) respon = opener.open(url1) result = respon.read() respon.close() ckjar.save() print result
def __init__(self, username=None, password=None, auth=None, code=None, datadir=None, configdir=None, cachedir=None, debug=False): self.username = username self.password = password self.datadir = datadir or DATADIR self.configdir = configdir or CONFIGDIR self.cachedir = cachedir or CACHEDIR if not os.path.isdir(self.configdir): # Create the config dir as xdg would. Let exceptions bubble up os.makedirs(self.configdir, 0700) self.cookiejar = cookielib.MozillaCookieJar( filename=osp.join(self.configdir, "cookies.txt")) try: self.cookiejar.load() except (IOError, cookielib.LoadError) as e: log.error('Error reading cookies: %s', e) if auth: log.info("Injecting authenticated cookie") expires = int(auth.split('|')[1]) + 730 * 24 * 60 * 60 cookie = cookielib.Cookie( version=0, name='_simpleauth_sess', value=auth, port=None, port_specified=False, domain=urlsplit(self.url)[1], domain_specified=False, domain_initial_dot=False, path='/', path_specified=False, secure=True, expires=expires, discard=False, comment=None, comment_url=None, rest={}, ) self.cookiejar.set_cookie(cookie) super(HumbleBundle, self).__init__(self.url, tag=APPNAME, cookiejar=self.cookiejar, debug=debug) if code: log.info("Validating browser code at '%s/user/humbleguard'", self.url) try: self.get("/user/humbleguard", { 'goto': "/home", 'qs': "", 'code': code.upper() }) except httpbot.urllib2.HTTPError as e: raise HumbleBundleError("Incorrect browser verification code") # "purchases" in the website. May be non-bundle like Store Purchases self.bundles = {} # "subproducts" in json. May be not a game, like Soundtracks and eBooks self.games = {} # Load bundles and games try: with open(osp.join(self.configdir, "bundles.json")) as fp1: with open(osp.join(self.configdir, "games.json")) as fp2: self.bundles = json.load(fp1) self.games = json.load(fp2) log.info("Loaded %d games from %d bundles", len(self.games), len(self.bundles)) self._merge() except IOError: self.update()
def refresh_cookie(): #创建MozillaCookieJar实例对象 cookie = cookielib.MozillaCookieJar() #从文件中读取cookie内容到变量 cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True) return cookie
# -*- coding: utf-8 -*- import urllib2 import cookielib __author__ = 'Anliven' # 从文件中获取Cookie并访问 readcookie = cookielib.MozillaCookieJar() # 创建MozillaCookieJar实例对象 readcookie.load('Spider_cookielib_2.txt', ignore_discard=True, ignore_expires=True) # 从文件中读取cookie内容到变量 req = urllib2.Request("http://www.cn.bing.com") # 创建请求的request opener = urllib2.build_opener(urllib2.HTTPCookieProcessor( readcookie)) # 利用urllib2的build_opener方法创建一个opener response = opener.open(req) print response.read() # 设想cookie文件中保存的是登录的cookie,那么提取出这个cookie文件内容,就可以用以上方法模拟这个人的账号登录 # 创建一个带有cookie的opener,在访问登录的URL时,将登录后的cookie保存下来,然后利用这个cookie来访问其他网址
#-*-coding=utf-8-*- import urllib,urllib2,re,requests,cookielib from bs4 import BeautifulSoup login_url="https://github.com/login" ''' session=requests.session() html=session.get(login_url,headers=header) ''' html=urllib.urlopen(login_url) soup=BeautifulSoup(html.read(),"lxml") for input in soup.form.find_all("input"): if input.get("name")=="authenticity_token": token=input.get("value") #print token values={'login':'******','password':'******','commit':'Sign+in','authenticity_token':token,'utf8':'%E2%9C%93'} data=urllib.urlencode(values) ckjar=cookielib.MozillaCookieJar('gitcookie.txt') handler=urllib2.HTTPCookieProcessor(ckjar) opener=urllib2.build_opener(handler) opener.addheaders=[('Host','https://github.com'),('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0'),('Referer','https://github.com')] auth_url="https://github.com/session" respon=opener.open(auth_url,data) ckjar.save(ignore_discard=True,ignore_expires=True) #respon=opener.open("https://github.com/") print respon.read()
def fetch_html_encoded_roles( adfs_host, adfs_cookie_location, ssl_verification_enabled, provider_id, adfs_ca_bundle=None, username=None, password=None, sspi=None, u2f_trigger_default=None, ): # Support for Kerberos SSO on Windows via requests_negotiate_sspi # also requires tricking the server into thinking we're using IEq # so that it servers up a redirect to the IWA page. if sspi: _headers[ 'User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' # Initiate session handler session = requests.Session() # LWPCookieJar has an issue on Windows when cookies have an 'expires' date too far in the future and they are converted from timestamp to datetime. # MozillaCookieJar works because it does not convert the timestamps. # Duo uses 253402300799 for its cookies which translates into 9999-12-31T23:59:59Z. # Windows 64bit maximum date is 3000-12-31T23:59:59Z, and 32bit is 2038-01-18T23:59:59Z. session.cookies = cookielib.MozillaCookieJar(filename=adfs_cookie_location) try: have_creds = (username and password) or _auth_provider session.cookies.load(ignore_discard=not (have_creds)) except IOError as e: error_message = getattr(e, 'message', e) logging.debug( u'Attempt to load authentication cookies into session failed. ' u'Re-authentication will be performed. ' u'The error: {}'.format(error_message)) if _auth_provider and sspi: domain = None if username: if '@' in username: # User principal name (UPN) format username, domain = username.split('@', 1) elif '\\' in username: # Down-level logon name format domain, username = username.split('\\', 1) if system() == 'Windows': auth = _auth_provider(username, password, domain) elif username and domain: auth = _auth_provider(principal="{}@{}".format(username, domain), mutual_authentication=OPTIONAL) else: auth = _auth_provider(mutual_authentication=OPTIONAL) data = None else: auth = None data = { 'UserName': username, 'Password': password, 'AuthMethod': provider_id } if adfs_ca_bundle: ssl_verification = adfs_ca_bundle else: ssl_verification = ssl_verification_enabled # Opens the initial AD FS URL and follows all of the HTTP302 redirects authentication_url = _IDP_ENTRY_URL.format(adfs_host, provider_id) response = session.post(authentication_url, verify=ssl_verification, headers=_headers, auth=auth, data=data) logging.debug(u'''Request: * url: {} * headers: {} Response: * status: {} * headers: {} * body: {} '''.format(authentication_url, response.request.headers, response.status_code, response.headers, response.text)) if response.status_code >= 400: session.cookies.clear() mask = os.umask(0o177) try: session.cookies.save(ignore_discard=True) finally: os.umask(mask) del auth del data del username password = '******' del password # Decode the response return response, session
import quizduell import cookielib import json import os import argparse parser = argparse.ArgumentParser(description='Give me a gameID!') parser.add_argument("--username") parser.add_argument("--password") args = parser.parse_args() username = args.username password = args.password # Load authenticated session from file to prevent unnecessary logins: cookie_jar = cookielib.MozillaCookieJar('cookie_file') api = quizduell.QuizduellApi(cookie_jar) if os.access(cookie_jar.filename, os.F_OK): cookie_jar.load() else: api.login_user(username, password) api = quizduell.QuizduellApi(cookie_jar) result = api.current_user_games() if 'access' in result: # Session invalid, re-login: api.login_user(username, password) result = api.top_list_rating()
print('usage:python classlist.py [name] [student code]') sys.exit(0) else: name = sys.argv[1] code = sys.argv[2] baseurl = 'http://jxgl.hdu.edu.cn/xf_xsqxxxk.aspx?' # init my opener paramters = urllib.urlencode({ 'xh': code, 'xm': name.decode('utf-8').encode('gbk'), 'gnmkdm': 'N121113' }) cookiename = 'cookie.dat' cookie = cookielib.MozillaCookieJar(cookiename) cookie.load(cookiename, ignore_discard=True, ignore_expires=True) opener = getopener(cookie) para_dct = {} response = opener.open(baseurl + paramters) temp_content = read(response) viewstate = re.compile('id="__VIEWSTATE" value="(.*)"').search( temp_content).groups()[0] eventvali = re.compile('id="__EVENTVALIDATION" value="(.*)"').search( temp_content).groups()[0] para_dct['ddl_kcxz'] = '' para_dct['ddl_ywyl'] = ''
if __name__ == '__main__': action = sys.argv[8] uri = urllib2.urlparse.ParseResult( scheme=sys.argv[9], netloc=sys.argv[10], path=sys.argv[11], params='', query='', fragment='').geturl() set_cookie = sys.argv[12] if len(sys.argv)>12 else None if 'XDG_DATA_HOME' in os.environ.keys() and os.environ['XDG_DATA_HOME']: f = os.path.join(os.environ['XDG_DATA_HOME'],'reuzbl/cookies.txt') else: f = os.path.join(os.environ['HOME'],'.local/share/reuzbl/cookies.txt') jar = cookielib.MozillaCookieJar(f) try: jar.load(ignore_discard=True) except: pass req = urllib2.Request(uri) if action == 'GET': jar.add_cookie_header(req) if req.has_header('Cookie'): print req.get_header('Cookie') elif action == 'PUT': hdr = urllib2.httplib.HTTPMessage(StringIO.StringIO('Set-Cookie: %s' % set_cookie)) res = urllib2.addinfourl(StringIO.StringIO(), hdr, req.get_full_url())
def get_headers_from_response( url, post=None, headers=[[ 'User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12' ]]): return_headers = [] if (DEBUG == True): logger.info("[scrapertools.py] get_headers_from_response url=" + url) if post is not None: if (DEBUG == True): logger.info("[scrapertools.py] post=" + post) else: if (DEBUG == True): logger.info("[scrapertools.py] post=None") # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_setting("cookies.dir"), 'cookies.dat') if (DEBUG == True): logger.info("[scrapertools.py] ficherocookies=" + ficherocookies) cj = None ClientCookie = None cookielib = None import cookielib # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if os.path.isfile(ficherocookies): if (DEBUG == True): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: if (DEBUG == True): logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), NoRedirectHandler()) urllib2.install_opener(opener) # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Traza la peticion if post is None: if (DEBUG == True): logger.info("[scrapertools.py] petición GET") else: if (DEBUG == True): logger.info("[scrapertools.py] petición POST") # Array de cabeceras if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") for header in headers: if (DEBUG == True): logger.info("[scrapertools.py] header=%s" % str(header[0])) txheaders[header[0]] = header[1] if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") # Construye el request req = Request(url, post, txheaders) handle = urlopen(req) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra #data=handle.read() info = handle.info() if (DEBUG == True): logger.info("[scrapertools.py] Respuesta") if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") location_header = "" for header in info: if (DEBUG == True): logger.info("[scrapertools.py] " + header + "=" + info[header]) return_headers.append([header, info[header]]) handle.close() if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") # Tiempo transcurrido fin = time.clock() if (DEBUG == True): logger.info("[scrapertools.py] Descargado en %d segundos " % (fin - inicio + 1)) return return_headers
import urllib2 import cookielib filename = 'cookie.txt' cookie = cookielib.MozillaCookieJar( filename ) # Declare a MozillaCookieJar object instance to save cookie and write to file handler = urllib2.HTTPCookieProcessor( cookie) # Create cookie processor using urllib2.HTTPCookieProcessor object opener = urllib2.build_opener(handler) # Build opener through handler response = opener.open( 'http://www.zhihu.com') # Equal to urllib2.urlopen method cookie.save(ignore_discard=True, ignore_expires=True)
def downloadpage( url, post=None, headers=[[ 'User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12' ]], follow_redirects=True, timeout=socket.getdefaulttimeout()): if (DEBUG == True): logger.info("[scrapertools.py] downloadpage") if (DEBUG == True): logger.info("[scrapertools.py] url=" + url) if post is not None: if (DEBUG == True): logger.info("[scrapertools.py] post=" + post) else: if (DEBUG == True): logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_setting("cookies.dir"), 'cookies.dat') if (DEBUG == True): logger.info("[scrapertools.py] ficherocookies=" + ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: if (DEBUG == True): logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: if (DEBUG == True): logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: if (DEBUG == True): logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: if (DEBUG == True): logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: if (DEBUG == True): logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: if (DEBUG == True): logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules if (DEBUG == True): logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): if (DEBUG == True): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: if (DEBUG == True): logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: if (DEBUG == True): logger.info( "[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL), urllib2.HTTPCookieProcessor(cj), NoRedirectHandler()) else: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL), urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: if (DEBUG == True): logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: if (DEBUG == True): logger.info("[scrapertools.py] petición GET") else: if (DEBUG == True): logger.info("[scrapertools.py] petición POST") # Añade las cabeceras if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") for header in headers: if (DEBUG == True): logger.info("[scrapertools.py] header %s=%s" % (str(header[0]), str(header[1]))) txheaders[header[0]] = header[1] if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") req = Request(url, post, txheaders) if timeout is None: handle = urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: deftimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handle = urlopen(req) except: import sys for line in sys.exc_info(): logger.error("%s" % line) socket.setdefaulttimeout(deftimeout) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra data = handle.read() info = handle.info() if (DEBUG == True): logger.info("[scrapertools.py] Respuesta") if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") for header in info: if (DEBUG == True): logger.info("[scrapertools.py] " + header + "=" + info[header]) handle.close() if (DEBUG == True): logger.info("[scrapertools.py] ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() if (DEBUG == True): logger.info("[scrapertools.py] Descargado en %d segundos " % (fin - inicio + 1)) return data
import cookielib import urllib2 filename = 'cookie.txt' cookie = cookielib.MozillaCookieJar(filename) handler = urllib2.HTTPCookieProcessor(cookie) opener = urllib2.build_opener(handler) response = opener.open("https://www.github.com") cookie.save(ignore_discard=True, ignore_expires=True)
def downloadpageGzip(url): # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_data_path(), 'cookies.dat') if (DEBUG == True): logger.info("Cookiefile=" + ficherocookies) inicio = time.clock() cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: if (DEBUG == True): logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} import httplib parsedurl = urlparse.urlparse(url) if (DEBUG == True): logger.info("parsedurl=" + str(parsedurl)) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding': 'gzip,deflate', 'Keep-Alive': '300', 'Connection': 'keep-alive', 'Referer': parsedurl[0] + "://" + parsedurl[1] } if (DEBUG == True): logger.info(str(txheaders)) # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() fin = time.clock() if (DEBUG == True): logger.info( "[scrapertools.py] Descargado 'Gzipped data' en %d segundos " % (fin - inicio + 1)) # Descomprime el archivo de datos Gzip try: fin = inicio import StringIO compressedstream = StringIO.StringIO(data) import gzip gzipper = gzip.GzipFile(fileobj=compressedstream) data1 = gzipper.read() gzipper.close() fin = time.clock() if (DEBUG == True): logger.info( "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos " % (fin - inicio + 1)) return data1 except: return data
def read_body_and_headers(url,post=None,headers=[],follow_redirects=False,timeout=None): _log("read_body_and_headers "+url) if post is not None: _log("read_body_and_headers post="+post) if len(headers)==0: headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"]) # Start cookie lib ficherocookies=os.path.join(get_data_path(),'cookies.dat'); _log("read_body_and_headers cookies_file="+ficherocookies); cj=None; ClientCookie=None; cookielib=None try: _log("read_body_and_headers importing cookielib"); import cookielib # Let's see if cookielib is available except ImportError: _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: _log("read_body_and_headers importing ClientCookie"); import ClientCookie except ImportError: _log("read_body_and_headers ClientCookie not available"); urlopen=urllib2.urlopen; Request=urllib2.Request # ClientCookie isn't available either else: _log("read_body_and_headers ClientCookie available"); urlopen=ClientCookie.urlopen; Request=ClientCookie.Request; cj=ClientCookie.MozillaCookieJar() # imported ClientCookie else: _log("read_body_and_headers cookielib available"); urlopen=urllib2.urlopen; Request=urllib2.Request; cj=cookielib.MozillaCookieJar() # importing cookielib worked # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules _log("read_body_and_headers Cookies enabled") if os.path.isfile(ficherocookies): _log("read_body_and_headers Reading cookie file") try: cj.load(ficherocookies) # if we have a cookie file already saved # then load the cookies into the Cookie Jar except: _log("read_body_and_headers Wrong cookie file, deleting..."); os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: _log("read_body_and_headers opener using urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: _log("read_body_and_headers opener using ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener=ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)); ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- inicio=time.clock() # Contador txheaders={} # Diccionario para las cabeceras if post is None: _log("read_body_and_headers GET request") # Construye el request else: _log("read_body_and_headers POST request") _log("read_body_and_headers ---------------------------") # Añade las cabeceras for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1]))); txheaders[header[0]]=header[1] _log("read_body_and_headers ---------------------------"); req=Request(url,post,txheaders) if timeout is None: handle=urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: try: import socket; deftimeout=socket.getdefaulttimeout(); socket.setdefaulttimeout(timeout); handle=urlopen(req); socket.setdefaulttimeout(deftimeout) except: import sys for line in sys.exc_info(): _log( "%s" % line ) cj.save(ficherocookies) # Actualiza el almacén de cookies # Lee los datos y cierra if handle.info().get('Content-Encoding')=='gzip': buf=StringIO(handle.read()); f=gzip.GzipFile(fileobj=buf); data=f.read() else: data=handle.read() info=handle.info(); _log("read_body_and_headers Response"); returnheaders=[]; _log("read_body_and_headers ---------------------------") for header in info: _log("read_body_and_headers "+header+"="+info[header]); returnheaders.append([header,info[header]]) handle.close(); _log("read_body_and_headers ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin=time.clock(); _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1)); _log("read_body_and_headers body="+data); return data,returnheaders
def _set_cookie(self, fileName): cookie = cookielib.MozillaCookieJar() cookie.load(fileName, ignore_discard=True, ignore_expires=True) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) urllib2.install_opener(opener)
110: 'Opera/9.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.01', 111: 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)', 112: 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 113: 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)', } HOST = 'Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0' #Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 #HISTORYFILE = xbmc.translatePath(ptv.getAddonInfo('profile') + "history.xml") #cj = cookielib.LWPCookieJar() cj = cookielib.MozillaCookieJar() class common: HOST = HOST HEADER = None def __init__(self): pass def html_special_chars(self, txt): txt = txt.replace('#038;', '') txt = txt.replace('"', '"') txt = txt.replace(''', '\'') txt = txt.replace('”', '"') txt = txt.replace('„', '"')
def __init__(self, host, auth_function, user_agent, source, host_override=None, extra_headers=None, save_cookies=False, auth_tries=3, account_type=None, debug_data=True, secure=True, rpc_tries=3): """Creates a new HttpRpcServer. Args: host: The host to send requests to. auth_function: A function that takes no arguments and returns an (email, password) tuple when called. Will be called if authentication is required. user_agent: The user-agent string to send to the server. Specify None to omit the user-agent header. source: The source to specify in authentication requests. host_override: The host header to send to the server (defaults to host). extra_headers: A dict of extra headers to append to every request. Values supplied here will override other default headers that are supplied. save_cookies: If True, save the authentication cookies to local disk. If False, use an in-memory cookiejar instead. Subclasses must implement this functionality. Defaults to False. auth_tries: The number of times to attempt auth_function before failing. account_type: One of GOOGLE, HOSTED_OR_GOOGLE, or None for automatic. debug_data: Whether debugging output should include data contents. rpc_tries: The number of rpc retries upon http server error (i.e. Response code >= 500 and < 600) before failing. """ if secure: self.scheme = "https" else: self.scheme = "http" self.host = host self.host_override = host_override self.auth_function = auth_function self.source = source self.authenticated = False self.auth_tries = auth_tries self.debug_data = debug_data self.rpc_tries = rpc_tries self.account_type = account_type self.read_credentials = False #have user credentials been read ? self.username = None self.password = None #URL of AppServer redirected to by AppLoad Balancer self.appserver_url = None #Last AppServer that was authenticated successfully self.last_appserver_ip = None self.extra_headers = {} if user_agent: self.extra_headers["User-Agent"] = user_agent if extra_headers: self.extra_headers.update(extra_headers) self.save_cookies = save_cookies self.cookie_jar = cookielib.MozillaCookieJar() self.opener = self._GetOpener() if self.host_override: logger.info("Server: %s; Host: %s", self.host, self.host_override) else: logger.info("Server: %s", self.host) if ((self.host_override and self.host_override == "localhost") or self.host == "localhost" or self.host.startswith("localhost:")): self._DevAppServerAuthenticate()
from requests import Session from robobrowser import RoboBrowser import cookielib import pdb session = Session() session.verify = False # Skip SSL verification cj = cookielib.MozillaCookieJar('cookies.txt') cj.load() browser = RoboBrowser(session=session) ## Instant Queue browser.open("http://www.netflix.com/MyList", cookies=cj) # get the form queue_form = browser.get_form(id='MainQueueForm') # queue_submit = queue_form.submit_fields['evoSubmit'] predictions = [] skip_keys = ["queueHeader"] for key in queue_form.keys(): if key in skip_keys: continue if 'OP' in key: continue spans = browser.find_all("input", {"name": key})[0].findAllNext("span") for s in spans: if s is not None: for c in s['class']: if 'sbmf-' in c: predicted_rating = c.strip("sbmf-") if key not in (item[0] for item in predictions):
try: shutil.copyfile(post_review_cookies, cookie_file) os.chmod(cookie_file, 0600) except IOError, e: logging.warning("There was an error while copying " "post-review's cookies: %s" % e) if not os.path.isfile(cookie_file): try: open(cookie_file, 'w').close() os.chmod(cookie_file, 0600) except IOError, e: logging.warning("There was an error while creating a " "cookie file: %s" % e) return cookielib.MozillaCookieJar(cookie_file), cookie_file class ReviewBoardServer(object): """Represents a Review Board server we are communicating with. Provides methods for executing HTTP requests on a Review Board server's Web API. The ``auth_callback`` parameter can be used to specify a callable which will be called when authentication fails. This callable will be passed the realm, and url of the Review Board server and should return a 2-tuple of username, password. The user can be prompted for their credentials using this mechanism. """ def __init__(self,