def edit_country(): opener = login.login_cookies() country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html) import pprint; pprint.pprint(data) #print 'Population before: ' + data['population'] data['population'] = int(data['population']) + 1 encoded_data = urlencode(data) newrequest = request.Request(COUNTRY_URL, encoded_data) response = opener.open(newrequest) country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html)
def edit_country(): opener = login.login_cookies() country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html) import pprint; pprint.pprint(data) print 'Population before: ' + data['population'] data['population'] = int(data['population']) + 1 encoded_data = urllib.urlencode(data) request = urllib2.Request(COUNTRY_URL, encoded_data) response = opener.open(request) country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html) print 'Population after:', data['population']
def edit_country(): opener = login.login_cookies() country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html) pprint.pprint(data) print 'Population before:' + data['population'] data['population'] = int(data['population']) + 1 encoded_data = urllib.urlencode(data) request = urllib2.Request(COUNTRY_URL, encoded_data) response = opener.open(request) country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html) print 'Population after:', data['population']
def demo0(): cj = cookielib.CookieJar() openr = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) html = openr.open(REGISTER_URL).read() form = parse_form(html) pprint.pprint(form) """ 注册表单 {'_formkey': 'b3fec06e-8882-45a8-82e8-47da1ff985e9', '_formname': 'register', '_next': '/', 'email': '', 'first_name': '', 'last_name': '', 'password': '', 'password_two': '', 'recaptcha_response_field': None} """ img = get_captcha(html) img.save('cap_ori.png') gray = img.convert('L') gray.save('cap_gray.png') bw = gray.point(lambda x: 0 if x < 1 else 255, '1') #x<1 return 0 ;else return 255 bw.save('cap_bw.png') print pytesseract.image_to_string(img) print pytesseract.image_to_string(gray) print pytesseract.image_to_string(bw)
def change_currency(): session = requests.Session() response, session = login(session=session) country_or_district_html = session.get(COUNTRY_OR_DISTRICT_URL) data = parse_form(country_or_district_html.content) print('currency is: ', data['currency_name']) data['currency_name'] = 'British pounds' response = session.post(COUNTRY_OR_DISTRICT_URL, data=data) test_currency = get_currency() print('currency is now: ', test_currency) assert test_currency == data['currency_name']
def add_population(): session = requests.Session() response, session = login(session=session) country_or_district_html = session.get(COUNTRY_OR_DISTRICT_URL) data = parse_form(country_or_district_html.content) print('population is: ', data['population']) data['population'] = int(data['population']) + 1 response = session.post(COUNTRY_OR_DISTRICT_URL, data=data) test_population = get_population() print('population is now:', test_population) assert test_population == data['population']
def register(first_name, last_name, email, password): session = requests.Session() html = session.get(REGISTER_URL) form = parse_form(html.content) form['first_name'] = first_name form['last_name'] = last_name form['email'] = email form['password'] = form['password_two'] = password img = get_captcha_img(html.content) captcha = ocr(img) form['recaptcha_response_field'] = captcha resp = session.post(html.url, form) success = '/user/register' not in resp.url if not success: form_errors = fromstring(resp.content).cssselect('div.error') print('Form Errors:') print('n'.join( (' {}: {}'.format(f.get('id'), f.text) for f in form_errors))) return success
def register(first_name, last_name, email, password): cj = cookielib.CookieJar() openr = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) html = openr.open(REGISTER_URL).read() form = parse_form(html) img = get_captcha(html) #使用OCR识别引擎之前对图像处理变换为二值图像更有利于识别 bw = bw_image(img) captcha = pytesseract.image_to_string(bw).lower() print captcha form['first_name'] = first_name form['last_name'] = last_name form['email'] = email form['password'] = form['password_two'] = password form['recaptcha_response_field'] = captcha encoded_data = urllib.urlencode(form) request = urllib2.Request(REGISTER_URL, encoded_data) response = openr.open(request) success = '/user/register' not in response.geturl() return success
from login import login, parse_form import requests session = requests.Session() COUNTRY_URL = 'http://example.webscraping.com/places/default/edit/United-Kingdom-239' response, session = login(session=session) country_html = session.get(COUNTRY_URL) data = parse_form(country_html.content) data['population'] = int(data['population']) + 1 response = session.post(COUNTRY_URL, data)
#-*- coding=utf-8 -*- #登陆成功后与表单交互 import login import urllib2 import urllib #这里是登陆过后修改网页的特定form的值 COUNTRY_URL = "http://example.webscraping.com/edit/Aland-Islands-2" opener = login.login_cookies() country_html = opener.open(COUNTRY_URL).read() data = login.parse_form(country_html) data['population'] = int(data['population']) + 1 encoded_data = urllib.urlencode(data) request = urllib2.Request(COUNTRY_URL, encoded_data) response = opener.open(request) print response.geturl()
import requests from login import parse_form LOGIN_URL = 'http://example.webscraping.com/places/default/user/login' LOGIN_EMAIL = '*****@*****.**' LOGIN_PASSWORD = '******' data = {'email': LOGIN_EMAIL, 'password': LOGIN_PASSWORD} html = requests.get(LOGIN_URL) data = parse_form(html.content) data['email'] = LOGIN_EMAIL data['password'] = LOGIN_PASSWORD response = requests.post(LOGIN_URL, data) response.url print(response.url) second_response = requests.post(LOGIN_URL, data, cookies=html.cookies) print(second_response.url)
import cookielib, urllib2, pprint import login from io import BytesIO import lxml.html from PIL import Image import pytesseract REGISTER_URL = 'http://example.webscraping.com/places/default/user/register?_next=/places/default/index' cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) html = opener.open(REGISTER_URL).read() form = login.parse_form(html) #pprint.pprint(form) tree = lxml.html.fromstring(html) img_data = tree.cssselect('div#recaptcha img')[0].get('src') img_data = img_data.partition(',')[-1] binary_img_data = img_data.decode('base64') file_like = BytesIO(binary_img_data) img = Image.open(file_like) img.save('captcha_original.png') gray = img.convert('L') gray.save('captcha_gray.png') bw = gray.point(lambda x: 0 if x < 1 else 255, '1') bw.save('captcha_thresholded.png') pytesseract.image_to_string(img)