def test_dash_issue(self): html = '<strong>—</strong>' self.server.response['get.data'] = html grab = build_grab() grab.go(self.server.get_url()) # By default &#[128-160]; are fixed self.assertFalse( grab.doc.select('//strong/text()').text() == six.unichr(151)) self.assertTrue( grab.doc.select('//strong/text()').text() == six.unichr(8212)) # disable fix-behaviour grab.setup(fix_special_entities=False) grab.go(self.server.get_url()) # By default &#[128-160]; are fixed self.assertTrue( grab.doc.select('//strong/text()').text() == six.unichr(151)) self.assertFalse( grab.doc.select('//strong/text()').text() == six.unichr(8212)) # Explicitly use unicode_body func grab = build_grab() grab.go(self.server.get_url()) #print(':::', grab.doc.unicode_body()) self.assertTrue('—' in grab.doc.unicode_body())
def test_unicode_post(self): # By default, unicode post should be converted into utf-8 grab = build_grab() data = u'фыва' grab.setup(post=data, url=self.server.get_url()) grab.request() self.assertEqual(self.server.request['data'], data.encode('utf-8')) # Now try cp1251 with charset option self.server.request['charset'] = 'cp1251' grab = build_grab() data = u'фыва' grab.setup(post=data, url=self.server.get_url(), charset='cp1251', debug=True) grab.request() self.assertEqual(self.server.request['data'], data.encode('cp1251')) # Now try dict with unicode value & charset option self.server.request['charset'] = 'cp1251' grab = build_grab() data = u'фыва' grab.setup(post={'foo': data}, url=self.server.get_url(), charset='cp1251', debug=True) grab.request() test = 'foo=%s' % quote(data.encode('cp1251')) test = test.encode('utf-8') # py3 hack self.assertEqual(self.server.request['data'], test)
def test_session(self): # Test that if Grab gets some cookies from the server # then it sends it back grab = build_grab() grab.setup(reuse_cookies=True) self.server.response['cookies'] = {'foo': 'bar'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'bar') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar') # Test reuse_cookies=False grab = build_grab() grab.setup(reuse_cookies=False) self.server.response['cookies'] = {'foo': 'baz'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'baz') grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['cookies']) == 0) # Test something grab = build_grab() grab.setup(reuse_cookies=True) self.server.response['cookies'] = {'foo': 'bar'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'bar') grab.clear_cookies() grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['cookies']) == 0)
def test_dash_issue(self): html = '<strong>—</strong>' self.server.response['get.data'] = html grab = build_grab() grab.go(self.server.get_url()) # By default &#[128-160]; are fixed self.assertFalse(grab.doc.select('//strong/text()').text() == six.unichr(151)) self.assertTrue(grab.doc.select('//strong/text()').text() == six.unichr(8212)) # disable fix-behaviour grab.setup(fix_special_entities=False) grab.go(self.server.get_url()) # By default &#[128-160]; are fixed self.assertTrue(grab.doc.select('//strong/text()').text() == six.unichr(151)) self.assertFalse(grab.doc.select('//strong/text()').text() == six.unichr(8212)) # Explicitly use unicode_body func grab = build_grab() grab.go(self.server.get_url()) #print(':::', grab.doc.unicode_body()) self.assertTrue('—' in grab.doc.unicode_body())
def test_load_dump(self): with temp_file() as tmp_file: grab = build_grab() cookies = {'foo': 'bar', 'spam': 'ham'} grab.setup(cookies=cookies) grab.go(self.server.get_url()) grab.cookies.save_to_file(tmp_file) with open(tmp_file) as inp: self.assertEqual(set(cookies.items()), set((x['name'], x['value']) for x in json.load(inp))) grab = build_grab() cookies = {'foo': 'bar', 'spam': u'begemot'} grab.setup(cookies=cookies) grab.go(self.server.get_url()) grab.cookies.save_to_file(tmp_file) with open(tmp_file) as inp: self.assertEqual(set(cookies.items()), set((x['name'], x['value']) for x in json.load(inp))) # Test load cookies grab = build_grab() cookies = [{'name': 'foo', 'value': 'bar', 'domain': self.server.address}, {'name': 'spam', 'value': u'begemot', 'domain': self.server.address}] with open(tmp_file, 'w') as out: json.dump(cookies, out) grab.cookies.load_from_file(tmp_file) self.assertEqual(set(grab.cookies.items()), set((x['name'], x['value']) for x in cookies))
def test_load_dump(self): with temp_file() as tmp_file: grab = build_grab() cookies = {'foo': 'bar', 'spam': 'ham'} grab.setup(cookies=cookies) grab.go(self.server.get_url()) grab.cookies.save_to_file(tmp_file) with open(tmp_file) as inp: self.assertEqual(set(cookies.items()), set((x['name'], x['value']) for x in json.load(inp))) grab = build_grab() cookies = {'foo': 'bar', 'spam': u'begemot'} grab.setup(cookies=cookies) grab.go(self.server.get_url()) grab.cookies.save_to_file(tmp_file) with open(tmp_file) as inp: self.assertEqual(set(cookies.items()), set((x['name'], x['value']) for x in json.load(inp))) # Test load cookies grab = build_grab() cookies = [{'name': 'foo', 'value': 'bar', 'domain': self.server.address}, {'name': 'spam', 'value': u'begemot', 'domain': self.server.address}] with open(tmp_file, 'w') as out: json.dump(cookies, out) grab.cookies.load_from_file(tmp_file) self.assertEqual(set(grab.cookies.items()), set((x['name'], x['value']) for x in cookies))
def test_unicode_post(self): # By default, unicode post should be converted into utf-8 grab = build_grab() data = u'фыва' grab.setup(post=data, url=self.server.get_url()) grab.request() self.assertEqual(self.server.request['data'], data.encode('utf-8')) # Now try cp1251 with charset option self.server.request['charset'] = 'cp1251' grab = build_grab() data = u'фыва' grab.setup(post=data, url=self.server.get_url(), charset='cp1251', debug=True) grab.request() self.assertEqual(self.server.request['data'], data.encode('cp1251')) # Now try dict with unicode value & charset option self.server.request['charset'] = 'cp1251' grab = build_grab() data = u'фыва' grab.setup(post={'foo': data}, url=self.server.get_url(), charset='cp1251', debug=True) grab.request() test = 'foo=%s' % quote(data.encode('cp1251')) test = test.encode('utf-8') # py3 hack self.assertEqual(self.server.request['data'], test)
def test_session(self): # Test that if Grab gets some cookies from the server # then it sends it back grab = build_grab() grab.setup(reuse_cookies=True) self.server.response['cookies'] = {'foo': 'bar'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'bar') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar') # Test reuse_cookies=False grab = build_grab() grab.setup(reuse_cookies=False) self.server.response['cookies'] = {'foo': 'baz'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'baz') grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['cookies']) == 0) # Test something grab = build_grab() grab.setup(reuse_cookies=True) self.server.response['cookies'] = {'foo': 'bar'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'bar') grab.clear_cookies() grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['cookies']) == 0)
def test_adopt(self): grab = build_grab() self.server.response['get.data'] = 'Moon' grab.go(self.server.get_url()) grab2 = build_grab() self.assertEqual(grab2.config['url'], None) grab2.adopt(grab) self.assertTrue(b'Moon' in grab2.doc.body) self.assertEqual(grab2.config['url'], self.server.get_url())
def test_make_url_absolute(self): grab = build_grab() self.server.response['get.data'] = '<base href="http://foo/bar/">' grab.go(self.server.get_url()) absolute_url = grab.make_url_absolute('/foobar', resolve_base=True) self.assertEqual(absolute_url, 'http://foo/foobar') grab = build_grab() absolute_url = grab.make_url_absolute('/foobar') self.assertEqual(absolute_url, '/foobar')
def test_empty_document(self): self.server.response['get.data'] = 'oops' grab = build_grab() grab.go(self.server.get_url()) grab.xpath_exists('//anytag') self.server.response['get.data'] = '<frameset></frameset>' grab = build_grab() grab.go(self.server.get_url()) grab.xpath_exists('//anytag')
def test_empty_document(self): self.server.response['get.data'] = 'oops' grab = build_grab() grab.go(self.server.get_url()) grab.xpath_exists('//anytag') self.server.response['get.data'] = '<frameset></frameset>' grab = build_grab() grab.go(self.server.get_url()) grab.xpath_exists('//anytag')
def test_options_method(self): grab = build_grab() grab.setup(method='options', post=b'abc') grab.go(self.server.get_url()) self.assertEqual('OPTIONS', self.server.request['method']) self.assertEqual('3', self.server.request['headers']['Content-Length']) grab = build_grab() grab.setup(method='options') grab.go(self.server.get_url()) self.assertEqual('OPTIONS', self.server.request['method']) self.assertTrue('Content-Length' not in self.server.request['headers'])
def test_useragent(self): grab = build_grab() # Null value activates default random user-agent # For some transports it just allow them to send default user-agent # like in Kit transport case grab = build_grab() grab.setup(user_agent=None) grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['headers']) > 0) self.assertFalse( 'PycURL' in self.server.request['headers']['user-agent']) # By default user_agent is None => random user agent is generated grab = build_grab() grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['headers']) > 0) self.assertFalse( 'PycURL' in self.server.request['headers']['user-agent']) # Simple case: setup user agent manually grab.setup(user_agent='foo') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], 'foo') with temp_file() as ua_file: # user agent from file should be loaded with open(ua_file, 'w') as out: out.write('GOD') grab.setup(user_agent=None, user_agent_file=ua_file) grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], 'GOD') with temp_file() as ua_file: # random user agent from file should be loaded with open(ua_file, 'w') as out: out.write('GOD1\nGOD2') grab.setup(user_agent=None, user_agent_file=ua_file) grab.go(self.server.get_url()) self.assertTrue( self.server.request['headers']['user-agent'] in ('GOD1', 'GOD2')) agent = grab.config['user_agent'] # User-agent should not change grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], agent) # User-agent should not change grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], agent)
def test_useragent(self): grab = build_grab() # Null value activates default random user-agent # For some transports it just allow them to send default user-agent # like in Kit transport case grab = build_grab() grab.setup(user_agent=None) grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['headers']) > 0) self.assertFalse('PycURL' in self.server.request['headers']['user-agent']) # By default user_agent is None => random user agent is generated grab = build_grab() grab.go(self.server.get_url()) self.assertTrue(len(self.server.request['headers']) > 0) self.assertFalse('PycURL' in self.server.request['headers']['user-agent']) # Simple case: setup user agent manually grab.setup(user_agent='foo') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], 'foo') with temp_file() as ua_file: # user agent from file should be loaded with open(ua_file, 'w') as out: out.write('GOD') grab.setup(user_agent=None, user_agent_file=ua_file) grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], 'GOD') with temp_file() as ua_file: # random user agent from file should be loaded with open(ua_file, 'w') as out: out.write('GOD1\nGOD2') grab.setup(user_agent=None, user_agent_file=ua_file) grab.go(self.server.get_url()) self.assertTrue(self.server.request['headers']['user-agent'] in ('GOD1', 'GOD2')) agent = grab.config['user_agent'] # User-agent should not change grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], agent) # User-agent should not change grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], agent)
def test_cdata_issue(self): self.server.response['data'] = XML # By default HTML DOM builder is used # It handles CDATA incorrectly grab = build_grab() grab.go(self.server.get_url()) self.assertEqual(None, grab.xpath_one('//weight').text) self.assertEqual(None, grab.doc.tree.xpath('//weight')[0].text) # But XML DOM builder produces valid result # self.assertEqual(None, grab.xpath_one('//weight').text) grab = build_grab(content_type='xml') grab.go(self.server.get_url()) self.assertEqual('30', grab.doc.tree.xpath('//weight')[0].text)
def test_cdata_issue(self): self.server.response['data'] = XML # By default HTML DOM builder is used # It handles CDATA incorrectly grab = build_grab() grab.go(self.server.get_url()) self.assertEqual(None, grab.xpath_one('//weight').text) self.assertEqual(None, grab.doc.tree.xpath('//weight')[0].text) # But XML DOM builder produces valid result # self.assertEqual(None, grab.xpath_one('//weight').text) grab = build_grab(content_type='xml') grab.go(self.server.get_url()) self.assertEqual('30', grab.doc.tree.xpath('//weight')[0].text)
def test_useragent_simple(self): grab = build_grab() # Simple case: setup user agent manually grab.setup(user_agent='foo') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], 'foo')
def test_submit(self): grab = build_grab() self.server.response['get.data'] = POST_FORM % self.server.get_url() grab.go(self.server.get_url()) grab.doc.set_input('name', 'Alex') grab.submit() self.assert_equal_qs(self.server.request['data'], b'name=Alex&secret=123') # Default submit control self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM grab.go(self.server.get_url()) grab.submit() self.assert_equal_qs(self.server.request['data'], b'secret=123&submit1=submit1') # Selected submit control self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM grab.go(self.server.get_url()) grab.submit(submit_name='submit2') self.assert_equal_qs(self.server.request['data'], b'secret=123&submit2=submit2') # Default submit control if submit control name is invalid self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM grab.go(self.server.get_url()) grab.submit(submit_name='submit3') self.assert_equal_qs(self.server.request['data'], b'secret=123&submit1=submit1')
def test_set_methods(self): grab = build_grab() self.server.response['get.data'] = FORMS_HTML grab.go(self.server.get_url()) # pylint: disable=protected-access self.assertEqual(grab.doc._lxml_form, None) # pylint: enable=protected-access grab.doc.set_input('gender', '1') # pylint: disable=no-member,protected-access self.assertEqual('common_form', grab.doc._lxml_form.get('id')) # pylint: enable=no-member,protected-access # pylint: disable=no-member,protected-access self.assertRaises(KeyError, lambda: grab.doc.set_input('query', 'asdf')) # pylint: enable=no-member,protected-access grab.doc._lxml_form = None # pylint: disable=protected-access grab.doc.set_input_by_id('search_box', 'asdf') # pylint: disable=no-member,protected-access self.assertEqual('search_form', grab.doc._lxml_form.get('id')) # pylint: enable=no-member,protected-access grab.doc.choose_form(xpath='//form[@id="common_form"]') grab.doc.set_input_by_number(0, 'asdf') # pylint: disable=no-member,protected-access grab.doc._lxml_form = None grab.doc.set_input_by_xpath('//*[@name="gender"]', '2') self.assertEqual('common_form', grab.doc._lxml_form.get('id'))
def test_assign_unicode_to_body(self): grab = build_grab() grab.doc.body = b'abc' grab.doc.body = b'def' with self.assertRaises(GrabMisuseError): grab.doc.body = u'Спутник'
def test_multipart_post(self): grab = build_grab(url=self.server.get_url(), debug_post=True) # Dict grab.setup(multipart_post={'foo': 'bar'}) grab.request() self.assertTrue(b'name="foo"' in self.server.request['data']) # Few values with non-ascii data # TODO: understand and fix # AssertionError: 'foo=bar&gaz=%D0%94%D0%B5%D0%BB%'\ # 'D1%8C%D1%84%D0%B8%D0%BD&abc=' != # 'foo=bar&gaz=\xd0\x94\xd0\xb5\xd0'\ # '\xbb\xd1\x8c\xd1\x84\xd0\xb8\xd0\xbd&abc=' # grab.setup(post=({'foo': 'bar', 'gaz': u'Дельфин', 'abc': None})) # grab.request() # self.assertEqual(self.server.request['data'], # 'foo=bar&gaz=Дельфин&abc=') # tuple with one pair grab.setup(multipart_post=(('foo', 'bar'), )) grab.request() self.assertTrue(b'name="foo"' in self.server.request['data']) # tuple with two pairs grab.setup(multipart_post=(('foo', 'bar'), ('foo', 'baz'))) grab.request() self.assertTrue(b'name="foo"' in self.server.request['data'])
def test_xml_with_declaration(self): self.server.response['get.data'] =\ b'<?xml version="1.0" encoding="UTF-8"?>'\ b'<root><foo>foo</foo></root>' grab = build_grab() grab.go(self.server.get_url()) self.assertTrue(grab.doc.select('//foo').text() == 'foo')
def test_invalid_charset(self): html = '''<head><meta http-equiv="Content-Type" content="text/html; charset=windows-874">' </head><body>test</body>''' self.server.response['get.data'] = html grab = build_grab() grab.go(self.server.get_url())
def test_task_clone_grab_config_and_url(self): grab = build_grab() grab.setup(url='http://foo.com/') task = Task('foo', grab=grab) task2 = task.clone(url='http://bar.com/') self.assertEqual(task2.url, 'http://bar.com/') self.assertEqual(task2.grab_config['url'], 'http://bar.com/')
def test_cookiefile(self): with temp_file() as tmp_file: grab = build_grab() cookies = [{'name': 'spam', 'value': 'ham', 'domain': self.server.address}] with open(tmp_file, 'w') as out: json.dump(cookies, out) # One cookie are sent in server reponse # Another cookies is passed via the `cookiefile` option self.server.response['cookies'] = {'godzilla': 'monkey'}.items() grab.setup(cookiefile=tmp_file, debug=True) grab.go(self.server.get_url()) self.assertEqual(self.server.request['cookies']['spam']['value'], 'ham') # This is correct reslt of combining two cookies merged_cookies = [('godzilla', 'monkey'), ('spam', 'ham')] # grab.cookies should contains merged cookies self.assertEqual(set(merged_cookies), set(grab.cookies.items())) # `cookiefile` file should contains merged cookies with open(tmp_file) as inp: self.assertEqual(set(merged_cookies), set((x['name'], x['value']) for x in json.load(inp))) # Just ensure it works grab.go(self.server.get_url())
def test_useragent_simple(self): grab = build_grab() # Simple case: setup user agent manually grab.setup(user_agent='foo') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers']['user-agent'], 'foo')
def test_grab_parse_defensedxml(self): with temp_dir() as tmp_dir: injection_path = os.path.join(tmp_dir, 'injection') with open(injection_path, 'w') as out: out.write('Hey there!') # Prepare file:// URL valid for both linux and windows injection_url = 'file:///%s' % (injection_path.lstrip('/') .replace('\\', '/')) bad_xml = ( '<!DOCTYPE external [' '<!ENTITY ee SYSTEM "' + injection_url + '">' ']>' '<root>ⅇ</root>' ).encode() xml_file = os.path.join(tmp_dir, 'bad.xml') # On windows, use slashed instead of backslashes to avoid error: # Invalid file://hostname/, expected localhost or 127.0.0.1 or none if '\\' in xml_file: xml_file = xml_file.replace('\\', '/') with open(xml_file, 'wb') as out: out.write(bad_xml) grab = build_grab(content_type='xml') file_url = 'file://%s' % xml_file grab.go(file_url) self.assertRaises(EntitiesForbidden, grab.doc, '//title')
def test_nonascii_path(self): grab = build_grab() self.server.response['data'] = 'medved' grab.go(self.server.get_url(u'/превед')) self.assertEqual(b'medved', grab.doc.body) self.assertEqual('/%D0%BF%D1%80%D0%B5%D0%B2%D0%B5%D0%B4', self.server.request['path'])
def test_invalid_charset(self): html = '''<head><meta http-equiv="Content-Type" content="text/html; charset=windows-874">' </head><body>test</body>''' self.server.response['get.data'] = html grab = build_grab() grab.go(self.server.get_url())
def test_submit(self): data = b'''<form method="post"> <input type="text" name="foo" value="val"></form>''' grab = build_grab(data) grab.choose_form(0) grab.submit(make_request=False) self.assertTrue('foo' in dict(grab.config['post']))
def test_nobody(self): grab = build_grab() grab.setup(nobody=True) self.server.response['get.data'] = 'foo' grab.go(self.server.get_url()) self.assertEqual(b'', grab.doc.body) self.assertTrue(len(grab.doc.head) > 0)
def test_choose_form_by_element_noform(self): data = b''' <div>test</div> ''' grab = build_grab(data) self.assertRaises(DataNotFound, grab.choose_form_by_element, '//input[@name="bar"]')
def test_form_fields(self): data = b''' <form> <input value="foo"> <input name="dis" disabled="disabled" value="diz"> <select name="sel"> <option value="opt1">opt1</option) <option value="opt2">opt2</option) </select> <input type="radio" name="rad1" value="rad1"> <input type="checkbox" name="cb1" value="cb1"> <input type="checkbox" name="cb2" value="cb2" checked="checked"> <input type="text" name="text1" value="text1"> <textarea name="area1">area1</textarea> </form> ''' grab = build_grab(data) fields = { 'sel': 'opt1', 'rad1': 'rad1', 'cb2': 'cb2', 'text1': 'text1', 'area1': 'area1', } self.assertEqual(fields, grab.form_fields())
def test_multipart_post(self): grab = build_grab(url=self.server.get_url(), debug_post=True) # Dict grab.setup(multipart_post={'foo': 'bar'}) grab.request() self.assertTrue(b'name="foo"' in self.server.request['data']) # Few values with non-ascii data # TODO: understand and fix # AssertionError: 'foo=bar&gaz=%D0%94%D0%B5%D0%BB%'\ # 'D1%8C%D1%84%D0%B8%D0%BD&abc=' != # 'foo=bar&gaz=\xd0\x94\xd0\xb5\xd0'\ # '\xbb\xd1\x8c\xd1\x84\xd0\xb8\xd0\xbd&abc=' # grab.setup(post=({'foo': 'bar', 'gaz': u'Дельфин', 'abc': None})) # grab.request() # self.assertEqual(self.server.request['data'], # 'foo=bar&gaz=Дельфин&abc=') # tuple with one pair grab.setup(multipart_post=(('foo', 'bar'),)) grab.request() self.assertTrue(b'name="foo"' in self.server.request['data']) # tuple with two pairs grab.setup(multipart_post=(('foo', 'bar'), ('foo', 'baz'))) grab.request() self.assertTrue(b'name="foo"' in self.server.request['data'])
def test_pyquery_handler(self): self.server.response['get.data'] = ( '<body><h1>Hello world</h1><footer>2014</footer>') grab = build_grab() grab.go(self.server.get_url()) self.assertEqual(grab.doc.pyquery('h1').text(), 'Hello world')
def test_submit(self): grab = build_grab() self.server.response['get.data'] = POST_FORM % self.server.get_url() grab.go(self.server.get_url()) grab.doc.set_input('name', 'Alex') grab.submit() self.assert_equal_qs(self.server.request['data'], b'name=Alex&secret=123') # Default submit control self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM grab.go(self.server.get_url()) grab.submit() self.assert_equal_qs(self.server.request['data'], b'secret=123&submit1=submit1') # Selected submit control self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM grab.go(self.server.get_url()) grab.submit(submit_name='submit2') self.assert_equal_qs(self.server.request['data'], b'secret=123&submit2=submit2') # Default submit control if submit control name is invalid self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM grab.go(self.server.get_url()) grab.submit(submit_name='submit3') self.assert_equal_qs(self.server.request['data'], b'secret=123&submit1=submit1')
def test_set_methods(self): grab = build_grab() self.server.response['get.data'] = FORMS_HTML grab.go(self.server.get_url()) # pylint: disable=protected-access self.assertEqual(grab.doc._lxml_form, None) # pylint: enable=protected-access grab.doc.set_input('gender', '1') # pylint: disable=no-member,protected-access self.assertEqual('common_form', grab.doc._lxml_form.get('id')) # pylint: enable=no-member,protected-access # pylint: disable=no-member,protected-access self.assertRaises(KeyError, lambda: grab.doc.set_input('query', 'asdf')) # pylint: enable=no-member,protected-access grab.doc._lxml_form = None # pylint: disable=protected-access grab.doc.set_input_by_id('search_box', 'asdf') # pylint: disable=no-member,protected-access self.assertEqual('search_form', grab.doc._lxml_form.get('id')) # pylint: enable=no-member,protected-access grab.doc.choose_form(xpath='//form[@id="common_form"]') grab.doc.set_input_by_number(0, 'asdf') # pylint: disable=no-member,protected-access grab.doc._lxml_form = None grab.doc.set_input_by_xpath('//*[@name="gender"]', '2') self.assertEqual('common_form', grab.doc._lxml_form.get('id'))
def test_assign_unicode_to_body(self): grab = build_grab() grab.doc.body = b'abc' grab.doc.body = b'def' with self.assertRaises(GrabMisuseError): grab.doc.body = u'Спутник'
def test_body_maxsize(self): grab = build_grab() grab.setup(body_maxsize=100) self.server.response['get.data'] = 'x' * 1024 * 1024 grab.go(self.server.get_url()) # Should be less 50kb self.assertTrue(len(grab.doc.body) < 50000)
def test_redirect_session(self): grab = build_grab() self.server.response['cookies'] = {'foo': 'bar'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'bar') # Setup one-time redirect grab = build_grab() self.server.response['cookies'] = {} self.server.response_once['headers'] = [ ('Location', self.server.get_url()), ('Set-Cookie', 'foo=bar'), ] self.server.response_once['code'] = 302 grab.go(self.server.get_url()) self.assertEqual(self.server.request['cookies']['foo']['value'], 'bar')
def test_cookiefile(self): with temp_file() as tmp_file: grab = build_grab() cookies = [{ 'name': 'spam', 'value': 'ham', 'domain': self.server.address }] with open(tmp_file, 'w') as out: json.dump(cookies, out) # One cookie are sent in server reponse # Another cookies is passed via the `cookiefile` option self.server.response['cookies'] = {'godzilla': 'monkey'}.items() grab.setup(cookiefile=tmp_file, debug=True) grab.go(self.server.get_url()) self.assertEqual(self.server.request['cookies']['spam']['value'], 'ham') # This is correct reslt of combining two cookies merged_cookies = [('godzilla', 'monkey'), ('spam', 'ham')] # grab.cookies should contains merged cookies self.assertEqual(set(merged_cookies), set(grab.cookies.items())) # `cookiefile` file should contains merged cookies with open(tmp_file) as inp: self.assertEqual( set(merged_cookies), set((x['name'], x['value']) for x in json.load(inp))) # Just ensure it works grab.go(self.server.get_url())
def test_request_counter(self): import threading reset_request_counter() grab = build_grab() grab.go(self.server.get_url()) self.assertEqual(grab.request_counter, 1) grab.go(self.server.get_url()) self.assertEqual(grab.request_counter, 2) def func(): grab = build_grab() grab.go(self.server.get_url()) # Make 10 requests in concurrent threads threads = [] for _ in six.moves.range(10): thread = threading.Thread(target=func) threads.append(thread) thread.start() for thread in threads: thread.join() grab.go(self.server.get_url()) self.assertEqual(grab.request_counter, 13)
def test_find_link(self): data = b'''<a href="http://ya.ru/">ya.ru</a>''' grab = build_grab(data) self.assertEqual('http://ya.ru/', grab.find_link(b'ya.ru', make_absolute=True)) self.assertEqual(None, grab.find_link(b'google.ru', make_absolute=True)) self.assertRaises(GrabMisuseError, grab.find_link, u'asdf')
def test_original_exceptions_grab(self): import pycurl grab = build_grab() try: grab.go('http://%s' % NON_ROUTABLE_IP) except GrabNetworkError as ex: # pylint: disable=broad-except self.assertTrue(isinstance(ex.original_exc, pycurl.error))
def test_original_exceptions_urllib2(self): from urllib3.exceptions import ConnectTimeoutError grab = build_grab() try: grab.go('http://%s' % NON_ROUTABLE_IP) except GrabNetworkError as ex: # pylint: disable=broad-except self.assertTrue(isinstance(ex.original_exc, ConnectTimeoutError))
def test_put(self): grab = build_grab() grab.setup(post=b'abc', url=self.server.get_url(), method='put', debug=True) self.server.request['debug'] = True grab.request() self.assertEqual(self.server.request['method'], 'PUT') self.assertEqual(self.server.request['headers']['content-length'], '3')
def test_choose_form_by_element(self): data = b''' <form><input name="foo"></form> <form><input name="bar"></form> ''' grab = build_grab(data) grab.choose_form_by_element('//input[@name="bar"]') self.assertEqual(grab.doc('//form[2]').node(), grab.doc.form)
def test_empty_useragent_pycurl(self): grab = build_grab() # Empty string disable default pycurl user-agent grab.setup(user_agent='') grab.go(self.server.get_url()) self.assertEqual(self.server.request['headers'] .get('user-agent', ''), '')
def test_pyquery_handler(self): self.server.response['get.data'] = ( '<body><h1>Hello world</h1><footer>2014</footer>' ) grab = build_grab() grab.go(self.server.get_url()) self.assertEqual(grab.doc.pyquery('h1').text(), 'Hello world')
def test_redirect_session(self): grab = build_grab() self.server.response['cookies'] = {'foo': 'bar'}.items() grab.go(self.server.get_url()) self.assertEqual(grab.doc.cookies['foo'], 'bar') # Setup one-time redirect grab = build_grab() self.server.response['cookies'] = {} self.server.response_once['headers'] = [ ('Location', self.server.get_url()), ('Set-Cookie', 'foo=bar'), ] self.server.response_once['code'] = 302 grab.go(self.server.get_url()) self.assertEqual(self.server.request['cookies']['foo']['value'], 'bar')
def test_post_multivalue_key(self): grab = build_grab() grab.setup(post=[('foo', [1, 2])]) grab.go(self.server.get_url()) self.assertEqual( self.server.request['data'], b'foo=1&foo=2' )
def test_attribute_exception(self): grab = build_grab() self.assertTrue(grab.exception is None) try: grab.go('http://%s' % NON_ROUTABLE_IP) except GrabNetworkError: pass self.assertTrue(isinstance(grab.exception, GrabNetworkError))
def test_cookiefile_empty(self): with temp_file() as tmp_file: grab = build_grab() # Empty file should not raise Exception with open(tmp_file, 'w') as out: out.write('') grab.setup(cookiefile=tmp_file) grab.go(self.server.get_url())