def test_requested_resources(context): "response object should contain the url of all subrequests" sl = SleepyHollow() response = sl.get(context.route_to('/fewresources')) response.status_code.should.equal(200) response.should.have.property('requested_resources').being.a(list) response.requested_resources.should.have.length_of(5) sorted(response.requested_resources).should.equal(sorted([ { 'status': 200, 'url': u'http://127.0.0.1:5000/fewresources' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/js/jquery-1.8.3.min.js' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/js/fewresources.js' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/js/fewresources.js' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/img/funny.gif' } ]))
def test_js_confirms_doesnt_disrupt(context): "SleepyHollow will not block sleepy hollow" sl = SleepyHollow() response = sl.get(context.route_to("/jsconfirm")) response.status_code.should.equal(200) expect("Confirmation dialogs don't block").to.be.within(response.html)
def test_request_api_for_authentication(context): "SleepyHollow supports requests-based authentication" sl = SleepyHollow() response = sl.get(context.route_to("/auth/simple"), auth=('lincoln', 'gabriel')) response.status_code.should.equal(200) expect('Very Simple').to.be.within(response.text)
def test_config_stuff(context): "The config dictionary should be forwarded to the C layer" sl = SleepyHollow() response = sl.get(context.route_to('/simple'), config={'screenshot': True}) response.screenshot_bytes.shouldnt.be.empty
def test_save_screenshot(context): "The save_screenshot method should complain if screenshot is not enabled" sl = SleepyHollow() response = sl.get(context.route_to('/simple'), config={'screenshot': False}) response.save_screenshot.when.called_with('stuff.png').should.throw( ValueError, "Screenshot should be enabled throught the config dict" )
def test_follows_meta_redirect(context): "SleepyHollow will follow meta redirects" sl = SleepyHollow() response = sl.get(context.route_to("/metaredirect")) response.status_code.should.equal(200) expect("Successfully redirected!").to.be.within(response.html) response.url.should.equal('http://localhost:5000/postredirect')
def test_response_status_codes(context): "The request method should report the right http status codes" sl = SleepyHollow() response = sl.get(context.route_to('/status-200')) response.status_code.should.equal(200) response.reason.should.equal('OK') expect('Status 200').to.be.within(response.text) response = sl.get(context.route_to('/status-404')) response.status_code.should.equal(404) response.reason.should.equal('Not Found') expect('Status 404').to.be.within(response.text) response = sl.get(context.route_to('/status-500')) response.status_code.should.equal(500) response.reason.should.equal('Internal Server Error') expect('Status 500').to.be.within(response.text)
def test_request_api(context): "the get method should return exactly the same thing of request(get)" sl = SleepyHollow() response1 = sl.request('get', context.route_to("/simple")) response2 = sl.get(context.route_to("/simple")) response1.status_code.should.equal(response2.status_code) response1.reason.should.equal(response2.reason) response1.text.should.equal(response2.text) response1.content.should.equal(response2.content)
def test_can_authenticate_in_cookie_based_websites(context): "Sleepy Hollow can keep the session in cookie based websites" sl = SleepyHollow() response1 = sl.get(context.route_to('/admin')) response1.url.should.equal(u'http://127.0.0.1:5000/login') response1.status_code.should.equal(200) response2 = sl.post(context.route_to('/login'), {'email': '*****@*****.**'}) response2.url.should.equal(u'http://127.0.0.1:5000/admin') response2.status_code.should.equal(302) expect("Hello lincoln, welcome to the admin").to.be.within(response2.text)
class Scraper(object): base_url = 'http://m.saks.com' def __init__(self): self.http = SleepyHollow() def path(self, to): if to.startswith('http'): return to return '/'.join([self.base_url, to.lstrip('/')]) def get(self, path): return self.http.get(self.path(path)) def get_root_links(self): print "Getting root links..." response = self.get('/eSearch.jsp?sid=127F38CAD8BC&N_Dim=0&bmSingle=N_Dim&N=1553&Ns=P_0_sort') dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('#left-nav-content > div > a')] def get_subcategory_links(self, link): print "Getting category links on %r..." % link response = self.get(link) dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('#left-nav-content > div > a')] def get_product_links(self, parent_url): print "Getting product links on %r..." % parent_url response = self.get(parent_url) dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('.productRow > a')] def get_product_details(self, product_url): print "Getting product details on %r..." % product_url response = self.get(product_url) dom = lhtml.fromstring(response.html) img = dom.cssselect("#productMainImg")[0] name = dom.cssselect('form h1')[0] return dict( name=name.text.strip(), img=img.attrib['src'], ) def scrape(self): root_links = self.get_root_links() for root in root_links: subcategories = self.get_subcategory_links(root) for subcat in subcategories: products = self.get_product_links(subcat) for prod in products: print self.get_product_details(prod)
def test_response_headers(context): "It should be possible to inspect the headers of a response object" sl = SleepyHollow() response = sl.get(context.route_to('/status-200')) response.should.have.property('headers').being.a(dict) response.headers.should.have.key('Content-Type').being.equal( u'text/html; charset=UTF-8') response.headers.should.have.key('Server').being.equal( u'TornadoServer/2.4.1') response.headers.should.have.key('Content-Length').being.equal(u'91') response.headers.should.have.key('Etag').being.equal( u'"917c97d9437cbd1c1192f2f516e7155183b58232"')
def test_getting_js_errors(context): "response objects should contain js errors" sl = SleepyHollow() response = sl.get(context.route_to('/jserror')) # Let's test the types response.status_code.should.equal(200) response.should.have.property('js_errors').being.a(tuple) response.js_errors.should.have.length_of(1) response.js_errors.should.equal(({ 'line_number': 3, 'message': u'TypeError: \'undefined\' is not a function (evaluating \'window.intentional_error("javascript errors")\')', 'source_id': u'http://127.0.0.1:5000/media/js/jserror.js' },)) expect("IT WORKS").to.be.within(response.html)
def test_json_response(context): "Retrieving a JSON response object using the get method" sl = SleepyHollow() response = sl.get(context.route_to('/status-200.json')) # Let's test the types response.should.be.a(Response) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.equal({ u'success': True, u'status': 200, u'method': 'GET', })
def test_response(context): "Retrieving the response object using the get method" sl = SleepyHollow() response = sl.get(context.route_to('/simple')) # Let's test the types response.should.be.a(Response) response.url.should.be.a(unicode) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.be.none # Now let's test the values response.url.should.equal(context.route_to('/simple')) response.status_code.should.equal(200) expect('Very Simple').to.be.within(response.text)
class GetASaleProduct(object): meta_redirect_url = re.compile(r'meta\s+' 'http-equiv="refresh"\s+' 'content="\d+;URL=(?P<url>.*?)"', re.I) def __init__(self): self.http = SleepyHollow() def get_response_with_dom(self, url): if not url.startswith('http'): url = 'http://www.bananarepublic.com/%s' % url.lstrip('/') response = self.http.get(url, config=dict(screenshot=True)) meta_refresh = self.meta_redirect_url.search(response.html) if meta_refresh is not None: return self.get_response_with_dom(meta_refresh.group('url')) response.dom = lhtml.fromstring(response.html) return response def find_sale_links(self): print "Getting sales links..." response = self.get_response_with_dom('http://www.bananarepublic.com/products/index.jsp') return response.dom.xpath("//ul/li[contains(@class, 'idxBottomCat')]/a[" "contains(text(), 'Sale') or " "contains(text(), 'Clearance') or " "contains(text(), 'Discount')]/@href") def find_product_links(self, category_link): print "Getting product links..." response = self.get_response_with_dom(category_link) return response.dom.xpath("//a[contains(@class, 'productItemName')]/@href") def start(self): for category_link in self.find_sale_links(): for product_link in self.find_product_links(category_link): response = self.get_response_with_dom(product_link) img = response.dom.cssselect("#product_image")[0] src = img.attrib['src'] assert src.lower().endswith('jpg'), 'Expected %r to be a JPG' % src break break
def test_get_sending_headers(context): "requesting with GET adding custom headers" sl = SleepyHollow() response = sl.get( context.route_to('/status-200.json'), headers={'X-Name': 'Gabriel'} ) # Let's test the types response.should.be.a(Response) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.equal({ u'success': True, u'method': 'GET', u'status': 200, u'X-Name': u'Gabriel', })
#!/usr/bin/env python # -*- coding: utf-8 -*- from sure import expect from sleepyhollow import SleepyHollow http = SleepyHollow() for x in range(101): r = http.get('http://localhost:5000/heavy', {'index': x}) expect(r.url).to.equal('http://localhost:5000/heavy?index=%d' % x) print r, r.url, id(r)
class Scraper(object): base_url = 'http://m.saks.com' def __init__(self): self.http = SleepyHollow() def path(self, to): if to.startswith('http'): return to return '/'.join([self.base_url, to.lstrip('/')]) def get(self, path): return self.http.get(self.path(path)) def get_root_links(self): print "Getting root links..." response = self.get( '/eSearch.jsp?sid=127F38CAD8BC&N_Dim=0&bmSingle=N_Dim&N=1553&Ns=P_0_sort' ) dom = lhtml.fromstring(response.html) return [ l.attrib['href'] for l in dom.cssselect('#left-nav-content > div > a') ] def get_subcategory_links(self, link): print "Getting category links on %r..." % link response = self.get(link) dom = lhtml.fromstring(response.html) return [ l.attrib['href'] for l in dom.cssselect('#left-nav-content > div > a') ] def get_product_links(self, parent_url): print "Getting product links on %r..." % parent_url response = self.get(parent_url) dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('.productRow > a')] def get_product_details(self, product_url): print "Getting product details on %r..." % product_url response = self.get(product_url) dom = lhtml.fromstring(response.html) img = dom.cssselect("#productMainImg")[0] name = dom.cssselect('form h1')[0] return dict( name=name.text.strip(), img=img.attrib['src'], ) def scrape(self): root_links = self.get_root_links() for root in root_links: subcategories = self.get_subcategory_links(root) for subcat in subcategories: products = self.get_product_links(subcat) for prod in products: print self.get_product_details(prod)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import sys from sleepyhollow import SleepyHollow browser = SleepyHollow() response = browser.get("http://localhost:5000", config={ 'screenshot': True, 'width': 1300, 'height': 600, }) response.save_screenshot("../spec/screenshots/{0}.png".format(sys.argv[1]))