def process_request(self, request, spider): if spider.name in WEBKIT_DOWNLOADER: gh = Ghost() se = Session(gh, download_images=False) se.open(request.url) result, resource = se.evaluate( 'document.documentElement.innerHTML') spider.webkit_se = se renderedBody = str(resource).encode('utf8') return HtmlResponse(request.url, body=renderedBody)
class YoukuGhostDriver(object): def __init__(self, host, port, timeout): #url = 'http://111.161.35.198:12210/youku_ghost.html' url = 'http://%s:%s/youku_ghost.html' % (host, port) self.ghost = Ghost() self.session = Session(self.ghost, wait_timeout=timeout, plugins_enabled=True) self.session.open(url) def parse(self, vid): try: res = [] self.session.evaluate('window.getPlayUrl("%s")' % vid) success, resources = self.session.wait_for_selector('div[id="ck"]') if success: ck = self.session.evaluate( 'document.getElementById("ck").innerHTML') res = ck[0] except Exception, e: log.app_log.error(traceback.format_exc()) finally:
item_url = 'http://www.supremenewyork.com/shop/accessories/oi6nqp83m/hsyw4g52m' checkout_url = 'https://www.supremenewyork.com/checkout' ############################## ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' header = {'User-Agent': ua} gh = Ghost() se = Session(gh, user_agent=ua, wait_timeout=20, wait_callback=None, display=True, viewport_size=(1080, 1680), download_images=True) ############################## se.open(item_url) se.evaluate("""document.querySelector('input[name="commit"]').click();""") se.sleep(0.5) se.open(checkout_url) ISOFORMAT = '%Y%m%d' today = datetime.today() filename = today.strftime(ISOFORMAT) f = open('supreme' + '/' + filename + '.html', 'w') f.write(se.content) f.close() import code code.interact(banner="", local=locals())
from ghost import Ghost, Session ghost = Ghost() USERAGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0" with ghost.start(): session = Session(ghost, download_images=False, display=True, user_agent=USERAGENT, viewport_size=(800, 600)) page, rs = session.open("https://m.facebook.com/login.php", timeout=120) assert page.http_status == 200 session.evaluate(""" document.querySelector('input[name="email"]').value = '*****@*****.**' document.querySelector('input[name="pass"]').value = 'wikipedia150101facebook'; """) session.evaluate("""document.querySelector('input[name="login"]').click();""", expect_loading=True) """ import codecs with codecs.open('fb.html', encoding='utf-8', mode='w') as f: f.write(session.content) """ # session.save_cookies('fbookie') session.capture_to(path='fbookie.png') # gracefully clean off to avoid errors session.webview.setHtml('') session.exit()
searching = True story_buffer = '' next_url = initial_url while searching: try: session.open(next_url, timeout=300) lower_bound += 1 session.wait_for_selector(next_button, 60) except: break story_data = session.evaluate( 'document.querySelector("{0}").innerText;'.format(wrapping_div)) text = story_data[0] story_buffer += str(text) if lower_bound > upper_bound: save_story(series_name, lower_bound - chunks, upper_bound, story_buffer) story_buffer = '' lower_bound = upper_bound upper_bound += chunks link_data = session.evaluate( 'document.querySelector("{0}").href'.format(next_button)) print(link_data[0]) next_url = link_data[0]