class YoukuGhostDriver(object): def __init__(self, host, port, timeout): #url = 'http://111.161.35.198:12210/youku_ghost.html' url = 'http://%s:%s/youku_ghost.html' % (host, port) self.ghost = Ghost() self.session = Session(self.ghost, wait_timeout=timeout, plugins_enabled=True) self.session.open(url) def parse(self, vid): try: res = [] self.session.evaluate('window.getPlayUrl("%s")' % vid) success, resources = self.session.wait_for_selector('div[id="ck"]') if success: ck = self.session.evaluate( 'document.getElementById("ck").innerHTML') res = ck[0] except Exception, e: log.app_log.error(traceback.format_exc()) finally:
content = ''.join([i if ord(i) < 128 else ' ' for i in content]) print(file_name) with open(file_name, 'wt', encoding='utf-8') as file: file.write(content) searching = True story_buffer = '' next_url = initial_url while searching: try: session.open(next_url, timeout=300) lower_bound += 1 session.wait_for_selector(next_button, 60) except: break story_data = session.evaluate( 'document.querySelector("{0}").innerText;'.format(wrapping_div)) text = story_data[0] story_buffer += str(text) if lower_bound > upper_bound: save_story(series_name, lower_bound - chunks, upper_bound, story_buffer) story_buffer = '' lower_bound = upper_bound upper_bound += chunks