Ejemplo n.º 1
0
class YoukuGhostDriver(object):
    def __init__(self, host, port, timeout):
        #url = 'http://111.161.35.198:12210/youku_ghost.html'
        url = 'http://%s:%s/youku_ghost.html' % (host, port)
        self.ghost = Ghost()
        self.session = Session(self.ghost,
                               wait_timeout=timeout,
                               plugins_enabled=True)
        self.session.open(url)

    def parse(self, vid):
        try:
            res = []
            self.session.evaluate('window.getPlayUrl("%s")' % vid)
            success, resources = self.session.wait_for_selector('div[id="ck"]')
            if success:
                ck = self.session.evaluate(
                    'document.getElementById("ck").innerHTML')
                res = ck[0]

        except Exception, e:
            log.app_log.error(traceback.format_exc())

        finally:
Ejemplo n.º 2
0
    content = ''.join([i if ord(i) < 128 else ' ' for i in content])
    print(file_name)
    with open(file_name, 'wt', encoding='utf-8') as file:
        file.write(content)


searching = True
story_buffer = ''

next_url = initial_url

while searching:
    try:
        session.open(next_url, timeout=300)
        lower_bound += 1
        session.wait_for_selector(next_button, 60)
    except:
        break

    story_data = session.evaluate(
        'document.querySelector("{0}").innerText;'.format(wrapping_div))
    text = story_data[0]
    story_buffer += str(text)

    if lower_bound > upper_bound:
        save_story(series_name, lower_bound - chunks, upper_bound,
                   story_buffer)
        story_buffer = ''
        lower_bound = upper_bound
        upper_bound += chunks