Beispiel #1
0
 def process_request(self, request, spider):
     if spider.name in WEBKIT_DOWNLOADER:
         gh = Ghost()
         se = Session(gh, download_images=False)
         se.open(request.url)
         result, resource = se.evaluate(
             'document.documentElement.innerHTML')
         spider.webkit_se = se
         renderedBody = str(resource).encode('utf8')
         return HtmlResponse(request.url, body=renderedBody)
Beispiel #2
0
class YoukuGhostDriver(object):
    def __init__(self, host, port, timeout):
        #url = 'http://111.161.35.198:12210/youku_ghost.html'
        url = 'http://%s:%s/youku_ghost.html' % (host, port)
        self.ghost = Ghost()
        self.session = Session(self.ghost,
                               wait_timeout=timeout,
                               plugins_enabled=True)
        self.session.open(url)

    def parse(self, vid):
        try:
            res = []
            self.session.evaluate('window.getPlayUrl("%s")' % vid)
            success, resources = self.session.wait_for_selector('div[id="ck"]')
            if success:
                ck = self.session.evaluate(
                    'document.getElementById("ck").innerHTML')
                res = ck[0]

        except Exception, e:
            log.app_log.error(traceback.format_exc())

        finally:
Beispiel #3
0
item_url = 'http://www.supremenewyork.com/shop/accessories/oi6nqp83m/hsyw4g52m'
checkout_url = 'https://www.supremenewyork.com/checkout'
##############################
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
header = {'User-Agent': ua}
gh = Ghost()
se = Session(gh,
             user_agent=ua,
             wait_timeout=20,
             wait_callback=None,
             display=True,
             viewport_size=(1080, 1680),
             download_images=True)
##############################

se.open(item_url)
se.evaluate("""document.querySelector('input[name="commit"]').click();""")
se.sleep(0.5)
se.open(checkout_url)

ISOFORMAT = '%Y%m%d'
today = datetime.today()
filename = today.strftime(ISOFORMAT)
f = open('supreme' + '/' + filename + '.html', 'w')
f.write(se.content)
f.close()

import code
code.interact(banner="", local=locals())
Beispiel #4
0
from ghost import Ghost, Session

ghost = Ghost()
USERAGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0"

with ghost.start():
    session = Session(ghost, download_images=False, display=True, user_agent=USERAGENT, viewport_size=(800, 600))
    page, rs = session.open("https://m.facebook.com/login.php", timeout=120)
    assert page.http_status == 200

    session.evaluate("""
    document.querySelector('input[name="email"]').value = '*****@*****.**'
    document.querySelector('input[name="pass"]').value = 'wikipedia150101facebook';
    """)

    session.evaluate("""document.querySelector('input[name="login"]').click();""",
                 expect_loading=True)

    """
    import codecs

    with codecs.open('fb.html', encoding='utf-8', mode='w') as f:
       f.write(session.content)
    """

    # session.save_cookies('fbookie')
    session.capture_to(path='fbookie.png')

    # gracefully clean off to avoid errors
    session.webview.setHtml('')
    session.exit()
Beispiel #5
0

searching = True
story_buffer = ''

next_url = initial_url

while searching:
    try:
        session.open(next_url, timeout=300)
        lower_bound += 1
        session.wait_for_selector(next_button, 60)
    except:
        break

    story_data = session.evaluate(
        'document.querySelector("{0}").innerText;'.format(wrapping_div))
    text = story_data[0]
    story_buffer += str(text)

    if lower_bound > upper_bound:
        save_story(series_name, lower_bound - chunks, upper_bound,
                   story_buffer)
        story_buffer = ''
        lower_bound = upper_bound
        upper_bound += chunks

    link_data = session.evaluate(
        'document.querySelector("{0}").href'.format(next_button))
    print(link_data[0])
    next_url = link_data[0]