예제 #1
0
def fetch():
    USERNAME = '******'
    PASSWORD = '******'
    result_no = 0
    br = RoboBrowser()
    br.open(LOGIN_URL)
    print(br)
    br.get_form(id="fm1")
    br['username'].value = USERNAME
    br['password'].value = PASSWORD
    resp = br.submit()

    # Automatic redirect sometimes fails, follow manually when needed
    if 'Redirecting' in br.title():
        resp = br.follow_link(text_regex='click here')
        print(resp)


# Loop through the searches, keeping fixed query parameters
    for actor in VARIABLE_QUERY:
        # I like to watch what's happening in the console
        print >> sys.stderr, '***', actor
        # Lets do the actual query now
        br.open(SEARCH_URL + FIXED_QUERY + actor)
        # The query actually gives us links to the content pages we like,
        # but there are some other links on the page that we ignore
        nice_links = [
            l for l in br.links()
            if 'good_path' in l.url and 'credential' in l.url
        ]
        if not nice_links:  # Maybe the relevant results are empty
            break
        for link in nice_links:

            response = br.follow_link(link)
            # More console reporting on title of followed link page
            print(sys.stderr, br.title())
            # Increment output filenames, open and write the file
            result_no += 1
            out = open('result%d' % result_no, 'w')
            print(out, response.read())
            out.close()
            # Nothing ever goes perfectly, ignore if we do not get page
            #  except RoboBrowser:
            #     print(sys.stderr, "Response error (probably 404)")
            # Let's not hammer the site too much between fetches
            time.sleep(1)
예제 #2
0
s = requests.Session()
s.cookies = cookielib.LWPCookieJar()
# cookiejar = cookielib.LWPCookieJar()
# browser = mechanize.Browser()

browser = RoboBrowser(user_agent='TestBot', history=True, session=s)
# browser.set_cookiejar(cookiejar)

browser.open('http://ingress.com/intel')
for link in browser.get_links(url_regex='ServiceLogin'):
    browser.follow_link(link)
    browser.select_form(nr=0)
    browser.form['Email'] = GOOGLE_USER
    browser.form['Passwd'] = GOOGLE_PASS
    browser.submit()

    # req = mechanize.Request('http://www.ingress.com/rpc/dashboard.getGameScore', '{"method": "dashboard.getGameScore"}')
    s2 = requests.Session()
    s2.headers['method'] = 'dashboard.getGameScore'
    for cookie in s.cookies:
        if cookie.name == 'csrftoken':
            # req.add_header('X-CSRFToken', cookie.value)
            s2.headers['X-CSRFToken'] = cookie.value
    s.cookies.add_cookie_header(s2)
    browser = RoboBrowser(session=s2)
    browser.open('http://www.ingress.com/rpc/dashboard.getGameScore')

    # jsonData = '\n'.join(mechanize.urlopen(req).readlines())
    # print(json.loads(jsonData))