def fetch(): USERNAME = '******' PASSWORD = '******' result_no = 0 br = RoboBrowser() br.open(LOGIN_URL) print(br) br.get_form(id="fm1") br['username'].value = USERNAME br['password'].value = PASSWORD resp = br.submit() # Automatic redirect sometimes fails, follow manually when needed if 'Redirecting' in br.title(): resp = br.follow_link(text_regex='click here') print(resp) # Loop through the searches, keeping fixed query parameters for actor in VARIABLE_QUERY: # I like to watch what's happening in the console print >> sys.stderr, '***', actor # Lets do the actual query now br.open(SEARCH_URL + FIXED_QUERY + actor) # The query actually gives us links to the content pages we like, # but there are some other links on the page that we ignore nice_links = [ l for l in br.links() if 'good_path' in l.url and 'credential' in l.url ] if not nice_links: # Maybe the relevant results are empty break for link in nice_links: response = br.follow_link(link) # More console reporting on title of followed link page print(sys.stderr, br.title()) # Increment output filenames, open and write the file result_no += 1 out = open('result%d' % result_no, 'w') print(out, response.read()) out.close() # Nothing ever goes perfectly, ignore if we do not get page # except RoboBrowser: # print(sys.stderr, "Response error (probably 404)") # Let's not hammer the site too much between fetches time.sleep(1)
s = requests.Session() s.cookies = cookielib.LWPCookieJar() # cookiejar = cookielib.LWPCookieJar() # browser = mechanize.Browser() browser = RoboBrowser(user_agent='TestBot', history=True, session=s) # browser.set_cookiejar(cookiejar) browser.open('http://ingress.com/intel') for link in browser.get_links(url_regex='ServiceLogin'): browser.follow_link(link) browser.select_form(nr=0) browser.form['Email'] = GOOGLE_USER browser.form['Passwd'] = GOOGLE_PASS browser.submit() # req = mechanize.Request('http://www.ingress.com/rpc/dashboard.getGameScore', '{"method": "dashboard.getGameScore"}') s2 = requests.Session() s2.headers['method'] = 'dashboard.getGameScore' for cookie in s.cookies: if cookie.name == 'csrftoken': # req.add_header('X-CSRFToken', cookie.value) s2.headers['X-CSRFToken'] = cookie.value s.cookies.add_cookie_header(s2) browser = RoboBrowser(session=s2) browser.open('http://www.ingress.com/rpc/dashboard.getGameScore') # jsonData = '\n'.join(mechanize.urlopen(req).readlines()) # print(json.loads(jsonData))