def create_chrome_browser(): """ Creates a mechanize Chrome browser including the correct settings, headers and cookies :return: Mechanize Browser object """ browser = mechanize.Browser() cj = browser_cookie.chrome() browser.set_cookiejar(cj) # set browser settings browser.set_handle_equiv(True) browser.set_handle_redirect(True) browser.set_handle_referer(True) browser.set_handle_robots(False) # Follows refresh 0 but not hangs on refresh > 0 browser.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # Want debugging messages? # browser.set_debug_http(True) # browser.set_debug_redirects(True) # browser.set_debug_responses(True) # append some headings to make the browser act like modern browsers browser.addheaders.append( ( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64)" "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36", ) ) browser.addheaders.append(("Accept-Language", "en-GB,en-US;q=0.8,en;q=0.6")) browser.addheaders.append(("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")) return browser
def request(url): """input a url, return its text""" cj = browser_cookie.chrome() try: r = requests.get(url, cookies=cj) except: print("Error Response for URL: %s" % url) return r.text
from .search import SERVICES, dump_all_matches args = docopt.docopt(__doc__) service = args["SERVICE"] if service not in SERVICES: sys.exit("service must be one of: {}".format(", ".join(SERVICES))) session = requests.Session() if args["--cookies"]: try: import browser_cookie except ImportError: sys.exit("pip install browser_cookie if you want browser cookies") # browser_cookie is super-annoying and likes to print to stdout stdout = sys.stdout try: sys.stdout = sys.stderr if args["--cookies"] == "firefox": jar = browser_cookie.firefox() elif args["--cookies"] == "chrome": jar = browser_cookie.chrome() else: sys.exit("BROWSER should be 'firefox' or 'chrome'") finally: sys.stdout = stdout session.cookies = jar dump_all_matches(service, args["QUERY"], sys.stdout, session=session)