def setup_profile(self, firebug=True, netexport=True): """ Setup the profile for firefox :param firebug: whether add firebug extension :param netexport: whether add netexport extension :return: """ capture_path = os.getcwd() profile = webdriver.FirefoxProfile() profile.set_preference("app.update.enabled", False) if firebug: profile.add_extension(capture_path + '/extensions/firebug-2.0.8.xpi') profile.set_preference("extensions.firebug.currentVersion", "2.0.8") profile.set_preference("extensions.firebug.allPagesActivation", "on") profile.set_preference("extensions.firebug.defaultPanelName", "net") profile.set_preference("extensions.firebug.net.enableSites", True) profile.set_preference("extensions.firebug.delayLoad", False) profile.set_preference("extensions.firebug.onByDefault", True) profile.set_preference("extensions.firebug.showFirstRunPage", False) profile.set_preference("extensions.firebug.net.defaultPersist", True) if netexport: fc.make_folder(capture_path + "/har/") profile.add_extension(capture_path + '/extensions/netExport-0.9b7.xpi') profile.set_preference("extensions.firebug.DBG_NETEXPORT", True) profile.set_preference("extensions.firebug.netexport.alwaysEnableAutoExport", True) profile.set_preference("extensions.firebug.netexport.defaultLogDir", capture_path + "/har/") profile.set_preference("extensions.firebug.netexport.includeResponseBodies", True) return profile
def run(self, input_file, url=None, input_list=None, verbose=0): """ run the headless browser with given input if url given, the proc will only run hlb with given url and ignore input_list. :param url: :param input_list: :param input_file: the name of the file in "index url" format. i.e. 1, www.facebook.com 1, www.google.com ... :return: """ if not url and not input_list and not input_file: print 'no inputs' return {"error": "no inputs"} results = {} self.open_virtual_display() if verbose > 0: log_file = sys.stdout else: log_file = None self.binary = FirefoxBinary(log_file=log_file) self.profile = self.setup_profile() self.driver = webdriver.Firefox(firefox_profile=self.profile, firefox_binary=self.binary, timeout=60) self.driver.set_page_load_timeout(60) fc.make_folder(os.getcwd() + "/har/") afile = False if url: host, path = self.divide_url(url) results[url] = self.get(host, path) elif input_list: results_list = self.get_batch(input_list) for key, value in results_list.iteritems(): results[key] = value else: self.foctor_get_requests(input_file, results) afile = True if not afile: print "quit driver" self.quit() self.close_virtual_display() with open("./hb_results.json", "w") as f: json.dump(results, f, indent=4) print "Headless browser test finished" return results
def foctor_get_requests(self, site_list_file, results): """ use foctor_core library do get requests """ capture_path = os.getcwd() + "/" fc.make_folder(capture_path) display_mode = 0 # driver, display, tor_pname = fc.crawl_setup(tor=False, capture_path=capture_path, display_mode=display_mode) site_list = fc.read_site_list(site_list_file=site_list_file, start_index=0, end_index=1 << 31) driver, display = fc.do_crawl(sites=site_list, driver=self.driver, display=self.display, capture_path=capture_path, callback=self.wrap_results, external=results, fd=capture_path + "har/", files_count=len(os.listdir(capture_path + "har/"))) fc.teardown_driver(driver, display, display_mode) driver.quit()
def get(self, host, path="", ssl=False, external=None): """ Send get request to a url and wrap the results :param host: :param path: :return: """ theme = "https" if ssl else "http" url = host + path http_url = theme + "://" + url result = {} try: capture_path = os.getcwd() # SAVE THE HAR FILE UNDER THE FOLDER NAMED BY ITS URL # profile = self.setup_profile() # profile.set_preference("extensions.firebug.netexport.defaultLogDir", capture_path + "/har/"+url) # profile.update_preferences() # if self.binary is None: # self.binary = FirefoxBinary(log_file=sys.stdout) # log_file for debug fc.load_page(self.driver, http_url) # self.wait_for_ready_state(time_=5, state="interactive") if url[-1] == "/": f_name = url.split('/')[-2] else: f_name = url.split('/')[-1] fc.make_folder(capture_path + "/har/") fc.save_html(self.driver, f_name, capture_path + "/html/" + url + "/") fc.save_screenshot(self.driver, f_name, capture_path + "/screenshots/" + url + "/") har_file_path = capture_path + "/har/" print har_file_path result = self.wrap_results(har_file_path) if external is not None: external[http_url] = result except Exception as e: result['error'] = e.message print e return result