def load_localstorage(self, session_id): sessions = json.load(open(self.sessions_file)) storage_path = sessions[str(session_id)]["session_path"] url = sessions[str(session_id)]["web_url"] # Setting useragent to the same one the session saved with useragent = sessions[str(session_id)]["useragent"] profile = FirefoxProfile() profile.set_preference("general.useragent.override", useragent) localStorage = pickle.load(open(storage_path, "rb")) try: browser = Firefox(profile) except: error("Couldn't open browser to view session!") return browser.get(url) browser.delete_all_cookies() browser.execute_script( "window.localStorage.clear()") # clear the current localStorage for key, value in localStorage.items(): browser.execute_script( "window.localStorage.setItem(arguments[0], arguments[1]);", key, value) status(f"Session {session_id} loaded") browser.refresh() self.browsers.append(browser)
class BrowserEngine: options = Options() profile = FirefoxProfile() # Set certain preferences at a class level as they are static profile.accept_untrusted_certs = True profile.set_preference('permissions.default.image', 2) # Supposed to help with memory issues profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', False) profile.set_preference("browser.cache.disk.enable", False) profile.set_preference("browser.cache.memory.enable", False) profile.set_preference("browser.cache.offline.enable", False) profile.set_preference("network.http.use-cache", False) def __init__(self, wait, proxy=None, headless=False): self.proxy = None if not proxy else self.set_proxy(proxy) self.options.headless = headless self.driver = Firefox(options=self.options, firefox_profile=self.profile, proxy=self.proxy) self.driver.set_window_position(0, 0) self.driver.set_window_size(1024, 768) self.wait = WebDriverWait(self.driver, wait) def set_proxy(self, proxy): return Proxy({ "proxyType": ProxyType.MANUAL, "httpProxy": proxy, "ftpProxy": proxy, "sslProxy": proxy, "noProxy": "" }) def quit_driver(self): self.driver.quit() def refresh(self): self.driver.refresh() def clear_cookies(self): self.driver.delete_all_cookies() def get_request(self, url): self.driver.get(url) def get_element(self, type_, value): try: return self.wait.until( lambda driver: driver.find_element( getattr(By, type_), value) ) except TimeoutException: return False def click_button(self, button): button.click() def select_dropdown(self, element, value): select = Select(element) select.select_by_value(value) def switch_context(self, element): self.driver.switch_to.frame(element)
def get_cookies(num): # user = '******' # pwd = 'lxq69688' n = num % 2 users = ['[email protected]', '14785107068----lxq69688'] user = users[n] one_user = (user).split('----') user, pwd = one_user[0], one_user[1] option = FirefoxProfile() option.set_preference("dom.webnotifications.enabled", False) browser = Firefox(option) wait = WebDriverWait(browser, 120) browser.delete_all_cookies() browser.get(start_url) submit = wait.until( EC.presence_of_element_located( (By.XPATH, '//div[@class="W_login_form"]/div[@class="info_list login_btn"]/a' ))) sleep(1) submit.click() user_element = wait.until( EC.presence_of_element_located((By.XPATH, '//input[@id="loginname"]'))) # user_element.clear() sleep(1) user_element.send_keys(user) print('账号:', user) pwd_element = wait.until( EC.presence_of_element_located( (By.XPATH, '//input[@type="password"]'))) # pwd_element.clear() sleep(1) pwd_element.send_keys(pwd) print('密码:', pwd) submit = wait.until( EC.presence_of_element_located( (By.XPATH, '//div[@class="W_login_form"]/div[@class="info_list login_btn"]/a' ))) submit.click() print('点击登陆!') # sleep(2) user_btn = wait.until( EC.presence_of_element_located( (By.XPATH, '//ul[@class="gn_nav_list"]/li[5]//em[2]'))) print('登陆用户:', user_btn.text) #条件 确认是否登陆 cookies = browser.get_cookies() cookie = ';'.join(i['name'] + '=' + i['value'] for i in cookies) browser.close() cookies = { 'Cookie': cookie, } return cookies
def main_fun(): browser = Firefox() browser.maximize_window() browser.delete_all_cookies() try: hh_worker(browser) except Exception as ex: print(ex) traceback.print_exc() finally: browser.delete_all_cookies() browser.close() browser.quit()
class Scraper(object): def __init__(self, url): self.url = url self.browser = None self.display = Display( visible=0, size=(800, 600) ) def open(self): self.display.start() self.browser = Firefox() def close(self): self.browser.quit() self.display.stop() def scrape(self): self.browser.get(self.url) self.browser.delete_all_cookies() return self.browser.page_source
def load_cookie(self, session_id): sessions = json.load(open( self.sessions_file )) cookie_path = sessions[str(session_id)]["session_path"] url = sessions[str(session_id)]["web_url"] # Setting useragent to the same one the session saved with useragent = sessions[str(session_id)]["useragent"] profile = FirefoxProfile() profile.set_preference("general.useragent.override", useragent ) cookies = pickle.load(open(cookie_path, "rb")) try: browser = Firefox(profile) except: error("Couldn't open browser to view session!") return browser.get(url) browser.delete_all_cookies() browser.execute_script("window.localStorage.clear()") # clear the current localStorage for cookie in cookies: browser.add_cookie(cookie) status(f"Session {session_id} loaded") browser.refresh() self.browsers.append(browser)
# t_s_photo_link sub_elements = [ elem.find_element_by_class_name('t_s_photo_link') for elem in elements ] # href links from t_s_photo_link links = [elem.get_attribute('href') for elem in sub_elements] for link in links: try: browser.get(link) sleep(1) name = browser.find_element_by_class_name( 'shop-header-shop-header-title-1VQXz') phone = browser.find_element_by_class_name( 'shop-header-shop-header-phone-3Ivio') browser.delete_all_cookies() excel.addRow(name.text, phone.text) data[name.text] = phone.text print(f'Company name: {name.text} \nPhone: {phone.text}\n') except Exception as e: print(f"EXCEPTION! -----------------------\n{e}") continue except Exception as e: print(f"EXCEPTION FROM SWITCHING PAGES! \n{e}") excel.save() browser.quit()
class BrowserEngine: options = Options() profile = FirefoxProfile() # Set preferences at the class level profile.set_preference("permissions.default.image", 2) # Supposed to help with memory issues profile.set_preference("dom.ipc.plugins.enabled.libflashplayer.so", False) profile.set_preference("browser.cache.disk.enable", False) profile.set_preference("browser.cache.memory.enable", False) profile.set_preference("browser.cache.offline.enable", False) profile.set_preference("network.http.use-cache", False) profile.accept_untrusted_certs = True def __init__(self, wait=5, proxy=None, headless=True): self.proxy = None if not proxy else self.proxy(proxy) self.options.headless = headless self.driver = Firefox(options=self.options, firefox_profile=self.profile, desired_capabilities=self.proxy) self.driver.set_window_position( 0, 0) # TODO: Not sure if these help or not with optimization self.driver.set_window_size(1024, 768) self.wait = WebDriverWait(self.driver, wait) def proxy(self, proxy): proxy = Proxy({ "proxyType": ProxyType.MANUAL, "httpProxy": proxy, "ftpProxy": proxy, "sslProxy": proxy, "noProxy": "" }) capabilities = DesiredCapabilities.FIREFOX proxy.add_to_capabilities(capabilities) return capabilities def quit(self): self.driver.quit() def close(self): self.driver.close() def refresh(self): self.driver.refresh() def back(self): self.driver.execute_script("window.history.go(-1)") def clear_cookies(self): self.driver.delete_all_cookies() def get(self, url): self.driver.get(url) def find_element(self, type_, value): try: return self.wait.until( lambda driver: driver.find_element(getattr(By, type_), value)) except TimeoutException: return False def populate_element(self, element, value): element.send_keys(value) def is_clickable(self, type_, value): return self.wait.until( EC.element_to_be_clickable((getattr(By, type_), value))) def click(self, button): button.click() def select_dropdown(self, element, value): select = Select(element) select.select_by_value(value) def submit(self, form): form.submit() def execute_script(self, code): self.driver.execute_script(code) def screenshot(self, filename): self.driver.get_screenshot_as_file(filename)
def copy_cookies(fromd: webdriver.Firefox, tod: webdriver.Firefox, clear=False): if clear: tod.delete_all_cookies() for cookie in fromd.get_cookies(): tod.add_cookie(cookie)