def build(cfg, fetch_driver=True): """ builds a selenium-webdriver object with the specified configuration :param cfg: Configuration object :param fetch_driver: bool (default=True) fetches driver binaries :return: selenium-wire Webdriver object """ if cfg.proxy is not None: options = cfg.proxy.create_options() else: options = {} if cfg.driver in WebDriver.FIREFOX_DRIVER_NAMES: d = webdriver.Firefox o = webdriver.FirefoxOptions() if cfg.profile is None: p = webdriver.FirefoxProfile() else: p = webdriver.FirefoxProfile(cfg.profile) p.set_preference("general.useragent.override", cfg.user_agent) p.set_preference("media.volume_scale", "0.0") ''' if cfg.proxy is not None: p = cfg.proxy.update_preferences(p) ''' elif cfg.driver in WebDriver.CHROME_DRIVER_NAMES: d = webdriver.Chrome o = webdriver.ChromeOptions() o.add_argument("user-agent={0}".format(cfg.user_agent)) if cfg.user_data_dir: o.add_argument("user-data-dir={0}".format(cfg.user_data_dir)) ''' if cfg.proxy is not None: o.add_argument("--proxy-server={0}".format(cfg.proxy.for_chrome())) ''' p = None else: raise NotImplementedError if fetch_driver: Loader.fetch(cfg.executable_path, cfg.debug, cfg.driver) o.binary_location = cfg.executable_path o.headless = cfg.headless if cfg.driver in WebDriver.FIREFOX_DRIVER_NAMES: if cfg.proxy is None: return d(p, cfg.binary, options=o) else: '''return d(p, cfg.binary, options=o, proxy=cfg.proxy, seleniumwire_options=options)''' return d(p, cfg.binary, options=o, seleniumwire_options=options) elif cfg.driver in WebDriver.CHROME_DRIVER_NAMES: if cfg.proxy is None: return d(options=o) else: '''return d(options=o, proxy=cfg.proxy, seleniumwire_options=options)''' return d(options=o, seleniumwire_options=options)
def __init_browser(self): """ configure the web driver """ self.__options = webdriver.FirefoxOptions() self.__options.headless = True self.__options.accept_insecure_certs = True self.__geckodriver_binary = self.__args.geckodriver self.__firefox_binary = FirefoxBinary(self.__args.firefox) # Set firefox profile self.__profile = webdriver.FirefoxProfile() firefox_profile(self.__profile) if self.__browser is not None: self.__browser.close() if self.__args.console: self.__browser = webdriver.Firefox( options=self.__options, firefox_binary=self.__firefox_binary, firefox_profile=self.__profile, executable_path=self.__geckodriver_binary, log_path=os.path.devnull) self.__browser.set_window_size(1920, 1080) else: self.__browser = webdriver.Firefox( options=self.__options, firefox_binary=self.__firefox_binary, firefox_profile=self.__profile, executable_path=self.__geckodriver_binary, log_path=self.__args.log) self.__browser.set_window_size(1920, 1080) self.__wait = WebDriverWait(self.__browser, self.__args.timeout)
def get_firefox(self, proxy, useragent): profile = webdriver.FirefoxProfile() if useragent is not None: profile.set_preference("general.useragent.override", useragent) firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True options_sel = { 'connection_timeout': 5, 'suppress_connection_errors': True } if proxy is not None: options_sel['proxy'] = { 'http': proxy.get_init_string(), 'https': proxy.get_init_string().replace('http', 'https') } options = Options() options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument('--user-agent={}'.format(useragent)) binary = FirefoxBinary(self.browser_path) driver_ = webdriver.Firefox(capabilities=firefox_capabilities, options=options, firefox_binary=binary, firefox_profile=profile, timeout=5, seleniumwire_options=options_sel) HEADERS = {} HEADERS['User-Agent'] = useragent driver_.header_overrides = HEADERS return driver_
def _create_driver(self) -> webdriver.Firefox: profile = webdriver.FirefoxProfile() if self.profile_settings: for setting, value in self.profile_settings.items(): profile.set_preference(setting, value) if self.user_agent: profile.set_preference("general.useragent.override", self.user_agent) profile.update_preferences() if self.proxy: selenium_wire_options = { "proxy": { "http": self.proxy, "https": self.proxy }, "connection_keep_alive": True, "connection_timeout": 180, } else: selenium_wire_options = {} options = Options() options.headless = self.is_headless if not self.request_scope: selenium_wire_options["ignore_http_methods"] = ignore_http_methods driver = webdriver.Firefox( profile, options=options, seleniumwire_options=selenium_wire_options, ) if self.request_scope: driver.scopes = self.request_scope return driver
def __init__(self, start_url): profile = webdriver.FirefoxProfile() profile.set_preference("permissions.default.image", 2) # 1 - Allow all images # 2 - Block all images # 3 - Block 3rd party images self.driver = webdriver.Firefox(firefox_profile=profile) self.start_url = start_url self.seasons_urls = self._get_seasons()
def launch_browser(headers=None, user_agent=None, proxy=None, browser_type="Firefox"): options = {} if proxy: proxy = { "http": proxy, "https": proxy, } options["proxy"] = proxy if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): directory = sys._MEIPASS else: directory = os.path.dirname(__file__) driver = None if browser_type == "Firefox": matches = ["geckodriver.exe", "geckodriver"] driver_paths = list( map(lambda match: os.path.join(directory, match), matches)) found_paths = [ driver_path for driver_path in driver_paths if os.path.exists(driver_path) ] if found_paths: driver_path = found_paths[0] opts = webdriver.FirefoxOptions() # opts.add_argument("--headless") profile = webdriver.FirefoxProfile() if not user_agent: user_agent = generate_user_agent() profile.set_preference("general.useragent.override", user_agent) driver = webdriver.Firefox( firefox_profile=profile, executable_path=driver_path, options=opts, seleniumwire_options=options, ) else: message = f"Download geckodriver from https://github.com/mozilla/geckodriver/releases/tag/v0.27.0 and paste it in {directory}" input(message) else: driver_path = os.path.join(directory, "chromedriver.exe") opts = webdriver.ChromeOptions() opts.add_argument(f"--proxy-server={opts}") driver = webdriver.Chrome(executable_path=driver_path, options=opts, seleniumwire_options=options) if not driver: input("DRIVER NOT FOUND") exit(0) driver.set_window_size(1920, 1080) browser = driver if headers: browser._client.set_header_overrides(headers=headers) return browser
def _firefox_config(self): options = { 'proxy': proxies } fp = webdriver.FirefoxProfile() fp.set_preference("general.useragent.override", self._user_agent) # choice useragent fp.set_preference("media.peerconnection.enabled", False) # disable webrtc fp.set_preference("plugin.state.flash", 0) # disable flash fp.set_preference("general.useragent.locale", "en") fp.update_preferences() # save settings return webdriver.Firefox(firefox_profile=fp, seleniumwire_options=options, executable_path='geckodriver.exe')
def build_driver(): software_names = [SoftwareName.FIREFOX.value] operating_systems = [ OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value ] user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=100) user_agent = user_agent_rotator.get_random_user_agent() # test for prevent block user_agent = "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0" options = Options() options.add_argument("--headless") options.add_argument("--no-sandbox") # options.add_argument("--window-size=1420,1080") options.add_argument("--disable-gpu") options.add_argument(f'user-agent={user_agent}') profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", user_agent) profile.add_extension(extension='./extensions/adblock_plus-3.11-an+fx.xpi') profile.add_extension( extension='./extensions/adblock_for_firefox-4.33.0-fx.xpi') API_KEY = '7f3282dc1e35451c7037fa93818b0cef' proxy_options = { 'proxy': { 'http': f'http://*****:*****@proxy-server.scraperapi.com:8001', 'https': f'http://*****:*****@proxy-server.scraperapi.com:8001', 'no_proxy': 'localhost,127.0.0.1' } } driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=None, options=options, seleniumwire_options=proxy_options) print("Agent: {}".format(user_agent)) session_file = open(SELENIUM_SESSION_FILE, 'w') session_file.writelines([ driver.command_executor._url, "\n", driver.session_id, "\n", ]) session_file.close() time.sleep(5) # close other tabs ulties.closeOtherTabs(driver) return driver
def __init__(self, start_url, season): """ obj instance created in context of driver process """ profile = webdriver.FirefoxProfile() profile.set_preference("permissions.default.image", 2) # 1 - Allow all images # 2 - Block all images # 3 - Block 3rd party images self.driver = webdriver.Firefox(firefox_profile=profile) self.season = season self.start_url = start_url self.streams_links = self._get_episodes() self.streams_list = None # delivered by _get_chunks() self._get_chunks()
def get_firefox_profile(): profile = webdriver.FirefoxProfile() # AdBlockPlus extension. profile.add_extension( f"{dir_path}/firefox_extensions/d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d.xpi" ) profile.set_preference("extensions.adblockplus.currentVersion", "3.8") # uBlock Origin extension. # profile.add_extension("firefox_extensions/[email protected]") # profile.set_preference("extensions.ublock0.currentVersion", "1.25.2") # I don't care about cookies extension. profile.add_extension( f"{dir_path}/firefox_extensions/[email protected]") profile.set_preference("extensions.idontcareaboutcookies.currentVersion", "3.1.3") return profile
def initialize_browser(): global driver if (driver != None): return options = { 'connection_timeout': None # Never timeout } fp = webdriver.FirefoxProfile() fp.set_preference("browser.download.folderList", 2) fp.set_preference("general.useragent.override", "K-Barber's Patreon Scraper") fp.set_preference("browser.download.dir", os.getcwd() + "\\scraped") fp.set_preference("browser.download.manager.showWhenStarting", False) fp.set_preference("browser.download.manager.showAlertOnComplete", False) download_types = """ image/jpg, image/jpeg, image/png, application/zip, application/x-photoshop, image/vnd.adobe.photoshop, application/photoshop, application/psd, image/psd """ fp.set_preference("browser.helperApps.neverAsk.saveToDisk", download_types) driver = webdriver.Firefox(firefox_profile=fp, seleniumwire_options=options) WebDriverWait(driver, 5) get_url("https://www.patreon.com") if (os.path.isfile("patreon_cookie.pkl")): for cookie in pickle.load(open("patreon_cookie.pkl", "rb")): driver.add_cookie(cookie) get_url("https://www.patreon.com/login") if (driver.current_url == "https://www.patreon.com/home"): pickle.dump(driver.get_cookies(), open("patreon_cookie.pkl", "wb")) return else: var = "" while (var.lower() != "y"): var = input("Done logging in? (Y/N)") else: var = "" while (var.lower() != "y"): var = input("Done logging in? (Y/N)") pickle.dump(driver.get_cookies(), open("patreon_cookie.pkl", "wb"))
def emulator(self): firefox_profile = webdriver.FirefoxProfile() # firefox_profile.set_preference('permissions.default.image', 2) # firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false') options = Options() options.headless = True driver = webdriver.Firefox(options=options, firefox_profile=firefox_profile) url = 'https://asunnot.oikotie.fi/vuokrattavat-asunnot?pagination=1&cardType=101&' \ f'locations={self.settings.get("location")}' if self.use_useragent: driver.header_overrides = {"User-Agent": get_useragent()} driver.get(url) source = driver.find_element_by_xpath("//body").get_attribute( 'outerHTML') soup = BeautifulSoup(source, "lxml") cards = soup.find_all('a', {'class': 'ot-card'}) driver.close() return [card.get('href') for card in cards]
password = "******" page_limit = 3 driver = webdriver.Firefox( r'C:\Users\Gabriel\anaconda3\Lib\site-packages\seleniumwire\webdriver' ) ##change to your path sleep(2) driver.header_overrides = { 'Referer': 'referer_string', } driver.get(url) sleep(3) fp = webdriver.FirefoxProfile() path_modify_header = r'C:\Users\Gabriel\Desktop\helloWorld\modify_headers-0.7.1.1-fx.xpi' ##change to your path fp.add_extension(path_modify_header) fp.set_preference("modifyheaders.headers.count", 1) fp.set_preference("modifyheaders.headers.action0", "Will") fp.set_preference("modifyheaders.headers.name0", "FFOX") fp.set_preference("modifyheaders.headers.value0", "20.2") fp.set_preference("modifyheaders.headers.enabled0", True) fp.set_preference("modifyheaders.config.active", True) fp.set_preference("modifyheaders.config.alwaysOn", True) driver.find_element_by_name("email").send_keys(username) sleep(0.5) driver.find_element_by_name("password").send_keys(password) sleep(0.5) driver.find_element_by_css_selector("button.btn").click()
""" config_folder = os.path.dirname(os.path.abspath(__file__)) monitoring_folder = os.path.join(config_folder, "files") profile_folder = os.path.join(config_folder, "profile") log_folder = os.path.join(config_folder, "logs") geckodriver = os.path.join(config_folder, "geckodriver") geckodriver_log = os.path.join(log_folder, "geckodriver.log") """ Different path variables. """ Path(profile_folder).mkdir(parents=True, exist_ok=True) options = Options() options.headless = True driver_profile = webdriver.FirefoxProfile(profile_folder) driver_profile.set_preference('intl.accept_languages', 'de') #headless driver = webdriver.Firefox(firefox_profile=driver_profile, executable_path=geckodriver, log_path=geckodriver_log, options=options) """ Seleniumwire driver and it's options. """ LOGGING_CONFIG = { 'version': 1, # required 'disable_existing_loggers': True, # this config overrides all other loggers 'formatters': {