Exemple #1
0
    def build(cfg, fetch_driver=True):
        """
        builds a selenium-webdriver object with the specified configuration
        :param cfg: Configuration object
        :param fetch_driver: bool (default=True) fetches driver binaries
        :return: selenium-wire Webdriver object
        """
        if cfg.proxy is not None:
            options = cfg.proxy.create_options()
        else:
            options = {}

        if cfg.driver in WebDriver.FIREFOX_DRIVER_NAMES:
            d = webdriver.Firefox
            o = webdriver.FirefoxOptions()
            if cfg.profile is None:
                p = webdriver.FirefoxProfile()
            else:
                p = webdriver.FirefoxProfile(cfg.profile)
            p.set_preference("general.useragent.override", cfg.user_agent)
            p.set_preference("media.volume_scale", "0.0")
            '''
            if cfg.proxy is not None:
                p = cfg.proxy.update_preferences(p)
            '''
        elif cfg.driver in WebDriver.CHROME_DRIVER_NAMES:
            d = webdriver.Chrome
            o = webdriver.ChromeOptions()
            o.add_argument("user-agent={0}".format(cfg.user_agent))
            if cfg.user_data_dir:
                o.add_argument("user-data-dir={0}".format(cfg.user_data_dir))
            '''
            if cfg.proxy is not None:
                o.add_argument("--proxy-server={0}".format(cfg.proxy.for_chrome()))
            '''
            p = None
        else:
            raise NotImplementedError

        if fetch_driver:
            Loader.fetch(cfg.executable_path, cfg.debug, cfg.driver)

        o.binary_location = cfg.executable_path
        o.headless = cfg.headless

        if cfg.driver in WebDriver.FIREFOX_DRIVER_NAMES:
            if cfg.proxy is None:
                return d(p, cfg.binary, options=o)
            else:
                '''return d(p, cfg.binary, options=o, proxy=cfg.proxy, seleniumwire_options=options)'''
                return d(p,
                         cfg.binary,
                         options=o,
                         seleniumwire_options=options)
        elif cfg.driver in WebDriver.CHROME_DRIVER_NAMES:
            if cfg.proxy is None:
                return d(options=o)
            else:
                '''return d(options=o, proxy=cfg.proxy, seleniumwire_options=options)'''
                return d(options=o, seleniumwire_options=options)
Exemple #2
0
    def __init_browser(self):
        """ configure the web driver """
        self.__options = webdriver.FirefoxOptions()
        self.__options.headless = True
        self.__options.accept_insecure_certs = True
        self.__geckodriver_binary = self.__args.geckodriver
        self.__firefox_binary = FirefoxBinary(self.__args.firefox)
        # Set firefox profile
        self.__profile = webdriver.FirefoxProfile()
        firefox_profile(self.__profile)
        if self.__browser is not None:
            self.__browser.close()

        if self.__args.console:
            self.__browser = webdriver.Firefox(
                options=self.__options,
                firefox_binary=self.__firefox_binary,
                firefox_profile=self.__profile,
                executable_path=self.__geckodriver_binary,
                log_path=os.path.devnull)
            self.__browser.set_window_size(1920, 1080)
        else:
            self.__browser = webdriver.Firefox(
                options=self.__options,
                firefox_binary=self.__firefox_binary,
                firefox_profile=self.__profile,
                executable_path=self.__geckodriver_binary,
                log_path=self.__args.log)
            self.__browser.set_window_size(1920, 1080)
        self.__wait = WebDriverWait(self.__browser, self.__args.timeout)
Exemple #3
0
    def get_firefox(self, proxy, useragent):

        profile = webdriver.FirefoxProfile()
        if useragent is not None:
            profile.set_preference("general.useragent.override", useragent)
        firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
        firefox_capabilities['marionette'] = True

        options_sel = {
            'connection_timeout': 5,
            'suppress_connection_errors': True
        }

        if proxy is not None:
            options_sel['proxy'] = {
                'http': proxy.get_init_string(),
                'https': proxy.get_init_string().replace('http', 'https')
            }
        options = Options()
        options.headless = True
        options.add_argument("--window-size=1920,1080")
        options.add_argument('--user-agent={}'.format(useragent))
        binary = FirefoxBinary(self.browser_path)
        driver_ = webdriver.Firefox(capabilities=firefox_capabilities,
                                    options=options,
                                    firefox_binary=binary,
                                    firefox_profile=profile,
                                    timeout=5,
                                    seleniumwire_options=options_sel)
        HEADERS = {}
        HEADERS['User-Agent'] = useragent
        driver_.header_overrides = HEADERS

        return driver_
    def _create_driver(self) -> webdriver.Firefox:
        profile = webdriver.FirefoxProfile()
        if self.profile_settings:
            for setting, value in self.profile_settings.items():
                profile.set_preference(setting, value)
        if self.user_agent:
            profile.set_preference("general.useragent.override",
                                   self.user_agent)
        profile.update_preferences()
        if self.proxy:
            selenium_wire_options = {
                "proxy": {
                    "http": self.proxy,
                    "https": self.proxy
                },
                "connection_keep_alive": True,
                "connection_timeout": 180,
            }
        else:
            selenium_wire_options = {}
        options = Options()
        options.headless = self.is_headless
        if not self.request_scope:
            selenium_wire_options["ignore_http_methods"] = ignore_http_methods

        driver = webdriver.Firefox(
            profile,
            options=options,
            seleniumwire_options=selenium_wire_options,
        )
        if self.request_scope:
            driver.scopes = self.request_scope
        return driver
Exemple #5
0
 def __init__(self, start_url):
     profile = webdriver.FirefoxProfile()
     profile.set_preference("permissions.default.image", 2)
     # 1 - Allow all images
     # 2 - Block all images
     # 3 - Block 3rd party images
     self.driver = webdriver.Firefox(firefox_profile=profile)
     self.start_url = start_url
     self.seasons_urls = self._get_seasons()
Exemple #6
0
def launch_browser(headers=None,
                   user_agent=None,
                   proxy=None,
                   browser_type="Firefox"):
    options = {}
    if proxy:
        proxy = {
            "http": proxy,
            "https": proxy,
        }
        options["proxy"] = proxy
    if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
        directory = sys._MEIPASS
    else:
        directory = os.path.dirname(__file__)
    driver = None
    if browser_type == "Firefox":
        matches = ["geckodriver.exe", "geckodriver"]
        driver_paths = list(
            map(lambda match: os.path.join(directory, match), matches))
        found_paths = [
            driver_path for driver_path in driver_paths
            if os.path.exists(driver_path)
        ]
        if found_paths:
            driver_path = found_paths[0]
            opts = webdriver.FirefoxOptions()
            # opts.add_argument("--headless")
            profile = webdriver.FirefoxProfile()
            if not user_agent:
                user_agent = generate_user_agent()
            profile.set_preference("general.useragent.override", user_agent)
            driver = webdriver.Firefox(
                firefox_profile=profile,
                executable_path=driver_path,
                options=opts,
                seleniumwire_options=options,
            )
        else:
            message = f"Download geckodriver from https://github.com/mozilla/geckodriver/releases/tag/v0.27.0 and paste it in {directory}"
            input(message)
    else:
        driver_path = os.path.join(directory, "chromedriver.exe")
        opts = webdriver.ChromeOptions()
        opts.add_argument(f"--proxy-server={opts}")
        driver = webdriver.Chrome(executable_path=driver_path,
                                  options=opts,
                                  seleniumwire_options=options)
    if not driver:
        input("DRIVER NOT FOUND")
        exit(0)
    driver.set_window_size(1920, 1080)
    browser = driver
    if headers:
        browser._client.set_header_overrides(headers=headers)
    return browser
Exemple #7
0
    def _firefox_config(self):
        options = {
            'proxy': proxies
        }
        fp = webdriver.FirefoxProfile()
        fp.set_preference("general.useragent.override", self._user_agent)  # choice useragent
        fp.set_preference("media.peerconnection.enabled", False)  # disable webrtc
        fp.set_preference("plugin.state.flash", 0)  # disable flash
        fp.set_preference("general.useragent.locale", "en")
        fp.update_preferences()  # save settings

        return webdriver.Firefox(firefox_profile=fp, seleniumwire_options=options, executable_path='geckodriver.exe')
def build_driver():
    software_names = [SoftwareName.FIREFOX.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]
    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    user_agent = user_agent_rotator.get_random_user_agent()
    # test for prevent block
    user_agent = "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0"
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    # options.add_argument("--window-size=1420,1080")
    options.add_argument("--disable-gpu")
    options.add_argument(f'user-agent={user_agent}')

    profile = webdriver.FirefoxProfile()
    profile.set_preference("general.useragent.override", user_agent)
    profile.add_extension(extension='./extensions/adblock_plus-3.11-an+fx.xpi')
    profile.add_extension(
        extension='./extensions/adblock_for_firefox-4.33.0-fx.xpi')

    API_KEY = '7f3282dc1e35451c7037fa93818b0cef'
    proxy_options = {
        'proxy': {
            'http':
            f'http://*****:*****@proxy-server.scraperapi.com:8001',
            'https':
            f'http://*****:*****@proxy-server.scraperapi.com:8001',
            'no_proxy': 'localhost,127.0.0.1'
        }
    }

    driver = webdriver.Firefox(firefox_profile=profile,
                               firefox_binary=None,
                               options=options,
                               seleniumwire_options=proxy_options)

    print("Agent: {}".format(user_agent))
    session_file = open(SELENIUM_SESSION_FILE, 'w')
    session_file.writelines([
        driver.command_executor._url,
        "\n",
        driver.session_id,
        "\n",
    ])
    session_file.close()
    time.sleep(5)
    # close other tabs
    ulties.closeOtherTabs(driver)
    return driver
Exemple #9
0
    def __init__(self, start_url, season):
        """ obj instance created in context of driver process """
        profile = webdriver.FirefoxProfile()
        profile.set_preference("permissions.default.image", 2)
        # 1 - Allow all images
        # 2 - Block all images
        # 3 - Block 3rd party images

        self.driver = webdriver.Firefox(firefox_profile=profile)
        self.season = season
        self.start_url = start_url
        self.streams_links = self._get_episodes()
        self.streams_list = None  # delivered by _get_chunks()
        self._get_chunks()
Exemple #10
0
def get_firefox_profile():
    profile = webdriver.FirefoxProfile()
    # AdBlockPlus extension.
    profile.add_extension(
        f"{dir_path}/firefox_extensions/d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d.xpi"
    )
    profile.set_preference("extensions.adblockplus.currentVersion", "3.8")
    # uBlock Origin extension.
    # profile.add_extension("firefox_extensions/[email protected]")
    # profile.set_preference("extensions.ublock0.currentVersion", "1.25.2")
    # I don't care about cookies extension.
    profile.add_extension(
        f"{dir_path}/firefox_extensions/[email protected]")
    profile.set_preference("extensions.idontcareaboutcookies.currentVersion",
                           "3.1.3")
    return profile
Exemple #11
0
def initialize_browser():
    global driver

    if (driver != None):
        return

    options = {
        'connection_timeout': None  # Never timeout
    }

    fp = webdriver.FirefoxProfile()
    fp.set_preference("browser.download.folderList", 2)
    fp.set_preference("general.useragent.override",
                      "K-Barber's Patreon Scraper")
    fp.set_preference("browser.download.dir", os.getcwd() + "\\scraped")
    fp.set_preference("browser.download.manager.showWhenStarting", False)
    fp.set_preference("browser.download.manager.showAlertOnComplete", False)
    download_types = """
        image/jpg, image/jpeg, image/png, application/zip, application/x-photoshop, image/vnd.adobe.photoshop,
        application/photoshop, application/psd, image/psd
    """
    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", download_types)

    driver = webdriver.Firefox(firefox_profile=fp,
                               seleniumwire_options=options)
    WebDriverWait(driver, 5)
    get_url("https://www.patreon.com")

    if (os.path.isfile("patreon_cookie.pkl")):
        for cookie in pickle.load(open("patreon_cookie.pkl", "rb")):
            driver.add_cookie(cookie)
        get_url("https://www.patreon.com/login")
        if (driver.current_url == "https://www.patreon.com/home"):
            pickle.dump(driver.get_cookies(), open("patreon_cookie.pkl", "wb"))
            return
        else:
            var = ""
            while (var.lower() != "y"):
                var = input("Done logging in? (Y/N)")
    else:
        var = ""
        while (var.lower() != "y"):
            var = input("Done logging in? (Y/N)")

    pickle.dump(driver.get_cookies(), open("patreon_cookie.pkl", "wb"))
Exemple #12
0
    def emulator(self):
        firefox_profile = webdriver.FirefoxProfile()
        # firefox_profile.set_preference('permissions.default.image', 2)
        # firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
        options = Options()
        options.headless = True
        driver = webdriver.Firefox(options=options,
                                   firefox_profile=firefox_profile)

        url = 'https://asunnot.oikotie.fi/vuokrattavat-asunnot?pagination=1&cardType=101&' \
              f'locations={self.settings.get("location")}'

        if self.use_useragent:
            driver.header_overrides = {"User-Agent": get_useragent()}

        driver.get(url)
        source = driver.find_element_by_xpath("//body").get_attribute(
            'outerHTML')
        soup = BeautifulSoup(source, "lxml")
        cards = soup.find_all('a', {'class': 'ot-card'})
        driver.close()
        return [card.get('href') for card in cards]
password = "******"
page_limit = 3

driver = webdriver.Firefox(
    r'C:\Users\Gabriel\anaconda3\Lib\site-packages\seleniumwire\webdriver'
)  ##change to your path
sleep(2)

driver.header_overrides = {
    'Referer': 'referer_string',
}

driver.get(url)
sleep(3)

fp = webdriver.FirefoxProfile()
path_modify_header = r'C:\Users\Gabriel\Desktop\helloWorld\modify_headers-0.7.1.1-fx.xpi'  ##change to your path
fp.add_extension(path_modify_header)
fp.set_preference("modifyheaders.headers.count", 1)
fp.set_preference("modifyheaders.headers.action0", "Will")
fp.set_preference("modifyheaders.headers.name0", "FFOX")
fp.set_preference("modifyheaders.headers.value0", "20.2")
fp.set_preference("modifyheaders.headers.enabled0", True)
fp.set_preference("modifyheaders.config.active", True)
fp.set_preference("modifyheaders.config.alwaysOn", True)

driver.find_element_by_name("email").send_keys(username)
sleep(0.5)
driver.find_element_by_name("password").send_keys(password)
sleep(0.5)
driver.find_element_by_css_selector("button.btn").click()
Exemple #14
0
"""

config_folder = os.path.dirname(os.path.abspath(__file__))
monitoring_folder = os.path.join(config_folder, "files")
profile_folder = os.path.join(config_folder, "profile")
log_folder = os.path.join(config_folder, "logs")
geckodriver = os.path.join(config_folder, "geckodriver")
geckodriver_log = os.path.join(log_folder, "geckodriver.log")
"""
Different path variables.
"""

Path(profile_folder).mkdir(parents=True, exist_ok=True)
options = Options()
options.headless = True
driver_profile = webdriver.FirefoxProfile(profile_folder)
driver_profile.set_preference('intl.accept_languages', 'de')
#headless
driver = webdriver.Firefox(firefox_profile=driver_profile,
                           executable_path=geckodriver,
                           log_path=geckodriver_log,
                           options=options)
"""
Seleniumwire driver and it's options.
"""

LOGGING_CONFIG = {
    'version': 1,  # required
    'disable_existing_loggers':
    True,  # this config overrides all other loggers
    'formatters': {