コード例 #1
0
    def __init__(self, driver_location, url):

        chrom_options = Options()
        prefs = {
            'profile.default_content_setting_values': {
                'cookies': 2,
                'images': 2,
                'javascript': 2,
                'plugins': 2,
                'popups': 2,
                'geolocation': 2,
                'notifications': 2,
                'auto_select_certificate': 2,
                'fullscreen': 2,
                'disk-cache-size': 4096,
                'mouselock': 2,
                'mixed_script': 2,
                'media_stream': 2,
                'media_stream_mic': 2,
                'media_stream_camera': 2,
                'protocol_handlers': 2,
                'ppapi_broker': 2,
                'automatic_downloads': 2,
                'midi_sysex': 2,
                'push_messaging': 2,
                'ssl_cert_decisions': 2,
                'metro_switch_to_desktop': 2,
                'protected_media_identifier': 2,
                'app_banner': 2,
                'site_engagement': 2,
                'durable_storage': 2
            }
        }
        chrom_options.add_experimental_option("prefs", prefs)
        chrom_options.add_argument('--headless')
        chrom_options.add_argument('--no-sandbox')
        chrom_options.Proxy = None
        capa = DesiredCapabilities.CHROME
        capa["pageLoadStrategy"] = "normal"
        self._driver = webdriver.Chrome(driver_location,
                                        desired_capabilities=capa,
                                        chrome_options=chrom_options)
        self._url = url

        self._scraper_functions = [
            self.get_title,
            self.get_url,
            self.get_description,
            self.get_keywords,
            # self.get_category,
            self.get_links
        ]

        self._open_page()
コード例 #2
0
def proxy_driver():
    global ALL_PROXIES, my_ip

    co = Options()
    prox = Proxy()

    if len(ALL_PROXIES) == 0:
        print("--- Proxies used up (%s)" % len(ALL_PROXIES))
        ALL_PROXIES = get_proxies()

    # temporal_index = len(ALL_PROXIES)
    # Accessing and removing last element of deque
    else:
        pxy = ALL_PROXIES.pop()
        my_ip = pxy
        print('Proxy Actual:', pxy)

        prox.proxy_type = ProxyType.MANUAL
        prox.autodetect = False
        prox.httpProxy = prox.sslProxy = pxy  #prox.socksProxy = pxy

        capabilities = webdriver.DesiredCapabilities.CHROME
        prox.add_to_capabilities(capabilities)

        #print('Proxy Options', prox)
        co.Proxy = prox
        co.add_argument("ignore-certificate-errors")

        co.add_argument("start-maximized")
        co.add_experimental_option("excludeSwitches", ["enable-automation"])
        co.add_experimental_option('useAutomationExtension', False)
        ua = UserAgent()
        userAgent = ua.random
        co.add_argument(f'user-agent={userAgent}')
        co.add_argument('--disable-notifications')

        # Se agrega el add-on Buster para validar los captchas
        co.add_extension('./buster_extension.crx')
        co.add_extension('./vpn.crx')

    driver = webdriver.Chrome(ChromeDriverManager().install(),
                              chrome_options=co)
    # driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

    return driver