def __init__(self, driver_location, url): chrom_options = Options() prefs = { 'profile.default_content_setting_values': { 'cookies': 2, 'images': 2, 'javascript': 2, 'plugins': 2, 'popups': 2, 'geolocation': 2, 'notifications': 2, 'auto_select_certificate': 2, 'fullscreen': 2, 'disk-cache-size': 4096, 'mouselock': 2, 'mixed_script': 2, 'media_stream': 2, 'media_stream_mic': 2, 'media_stream_camera': 2, 'protocol_handlers': 2, 'ppapi_broker': 2, 'automatic_downloads': 2, 'midi_sysex': 2, 'push_messaging': 2, 'ssl_cert_decisions': 2, 'metro_switch_to_desktop': 2, 'protected_media_identifier': 2, 'app_banner': 2, 'site_engagement': 2, 'durable_storage': 2 } } chrom_options.add_experimental_option("prefs", prefs) chrom_options.add_argument('--headless') chrom_options.add_argument('--no-sandbox') chrom_options.Proxy = None capa = DesiredCapabilities.CHROME capa["pageLoadStrategy"] = "normal" self._driver = webdriver.Chrome(driver_location, desired_capabilities=capa, chrome_options=chrom_options) self._url = url self._scraper_functions = [ self.get_title, self.get_url, self.get_description, self.get_keywords, # self.get_category, self.get_links ] self._open_page()
def proxy_driver(): global ALL_PROXIES, my_ip co = Options() prox = Proxy() if len(ALL_PROXIES) == 0: print("--- Proxies used up (%s)" % len(ALL_PROXIES)) ALL_PROXIES = get_proxies() # temporal_index = len(ALL_PROXIES) # Accessing and removing last element of deque else: pxy = ALL_PROXIES.pop() my_ip = pxy print('Proxy Actual:', pxy) prox.proxy_type = ProxyType.MANUAL prox.autodetect = False prox.httpProxy = prox.sslProxy = pxy #prox.socksProxy = pxy capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) #print('Proxy Options', prox) co.Proxy = prox co.add_argument("ignore-certificate-errors") co.add_argument("start-maximized") co.add_experimental_option("excludeSwitches", ["enable-automation"]) co.add_experimental_option('useAutomationExtension', False) ua = UserAgent() userAgent = ua.random co.add_argument(f'user-agent={userAgent}') co.add_argument('--disable-notifications') # Se agrega el add-on Buster para validar los captchas co.add_extension('./buster_extension.crx') co.add_extension('./vpn.crx') driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=co) # driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") return driver