def start_browser(link,cookies): caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "eager" chrome_options = ChromeOptions() chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option("useAutomationExtension", False) driver = Chrome(desired_capabilities=caps, executable_path=driver_path, options=chrome_options) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(window, 'navigator', { value: new Proxy(navigator, { has: (target, key) => (key === 'webdriver' ? false : key in target), get: (target, key) => key === 'webdriver' ? undefined : typeof target[key] === 'function' ? target[key].bind(target) : target[key] }) }) """ }, ) driver.get(link) for cookie in cookies: driver.add_cookie({ "name": cookie["name"], "value" : cookie["value"], "domain" : cookie["domain"] }) driver.get(link)
def build_driver(path_driver): chrome_options = ChromeOptions() chrome_options.add_argument("--headless") chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument("--disable-blink-features") chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--no-sandbox') chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option('useAutomationExtension', False) chrome_options.add_argument('--allow-running-insecure-content') chrome_options.add_argument("--window-size=1920,1080") user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " \ "(KHTML, like Gecko) Chrome/89.0.4280.66 Safari/537.36" chrome_options.add_argument(f'user-agent={user_agent}') driver = Chrome(executable_path=path_driver, options=chrome_options) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ const newProto = navigator.__proto__ delete newProto.webdriver navigator.__proto__ = newProto """ }) return driver
def user_agent_override(driver: Driver, user_agent: str = None, language: str = None, platform: str = None, **kwargs) -> None: if user_agent is None: ua = driver.execute_cdp_cmd("Browser.getVersion", {})['userAgent'] else: ua = user_agent ua = ua.replace("HeadlessChrome", "Chrome") # hide headless nature override = {} if language and platform: override = { "userAgent": ua, "acceptLanguage": language, "platform": platform } elif not language and platform: override = { "userAgent": ua, "acceptLanguage": language, "platform": platform } elif language and not platform: override = { "userAgent": ua, "acceptLanguage": language, "platform": platform } else: override = {"userAgent": ua} driver.execute_cdp_cmd('Network.setUserAgentOverride', override)
def gain_driver(): """ :return: """ chrome_options = Options() chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) chrome_options.add_experimental_option('useAutomationExtension', False) chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-blink-features-AutomationControlled") chrome_options.add_argument( 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36') driver = Chrome('chromedriver', options=chrome_options) driver.set_window_size(1366, 768) with open('stealth.min.js') as f: js = f.read() driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": js }) '''test''' # url = "https://bot.sannysoft.com/" # driver.get(url) # # source = driver.page_source # with open('result.html', 'w') as f: # f.write(source) return driver
def set_spider_option(self, header=None) -> Chrome: """ :param header: :return: """ # 实例化Chrome可选参数 options = ChromeOptions() # 最高权限运行 options.add_argument('--no-sandbox') # 隐身模式 options.add_argument('-incognito') # 无缓存加载 options.add_argument('--disk-cache-') # 设置中文 options.add_argument('lang=zh_CN.UTF-8') # 禁用 DevTools listening options.add_experimental_option('excludeSwitches', ['enable-logging']) options.add_argument('--log-level=3') # 更换头部 if header: options.add_argument(f"user-agent={header}") else: options.add_argument(f'user-agent={get_header()}') # 静默启动 if self.silence is True: options.add_argument('--headless') options.add_argument('--disable-gpu') options.add_argument("--disable-software-rasterizer") # 抑制自动化控制特征 options.add_argument('--disable-blink-features=AutomationControlled') options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('excludeSwitches', ['enable-automation']) # 加速模式,增加Selenium渲染效率 if self.assault: chrome_pref = {"profile.default_content_settings": {"Images": 2, 'javascript': 2}, "profile.managed_default_content_settings": {"Images": 2}} options.experimental_options['prefs'] = chrome_pref d_c = DesiredCapabilities.CHROME d_c['pageLoadStrategy'] = 'none' _api = Chrome( options=options, executable_path=CHROMEDRIVER_PATH, desired_capabilities=d_c ) else: _api = Chrome(options=options, executable_path=CHROMEDRIVER_PATH) # 进一步消除操作指令头,增加隐蔽性 _api.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) return _api
def get_driver(): from selenium.webdriver import Chrome from selenium.webdriver import ChromeOptions option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) driver = Chrome(options=option) driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})""", }) return driver
def set_spider_options(self) -> Chrome: # 实例化Chrome可选参数 options = ChromeOptions() # 最高权限运行 options.add_argument('--no-sandbox') # 隐身模式 options.add_argument('-incognito') # 无缓存加载 options.add_argument('--disk-cache-') # 设置中文 options.add_argument('lang=zh_CN.UTF-8') # 禁用 DevTools listening options.add_experimental_option('excludeSwitches', ['enable-logging']) options.add_argument('--log-level=3') # 更换头部 options.add_argument( "user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Edg/92.0.902.78'" ) # 静默启动 if self.silence is True: options.add_argument('--headless') options.add_argument('--disable-gpu') options.add_argument("--disable-software-rasterizer") # 抑制自动化控制特征 options.add_argument('--disable-blink-features=AutomationControlled') options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('excludeSwitches', ['enable-automation']) try: _api = Chrome(options=options, executable_path=self.CHROMEDRIVER_PATH) _api.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) return _api except WebDriverException as e: if "chromedriver" in str(e): print(f">>> 指定目录下缺少chromedriver {self.CHROMEDRIVER_PATH}") sys.exit()
def get_response_body_list(browser: Chrome, target_list: list) -> list: response_body_list = [] request_log = browser.get_log('performance') for i in range(len(request_log)): message = json.loads(request_log[i]['message']) message = message['message']['params'] try: request = message['request'] except KeyError: continue # print(request, type(request)) current_url = request['url'] current_method = request['method'] for target in target_list: if target['url'] in current_url and target['method'] == current_method: # 得到requestId requestId = message['requestId'] # print(requestId) # 通过requestId获取接口内容 response = browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) response_body_list.append({ 'response_body': response['body'], 'url': target['url'], 'method': target['method'] }) return response_body_list
def set_spider_options(self) -> Chrome: options = ChromeOptions() # 最高权限运行 options.add_argument('--no-sandbox') # 隐身模式 options.add_argument('-incognito') # 无缓存加载 options.add_argument('--disk-cache-') # 设置中文 options.add_argument('lang=zh_CN.UTF-8') # 更换头部 options.add_argument(f"user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64)" f" AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'") options.add_argument('--disable-blink-features=AutomationControlled') # 静默启动 if self.silence is True: options.add_argument('--headless') options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('excludeSwitches', ['enable-automation']) try: # 有反爬虫/默认:一般模式启动 if self.CHROMEDRIVER_PATH: _api = Chrome(options=options, executable_path=self.CHROMEDRIVER_PATH) else: _api = Chrome(options=options) _api.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) return _api except WebDriverException as e: if "chromedriver" in str(e): print(f">>> 指定目录下缺少chromedriver {self.CHROMEDRIVER_PATH}") exit()
def takeScreenshot(self): # Set Chrome browser parameters and location chromeOpt = ChromeOptions() chromeOpt.add_argument("--headless") driver = Chrome(options=chromeOpt) driver.maximize_window() driver.execute_cdp_cmd( "Emulation.setGeolocationOverride", { "latitude": self.lat, "longitude": self.long, "accuracy": 100, }, ) driver.execute_cdp_cmd( "Browser.grantPermissions", { "permissions": ["geolocation"] }, ) # Load browser page print("Loading", self.url + "...") start_load_time = time.time() driver.get(self.url) end_load_time = time.time() driver.refresh() time.sleep(3) # Save screenshot img_dir = Path(self.path) img_path = os.fspath(img_dir / "screenshot.png") driver.get_screenshot_as_file(img_path) driver.quit() print("Location: ("+str(self.lat)+", "+str(self.long)+')') print("Saved view to", img_path) # Print out optional output if -t or -i flags are given if self.timing_load: exec_time = round(end_load_time - start_load_time, 2) print("Load time:", str(exec_time) + 's') if self.showing_ip: hostname = urlparse(self.url).hostname ip = socket.gethostbyname(hostname) print("IP Address:", ip)
def print_pdf(driver: webdriver.Chrome, path_pdf="file.pdf"): """ Print and save the Web page HTML as PDF file using the Chrome Devtools Protocol. Using when webdriver options is headless. """ # TODO: Custume with headerTemplate/footerTemplate # https:chromedevtools.github.io/devtools-protocol/1-3/Page/#method-printToPDF pdf_cdp = driver.execute_cdp_cmd("Page.printToPDF", {"portrait": True}) with open(file=path_pdf, mode="wb") as _file: _file.write(base64.b64decode(pdf_cdp["data"]))
def _config_browser(self): opts = ChromeOptions() opts.add_experimental_option("detach", True) #opts.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2}) # 不加载图片,加快访问速度 opts.add_experimental_option( 'excludeSwitches', ['enable-automation']) # 此步骤很重要,设置为开发者模式,防止被各大网站识别出来使用了Selenium driver_dir = self._get_driver_dir() self._logger.info(f"using {driver_dir}") driver = Chrome(driver_dir, chrome_options=opts) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) return driver
def get_driver(): chrome_options = ChromeOptions() # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = Chrome(chrome_options=chrome_options) driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})""", }) print(driver.title) # 当前句柄 current = driver.current_window_handle driver.execute_script('window.open("http://www.baidu.com")') # 所有句柄 heandles = driver.window_handles secondhand = heandles[-1] # 切回first driver.switch_to.window(current) return driver
def gain_driver(): """ :return: """ chrome_options = Options() chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) chrome_options.add_experimental_option('useAutomationExtension', False) chrome_options.add_argument("--headless") chrome_options.add_argument( "--disable-blink-features-AutomationControlled") chrome_options.add_argument( 'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36' ) driver = Chrome('chromedriver', options=chrome_options) driver.set_window_size(1366, 768) with open('stealth.min.js') as f: js = f.read() driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js}) return driver
def get_data(urls): chromeOptions = Options() # 下面代码为避免网站对selenium的屏蔽 =======无头模式已开启 chromeOptions.add_argument('--disable-dev-shm-usage') chromeOptions.add_argument('--no-sandbox') chromeOptions.add_argument('--headless') chromeOptions.add_experimental_option("excludeSwitches", ["enable-automation"]) chromeOptions.add_experimental_option('useAutomationExtension', False) count = 0 db = pymysql.connect("119.3.184.238", "guest", "guest", "jobs") # 打开数据库连接(ip/数据库用户名/登录密码/数据库名) for i in urls: i = i.replace('\n', '') count += 1 print("====正在处理第%d条数据====" % count) print(i) try: web = Chrome(options=chromeOptions) web.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) wait = WebDriverWait(web, 3) #设置等待时间 web.get(i) time.sleep(0.5) try: action = ActionChains(web) source = web.find_element_by_xpath( "//*[@id='nc_1_n1z']") #需要滑动的元素 action.click_and_hold(source).perform() tracks = get_track() for x in tracks: action.move_by_offset(xoffset=x, yoffset=0).perform() time.sleep(0.5) action.release().perform() time.sleep(0.1) except: pass #获取数据 job_title = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//h3[@class="summary-plane__title"]'))) job_company_name = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="company"]/a'))) job_company_url = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="company"]/a'))) job_location = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//ul[@class="summary-plane__info"]/li/a'))) job_salary = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//span[@class="summary-plane__salary"]'))) job_release_data = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//span[@class="summary-plane__time"]'))) # for a,b,c,d,e,f,g in zip(job_title,job_url,job_company_name,job_company_url,job_location,job_salary,job_release_data): f = job_salary[0].text max_salary = 0 min_salary = 0 if '万' in f[:f.index('-')]: #最小单位为万 f = f.replace('万', '0千') max_salary = re.findall( r"\d+", f, )[1] + '000' min_salary = re.findall( r"\d+", f, )[0] + '000' if '.' in f[:f.index('-')]: #处理最小工资为小数 f = f.replace('.', '', 1) f = f.replace('0千', '千', 1) min_salary = re.findall( r"\d+", f, )[0] + '000' if '.' in f[f.index('-'):]: #处理最大工资为小数 f = f.replace('.', '', 1) f = f.replace('0千', '千', 1) max_salary = re.findall( r"\d+", f, )[0] + '000' elif '万' in f[f.index('-'):]: #如果最大工资单位为万 f = f.replace('万', '0千') max_salary = re.findall( r"\d+", f, )[1] + '000' min_salary = re.findall( r"\d+", f, )[0] + '000' if '.' in f[:f.index('-')]: f = f.replace('.', '', 1) #处理工资为小数 min_salary = re.findall( r"\d+", f, )[0] + '00' if '.' in f[f.index('-'):]: f = f.replace('.', '', 1) f = f.replace('0千', '千', 1) max_salary = re.findall( r"\d+", f, )[1] + '000' else: #工资单位都为一千 max_salary = re.findall( r"\d+", f, )[1] + '000' min_salary = re.findall( r"\d+", f, )[0] + '000' if '.' in f[:f.index('-')]: a = f.replace('.', '', 1) min_salary = re.findall( r"\d+", a, )[0] + '00' if '.' in f[f.index('-'):]: a = f[f.index('-'):].replace('.', '', 1) max_salary = re.findall( r"\d+", a, )[0] + '00' g = job_release_data[0].text try: text = re.findall( r"\d+月\d+日", g, )[0] g_1 = re.findall( r"\d+月", g, )[0] g_2 = re.findall( r"\d+日", g, )[0] g = '2020' + '-' + g_1 + '-' + g_2 except Exception as e: print(e) g = '2020-7-14' dict = { "job_sourse": "4", "job_title": job_title[0].text, "job_url": i, "job_company_name": job_company_name[0].text, "job_company_url": job_company_url[0].get_attribute('href'), "job_location": job_location[0].text, "job_salary": f, "job_max_salary": max_salary, "job_min_salary": min_salary, "job_release_data": g, "job_collect_data": "2020-7-15" } cursor = db.cursor() #保存到mysql table = 'jobs' keys = ','.join(dict.keys()) values = ','.join(['%s'] * len(dict)) sql = 'insert into {table}({keys}) VALUES({values})'.format( table=table, keys=keys, values=values) try: if cursor.execute(sql, tuple(dict.values())): print('insert successful') db.commit() except Exception as e: print("insert failed!", e) db.rollback() with open('/root/Python/zhilian_data_0714.json', 'a+', encoding='utf-8') as f: #本地也保留一份 dict = json.dumps(dict, ensure_ascii=False) f.write(dict + '\n') f.close() web.close() except: print(i + '哎呀,这个页面获取失败了!') web.close() db.close()
['enable-automation']) # 隐藏"Chrome正在受到自动软件的控制" chrome_options.add_argument('disable-infobars') driver = Chrome(chrome_options=chrome_options) #窗口最大化 driver.maximize_window() #隐式等待 #driver.set_page_load_timeout (15) # CDP执行JavaScript 代码 重定义windows.navigator.webdriver的值 driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" } #爬虫1 def spider1(startDate, endDate): # 起始页面 start_url = """https://www.adb.org/search?page=1&facet_query=ola_collection_name%3Anews%7CNews%20Release%2Bphoto_essay%7CPhoto%20Essay%2Bfeature%7CMultimedia&facet_query=ds_field_date_content%3A{}T00%3A00%3A00.000Z%2B{}T16%3A00%3A00.000Z
import time from selenium.webdriver import Chrome from selenium.webdriver.chrome.options import Options chrome_options = Options() # chrome_options.add_argument("--headless") chrome_options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56" ) chrome_options.add_argument("--disable-blink-features=AutomationControlled") driver = Chrome('./chromedriver.exe', options=chrome_options) with open('stealth.min.js') as f: js = f.read() driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js}) driver.get('https://bot.sannysoft.com') time.sleep(5) driver.save_screenshot('walkaround.png') # save source souce = driver.page_source with open('result.html', 'w') as f: f.write(souce)
class jdLogin(object): def __init__(self): self.path = './chromedriver' option = ChromeOptions() # option.add_argument('--headless') prefs = { 'profile.default_content_setting_values': { 'notifications': 2 } } option.add_experimental_option('prefs', prefs) option.add_argument('--no-sandbox') option.add_argument( '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36' ) option.add_argument('--disable-dev-shm-usage') option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') # option.add_argument('--proxy-server=http://127.0.0.1:8080') option.add_experimental_option('excludeSwitches', ['enable-automation']) self.driver = Chrome(options=option, executable_path=self.path) # 反检测 # 移除webdriver self.driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) self.driver.set_page_load_timeout(30) self.timeout = WebDriverWait(self.driver, 30) # self.driver.set_window_size(1920, 1080) def get_login(self): self.driver.get('http://passport.jd.com/new/login.aspx') self.driver.find_element_by_xpath( '//a[@clstag="pageclick|keycount|login_pc_201804112|10"]').click() sleep(0.5) self.driver.find_element_by_id('loginname').send_keys(USERNAME) sleep(0.5) self.driver.find_element_by_id('nloginpwd').send_keys(PASSWORD) sleep(0.5) self.driver.find_element_by_id('loginsubmit').click() def download_imgs(self): bgData = self.driver.find_element_by_xpath( '//div[@class="JDJRV-bigimg"]/img').get_attribute('src') bg = bgData.split('base64,')[1] patchData = self.driver.find_element_by_xpath( '//div[@class="JDJRV-smallimg"]/img').get_attribute('src') patch = patchData.split('base64,')[1] bgData = base64.b64decode(bg) with open('bg.png', 'wb') as fw1: fw1.write(bgData) patchData = base64.b64decode(patch) with open('patch.png', 'wb') as fw2: fw2.write(patchData) def dragging(self, tracks): # 按照行动轨迹先正向滑动,后反滑动 button = self.driver.find_element_by_class_name('JDJRV-slide-btn') ActionChains(self.driver).click_and_hold(button).perform() tracks_backs = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1] # -20 for track in tracks: ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform() # sleep(0.08) # 反向滑动 # for back in tracks_backs: # ActionChains(self.dr).move_by_offset(xoffset=back, yoffset=0).perform() ActionChains(self.driver).move_by_offset(xoffset=-3, yoffset=0).perform() ActionChains(self.driver).move_by_offset(xoffset=3, yoffset=0).perform() sleep(0.7) ActionChains(self.driver).release().perform() print('stop...') def main(self): self.get_login() # 访问登录页,选择密码登陆 sleep(1) slide = self.driver.find_element_by_class_name("JDJRV-suspend-slide") if slide: print("进入滑块验证码流程") self.download_imgs() sleep(1) move = get_grap() # track = get_track7(move + 20.85) track = get_track7(move - 2.5) self.dragging(track) sleep(100)
def post(self): req_parser = reqparse.RequestParser() req_parser.add_argument('url', type=str, required=True) args = req_parser.parse_args() url = args['url'] if not url: return { 'url': url, 'x5sec': '', } option = ChromeOptions() # option.add_argument('--headless') option.add_argument('--no-sandbox') # option.add_argument('--proxy-server=http://HD3P6R2K3912I09D:[email protected]:9020') option.add_argument( 'user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"') option.add_argument('--disable-dev-shm-usage') option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option("useAutomationExtension", False) # option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome' wd = Chrome(options=option, executable_path='chromedriver') # wd = Chrome(ChromeDriverManager().install(), options=option) wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); Object.defineProperty(navigator, 'language', { get: () => "zh-CN" }); Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 }); Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 }); Object.defineProperty(navigator, 'platform', { get: () => 'MacIntel' }); Object.defineProperty(navigator, 'userAgent', { get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); """ }) wd.set_page_load_timeout(20) _timeout = WebDriverWait(wd, 20) try: x5sec = '' wd.get(url) wd.implicitly_wait(10) wd.delete_all_cookies() # todo 多页面并行实现 cnt = 0 while True: time.sleep(0.2) wd.find_element_by_id("nc_1_n1z").click() slid_ing = wd.find_element_by_id("nc_1_n1z") ActionChains(wd).click_and_hold(on_element=slid_ing).perform() time.sleep(0.2) lgh = 0 try: while lgh <= 510: lgh += random.randint(30, 50) ActionChains(wd).move_by_offset(xoffset=lgh, yoffset=0).perform() time.sleep(0.2) ActionChains(wd).release().perform() except: time.sleep(0.2) ActionChains(wd).release().perform() try: slide_refresh = wd.find_element_by_xpath("//div[@id='nocaptcha']/div/span/a") slide_refresh.click() except: break cnt += 1 if cnt > 10: break cookies = wd.get_cookies() wd.close() for x5sec_data in cookies: if 'x5sec' in x5sec_data.values(): x5sec = x5sec_data['value'] return { 'x5sec': x5sec, } except: wd.close() return { 'url': url, 'x5sec': '', }
class S(object): def __init__(self): self.path = '/root/.wdm/drivers/chromedriver/80.0.3987.106/linux64/chromedriver' option = ChromeOptions() option.add_argument('--headless') prefs = { 'profile.default_content_setting_values': { 'notifications': 2 } } option.add_experimental_option('prefs', prefs) option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) self.wd = Chrome(options=option, executable_path=self.path) # 移除webdriver self.wd.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) self.wd.set_page_load_timeout(20) self.timeout = WebDriverWait(self.wd, 20) self.url = 'https://graph.qq.com/oauth2.0/show?which=Login&display=pc&response_type=code&client_id=101477621&redirect_uri=https%3A%2F%2Fsso.e.qq.com%2Fpassport%3Fsso_redirect_uri%3Dhttps%253A%252F%252Fe.qq.com%252Fads%252F%26service_tag%3D1&scope=get_user_info' self.users = 'xxx' self.passwd = 'xxx' def run(self): self.wd.get(self.url) self.wd.implicitly_wait(10) self.wd.delete_all_cookies() time.sleep(2) iframe = self.wd.find_element_by_xpath('//iframe') self.wd.switch_to.frame(iframe) self.wd.find_element_by_id('switcher_plogin').click() time.sleep(1) self.wd.find_element_by_id('u').clear() time.sleep(1) self.wd.find_element_by_id('u').send_keys(self.users) time.sleep(2) self.wd.find_element_by_id('p').clear() time.sleep(1) self.wd.find_element_by_id('p').send_keys(self.passwd) time.sleep(2) self.wd.find_element_by_id('login_button').click() time.sleep(5) try: tips = self.wd.find_element_by_id('qlogin_tips_2').text if '由于你的帐号存在异常,需要进行手机验证,' in tips: while True: dd_notice('需要扫描二维码...', dd_token_url) time.sleep(2) self.wd.save_screenshot('qrImg.png') im = Image.open('qrImg.png') im.save('qrImg.png') time.sleep(30) requests.get( 'https://e.qq.com/atlas/8944022/admanage/campaign', verify=False) time.sleep(2) if 'gdt_token' in json.dumps(self.wd.get_cookies()): dd_notice('二维码验证成功!!!', dd_token_url) break else: dd_notice('二维码验证失败!重试中...', dd_token_url) except Exception as e: dd_notice('不需要二维码验证!', dd_token_url) try: while True: time.sleep(3) iframe = self.wd.find_element_by_xpath('//iframe') self.wd.switch_to.frame(iframe) time.sleep(1) flags = self.wd.find_element_by_xpath( '//*[@id="guideText"]').text if '拖动下方滑块完成拼图' == flags: dd_notice('需要滑块!!!', dd_token_url) src_url = self.wd.find_element_by_xpath( '//*[@id="slideBg"]').get_attribute('src') res = requests.get(url=src_url, verify=False) with open('crack.jpeg', 'wb') as f: f.write(res.content) time.sleep(3) slid_ing = self.wd.find_element_by_id( 'tcaptcha_drag_button') ActionChains( self.wd).click_and_hold(on_element=slid_ing).perform() time.sleep(0.2) position = qq_mark_detect('crack.jpeg').x.values[0] real_position = position * (280 / 680) - 23 track_list = self.get_track(int(real_position)) for track in track_list: ActionChains(self.wd).move_by_offset( xoffset=track, yoffset=0).perform() time.sleep(0.002) ActionChains(self.wd).release().perform() time.sleep(2) requests.get( 'https://e.qq.com/atlas/8944022/admanage/campaign', verify=False) time.sleep(2) print(self.wd.get_cookies()) if 'gdt_token' in json.dumps(self.wd.get_cookies()): dd_notice('滑块验证成功!!!', dd_token_url) break else: dd_notice('滑块验证验证失败!重试中...', dd_token_url) else: dd_notice('不需要滑块!!!', dd_token_url) except Exception as e: dd_notice('不需要滑块!', dd_token_url) cookies_data = self.wd.get_cookies() try: if 'gdt_token' in json.dumps( cookies_data) and 'gdt_protect' in json.dumps( cookies_data): cookies = {} for data in cookies_data: if 'gdt_protect' in data.values(): gdt_protect = data.get('value') if gdt_protect: cookies['gdt_protect'] = gdt_protect if 'gdt_token' in data.values(): gdt_token = data.get('value') if gdt_token: cookies['gdt_token'] = gdt_token dd_notice(f'获取的cookies: {cookies}', dd_token_url) time.sleep(2) self.close() else: dd_notice('未成功获取cookies, 需手动重试!!!!!', dd_token_url) self.close() except Exception as e: dd_notice('广点通自动化登陆失败!!!需手动重试!!!!!', dd_token_url) self.close() @staticmethod def get_track(distance): """ 模拟轨迹 假装是人在操作 :param distance: :return: """ v = 0 t = 0.2 tracks = [] current = 0 mid = distance * 7 / 8 distance += 10 while current < distance: if current < mid: a = random.randint(2, 4) else: a = -random.randint(3, 5) v0 = v s = v0 * t + 0.5 * a * (t**2) current += s tracks.append(round(s)) v = v0 + a * t for i in range(4): tracks.append(-random.randint(2, 3)) for i in range(4): tracks.append(-random.randint(1, 3)) return tracks def close(self): self.wd.close()
def test_execute_cdp_cmd(): driver = Chrome() version_info = driver.execute_cdp_cmd('Browser.getVersion', {}) assert isinstance(version_info, dict) assert 'userAgent' in version_info
def post(self): req_parser = reqparse.RequestParser() req_parser.add_argument('url', type=str, required=True) args = req_parser.parse_args() url = args['url'] if not url: return { 'url': url, 'x5sec': '', } option = ChromeOptions() # option.add_argument('--headless') option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') mobile_emulation = {"deviceMetrics": { "width": 375, "height": 667, "pixelRatio": 3}, "userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372"} option.add_experimental_option("mobileEmulation", mobile_emulation) option.add_experimental_option('w3c', False) option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option("useAutomationExtension", False) option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome' wd = Chrome(options=option, executable_path='/root/Downloads/slider_servers/chromedriver') wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) ''' wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); Object.defineProperty(navigator, 'language', { get: () => "zh-CN" }); Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 }); Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 }); Object.defineProperty(navigator, 'platform', { get: () => 'MacIntel' }); Object.defineProperty(navigator, 'userAgent', { get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); """ }) ''' wd.set_page_load_timeout(20) _timeout = WebDriverWait(wd, 20) try: x5sec = '' wd.get(url) wd.implicitly_wait(10) wd.delete_all_cookies() cnt = 0 while True: time.sleep(0.4) wd.find_element_by_id("nc_1_n1t").click() slid_ing = wd.find_element_by_id("nc_1_n1t") time.sleep(0.2) try: TouchActions(wd).flick_element(slid_ing, 258, 0, random.randint(200, 300)).perform() time.sleep(0.2) except Exception as e: import traceback print(traceback.format_exc()) print(e) time.sleep(0.4) try: slide_refresh = wd.find_element_by_xpath('//*[@id="nc_1-stage-3"]/span[1]/span[1]') slide_refresh.click() except: break cnt += 1 if cnt > 10: break cookies = wd.get_cookies() wd.close() for x5sec_data in cookies: if 'x5sec' in x5sec_data.values(): x5sec = x5sec_data['value'] return { 'x5sec': x5sec, } except: wd.close() return { 'url': url, 'x5sec': '', }
def test_execute_cdp_cmd(): driver = Chrome() version_info = driver.execute_cdp_cmd('Browser.getVersion', {}) assert isinstance(version_info, dict) assert 'userAgent' in version_info