예제 #1
0
def start_browser(link,cookies):
    caps = DesiredCapabilities().CHROME
    caps["pageLoadStrategy"] = "eager" 
    chrome_options = ChromeOptions()
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option("useAutomationExtension", False)
    driver = Chrome(desired_capabilities=caps, executable_path=driver_path, options=chrome_options)
    driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument",
            {
                "source": """
        Object.defineProperty(window, 'navigator', {
            value: new Proxy(navigator, {
              has: (target, key) => (key === 'webdriver' ? false : key in target),
              get: (target, key) =>
                key === 'webdriver'
                  ? undefined
                  : typeof target[key] === 'function'
                  ? target[key].bind(target)
                  : target[key]
            })
        })
                  """
            },
    )
    driver.get(link)
    for cookie in cookies:
        driver.add_cookie({
            "name": cookie["name"],
            "value" : cookie["value"],
            "domain" : cookie["domain"]
        })
    driver.get(link)
예제 #2
0
def build_driver(path_driver):
    chrome_options = ChromeOptions()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument("--disable-blink-features")
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_experimental_option("excludeSwitches",
                                           ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument('--allow-running-insecure-content')
    chrome_options.add_argument("--window-size=1920,1080")
    user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " \
                 "(KHTML, like Gecko) Chrome/89.0.4280.66 Safari/537.36"
    chrome_options.add_argument(f'user-agent={user_agent}')
    driver = Chrome(executable_path=path_driver, options=chrome_options)
    driver.execute_cdp_cmd(
        "Page.addScriptToEvaluateOnNewDocument", {
            "source":
            """
              const newProto = navigator.__proto__
              delete newProto.webdriver
              navigator.__proto__ = newProto
              """
        })
    return driver
def user_agent_override(driver: Driver,
                        user_agent: str = None,
                        language: str = None,
                        platform: str = None,
                        **kwargs) -> None:
    if user_agent is None:
        ua = driver.execute_cdp_cmd("Browser.getVersion", {})['userAgent']
    else:
        ua = user_agent
    ua = ua.replace("HeadlessChrome", "Chrome")  # hide headless nature
    override = {}
    if language and platform:
        override = {
            "userAgent": ua,
            "acceptLanguage": language,
            "platform": platform
        }
    elif not language and platform:
        override = {
            "userAgent": ua,
            "acceptLanguage": language,
            "platform": platform
        }
    elif language and not platform:
        override = {
            "userAgent": ua,
            "acceptLanguage": language,
            "platform": platform
        }
    else:
        override = {"userAgent": ua}

    driver.execute_cdp_cmd('Network.setUserAgentOverride', override)
예제 #4
0
def gain_driver():
    """

    :return:
    """
    chrome_options = Options()
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-blink-features-AutomationControlled")
    chrome_options.add_argument(
        'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')

    driver = Chrome('chromedriver', options=chrome_options)
    driver.set_window_size(1366, 768)
    with open('stealth.min.js') as f:
        js = f.read()

    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": js
    })
    '''test'''
    # url = "https://bot.sannysoft.com/"
    # driver.get(url)
    #
    # source = driver.page_source
    # with open('result.html', 'w') as f:
    #     f.write(source)
    return driver
예제 #5
0
    def set_spider_option(self, header=None) -> Chrome:
        """

        :param header:
        :return:
        """
        # 实例化Chrome可选参数
        options = ChromeOptions()
        # 最高权限运行
        options.add_argument('--no-sandbox')
        # 隐身模式
        options.add_argument('-incognito')
        # 无缓存加载
        options.add_argument('--disk-cache-')
        # 设置中文
        options.add_argument('lang=zh_CN.UTF-8')
        # 禁用 DevTools listening
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        options.add_argument('--log-level=3')
        # 更换头部
        if header:
            options.add_argument(f"user-agent={header}")
        else:
            options.add_argument(f'user-agent={get_header()}')
        # 静默启动
        if self.silence is True:
            options.add_argument('--headless')
            options.add_argument('--disable-gpu')
            options.add_argument("--disable-software-rasterizer")
        # 抑制自动化控制特征
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_experimental_option('useAutomationExtension', False)
        options.add_experimental_option('excludeSwitches', ['enable-automation'])
        # 加速模式,增加Selenium渲染效率
        if self.assault:
            chrome_pref = {"profile.default_content_settings": {"Images": 2, 'javascript': 2},
                           "profile.managed_default_content_settings": {"Images": 2}}
            options.experimental_options['prefs'] = chrome_pref
            d_c = DesiredCapabilities.CHROME
            d_c['pageLoadStrategy'] = 'none'
            _api = Chrome(
                options=options,
                executable_path=CHROMEDRIVER_PATH,
                desired_capabilities=d_c
            )
        else:
            _api = Chrome(options=options, executable_path=CHROMEDRIVER_PATH)
        # 进一步消除操作指令头,增加隐蔽性
        _api.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
        })
        return _api
예제 #6
0
def get_driver():
    from selenium.webdriver import Chrome
    from selenium.webdriver import ChromeOptions

    option = ChromeOptions()
    option.add_experimental_option('excludeSwitches', ['enable-automation'])
    driver = Chrome(options=option)
    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})""",
    })
    return driver
예제 #7
0
    def set_spider_options(self) -> Chrome:
        # 实例化Chrome可选参数
        options = ChromeOptions()
        # 最高权限运行
        options.add_argument('--no-sandbox')
        # 隐身模式
        options.add_argument('-incognito')
        # 无缓存加载
        options.add_argument('--disk-cache-')
        # 设置中文
        options.add_argument('lang=zh_CN.UTF-8')
        # 禁用 DevTools listening
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        options.add_argument('--log-level=3')
        # 更换头部
        options.add_argument(
            "user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Edg/92.0.902.78'"
        )
        # 静默启动
        if self.silence is True:
            options.add_argument('--headless')
            options.add_argument('--disable-gpu')
            options.add_argument("--disable-software-rasterizer")

        # 抑制自动化控制特征
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_experimental_option('useAutomationExtension', False)
        options.add_experimental_option('excludeSwitches',
                                        ['enable-automation'])

        try:
            _api = Chrome(options=options,
                          executable_path=self.CHROMEDRIVER_PATH)
            _api.execute_cdp_cmd(
                "Page.addScriptToEvaluateOnNewDocument", {
                    "source":
                    """
                           Object.defineProperty(navigator, 'webdriver', {
                             get: () => undefined
                           })
                         """
                })
            return _api
        except WebDriverException as e:
            if "chromedriver" in str(e):
                print(f">>> 指定目录下缺少chromedriver {self.CHROMEDRIVER_PATH}")
                sys.exit()
예제 #8
0
def get_response_body_list(browser: Chrome, target_list: list) -> list:
    response_body_list = []
    request_log = browser.get_log('performance')
    for i in range(len(request_log)):
        message = json.loads(request_log[i]['message'])
        message = message['message']['params']
        try:
            request = message['request']
        except KeyError:
            continue
        # print(request, type(request))
        current_url = request['url']
        current_method = request['method']
        for target in target_list:
            if target['url'] in current_url and target['method'] == current_method:
                # 得到requestId
                requestId = message['requestId']
                # print(requestId)
                # 通过requestId获取接口内容
                response = browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})
                response_body_list.append({
                    'response_body': response['body'],
                    'url': target['url'],
                    'method': target['method']
                })

    return response_body_list
예제 #9
0
    def set_spider_options(self) -> Chrome:
        options = ChromeOptions()

        # 最高权限运行
        options.add_argument('--no-sandbox')

        # 隐身模式
        options.add_argument('-incognito')

        # 无缓存加载
        options.add_argument('--disk-cache-')

        # 设置中文
        options.add_argument('lang=zh_CN.UTF-8')

        # 更换头部
        options.add_argument(f"user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                             f" AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'")

        options.add_argument('--disable-blink-features=AutomationControlled')

        # 静默启动
        if self.silence is True:
            options.add_argument('--headless')

        options.add_experimental_option('useAutomationExtension', False)
        options.add_experimental_option('excludeSwitches', ['enable-automation'])

        try:
            # 有反爬虫/默认:一般模式启动
            if self.CHROMEDRIVER_PATH:
                _api = Chrome(options=options, executable_path=self.CHROMEDRIVER_PATH)
            else:
                _api = Chrome(options=options)
            _api.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
                "source": """
                           Object.defineProperty(navigator, 'webdriver', {
                             get: () => undefined
                           })
                         """
            })
            return _api
        except WebDriverException as e:
            if "chromedriver" in str(e):
                print(f">>> 指定目录下缺少chromedriver {self.CHROMEDRIVER_PATH}")
                exit()
예제 #10
0
    def takeScreenshot(self):
        # Set Chrome browser parameters and location
        chromeOpt = ChromeOptions()
        chromeOpt.add_argument("--headless")
        driver = Chrome(options=chromeOpt)
        driver.maximize_window()
        driver.execute_cdp_cmd(
            "Emulation.setGeolocationOverride",
            {
                "latitude": self.lat,
                "longitude": self.long,
                "accuracy": 100,
            },
        )
        driver.execute_cdp_cmd(
            "Browser.grantPermissions",
            {
                "permissions": ["geolocation"]
            },
        )

        # Load browser page
        print("Loading", self.url + "...")
        start_load_time = time.time()
        driver.get(self.url)
        end_load_time = time.time()
        driver.refresh()
        time.sleep(3)

        # Save screenshot
        img_dir = Path(self.path)
        img_path = os.fspath(img_dir / "screenshot.png")
        driver.get_screenshot_as_file(img_path)
        driver.quit()

        print("Location: ("+str(self.lat)+", "+str(self.long)+')')
        print("Saved view to", img_path)

        # Print out optional output if -t or -i flags are given
        if self.timing_load:
            exec_time = round(end_load_time - start_load_time, 2)
            print("Load time:", str(exec_time) + 's')
        if self.showing_ip:
            hostname = urlparse(self.url).hostname
            ip = socket.gethostbyname(hostname)
            print("IP Address:", ip)
예제 #11
0
def print_pdf(driver: webdriver.Chrome, path_pdf="file.pdf"):
    """
    Print and save the Web page HTML as PDF file
    using the Chrome Devtools Protocol.
    Using when webdriver options is headless.
    """
    # TODO: Custume with headerTemplate/footerTemplate
    # https:chromedevtools.github.io/devtools-protocol/1-3/Page/#method-printToPDF
    pdf_cdp = driver.execute_cdp_cmd("Page.printToPDF", {"portrait": True})

    with open(file=path_pdf, mode="wb") as _file:
        _file.write(base64.b64decode(pdf_cdp["data"]))
예제 #12
0
    def _config_browser(self):

        opts = ChromeOptions()
        opts.add_experimental_option("detach", True)
        #opts.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2}) # 不加载图片,加快访问速度
        opts.add_experimental_option(
            'excludeSwitches',
            ['enable-automation'])  # 此步骤很重要,设置为开发者模式,防止被各大网站识别出来使用了Selenium
        driver_dir = self._get_driver_dir()
        self._logger.info(f"using {driver_dir}")
        driver = Chrome(driver_dir, chrome_options=opts)
        driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
                                    Object.defineProperty(navigator, 'webdriver', {
                                      get: () => undefined
                                    })
                                  """
            })
        return driver
예제 #13
0
def get_driver():
    chrome_options = ChromeOptions()
    # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")

    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    driver = Chrome(chrome_options=chrome_options)
    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})""",
    })
    print(driver.title)

    # 当前句柄
    current = driver.current_window_handle

    driver.execute_script('window.open("http://www.baidu.com")')

    # 所有句柄
    heandles = driver.window_handles
    secondhand = heandles[-1]

    # 切回first
    driver.switch_to.window(current)
    return driver
예제 #14
0
def gain_driver():
    """

    :return:
    """
    chrome_options = Options()
    chrome_options.add_experimental_option('excludeSwitches',
                                           ['enable-automation'])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument("--headless")
    chrome_options.add_argument(
        "--disable-blink-features-AutomationControlled")
    chrome_options.add_argument(
        'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36'
    )
    driver = Chrome('chromedriver', options=chrome_options)
    driver.set_window_size(1366, 768)
    with open('stealth.min.js') as f:
        js = f.read()

    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",
                           {"source": js})
    return driver
예제 #15
0
def get_data(urls):
    chromeOptions = Options()
    # 下面代码为避免网站对selenium的屏蔽  =======无头模式已开启
    chromeOptions.add_argument('--disable-dev-shm-usage')
    chromeOptions.add_argument('--no-sandbox')
    chromeOptions.add_argument('--headless')
    chromeOptions.add_experimental_option("excludeSwitches",
                                          ["enable-automation"])
    chromeOptions.add_experimental_option('useAutomationExtension', False)
    count = 0
    db = pymysql.connect("119.3.184.238", "guest", "guest",
                         "jobs")  # 打开数据库连接(ip/数据库用户名/登录密码/数据库名)
    for i in urls:
        i = i.replace('\n', '')
        count += 1
        print("====正在处理第%d条数据====" % count)
        print(i)
        try:
            web = Chrome(options=chromeOptions)
            web.execute_cdp_cmd(
                "Page.addScriptToEvaluateOnNewDocument", {
                    "source":
                    """
                 Object.defineProperty(navigator, 'webdriver', {
                   get: () => undefined
                   })
                    """
                })
            wait = WebDriverWait(web, 3)  #设置等待时间
            web.get(i)
            time.sleep(0.5)
            try:
                action = ActionChains(web)
                source = web.find_element_by_xpath(
                    "//*[@id='nc_1_n1z']")  #需要滑动的元素
                action.click_and_hold(source).perform()
                tracks = get_track()
                for x in tracks:
                    action.move_by_offset(xoffset=x, yoffset=0).perform()
                time.sleep(0.5)
                action.release().perform()
                time.sleep(0.1)
            except:
                pass
        #获取数据
            job_title = wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//h3[@class="summary-plane__title"]')))
            job_company_name = wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//div[@class="company"]/a')))
            job_company_url = wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//div[@class="company"]/a')))
            job_location = wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//ul[@class="summary-plane__info"]/li/a')))
            job_salary = wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//span[@class="summary-plane__salary"]')))
            job_release_data = wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//span[@class="summary-plane__time"]')))
            # for a,b,c,d,e,f,g in zip(job_title,job_url,job_company_name,job_company_url,job_location,job_salary,job_release_data):
            f = job_salary[0].text
            max_salary = 0
            min_salary = 0
            if '万' in f[:f.index('-')]:  #最小单位为万
                f = f.replace('万', '0千')
                max_salary = re.findall(
                    r"\d+",
                    f,
                )[1] + '000'
                min_salary = re.findall(
                    r"\d+",
                    f,
                )[0] + '000'
                if '.' in f[:f.index('-')]:  #处理最小工资为小数
                    f = f.replace('.', '', 1)
                    f = f.replace('0千', '千', 1)
                    min_salary = re.findall(
                        r"\d+",
                        f,
                    )[0] + '000'
                if '.' in f[f.index('-'):]:  #处理最大工资为小数
                    f = f.replace('.', '', 1)
                    f = f.replace('0千', '千', 1)
                    max_salary = re.findall(
                        r"\d+",
                        f,
                    )[0] + '000'
            elif '万' in f[f.index('-'):]:  #如果最大工资单位为万
                f = f.replace('万', '0千')
                max_salary = re.findall(
                    r"\d+",
                    f,
                )[1] + '000'
                min_salary = re.findall(
                    r"\d+",
                    f,
                )[0] + '000'
                if '.' in f[:f.index('-')]:
                    f = f.replace('.', '', 1)  #处理工资为小数
                    min_salary = re.findall(
                        r"\d+",
                        f,
                    )[0] + '00'
                if '.' in f[f.index('-'):]:
                    f = f.replace('.', '', 1)
                    f = f.replace('0千', '千', 1)
                    max_salary = re.findall(
                        r"\d+",
                        f,
                    )[1] + '000'
            else:  #工资单位都为一千
                max_salary = re.findall(
                    r"\d+",
                    f,
                )[1] + '000'
                min_salary = re.findall(
                    r"\d+",
                    f,
                )[0] + '000'
                if '.' in f[:f.index('-')]:
                    a = f.replace('.', '', 1)
                    min_salary = re.findall(
                        r"\d+",
                        a,
                    )[0] + '00'
                if '.' in f[f.index('-'):]:
                    a = f[f.index('-'):].replace('.', '', 1)
                    max_salary = re.findall(
                        r"\d+",
                        a,
                    )[0] + '00'
            g = job_release_data[0].text
            try:
                text = re.findall(
                    r"\d+月\d+日",
                    g,
                )[0]
                g_1 = re.findall(
                    r"\d+月",
                    g,
                )[0]
                g_2 = re.findall(
                    r"\d+日",
                    g,
                )[0]
                g = '2020' + '-' + g_1 + '-' + g_2
            except Exception as e:
                print(e)
                g = '2020-7-14'
            dict = {
                "job_sourse": "4",
                "job_title": job_title[0].text,
                "job_url": i,
                "job_company_name": job_company_name[0].text,
                "job_company_url": job_company_url[0].get_attribute('href'),
                "job_location": job_location[0].text,
                "job_salary": f,
                "job_max_salary": max_salary,
                "job_min_salary": min_salary,
                "job_release_data": g,
                "job_collect_data": "2020-7-15"
            }
            cursor = db.cursor()  #保存到mysql
            table = 'jobs'
            keys = ','.join(dict.keys())
            values = ','.join(['%s'] * len(dict))
            sql = 'insert into {table}({keys}) VALUES({values})'.format(
                table=table, keys=keys, values=values)
            try:
                if cursor.execute(sql, tuple(dict.values())):
                    print('insert successful')
                    db.commit()
            except Exception as e:
                print("insert failed!", e)
                db.rollback()
            with open('/root/Python/zhilian_data_0714.json',
                      'a+',
                      encoding='utf-8') as f:  #本地也保留一份
                dict = json.dumps(dict, ensure_ascii=False)
                f.write(dict + '\n')
                f.close()
            web.close()
        except:
            print(i + '哎呀,这个页面获取失败了!')
            web.close()
    db.close()
예제 #16
0
                                       ['enable-automation'])
# 隐藏"Chrome正在受到自动软件的控制"
chrome_options.add_argument('disable-infobars')

driver = Chrome(chrome_options=chrome_options)
#窗口最大化
driver.maximize_window()
#隐式等待
#driver.set_page_load_timeout (15)

# CDP执行JavaScript 代码  重定义windows.navigator.webdriver的值
driver.execute_cdp_cmd(
    "Page.addScriptToEvaluateOnNewDocument", {
        "source":
        """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => undefined
        })
      """
    })

headers = {
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
}


#爬虫1
def spider1(startDate, endDate):
    # 起始页面
    start_url = """https://www.adb.org/search?page=1&facet_query=ola_collection_name%3Anews%7CNews%20Release%2Bphoto_essay%7CPhoto%20Essay%2Bfeature%7CMultimedia&facet_query=ds_field_date_content%3A{}T00%3A00%3A00.000Z%2B{}T16%3A00%3A00.000Z
예제 #17
0
import time
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options

chrome_options = Options()
# chrome_options.add_argument("--headless")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56"
)
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

driver = Chrome('./chromedriver.exe', options=chrome_options)

with open('stealth.min.js') as f:
    js = f.read()

driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})

driver.get('https://bot.sannysoft.com')
time.sleep(5)
driver.save_screenshot('walkaround.png')

# save source
souce = driver.page_source
with open('result.html', 'w') as f:
    f.write(souce)
예제 #18
0
class jdLogin(object):
    def __init__(self):
        self.path = './chromedriver'

        option = ChromeOptions()
        # option.add_argument('--headless')
        prefs = {
            'profile.default_content_setting_values': {
                'notifications': 2
            }
        }
        option.add_experimental_option('prefs', prefs)
        option.add_argument('--no-sandbox')
        option.add_argument(
            '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
        )
        option.add_argument('--disable-dev-shm-usage')
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        # option.add_argument('--proxy-server=http://127.0.0.1:8080')
        option.add_experimental_option('excludeSwitches',
                                       ['enable-automation'])
        self.driver = Chrome(options=option, executable_path=self.path)

        # 反检测
        # 移除webdriver
        self.driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
            })

        self.driver.set_page_load_timeout(30)
        self.timeout = WebDriverWait(self.driver, 30)
        # self.driver.set_window_size(1920, 1080)

    def get_login(self):
        self.driver.get('http://passport.jd.com/new/login.aspx')
        self.driver.find_element_by_xpath(
            '//a[@clstag="pageclick|keycount|login_pc_201804112|10"]').click()
        sleep(0.5)
        self.driver.find_element_by_id('loginname').send_keys(USERNAME)
        sleep(0.5)
        self.driver.find_element_by_id('nloginpwd').send_keys(PASSWORD)
        sleep(0.5)
        self.driver.find_element_by_id('loginsubmit').click()

    def download_imgs(self):
        bgData = self.driver.find_element_by_xpath(
            '//div[@class="JDJRV-bigimg"]/img').get_attribute('src')
        bg = bgData.split('base64,')[1]
        patchData = self.driver.find_element_by_xpath(
            '//div[@class="JDJRV-smallimg"]/img').get_attribute('src')
        patch = patchData.split('base64,')[1]
        bgData = base64.b64decode(bg)
        with open('bg.png', 'wb') as fw1:
            fw1.write(bgData)
        patchData = base64.b64decode(patch)
        with open('patch.png', 'wb') as fw2:
            fw2.write(patchData)

    def dragging(self, tracks):
        # 按照行动轨迹先正向滑动,后反滑动
        button = self.driver.find_element_by_class_name('JDJRV-slide-btn')
        ActionChains(self.driver).click_and_hold(button).perform()
        tracks_backs = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1]  # -20

        for track in tracks:
            ActionChains(self.driver).move_by_offset(xoffset=track,
                                                     yoffset=0).perform()

        # sleep(0.08)
        # 反向滑动
        # for back in tracks_backs:
        #      ActionChains(self.dr).move_by_offset(xoffset=back, yoffset=0).perform()

        ActionChains(self.driver).move_by_offset(xoffset=-3,
                                                 yoffset=0).perform()
        ActionChains(self.driver).move_by_offset(xoffset=3,
                                                 yoffset=0).perform()

        sleep(0.7)
        ActionChains(self.driver).release().perform()
        print('stop...')

    def main(self):
        self.get_login()  # 访问登录页,选择密码登陆
        sleep(1)
        slide = self.driver.find_element_by_class_name("JDJRV-suspend-slide")
        if slide:
            print("进入滑块验证码流程")
            self.download_imgs()
            sleep(1)
            move = get_grap()
            # track = get_track7(move + 20.85)
            track = get_track7(move - 2.5)
            self.dragging(track)
            sleep(100)
예제 #19
0
    def post(self):

        req_parser = reqparse.RequestParser()
        req_parser.add_argument('url', type=str, required=True)
        args = req_parser.parse_args()

        url = args['url']
        if not url:
            return {
                'url': url,
                'x5sec': '',
            }

        option = ChromeOptions()
        # option.add_argument('--headless')
        option.add_argument('--no-sandbox')
        # option.add_argument('--proxy-server=http://HD3P6R2K3912I09D:[email protected]:9020')
        option.add_argument(
            'user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"')
        option.add_argument('--disable-dev-shm-usage')
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_experimental_option("useAutomationExtension", False)
        # option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome'
        wd = Chrome(options=option, executable_path='chromedriver')
        # wd = Chrome(ChromeDriverManager().install(), options=option)
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                               Object.defineProperty(navigator, 'webdriver', {
                                 get: () => undefined
                               });
                               Object.defineProperty(navigator, 'language', {
	                             get: () => "zh-CN"
                               });
                               Object.defineProperty(navigator, 'deviceMemory', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'hardwareConcurrency', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'platform', {
	                             get: () => 'MacIntel'
                               });
                               Object.defineProperty(navigator, 'userAgent', {
                                 get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
                               });
                               Object.defineProperty(navigator, 'plugins', {
                                 get: () => [1, 2, 3, 4, 5]
                               });
                             """
        })
        wd.set_page_load_timeout(20)
        _timeout = WebDriverWait(wd, 20)
        try:
            x5sec = ''
            wd.get(url)
            wd.implicitly_wait(10)
            wd.delete_all_cookies()

            # todo 多页面并行实现
            cnt = 0
            while True:
                time.sleep(0.2)
                wd.find_element_by_id("nc_1_n1z").click()
                slid_ing = wd.find_element_by_id("nc_1_n1z")
                ActionChains(wd).click_and_hold(on_element=slid_ing).perform()
                time.sleep(0.2)
                lgh = 0
                try:
                    while lgh <= 510:
                        lgh += random.randint(30, 50)
                        ActionChains(wd).move_by_offset(xoffset=lgh, yoffset=0).perform()
                    time.sleep(0.2)
                    ActionChains(wd).release().perform()

                except:
                    time.sleep(0.2)
                    ActionChains(wd).release().perform()

                try:
                    slide_refresh = wd.find_element_by_xpath("//div[@id='nocaptcha']/div/span/a")
                    slide_refresh.click()
                except:
                    break
                cnt += 1
                if cnt > 10:
                    break
            cookies = wd.get_cookies()
            wd.close()
            for x5sec_data in cookies:
                if 'x5sec' in x5sec_data.values():
                    x5sec = x5sec_data['value']
            return {
                'x5sec': x5sec,
            }
        except:
            wd.close()
            return {
                'url': url,
                'x5sec': '',
            }
예제 #20
0
class S(object):
    def __init__(self):
        self.path = '/root/.wdm/drivers/chromedriver/80.0.3987.106/linux64/chromedriver'
        option = ChromeOptions()
        option.add_argument('--headless')
        prefs = {
            'profile.default_content_setting_values': {
                'notifications': 2
            }
        }
        option.add_experimental_option('prefs', prefs)
        option.add_argument('--no-sandbox')
        option.add_argument('--disable-dev-shm-usage')
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches',
                                       ['enable-automation'])
        self.wd = Chrome(options=option, executable_path=self.path)
        # 移除webdriver
        self.wd.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
                           Object.defineProperty(navigator, 'webdriver', {
                             get: () => undefined
                           })
                         """
            })
        self.wd.set_page_load_timeout(20)
        self.timeout = WebDriverWait(self.wd, 20)
        self.url = 'https://graph.qq.com/oauth2.0/show?which=Login&display=pc&response_type=code&client_id=101477621&redirect_uri=https%3A%2F%2Fsso.e.qq.com%2Fpassport%3Fsso_redirect_uri%3Dhttps%253A%252F%252Fe.qq.com%252Fads%252F%26service_tag%3D1&scope=get_user_info'
        self.users = 'xxx'
        self.passwd = 'xxx'

    def run(self):
        self.wd.get(self.url)
        self.wd.implicitly_wait(10)
        self.wd.delete_all_cookies()
        time.sleep(2)

        iframe = self.wd.find_element_by_xpath('//iframe')
        self.wd.switch_to.frame(iframe)

        self.wd.find_element_by_id('switcher_plogin').click()
        time.sleep(1)
        self.wd.find_element_by_id('u').clear()
        time.sleep(1)
        self.wd.find_element_by_id('u').send_keys(self.users)
        time.sleep(2)
        self.wd.find_element_by_id('p').clear()
        time.sleep(1)
        self.wd.find_element_by_id('p').send_keys(self.passwd)
        time.sleep(2)
        self.wd.find_element_by_id('login_button').click()
        time.sleep(5)
        try:
            tips = self.wd.find_element_by_id('qlogin_tips_2').text
            if '由于你的帐号存在异常,需要进行手机验证,' in tips:
                while True:
                    dd_notice('需要扫描二维码...', dd_token_url)
                    time.sleep(2)
                    self.wd.save_screenshot('qrImg.png')
                    im = Image.open('qrImg.png')
                    im.save('qrImg.png')
                    time.sleep(30)
                    requests.get(
                        'https://e.qq.com/atlas/8944022/admanage/campaign',
                        verify=False)
                    time.sleep(2)
                    if 'gdt_token' in json.dumps(self.wd.get_cookies()):
                        dd_notice('二维码验证成功!!!', dd_token_url)
                        break
                    else:
                        dd_notice('二维码验证失败!重试中...', dd_token_url)
        except Exception as e:
            dd_notice('不需要二维码验证!', dd_token_url)

        try:
            while True:
                time.sleep(3)
                iframe = self.wd.find_element_by_xpath('//iframe')
                self.wd.switch_to.frame(iframe)
                time.sleep(1)
                flags = self.wd.find_element_by_xpath(
                    '//*[@id="guideText"]').text
                if '拖动下方滑块完成拼图' == flags:
                    dd_notice('需要滑块!!!', dd_token_url)
                    src_url = self.wd.find_element_by_xpath(
                        '//*[@id="slideBg"]').get_attribute('src')
                    res = requests.get(url=src_url, verify=False)
                    with open('crack.jpeg', 'wb') as f:
                        f.write(res.content)
                    time.sleep(3)
                    slid_ing = self.wd.find_element_by_id(
                        'tcaptcha_drag_button')
                    ActionChains(
                        self.wd).click_and_hold(on_element=slid_ing).perform()
                    time.sleep(0.2)
                    position = qq_mark_detect('crack.jpeg').x.values[0]
                    real_position = position * (280 / 680) - 23
                    track_list = self.get_track(int(real_position))
                    for track in track_list:
                        ActionChains(self.wd).move_by_offset(
                            xoffset=track, yoffset=0).perform()
                        time.sleep(0.002)
                    ActionChains(self.wd).release().perform()
                    time.sleep(2)
                    requests.get(
                        'https://e.qq.com/atlas/8944022/admanage/campaign',
                        verify=False)
                    time.sleep(2)
                    print(self.wd.get_cookies())
                    if 'gdt_token' in json.dumps(self.wd.get_cookies()):
                        dd_notice('滑块验证成功!!!', dd_token_url)
                        break
                    else:
                        dd_notice('滑块验证验证失败!重试中...', dd_token_url)
                else:
                    dd_notice('不需要滑块!!!', dd_token_url)
        except Exception as e:
            dd_notice('不需要滑块!', dd_token_url)
        cookies_data = self.wd.get_cookies()
        try:
            if 'gdt_token' in json.dumps(
                    cookies_data) and 'gdt_protect' in json.dumps(
                        cookies_data):
                cookies = {}
                for data in cookies_data:
                    if 'gdt_protect' in data.values():
                        gdt_protect = data.get('value')
                        if gdt_protect:
                            cookies['gdt_protect'] = gdt_protect
                    if 'gdt_token' in data.values():
                        gdt_token = data.get('value')
                        if gdt_token:
                            cookies['gdt_token'] = gdt_token
                dd_notice(f'获取的cookies: {cookies}', dd_token_url)
                time.sleep(2)
                self.close()
            else:
                dd_notice('未成功获取cookies, 需手动重试!!!!!', dd_token_url)
                self.close()
        except Exception as e:
            dd_notice('广点通自动化登陆失败!!!需手动重试!!!!!', dd_token_url)
            self.close()

    @staticmethod
    def get_track(distance):
        """
        模拟轨迹 假装是人在操作
        :param distance:
        :return:
        """
        v = 0
        t = 0.2
        tracks = []
        current = 0
        mid = distance * 7 / 8

        distance += 10
        while current < distance:
            if current < mid:
                a = random.randint(2, 4)
            else:
                a = -random.randint(3, 5)

            v0 = v
            s = v0 * t + 0.5 * a * (t**2)
            current += s
            tracks.append(round(s))

            v = v0 + a * t

        for i in range(4):
            tracks.append(-random.randint(2, 3))
        for i in range(4):
            tracks.append(-random.randint(1, 3))
        return tracks

    def close(self):
        self.wd.close()
예제 #21
0
def test_execute_cdp_cmd():
    driver = Chrome()
    version_info = driver.execute_cdp_cmd('Browser.getVersion', {})
    assert isinstance(version_info, dict)
    assert 'userAgent' in version_info
예제 #22
0
    def post(self):

        req_parser = reqparse.RequestParser()
        req_parser.add_argument('url', type=str, required=True)
        args = req_parser.parse_args()

        url = args['url']
        if not url:
            return {
                'url': url,
                'x5sec': '',
            }

        option = ChromeOptions()
        # option.add_argument('--headless')
        option.add_argument('--no-sandbox')
        option.add_argument('--disable-dev-shm-usage')
        mobile_emulation = {"deviceMetrics": { "width": 375, "height": 667, "pixelRatio": 3}, "userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372"}
        option.add_experimental_option("mobileEmulation", mobile_emulation)
        option.add_experimental_option('w3c', False)
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_experimental_option("useAutomationExtension", False)
        option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome'
        wd = Chrome(options=option, executable_path='/root/Downloads/slider_servers/chromedriver')
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
        })
        '''
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                               Object.defineProperty(navigator, 'webdriver', {
                                 get: () => undefined
                               });
                               Object.defineProperty(navigator, 'language', {
	                             get: () => "zh-CN"
                               });
                               Object.defineProperty(navigator, 'deviceMemory', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'hardwareConcurrency', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'platform', {
	                             get: () => 'MacIntel'
                               });
                               Object.defineProperty(navigator, 'userAgent', {
                                 get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
                               });
                               Object.defineProperty(navigator, 'plugins', {
                                 get: () => [1, 2, 3, 4, 5]
                               });
                             """
        })
        '''
        wd.set_page_load_timeout(20)
        _timeout = WebDriverWait(wd, 20)
        try:
            x5sec = ''
            wd.get(url)
            wd.implicitly_wait(10)
            wd.delete_all_cookies()
            cnt = 0
            while True:
                time.sleep(0.4)
                wd.find_element_by_id("nc_1_n1t").click()
                slid_ing = wd.find_element_by_id("nc_1_n1t")
                time.sleep(0.2)
                try:
                    TouchActions(wd).flick_element(slid_ing, 258, 0, random.randint(200, 300)).perform()
                    time.sleep(0.2)
                except Exception as e:
                    import traceback
                    print(traceback.format_exc())
                    print(e)
                    time.sleep(0.4)
                try:
                    slide_refresh = wd.find_element_by_xpath('//*[@id="nc_1-stage-3"]/span[1]/span[1]')
                    slide_refresh.click()
                except:
                    break
                cnt += 1
                if cnt > 10:
                    break
            cookies = wd.get_cookies()
            wd.close()
            for x5sec_data in cookies:
                if 'x5sec' in x5sec_data.values():
                    x5sec = x5sec_data['value']
            return {
                'x5sec': x5sec,
            }
        except:
            wd.close()
            return {
                'url': url,
                'x5sec': '',
            }
def test_execute_cdp_cmd():
    driver = Chrome()
    version_info = driver.execute_cdp_cmd('Browser.getVersion', {})
    assert isinstance(version_info, dict)
    assert 'userAgent' in version_info