Ejemplo n.º 1
0
    def Chrome(self):
        params = self._params
        # 开启配置项 chrome_options
        chrome_options = webdriver.ChromeOptions()

        if params['headless']:  # 无头模式
            chrome_options.add_argument('--headless')
        if params['proxy'] is not None:  # 设置代理  未试用过?!
            # chrome_options.add_argument("--proxy-server=http://127.0.0.1:10152")
            chrome_options.add_argument(params['proxy'])
        if params['local_user_dir']:  # 使用本地Chrome的用户数据(比如cookie)
            sys_type = platform.system()  # 获取当前系统的类型
            if sys_type != "Windows":
                print('监测到当前系统不为windows,请自行填写profile_dir')
                exit(1)
            login_user = os.getlogin()  # 获取当前登录系统的用户名
            profile_dir = r'C:\Users\{}\AppData\Local\Google\Chrome\User Data'.format(
                login_user)
            print("预计本地Chrome浏览器的用户数据文件夹为:", profile_dir)
            chrome_options.add_argument("user-data-dir=" +
                                        os.path.abspath(profile_dir))

        config_prefs = {}
        if not params['images']:  # 禁止加载图片
            config_prefs['images'] = 2
        if not params['js']:  # 禁止加载javascript脚本
            config_prefs['javascript'] = 2
        if params['ua']:  # 使用随机UA
            config_prefs['User-Agent'] = random_ua()
        if config_prefs:
            prefs = {'profile.default_content_setting_values': config_prefs}
            chrome_options.add_experimental_option("prefs", prefs)
        # prefs = {
        #     'profile.default_content_setting_values': {
        #         # 'images': 2,  # 不加载图片
        #         'javascript': 2,  # 不加载JS
        #         # "User-Agent": random_ua()  # 随机UA
        #     }
        # }
        # 其他的可以参考:https://blog.csdn.net/zwq912318834/article/details/78933910
        # 还可参考:https://www.zhihu.com/question/35547395

        # 创建浏览器
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  executable_path=self._executable_path)
        return driver
Ejemplo n.º 2
0
    def PhantomJS(self):
        # 引入配置对象DesiredCapabilities
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
        # 开启配置项dcap
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        # 随机选取UA
        dcap["phantomjs.page.settings.userAgent"] = random_ua()
        # 设置不载入图片
        dcap["phantomjs.page.settings.loadImages"] = False
        # 设置代理
        # service_args = ['--proxy=127.0.0.1:4860', '--proxy-type=socks5']  # socks5 ?? 还是http??
        service_args = []

        # 创建浏览器
        driver = webdriver.PhantomJS(desired_capabilities=dcap,
                                     executable_path=self._executable_path,
                                     service_args=service_args)
        return driver
Ejemplo n.º 3
0
    def Chrome(self):
        params = self._params
        # 开启配置项 chrome_options
        chrome_options = webdriver.ChromeOptions()

        if params['headless']:    # 无头模式
            chrome_options.add_argument('--headless')
        if params['proxy'] is not None:       # 设置代理  未试用过?!
            # chrome_options.add_argument("--proxy-server=http://127.0.0.1:10152")
            chrome_options.add_argument(params['proxy'])
        if params['local_config']:
            print("os.path.abspath(params['profile_dir']) = ", os.path.abspath(params['profile_dir']))
            chrome_options.add_argument("user-data-dir=" + os.path.abspath(params['profile_dir']))

        config_prefs = {}
        if not params['images']:  # 禁止加载图片
            config_prefs['images'] = 2
        if not params['js']:      # 禁止加载javascript脚本
            config_prefs['javascript'] = 2
        if params['ua']:          # 使用随机UA
            config_prefs['User-Agent'] = random_ua()
        if config_prefs:
            prefs = {'profile.default_content_setting_values' : config_prefs}
            chrome_options.add_experimental_option("prefs", prefs)
        # prefs = {
        #     'profile.default_content_setting_values': {
        #         # 'images': 2,  # 不加载图片
        #         'javascript': 2,  # 不加载JS
        #         # "User-Agent": random_ua()  # 随机UA
        #     }
        # }
        # 其他的可以参考:https://blog.csdn.net/zwq912318834/article/details/78933910
        # 还可参考:https://www.zhihu.com/question/35547395

        # 创建浏览器
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  executable_path=self._executable_path)
        return driver
Ejemplo n.º 4
0
def main():
    # 1. 设置请求参数
    proxies = {
        'http': 'http://127.0.0.1:54422',
        'https': 'https://127.0.0.1:54422',
    }
    headers = {'Connection': 'Keep-Alive'
               # ,'host': 'zhannei.baidu.com'
               # ,'ref??': ''
                , 'User-Agent': random_ua()
    }
    params = {
    }
    # 2. 发送请求
    url = "http://www.baidu.com"
    # requests.get(url, headers=headers, proxies=proxies, params=params)
    response = atools_crawler.requests.get(url, headers=headers)
    # 3. 解析回复
    page_source = response.content.decode('utf8')
    # 将页面源代码写到临时html文件
    # with open('temp.html', 'r', encoding='utf8') as fout:
    #     fout.write(page_source)
    # or 直接输出页面源代码
    pprint(page_source)