예제 #1
0
 def validate_single_proxy(self, single_proxy, url, final_result):
     '''
     :param single_proxy:dict。 gen_proxy获得的结果中,单个记录。{ip,port,type,protocol}
     :param url: 代理对此url是否有效
     :param final_result:list。为了在协程中直接将valid的proxy提取,直接传入此参数
     :return: boolean。实际上,使用协程时,无法使用此返回值,而是直接将结果放入final_result
     '''
     ip = single_proxy['ip']
     port = single_proxy['port']
     proxy = {'http': '%s:%s' % (ip, port), 'https': '%s:%s' % (ip, port)}
     print('开始检测代理%s:%s对网站%s是否有效' %
           (single_proxy['ip'], single_proxy['port'], url))
     # print(proxy)
     if gbh_helper.detect_if_proxy_usable(proxies=proxy, url=url):
         print('代理 %s 有效' % proxy['http'])
         final_result.append(single_proxy)
         return True
     else:
         print('代理 %s 无效' % proxy['http'])
         # final_result.append(single_result)
         return False
예제 #2
0
def generate_chrome_ua(setting, num=None):
    '''
    :param setting: setting的实例
    :param num: 期望生成chrome_ua的个数
    :return: list,包含需要获取版本的UA
    '''
    if num is not None:
        # 如果只需要返回一个,直接生成
        if num == 1:
            return [
                'Mozilla/5.0 (Windows NT 6.0; Win64; x64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
            ]

    try:
        version_url = generate_chrome_url_base_on_type(setting)
    except ValueError as e:
        # print('generate_chrome_header调用generate_chrome_url_base_on_type'
        #       ',传入的参数必须是set')
        print(e)
        return

    # 检测是否需要代理,如果需要,设置代理
    # if_use_proxy = helper.detect_if_need_proxy(version_url[0])
    # print(version_url)
    if_need_proxy = helper.detect_if_need_proxy(self_constant.CHROME_BASE_URL)
    valid_proxies = None
    if if_need_proxy:
        if setting.proxies is None:
            raise Exception("setting没有设置任何代理,无法连接到https://www.chromedownloads\
    .net获得chrome版本")
        # print(setting.proxies)

        for single_proxies in setting.proxies:
            tmp = helper.detect_if_proxy_usable(
                proxies=single_proxies, url=self_constant.CHROME_BASE_URL)
            # print(tmp)
            if tmp:
                # print(single_proxies)
                valid_proxies = single_proxies
                break

        if valid_proxies is None:
            raise Exception('尝试了所有代理,都无法连接https://www.chromedownloads.net')

    chrome_ver = set({})
    for single_url in version_url:
        tmp_chrome_ver = get_chrome_ver(url=single_url,
                                        setting=setting,
                                        if_need_proxy=if_need_proxy,
                                        proxies=valid_proxies)
        # logging.debug(tmp_chrome_ver)
        # 获得的version加入chrome_ver
        chrome_ver = chrome_ver | tmp_chrome_ver
    # logging.debug(chrome_ver)
    os_bit = set([])
    if self_enum.OsType.All in setting.os_type:
        os_bit = {'Win32; x32', 'Win64; x64'}
    else:
        if self_enum.OsType.Win32 in setting.os_type:
            os_bit.add('Win32; x32')
        if self_enum.OsType.Win64 in setting.os_type:
            os_bit.add('Win64; x64')

    chrome_ua = [
        'Mozilla/5.0 (%s; %s) AppleWebKit/537.36 (KHTML, \
like Gecko) Chrome/%s Safari/537.36' % (winver, osbit, chromever)
        for osbit in os_bit for winver in setting.WIN_VER
        for chromever in chrome_ver
    ]
    # else:
    #     raise Exception('当前不支持产生非Windows的user-agent')

    if num is not None:
        if len(chrome_ua) > num:
            return random.sample(chrome_ua, num)

    return chrome_ua