def validate_single_proxy(self, single_proxy, url, final_result): ''' :param single_proxy:dict。 gen_proxy获得的结果中,单个记录。{ip,port,type,protocol} :param url: 代理对此url是否有效 :param final_result:list。为了在协程中直接将valid的proxy提取,直接传入此参数 :return: boolean。实际上,使用协程时,无法使用此返回值,而是直接将结果放入final_result ''' ip = single_proxy['ip'] port = single_proxy['port'] proxy = {'http': '%s:%s' % (ip, port), 'https': '%s:%s' % (ip, port)} print('开始检测代理%s:%s对网站%s是否有效' % (single_proxy['ip'], single_proxy['port'], url)) # print(proxy) if gbh_helper.detect_if_proxy_usable(proxies=proxy, url=url): print('代理 %s 有效' % proxy['http']) final_result.append(single_proxy) return True else: print('代理 %s 无效' % proxy['http']) # final_result.append(single_result) return False
def generate_chrome_ua(setting, num=None): ''' :param setting: setting的实例 :param num: 期望生成chrome_ua的个数 :return: list,包含需要获取版本的UA ''' if num is not None: # 如果只需要返回一个,直接生成 if num == 1: return [ 'Mozilla/5.0 (Windows NT 6.0; Win64; x64) AppleWebKit/537.36 \ (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36' ] try: version_url = generate_chrome_url_base_on_type(setting) except ValueError as e: # print('generate_chrome_header调用generate_chrome_url_base_on_type' # ',传入的参数必须是set') print(e) return # 检测是否需要代理,如果需要,设置代理 # if_use_proxy = helper.detect_if_need_proxy(version_url[0]) # print(version_url) if_need_proxy = helper.detect_if_need_proxy(self_constant.CHROME_BASE_URL) valid_proxies = None if if_need_proxy: if setting.proxies is None: raise Exception("setting没有设置任何代理,无法连接到https://www.chromedownloads\ .net获得chrome版本") # print(setting.proxies) for single_proxies in setting.proxies: tmp = helper.detect_if_proxy_usable( proxies=single_proxies, url=self_constant.CHROME_BASE_URL) # print(tmp) if tmp: # print(single_proxies) valid_proxies = single_proxies break if valid_proxies is None: raise Exception('尝试了所有代理,都无法连接https://www.chromedownloads.net') chrome_ver = set({}) for single_url in version_url: tmp_chrome_ver = get_chrome_ver(url=single_url, setting=setting, if_need_proxy=if_need_proxy, proxies=valid_proxies) # logging.debug(tmp_chrome_ver) # 获得的version加入chrome_ver chrome_ver = chrome_ver | tmp_chrome_ver # logging.debug(chrome_ver) os_bit = set([]) if self_enum.OsType.All in setting.os_type: os_bit = {'Win32; x32', 'Win64; x64'} else: if self_enum.OsType.Win32 in setting.os_type: os_bit.add('Win32; x32') if self_enum.OsType.Win64 in setting.os_type: os_bit.add('Win64; x64') chrome_ua = [ 'Mozilla/5.0 (%s; %s) AppleWebKit/537.36 (KHTML, \ like Gecko) Chrome/%s Safari/537.36' % (winver, osbit, chromever) for osbit in os_bit for winver in setting.WIN_VER for chromever in chrome_ver ] # else: # raise Exception('当前不支持产生非Windows的user-agent') if num is not None: if len(chrome_ua) > num: return random.sample(chrome_ua, num) return chrome_ua