def download(url): try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) # print("应答码:{}".format(r)) # 将获取内容的编码更改;通过chardet.detect检查内容,并拿到编码方式 r.encoding = chardet.detect(r.content)['encoding'] # 检测是否获取成功 if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception as e: print(e) count = 0 # 重试次数 # 获取代理IP再进行下载 proxylist = sql.select(10) if not proxylist: return None while count < config.RETRY_TIME: try: proxy = random.choice(proxylist) ip = proxy[0] port = proxy[1] proxies = {'http': 'http://%s%s' % (ip, port), 'https': 'http://%s:%s' % (ip, port)} r = requests.get(url=url, headres=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count += 1 return None
def download(url): try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count = 0 # 重试次数 proxylist = sqlhelper.select(10) if not proxylist: return None while count < config.RETRY_TIME: try: proxy = random.choice(proxylist) ip = proxy[0] port = proxy[1] proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)} r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count += 1 return None
def download_image(image_title, image_detail_websites): num = 1 amount = len(image_detail_websites) path = 'F:/Images/temp/' + image_title for i in image_detail_websites: proxies = config.get_ips() for ip in proxies: proxy = {'http': ip.strip()} print(proxy) try: r = requests.get(url=i, headers=config.get_header(), proxies=proxy, timeout=3) if r.status_code == 200: response = requests.get(url=i, headers=config.get_header(), proxies=proxy, timeout=3) if response.status_code == 200: if not os.path.exists(path): os.makedirs(path) os.chdir('F:/Images/temp/' + image_title) filename = '%s%s.jpg' % (image_title, num) print('正在下载图片:%s第%s/%s,' % (image_title, num, amount)) with open(filename, 'wb') as f: f.write(response.content) num += 1 break else: continue except: print('该代理{}失效!'.format(proxy))
def download(url): try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise else: return r.text except Exception: count = 0 # 重试次数 while count < config.RETRY_TIME: try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise else: return r.text except Exception: count += 1 return None
def download(url): """ 获取网页 :param url: 请求的网页地址 :return: 返回网页内容 """ try: # 网页请求成功 r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=spyder.HtmlHandler.proxy_list()) # r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) # 获取网页编码格式,并修改为request.text的解码类型 r.encoding = chardet.detect(r.content)['encoding'] if r.encoding == "GB2312": r.encoding = "GBK" # 网页请求OK或者请求得到的内容过少,判断为连接失败 if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count = 0 # 重试次数 # proxylist = sqlhelper.select(10) proxylist = json.loads(requests.get(config.PROXYURL).text) if not proxylist: return None while count < config.RETRY_TIME: try: proxy = random.choice(proxylist) ip = proxy[0] port = proxy[1] proxies = { "http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port) } # r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=P) r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count += 1 return None
def check_server(proxy): ip=proxy['ip'] port=proxy['port'] proxies = {"http": "http://%s:%s" % (ip, port), "https": "https://%s:%s" % (ip, port)} url1 = config.GOAL_HTTPS_LIST[0] url2=config.GOAL_HTTP_LIST c_https=requests.get(url=url1,proxies=proxies,headers=config.get_header(),timeout=config.TIMEOUT) c_http=requests.get(url=url2,proxies=proxies,headers=config.get_header(),timeout=config.TIMEOUT) if c_http and c_https: return 2 elif c_https: return 1 elif c_http: return 0
def NovelCrawl_Main(count): url = r'http://www.yousuu.com/booklist' proxies = fetchproxies() for i in range(count): try: header = get_header() proxy = random.choice(proxies) url = CrawlNovelList(url, header, proxy) except Exception as e: print 'error' print e.message pass sql = 'select distinct novellisturl from pagenovel ' a = MySQLHelper() NovelListUrl = a.SqlFecthAll(sql) a.CloseCon() print len(NovelListUrl) #proxies = fetchproxies() for i in NovelListUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovel(i[0], header, proxy) except Exception as e: print 'error' print e.message pass a = MySQLHelper() sql = 'select distinct novelurl from novelurl' NovelUrl = a.SqlFecthAll(sql) a.CloseCon() print len(NovelUrl) #proxies = fetchproxies() for i in NovelUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovelData(i[0], header, proxy) except Exception as e: print 'error' print e.message pass
def getMyIP(): try: r = requests.get(url=config.TEST_IP, headers=config.get_header(), timeout=config.TIMEOUT) ip = json.loads(r.text) return ip['origin'] except Exception as e: raise Test_URL_Fail
def getHtml(self): for i in range(0, 1): print i self.redis_pipline.smembers(i) resSet = self.redis_pipline.execute()[0] with open("data/" + str(i) + ".csv", "wb+") as f: csvHeader = ["id", "html"] f_csv = csv.writer(f) f_csv.writerow(csvHeader) for j, productID in enumerate(resSet): if j == 500: break url = self.baseUrl + productID flag = True while flag: try: proxy = self.getProxy() print "current ip" + proxy res = requests.get(url, headers=get_header(), proxies={"http": proxy}) print res.status_code if res.status_code != 200 or self.checkRobot( res.content): deleteIP(proxy) print proxy + "has been deleted" continue row = [] row.append(productID) row.append(res.content) f_csv.writerow(row) flag = False except Exception as e: print "here" + str(e.message) break
def get_kdlspider(): pattern_ip = '//*[@id="list"]/table/tbody/tr/td[1]/text()' pattern_port = '//*[@id="list"]/table/tbody/tr/td[2]/text()' start_url = [] path = setting.save_ip + 'a2.txt' ip_port_list = [] for i in range(1, 42): time.sleep(2) url = 'http://www.kuaidaili.com/free/inha/' + str(i) + '/' start_url.append(url) for i in start_url: print(i) time.sleep(2) response = requests.get(url=i, headers=config.get_header()) content = response.content selector = html.fromstring(content) ip = selector.xpath(pattern_ip) port = selector.xpath(pattern_port) for i in zip(ip, port): ip_port = i[0] + ':' + i[1] ip_port_list.append(ip_port) with open(path, 'a') as f: f.write('\n') for i in ip_port_list: f.write(i + '\n')
def get_xicidailispinder(): url = 'http://www.xicidaili.com/' ip_port_list = [] path = setting.save_ip + 'a3.txt' for i in range(4): k = 0 for j in range(19): time.sleep(2) num = j + 3 + k pattern_ip = '//*[@id="ip_list"]/tbody/tr[num]/td[2]/text()' pattern_port = '//*[@id="ip_list"]/tbody/tr[num]/td[3]/text()' response = requests.get(url=url, headers=config.get_header()) content = response.content selector = html.fromstring(content) ip = selector.xpath(pattern_ip) port = selector.xpath(pattern_port) for i in zip(ip, port): ip_port = i[0] + ':' + i[1] ip_port_list.append(ip_port) k = i + 22 with open(path, 'a') as f: f.write('\n') for i in ip_port_list: f.write(i + '\n')
def download(url): try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count = 0 # 重试次数 # while count < config.RETRY_TIME: # try: # #proxy = random.choice(proxy_list) # ip = proxy[0] # port = proxy[1] # proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)} # # r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) # r.encoding = chardet.detect(r.content)['encoding'] # if (not r.ok) or len(r.content) < 500: # raise ConnectionError # else: # return r.text # except Exception: # count += 1 return None
def get_basics(stock_info): base_url = 'https://xueqiu.com/stock/f10/finmainindex.json?symbol={0}&page=1&size=199' code_mk = stock_info['market']+stock_info['code'] req = requests.get(base_url.format(code_mk),headers=get_header()) json_data = json.loads(req.text)['list'] for i in json_data: stock_code = stock_info['code'] report_date = i['reportdate'] mainbusiincome = i['mainbusiincome'] mainbusiprofit = i['mainbusiprofit'] totprofit = i['totprofit'] netprofit = i['netprofit'] totalassets = i['totalassets'] totalliab = i['totalliab'] totsharequi = i['totsharequi'] basiceps = i['basiceps'] naps = i['naps'] opercashpershare = i['opercashpershare'] peropecashpershare = i['peropecashpershare'] operrevenue = i['operrevenue'] invnetcashflow = i['invnetcashflow'] finnetcflow = i['finnetcflow'] chgexchgchgs = i['chgexchgchgs'] cashnetr = i['cashnetr'] cashequfinbal = i['cashequfinbal'] createtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(stock_code, report_date) insert_stock_basics(stock_code, report_date, mainbusiincome, mainbusiprofit, totprofit, netprofit, totalassets, totalliab, totsharequi, basiceps, naps, opercashpershare, peropecashpershare, operrevenue, invnetcashflow, finnetcflow, chgexchgchgs, cashnetr, cashequfinbal,createtime)
def weather(self, addr): result = '' headers = config.get_header() format_data = {'addr': addr} res = requests.get(url=self.weather_url, params=format_data, headers=headers) #print(res.text) soup = etree.HTML(res.content) selector = soup.xpath('//ul[@class="query-hd"]') #print(selector) if len(selector) == 0: result = u'没找到这个城市的天气预报!' else: ul = soup.xpath('//ul[@class="query-hd"]/li') date = ul[0].xpath('./div[@class="date"]/text()')[0] phrase = ul[0].xpath('./div[@class="phrase"]/text()')[0] temperature = ul[0].xpath('./div[@class="temperature"]/text()')[0] result = result + date + u'今天' + " " + phrase + ' ' + temperature + '\n' for li in ul[1:]: date = li.xpath('./div[@class="date"]/text()')[0] phrase = li.xpath('./div[@class="phrase"]/text()')[0] temperature = li.xpath('./div[@class="temperature"]/text()')[0] result = result + date + " " + phrase + ' ' + temperature + '\n' return result
def kuaidi(self, kuaidi_no): result = '' headers = config.get_header() headers['Referer'] = 'http://m.ip138.com/kuaidi/search.asp' format_data = {'no': kuaidi_no} try: res = requests.post(url=self.kuaidi_url, data=format_data, headers=headers, timeout=config.TIMEOUT) res.encoding = 'utf-8' #print(res.text) except requests.exceptions.Timeout: print('Request timed out. (timeout=%s)' % config.TIMEOUT) return 'Request timed out,please try again!' soup = etree.HTML(res.content) selector = soup.xpath('//ul[@class="query-hd"]/li') title = soup.xpath('//ul[@class="query-hd"]/li[@class="title"]') comany = title[0].xpath('./span[@class="comany"]/text()') #print(comany) if len(comany) == 0: result = selector[-1].xpath('./text()')[0] #return result else: comany = title[0].xpath('./span/text()')[0] status = title[0].xpath('./span/text()')[1] #print(comany,status) result = result + comany + "," + status + '\n' for li in selector[1:-1]: time = li.xpath('./div[@class="time"]/text()')[0] detail = li.xpath('./div[@class="detail"]/text()')[0] result = result + time + " - " + detail + '\n' #print(result) return result
def getMyIP(): try: r = requests.get(url=TEST_IP, headers=get_header(), timeout=TIMEOUT) ip = json.loads(r.text) return ip['origin'] except Exception as e: print e.message
def __sub_reply_req(self, url, reply_sub_main_id): try: header = config.get_header(lv=2) res_pones = requests.get(url, headers=header) if 200 == res_pones.status_code: try: data = json.loads(str(res_pones.text)) if "code" in data and "100000" == data['code']: self.__sub_reply_parse( data['data']['html'], reply_sub_main_id=reply_sub_main_id) else: print("sub interface error", data, self.count, self.sub_count) except Exception as err: print("json失败", err) elif 404 == res_pones.status_code: self.__sub_reply_req(url=url, reply_sub_main_id=reply_sub_main_id) else: print("sub request error", res_pones.status_code, self.count, self.sub_count) except Exception as err: print("err3---->", err, self.count, self.sub_count) self.can_continue = False
def checkSped(selfip, proxy): try: speeds = [] test_url = config.GOAL_HTTPS_LIST proxies = { "http": "http://%s:%s" % (proxy['ip'], proxy['port']), "https": "https://%s:%s" % (proxy['ip'], proxy['port']) } for i in test_url: try: start = time.time() r = requests.get(url=i, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) if r.ok: speed = round(int(time.time() - start), 2) speeds.append(speed) else: speeds.append(1000000) except Exception as e: speeds.append(1000000) return speeds except Exception as e: return None
def _checkHttpProxy(selfip, proxies, isHttp=False): types = -1 if isHttp: test_url = config.GOAL_HTTP_LIST else: test_url = config.GOAL_HTTPS_LIST try: r = requests.get(url=test_url[0], headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) if r.ok: content = json.loads(r.text) headers = content['headers'] ip = content['origin'] proxy_connection = headers.get('Proxy-Connection', None) if ',' in ip: types = 2 elif proxy_connection: types = 1 else: types = 0 return True, types else: return False, types except Exception as e: return False, types
def get_free(): r_http = requests.get(free_url, params={'proxy': free_params[0]}, headers=get_header()) # r_socks1 = requests.get(free_url, params={'proxy': free_params[1]}, headers=get_header()) # r_socks2 = requests.get(free_url, params={'proxy': free_params[2]}, headers=get_header()) parse_free(r_http.text)
def download(self, url): count = 0 #重试次数 r = '' logger.info("downloading url: %s", url) ls_p = sqlHelper.select(count=10, conditions={'protocol': 1}) while count < config.RETRY_TIME: if r == '' or (not r.ok) or len(r.content) < 500: if len(ls_p) > 5: choose = random.choice(ls_p) proxies = { "https": "http://%s:%s" % (choose.ip, choose.port) } else: proxies = {} try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = 'gbk' count += 1 except Exception, e: count += 1 else: return r.text
def _checkHttpProxy(selfip, proxies, isHttp=True): types = -1 speed = -1 if isHttp: test_url = config.TEST_HTTP_HEADER else: test_url = config.TEST_HTTPS_HEADER try: start = time.time() r = requests.get(url=test_url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") sys.stdout.write("check result:" + str(r.text) + " \n") if r.ok: speed = round(time.time() - start, 2) content = json.loads(r.text) headers = content['headers'] ip = content['origin'] proxy_connection = headers.get('Proxy-Connection', None) if ',' in ip: types = 2 elif proxy_connection: types = 1 else: types = 0 return True, types, speed else: return False, types, speed except Exception as e: return False, types, speed
def html_Download(proxy, page): u""" 爬数据 写入mysql. 爬一次,sleep 1s。 """ url = "https://www.zhipin.com/c101210100/h_101210100/?query=%%E6%%95%%B0%%E6%%8D%%AE%%E5%%88%%86%%E6%%9E%%90&page=%s&ka=page-%s"\ %(page, page ) # %s %% 百分号 p = re.compile(r'\n+| +', re.S) Findjob_detail_url = re.compile(r'href="/job_detail/(.*?)" ka', re.S) try: c = TABLEzhipinHelper() c.CreateTablezhipin() headers = get_header() source_code = requests.get(url, headers=headers, proxies=proxy) soup = BeautifulSoup(source_code.text) job_list = soup.findAll('div', 'job-primary') for job in job_list: detail = re.split(',', re.sub(p, ',', job.text.strip())) detail_url = 'https://www.zhipin.com/job_detail/' + re.findall( Findjob_detail_url, str(job))[0] detail.append(detail_url) c.Insertzhipin(detail) print 'page %s is done at %s' % (page, time.ctime()) c.CloseCon() #time.sleep(1) except Exception as e: #traceback.print_exc(file = open(r'./html_Download_Error.log','a+')) print e pass
def _checkHttpProxy(selfip, proxies, isHttp=True): types = -1 speed = -1 if isHttp: test_url = config.TEST_HTTP_HEADER else: test_url = config.TEST_HTTPS_HEADER try: start = time.time() r = requests.get(url=test_url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) if r.ok: speed = round(time.time() - start, 2) content = json.loads(r.text) headers = content['headers'] ip = content['origin'] x_forwarded_for = headers.get('X-Forwarded-For', None) x_real_ip = headers.get('X-Real-Ip', None) if selfip in ip or ',' in ip: return False, types, speed elif x_forwarded_for is None and x_real_ip is None: types = 0 elif selfip not in x_forwarded_for and selfip not in x_real_ip: types = 1 else: types = 2 return True, types, speed else: return False, types, speed except Exception as e: return False, types, speed
def p_get(url,data=None,num_retries=3): try: request_data=b'' headers_default = config.get_header() if data: request_data =urllib.parse.urlencode(data).encode('utf-8') print("Dowbloading ...",url) req = urllib.request.Request(url, headers=headers_default,method="GET") response = urllib.request.urlopen(req,data=request_data) html=response.read() try: html=gzip.decompress(html) except: pass chardit1 = chardet.detect(html) html=html.decode(chardit1['encoding']) except urllib.error.URLError as e: print(e) html=None if num_retries>0: if hasattr(e,'code') and 500<=e.code<600: return p_get(url,data=data,num_retries=num_retries-1) except Exception as e: html=None print("错误----->",e) return html
def mobile(self, mobile_num): result = '' headers = config.get_header() headers['Referer'] = 'http://m.ip138.com/mobile.html' format_data = {'mobile': mobile_num} try: res = requests.get(url=self.mobile_url, params=format_data, headers=headers, timeout=config.TIMEOUT) #print(res.text) except requests.exceptions.Timeout: print('Request timed out. (timeout=%s)' % config.TIMEOUT) return 'Request timed out,please try again!' if res.status_code == 200: soup = etree.HTML(res.content) selector = soup.xpath('//table[@class="table"]/tr') er = soup.xpath( '//table[@class="table"]/tr/td[@colspan="2"]/text()') if len(er): result = er[0] #return result else: for table in selector: td_key = table.xpath('./td/text()')[0] td_var = table.xpath('./td/span/text()')[0] #result[td_key] = td_var result = result + td_key + ":" + td_var + '\n' #print(result) else: return '查询失败,请重试!' return result
def crawlData(self, url): #设置phantomjs desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() desired_capabilities["phantomjs.page.settings.userAgent"] = ( config.get_header()) # 不载入图片,爬页面速度会快很多 desired_capabilities["phantomjs.page.settings.loadImages"] = False # 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问一次url proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.MANUAL # proxy.http_proxy = random.choice(ips) # proxy.add_to_capabilities(desired_capabilities) # 打开带配置信息的phantomJS浏览器 # driver = webdriver.PhantomJS(executable_path=phantomjs_driver,desired_capabilities=desired_capabilities) driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities) driver.start_session(desired_capabilities) # 隐式等待5秒,可以自己调节 driver.implicitly_wait(5) # 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项 # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。 driver.set_page_load_timeout(20) # 设置10秒脚本超时时间 driver.set_script_timeout(20) #browser = webdriver.Chrome('/home/caidong/developProgram/selenium/chromedriver') driver.get(url) driver.implicitly_wait(1) driver.find_element_by_xpath( '//div[@class="house-chat-phone"]').click() html = driver.page_source return html
def getType(self, proxies, url): types = -1 speed = -1 try: start = time.time() r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) if r.ok: speed = round(time.time() - start, 2) content = json.loads(r.text) headers = content['headers'] ip = content['origin'] proxy_connection = headers.get('Connection', None) # print 'proxy_connection',proxy_connection if ',' in ip: anonymous = 2 elif proxy_connection: anonymous = 1 else: anonymous = 0 return True, types, speed else: return False, types, speed except Exception as e: print 'error' return False, types, speed
def baidu_check(selfip, proxies): ''' :param :return: ''' protocol = -1 types = -1 speed = -1 try: start = time.time() r = requests.get(url='https://www.baidu.com', headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if r.ok: speed = round(time.time() - start, 2) protocol = 0 types = 0 else: speed = -1 protocol = -1 types = -1 except Exception as e: speed = -1 protocol = -1 types = -1 return protocol, types, speed
def _checkHttpProxy(selfip, proxies, isHttp=True): types = -1 speed = -1 if isHttp: test_url = config.TEST_HTTP_HEADER else: test_url = config.TEST_HTTPS_HEADER try: start = time.time() r = requests.get(url=test_url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) if r.ok: speed = round(time.time() - start, 2) content = json.loads(r.text) headers = content['headers'] ip = content['origin'] proxy_connection = headers.get('Proxy-Connection', None) if ',' in ip: types = 2 elif proxy_connection: types = 1 else: types = 0 return True, types, speed else: return False, types, speed except Exception as e: return False, types, speed
def baidu_check(selfip, proxies): ''' 用来检测代理的类型,突然发现,免费网站写的信息不靠谱,还是要自己检测代理的类型 :param :return: ''' protocol = -1 types = -1 speed = -1 # try: # #http://ip.chinaz.com/getip.aspx挺稳定,可以用来检测ip # r = requests.get(url=config.TEST_URL, headers=config.get_header(), timeout=config.TIMEOUT, # proxies=proxies) # r.encoding = chardet.detect(r.content)['encoding'] # if r.ok: # if r.text.find(selfip)>0: # return protocol, types, speed # else: # return protocol,types,speed # # # except Exception as e: # return protocol, types, speed try: start = time.time() r = requests.get(url='https://www.baidu.com', headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if r.ok: speed = round(time.time() - start, 2) protocol= 0 types=0 else: speed = -1 protocol= -1 types=-1 except Exception as e: speed = -1 protocol = -1 types = -1 return protocol, types, speed