def get_proxyIp(self): proxy_api = "http://api.xdaili.cn/xdaili-api//greatRecharge/getGreatIp?spiderId=4d9d6f7844ac4ff0ba9741196055ffb0&orderno=YZ20193105820IhjYI2&returnType=2&count=1" response = requests.get(proxy_api) data = json.loads(response.text) data = data['RESULT'][0] print(data) proxy = ProxyModel(data) return proxy
def update_proxy(self): self.lock.acquire() if self.current_proxy is None or self.current_proxy.is_expiring or self.current_proxy.is_block: response_json = requests.get(self.PROXY_URL).json() try: print(response_json) self.current_proxy = ProxyModel(response_json['data'][0]) except: print('出错了!') print(response_json) self.lock.release()
def update_proxy(self): self.lock.acquire() if not self.current_proxy or self.current_proxy.is_expiring or self.current_proxy.blacked: response = requests.get(self.PROXY_URL) text = response.text result = json.loads(text) if len(result['data']) > 0: data = result['data'][0] proxy_model = ProxyModel(data) self.current_proxy = proxy_model self.lock.release()
def update_proxy(self): self.lock.acquire() if not self.current_proxy or self.current_proxy.is_expiring or self.current_proxy.blacked: resp = requests.get(self.PROXY_URL) text = resp.text result = json.loads(text) if len(result['data']) > 0: data = result['data'][0] proxy = ProxyModel(data) self.current_proxy = proxy print('重新获取了一个IP:%s' % self.current_proxy.ip) self.lock.release()
def update_proxy(self): self.lock.acquire() # 上锁 # 判断如果没有或者即将过期又或者被拉黑 if not self.current_proxy or self.current_proxy.is_expirin or self.current_proxy.blacked: response = requests.get(self.PROXY_URL) text = response.text # 此处得到的text是个json格式的字符串,需要load成字典 print("重新获取了一个代理:", text) # 从代理池api返回回来的数据格式如下: # {"code":0,"success":true,"msg":"0","data":[{"ip":"223.242.123.50","port":3212,"expire_time":"2019-01-15 10:15:20"}]} result = json.loads(text) if len(result['data']) > 0: data = result['data'][0] proxy_model = ProxyModel(data) self.current_proxy = proxy_model # return proxy_model self.lock.release() # 解锁操作
def update_proxy(self): #scrapy爬取的时候用的twisted 也就是异步 可以理解成多线程 #如果异步都来请求代理造成IP浪费 处于节约IP的目的 异步上锁 self.lock.acquire() if not self.current_proxy or self.current_proxy.is_expiring or self.current_proxy.blacked: response = requests.get(self.PROXY_URL) text = response.text print("重新获取了一个代理", text) result = json.loads(text) #芝麻代理不能让你频繁请求 也就是说 返回的data 可能没有值 if len(result['data']) > 0: data = result['data'][ 0] #{'ip': '106.46.136.7', 'port': 4225, 'expire_time': '2019-04-12 09:46:28'} #因为我们需要对data 进行多个操作 比如ip 和端口号拼接 时间转化成datetime类型判断是否过期 proxy_model = ProxyModel(data) self.current_proxy = proxy_model self.lock.release()