def _thread_check_ip(self, proxy): with lock: ip = proxy.ip start_time = time.time() proxy.last_use_time = utils.get_utc_date() proxies = { "http": "http://" + ip, "https": "http://" + ip, } try: response = utils.http_request('https://google.com', timeout=5, proxies=proxies) with lock: proxy.external_validity = response.status_code == 200 proxy.used_count = proxy.used_count + 1 proxy.external_response_speed = round(time.time() - start_time, 4) * 1000 response.close() except (KeyboardInterrupt): exit() except: with lock: proxy.external_validity = False proxy.external_response_speed = -1 start_time = time.time() try: response = utils.http_request('https://www.baidu.com', timeout=5, proxies=proxies) with lock: proxy.internal_validity = response.status_code == 200 proxy.used_count = proxy.used_count + 1 proxy.internal_response_speed = round(time.time() - start_time, 4) * 1000 response.close() except (KeyboardInterrupt): exit() except: with lock: proxy.internal_validity = False proxy.internal_response_speed = -1 with lock: utils.log('Check IP:' + ip + ' finished i:' + str(proxy.internal_validity) + ' e:' + str(proxy.external_validity)) self.calc_proxy_weight(proxy) self.session.commit()
def check_ip_availability_task(self): last_check_time = self.redis_client.get(REDIS_KEY_LAST_CHECK_IP_TIME) now_time = datetime.utcnow().timestamp() if last_check_time is not None and ( now_time - float(last_check_time)) < (TASK_INTERVAL * 60): return self.redis_client.set(REDIS_KEY_LAST_CHECK_IP_TIME, now_time) proxy_list = self.collection.find() for proxy in proxy_list: ip = proxy['ip'] start_time = time.time() response = utils.http_request('http://lwons.com/wx', timeout=10) is_success = response.status_code == 200 response.close() if not is_success: try: self.collection.delete_one({'ip': ip}) except: pass utils.log('Check ip %s FAILED' % ip) else: elapsed = round(time.time() - start_time, 4) try: self.collection.update_one({'ip': ip}, { "$set": { 'update_time': utils.get_utc_time(), 'response_speed': elapsed, 'validity': True } }) except: pass utils.log('Check ip %s SUCCESS' % ip)
def check_ip_availability_task(self): # redis获取上次自检时间,如果未达到设定时间则不在检查 last_check_time = self.redis_client.get(REDIS_KEY_LAST_CHECK_IP_TIME) now_time = datetime.utcnow().timestamp() if last_check_time is not None and ( now_time - float(last_check_time)) < (TASK_INTERVAL * 60): return self.redis_client.set(REDIS_KEY_LAST_CHECK_IP_TIME, now_time) proxy_list = self.db.find_all() for proxy in proxy_list: ip = proxy.ip start_time = time.time() # 这个自己机制就是通过代理ip来ping数据量很小的网站。如果ping失败了则直接删除该ip response = utils.http_request('http://www.baidu.com', timeout=10) is_success = response.status_code == 200 response.close() if not is_success: # 如果请求失败,直接删除IP try: self.db.delete_one(ip) except: pass utils.log('Check ip %s FAILED' % ip) else: # 如果请求成功,在数据库中记录该ip最后响应的时间,下次取ip时优先取出使用 elapsed = round(time.time() - start_time, 4) try: proxy.update_time = utils.get_utc_time() proxy.response_speed = elapsed proxy.validity = 1 self.db.insert_one(proxy) except: pass utils.log('Check ip %s SUCCESS' % ip)
def execute(self): ip = [] for num in range(1, 10): url = self.URL % num context = utils.http_request(url).text ip = ip + self.parse(context) return ip
def execute(self) -> []: self.content = utils.http_request(self.URL).text