def process_request(self, request, spider): def get_ua(): return getattr(self.ua, self.ua_type) request.headers.setdefault('User-Agent', get_ua()) # request.meta["proxy"] = "http://180.175.0.123:8060" # request.meta["proxy"] = "http://115.199.124.93:8060" # request.meta["proxy"] = "http://177.137.20.55:80" # request.meta["proxy"] = "http://120.52.32.46:80" # request.meta["proxy"] = "http://118.123.113.4:80" # request.meta["proxy"] = "http://218.85.133.62:80" # request.meta["proxy"] = "http://52.67.126.170:3129" # request.meta["proxy"] = "http://111.92.227.139:8080" # request.meta["proxy"] = "http://192.168.137.200:8080" # request.meta["proxy"] = "http://163.204.245.117:9999" get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip() # class RandomProxyMiddleware(object): # #动态设置ip代理 # def process_request(self, request, spider): # get_ip = GetIP() # request.meta["proxy"] = get_ip.get_random_ip()
class RandomProxyMiddleware(object): def __init__(self): super(RandomProxyMiddleware, self).__init__() self.proxy = GetIP() #动态设置ip代理 def process_request(self, request, spider): request.meta["proxy"] = self.proxy.get_random_ip()
def draw(): post_url = 'http://yqhh.werner.wiki/set/' get_ip = GetIP() for i in range(0, 20): post_data = { 'color': 6, 'number': i, 'csrfmiddlewaretoken': 'SsecrzNZg9V3nF3fl7IIIWOS00xuK8So' } ip = get_ip.get_random_ip() proxy_dict = {"http": ip} print(ip) response = requests.post(post_url, data=post_data, headers=header, proxies=proxy_dict) print(response.text) return
def process_request(self, request, spider): get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip()
def process_request(self, request, spider): request.meta['proxy'] = GetIP.get_random_ip() pass
""" result = cursor.execute(random_sql) for ip_info in cursor.fetchall(): # cursor.fetchall()返回tuple ip = ip_info[0] port = ip_info[1] judge_re = self.judge_ip(ip, port) if judge_re: return "http://{0}:{1}".format(ip, port) #返回有效的代理ip else: return self.get_random_ip() #取到无效的ip,就重新取 if __name__ == "__main__": get_ip = GetIP() print(get_ip.get_random_ip()) # 在数据库article_spider的tables下,新建表 proxy_ip Field Name Datatype Len Default PK? Not Null ip varchar 20 √ √ port varchar 10 √ speed float proxy_type varchar 5 # sql语句 SELECT + 表字段名 + FROM +数据表名+ WHERE + 筛选条件
def process_request(self, request, spider): get_ip = GetIP().get_random_ip() if get_ip: request.meta['proxy'] = get_ip
def process_request(self, request, spider): # IP代理设置 # request.meta["proxy"] = "http://106.75.9.39:8080" get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip()
def process_request(self, request, spider): # 这里也可以使用如下选择 # 1. github开源的scrapy-proxies # 2. scrapy crawlera 简单好用但是收费的代理 get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip()
def __init__(self): super(RandomProxyMiddleware, self).__init__() self.proxy = GetIP()