def process_request(self, request, spider): get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip()
def process_request(self, request, spider): request.meta['proxy'] = GetIP.get_random_ip() pass
def process_request(self, request, spider): # IP代理设置 # request.meta["proxy"] = "http://106.75.9.39:8080" get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip()
result = cursor.execute(random_sql) for ip_info in cursor.fetchall(): # cursor.fetchall()返回tuple ip = ip_info[0] port = ip_info[1] judge_re = self.judge_ip(ip, port) if judge_re: return "http://{0}:{1}".format(ip, port) #返回有效的代理ip else: return self.get_random_ip() #取到无效的ip,就重新取 if __name__ == "__main__": get_ip = GetIP() print(get_ip.get_random_ip()) # 在数据库article_spider的tables下,新建表 proxy_ip Field Name Datatype Len Default PK? Not Null ip varchar 20 √ √ port varchar 10 √ speed float proxy_type varchar 5 # sql语句 SELECT + 表字段名 + FROM +数据表名+ WHERE + 筛选条件 # test_scrapy_spider\test_scrapy_spider\middlewares.py
def process_request(self, request, spider): # 这里也可以使用如下选择 # 1. github开源的scrapy-proxies # 2. scrapy crawlera 简单好用但是收费的代理 get_ip = GetIP() request.meta["proxy"] = get_ip.get_random_ip()