コード例 #1
0
 def process_request(self, request, spider):
     get_ip = GetIP()
     request.meta["proxy"] = get_ip.get_random_ip()
コード例 #2
0
 def process_request(self, request, spider):
     request.meta['proxy'] = GetIP.get_random_ip()
     pass
コード例 #3
0
 def process_request(self, request, spider):
 # IP代理设置
 # request.meta["proxy"] = "http://106.75.9.39:8080"
     get_ip = GetIP()
     request.meta["proxy"] = get_ip.get_random_ip()
コード例 #4
0
        result = cursor.execute(random_sql)
        for ip_info in cursor.fetchall():  # cursor.fetchall()返回tuple
            ip = ip_info[0]
            port = ip_info[1]

            judge_re = self.judge_ip(ip, port)
            if judge_re:
                return "http://{0}:{1}".format(ip, port)  #返回有效的代理ip
            else:
                return self.get_random_ip()  #取到无效的ip,就重新取



if __name__ == "__main__":
    get_ip = GetIP()
    print(get_ip.get_random_ip())



# 在数据库article_spider的tables下,新建表 proxy_ip
Field Name              Datatype        Len         Default     PK?     Not Null
ip                      varchar         20                       √         √
port                    varchar         10                                 √
speed                   float
proxy_type              varchar         5

# sql语句  SELECT + 表字段名 + FROM +数据表名+ WHERE + 筛选条件



# test_scrapy_spider\test_scrapy_spider\middlewares.py
コード例 #5
0
 def process_request(self, request, spider):
     # 这里也可以使用如下选择
     # 1. github开源的scrapy-proxies
     # 2. scrapy crawlera 简单好用但是收费的代理
     get_ip = GetIP()
     request.meta["proxy"] = get_ip.get_random_ip()