Esempio n. 1
0
 def parse(self, response: Response):
     selects_detail_urls = response.xpath(
         '//*[@class="list"]//li//a/@href').getall()
     if len(selects_detail_urls) > 0:
         for detail_url in selects_detail_urls:
             yield Request(url=detail_url, callback=self.parse_detail)
     next = response.xpath('//*[@id="div_currpage"]//a[text()="下一页"]')
     if next:
         next_url = response.xpath(
             '//*[@id="div_currpage"]//a[text()="下一页"]/@href').get()
         yield Request(url=next_url, callback=self.parse)
Esempio n. 2
0
 def test_response(self):
     request = Request(
         "http://www.ocpe.com.cn/nengyuanjingji/zf/2020-08-29/3785.html")
     request.encoding = "utf-8"
     with open("test.html", "rb") as f:
         data = f.read()
         # data='{"code":200,"msg":"success","data":{"SYS_NAME":"职业院校综合管理与内部质量诊断与改进平台","LOGO":"/dfs/2020/11/05/20201105143318-47f7d6e3ca5637b23468330cab0ec0f3.jpg","BACKGROUND":"/dfs/2020/10/19/20201019194723-cdec006aef30e1e8313ac25eb2b71e38.png"}}'
         response = Response(data, 200, request)
         res = response.xpath("//div[@class='xwt_a']//a/text()").getall()
         print(res)
         print(response.links())
Esempio n. 3
0
 def parse(self, response: Response):
     # print('#######2')
     print(f'#######{self.name}')
     # print(threading.current_thread().name, "runing...", self.name)
     print(response.status)
     print("222222222222222")
     yield Request(url=response.url, callback=self.parse2, dont_filter=True)
Esempio n. 4
0
 def start_requests(self):
     for i in range(1):
         yield Request(
             url="http://search.51job.com",
             callback=self.parse,
             dont_filter=True,
         )
Esempio n. 5
0
 def start_requests(self):
     for page in range(100):
         url = f'http://exercise.kingname.info/exercise_middleware_ip/{page}'
         yield Request(url,
                       callback=self.parse,
                       dont_filter=False,
                       timeout=3)
Esempio n. 6
0
 def start_requests(self):
     """
     初始请求 用户可以重写此方法
     :return:
     """
     for url in self.start_urls:
         yield Request(url=url, callback=self.parse)
Esempio n. 7
0
async def fetch(url):
    try:
        req = Request(url=url)
        res = await AioHttpDown().fetch(req)
        return res
    except Exception as e:
        print(e)
        pass
Esempio n. 8
0
 def start_requests(self):
     for page in range(1000):
         url = f'http://exercise.kingname.info/exercise_middleware_ip/{page}'
         # url = f'http://exercise.kingname.info/exercise_middleware_ip/{page}'
         # url = 'http://fzggw.zj.gov.cn/art/2020/8/26/art_1621004_55344873.html'
         url = 'https://s.bdstatic.com/common/openjs/amd/eslx.js'
         yield Request(url,
                       callback=self.parse,
                       dont_filter=True,
                       timeout=3)
Esempio n. 9
0
    def parse(self, response: Response):
        print("run parse...........................................")
        print(response.status)
        res = response.xpath("/html/body//div/ul[@class='list']/li/a/@href")
        getall = res.getall()

        for _url in getall:
            yield Request(url=_url,
                          callback=self.parse_detail,
                          dont_filter=True)
            print(22222222222222222222222222222222222222)
        print(3333)
Esempio n. 10
0
 def start_requests(self):
     for page in range(1122):
         # url = f'http://exercise.kingname.info/exercise_middleware_ip/{page}'
         url = 'https://s.bdstatic.com/common/openjs/amd/eslx.js'
         yield Request(url, callback=self.parse, dont_filter=True)
Esempio n. 11
0
 def start_requests(self):
     for url in self.start_urls:
         yield Request(url=url, callback=self.parse)
Esempio n. 12
0
 def parse(self, response: Response):
     for i in range(300):
         yield Request(url=response.url, dont_filter=True)
     yield from BidItem.get_items(response.text)
Esempio n. 13
0
 def start_requests(self):
     start_urls = [
         "https://www.jianshu.com/p/e8f7f6c82be6" for i in range(30)
     ]
     for url in start_urls:
         yield Request(url=url, callback=self.parse, dont_filter=True)