Exemple #1
0
    def parse(self, response):
        li_list = response.xpath('/html/body/div/div[2]/div[4]/ul/li')

        for li in li_list:
            img_src = li.xpath('./div/a/div/img/@src').extract_first()
            item = ImgsproItem()
            item['src'] = img_src
            yield item
Exemple #2
0
 def parse(self, response):
     div_list = response.xpath('//div[@id="container"]/div')
     for div in div_list:
         # 注意:使用伪属性
         src = div.xpath('./div/a/img/@src2').extract_first()
         item = ImgsproItem()
         item['src'] = src
         print(item)
         yield item
Exemple #3
0
    def parse(self, response):
        div_list = response.xpath('//div[@id="container"]/div')
        for div in div_list:
            src = 'https:' + div.xpath('./div/a/img/@src2').get()

            item = ImgsproItem()
            item['src'] = src

            yield item
Exemple #4
0
    def parse(self, response):
        div_list = response.xpath('//*[@id="container"]/div')
        for div in div_list:
            #src2伪属性
            src = div.xpath('./div/a/img/@src2').extract_first().split('_')[0]
            src = 'https:'+ src + '.jpg'
            print(src)
            item = ImgsproItem()
            item['src'] = src

            yield item
Exemple #5
0
    def parse(self, response):
        div_list = response.xpath('//div[@id="container"]/div')
        for div in div_list:
            #  反爬机制:图片懒加载:当图片被滑动到可视界面时图片地址由   src2   变为   src
            #对应:使用伪属性  src2
            # src = div.xpath('./div/a/img/@src | ./div/a/img/@src2').extract_first()
            src = div.xpath('./div/a/img/@src2').extract_first()
            # print(src)

            item = ImgsproItem()
            item['src'] = src  #把src传给 item

            yield item
Exemple #6
0
    def parse(self, response):
        div_list = response.xpath('//div[@id="container"]/div')
        for div in div_list:
            img_name = div.xpath('./div/a/@alt').extract_first()
            # print(img_name)
            # 这是一种软加载的模式,只有当前查看的img的属性是src,其它是src2,而这里没有可视化界面,也就是全部都是src2
            src = div.xpath('./div/a/img/@src2').extract_first()
            # print(src)

            item = ImgsproItem()
            item['src'] = src
            item['img_name'] = img_name

            yield item
    def parse(self, response):
        div_list = response.xpath('//div[@id="container"]/div')

        for div in div_list:
            # 注意:页面设置了图片懒加载,使用伪属性src2获取图片src
            src = div.xpath('./div/a/img/@src2').extract_first()

            print(src)

            item = ImgsproItem()
            item['src'] = src

            # 提交item到管道
            yield item
Exemple #8
0
    def parse(self, response):
        #显示请求头信息
        print(response.request.headers)

        div_list = response.xpath('//div[@id="container"]/div')
        for div in div_list:
            #注意:使用伪属性
            src = div.xpath('./div/a/img/@src2').extract_first()
            name = div.xpath('./div/a/img/@alt').extract_first()

            item = ImgsproItem()
            item['src'] = src
            item['name'] = name

            yield item
Exemple #9
0
    def parse(self, response):
        div_list = response.xpath('//*[@id="container"]/div')
        # print(div_list)
        num = 1
        for div in div_list:
            # 注意:图片懒加载要使用其伪属性
            src = 'https:' + div.xpath('./div/a/img/@src2')[0].extract()
            imgName = div.xpath('./div/a/@alt')[0].extract()

            print('正在爬取%s: %s' % (imgName, src))

            item = ImgsproItem()
            item['src'] = src
            item['imgName'] = imgName
            yield item

            if num == 3:
                break
            num += 1