Exemple #1
0
    def parse_item(self, response):

        # sel = Selector(response)
        #
        # name = sel.xpath("//div[@class='main-image']/p/a/img/@alt").extract()[0]
        # print(name)

        l = ItemLoader(item=MzituItem(), response=response)
        l.add_xpath('image_urls', "//div[@class='main-image']/p/a/img/@src",
                    Identity())
        l.add_xpath('name', "//div[@class='main-image']/p/a/img/@alt",
                    Identity())
        # l.add_value('name', name)

        return l.load_item()
Exemple #2
0
 def parse_item(self, response):
     # 解析http://www.meizitu.com/a/5336.html获取图片URL
     l = ItemLoader(item=MeizituItem(), response=response)
     l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src",
                 Identity())
     l.add_value('url', response.url)
     return l.load_item()
Exemple #3
0
    def parse_item(self, response):
        l = ItemLoader(item=MeizituItem(), response=response)
        l.add_xpath('name', '//h2/a/text()')
        l.add_xpath('tags', "//div[@id='maincontent']/div[@class='postmeta  clearfix']/div[@class='metaRight']/p")
        l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src", Identity())

        l.add_value('url', response.url)
        return l.load_item()
 def parse_item(self, response):
     l = ItemLoader(item=CrawlpictureItem(), response=response)
     l.add_xpath('name', '//h2/a/text()')
     l.add_css('tags', 'div.metaRight p::text')
     #l.add_xpath('image_urls','//div[@id="picture"]/p/img/@src' or '//img[@class="scrollLoading"]/@src',Identity())
     l.add_css('image_urls', 'div.postContent img::attr(src)', Identity())
     l.add_value('url', response.url)
     return l.load_item()
Exemple #5
0
    def parse_item(self, response):
        l = ItemLoader(item=MeizituItem(), response=response)
        # l.add_xpath('name', '//div[@class="postContent"]/div[@id="picture"]/p/a/text()')
        # l.add_xpath('tags', '//div[@class="postContent"]')
        l.add_xpath('img_url', '//div[@class="text"]/p/br/img/@src', Identity())
        l.add_value('url', response.url)

        return l.load_item()
Exemple #6
0
    def parse_item(self, response):
        item_loader = ItemLoader(item=MeiziItem(), response=response)

        # 标题
        item_loader.add_xpath('title', '//h2/a/text()')
        # 图片链接
        item_loader.add_xpath('image', "//div[@id='picture']/p/img/@src",
                              Identity())
        # 帖子链接
        item_loader.add_xpath('link', response.url)

        return item_loader.load_item()
Exemple #7
0
    def parse_item(self, response):
        print "---------------------------parse_item start--------------------------------------------"
        # l=用ItemLoader载入MeizituItem()
        l = ItemLoader(item=TutorialItem(), response=response)
        # 名字
        l.add_xpath('name', '//h2/a/text()')
        # 标签
        l.add_xpath(
            'tags',
            "//div[@id='maincontent']/div[@class='postmeta  clearfix']/div[@class='metaRight']/p"
        )
        # 图片连接
        l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src",
                    Identity())
        # url
        l.add_value('url', response.url)

        return l.load_item()
    def parse_item(self, response):
        # 当Item在Spider中被收集之后,它将会被传递到Item Pipeline,一些组件会按照一定的顺序执行对Item的处理。
        l = ItemLoader(item=ItemloaderprojectItem(), response=response)
        l.add_xpath('name', '//h2/a/text()')
        l.add_xpath(
            'tags',
            "//div[@id='maincontent']/div[@class='postmeta  clearfix']/div[@class='metaRight']/p"
        )
        l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src",
                    Identity())
        l.add_value('url', response.url)

        # l.replace_value('url', 'www.baidu.com')
        # l.replace_xpath()
        # l.get_xpath()
        # l.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')

        # Instead, you can create a nested loader with the footer selector and add values relative to the footer.
        # The functionality is the same but you avoid repeating the footer selector.
        # Example:
        # loader = ItemLoader(item=Item())
        # # load stuff not in the footer
        # footer_loader = loader.nested_xpath('//footer')
        # footer_loader.add_xpath('social', 'a[@class = "social"]/@href')
        # footer_loader.add_xpath('email', 'a[@class = "email"]/@href')
        # # no need to call footer_loader.load_item()
        # loader.load_item()

        return l.load_item()


# 内置的处理器

# Identity 啥也不做
# TakeFirst 返回第一个非空值,通常用作输出处理器
# Join 将结果连起来,默认使用空格’ ‘
# Compose 将函数链接起来形成管道流,产生最后的输出
# MapCompose 跟上面的Compose类似,区别在于内部结果在函数中的传递方式.它的输入值是可迭代的,首先将第一个函数依次作用于所有值,产生新的可迭代输入,作为第二个函数的输入,最后生成的结果连起来返回最终值,一般用在输入处理器中。
# SelectJmes 使用json路径来查询值并返回结果