def parse_item(self, response): # sel = Selector(response) # # name = sel.xpath("//div[@class='main-image']/p/a/img/@alt").extract()[0] # print(name) l = ItemLoader(item=MzituItem(), response=response) l.add_xpath('image_urls', "//div[@class='main-image']/p/a/img/@src", Identity()) l.add_xpath('name', "//div[@class='main-image']/p/a/img/@alt", Identity()) # l.add_value('name', name) return l.load_item()
def parse_item(self, response): # 解析http://www.meizitu.com/a/5336.html获取图片URL l = ItemLoader(item=MeizituItem(), response=response) l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src", Identity()) l.add_value('url', response.url) return l.load_item()
def parse_item(self, response): l = ItemLoader(item=MeizituItem(), response=response) l.add_xpath('name', '//h2/a/text()') l.add_xpath('tags', "//div[@id='maincontent']/div[@class='postmeta clearfix']/div[@class='metaRight']/p") l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src", Identity()) l.add_value('url', response.url) return l.load_item()
def parse_item(self, response): l = ItemLoader(item=CrawlpictureItem(), response=response) l.add_xpath('name', '//h2/a/text()') l.add_css('tags', 'div.metaRight p::text') #l.add_xpath('image_urls','//div[@id="picture"]/p/img/@src' or '//img[@class="scrollLoading"]/@src',Identity()) l.add_css('image_urls', 'div.postContent img::attr(src)', Identity()) l.add_value('url', response.url) return l.load_item()
def parse_item(self, response): l = ItemLoader(item=MeizituItem(), response=response) # l.add_xpath('name', '//div[@class="postContent"]/div[@id="picture"]/p/a/text()') # l.add_xpath('tags', '//div[@class="postContent"]') l.add_xpath('img_url', '//div[@class="text"]/p/br/img/@src', Identity()) l.add_value('url', response.url) return l.load_item()
def parse_item(self, response): item_loader = ItemLoader(item=MeiziItem(), response=response) # 标题 item_loader.add_xpath('title', '//h2/a/text()') # 图片链接 item_loader.add_xpath('image', "//div[@id='picture']/p/img/@src", Identity()) # 帖子链接 item_loader.add_xpath('link', response.url) return item_loader.load_item()
def parse_item(self, response): print "---------------------------parse_item start--------------------------------------------" # l=用ItemLoader载入MeizituItem() l = ItemLoader(item=TutorialItem(), response=response) # 名字 l.add_xpath('name', '//h2/a/text()') # 标签 l.add_xpath( 'tags', "//div[@id='maincontent']/div[@class='postmeta clearfix']/div[@class='metaRight']/p" ) # 图片连接 l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src", Identity()) # url l.add_value('url', response.url) return l.load_item()
def parse_item(self, response): # 当Item在Spider中被收集之后,它将会被传递到Item Pipeline,一些组件会按照一定的顺序执行对Item的处理。 l = ItemLoader(item=ItemloaderprojectItem(), response=response) l.add_xpath('name', '//h2/a/text()') l.add_xpath( 'tags', "//div[@id='maincontent']/div[@class='postmeta clearfix']/div[@class='metaRight']/p" ) l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src", Identity()) l.add_value('url', response.url) # l.replace_value('url', 'www.baidu.com') # l.replace_xpath() # l.get_xpath() # l.add_xpath('price', '//p[@id="price"]', re='the price is (.*)') # Instead, you can create a nested loader with the footer selector and add values relative to the footer. # The functionality is the same but you avoid repeating the footer selector. # Example: # loader = ItemLoader(item=Item()) # # load stuff not in the footer # footer_loader = loader.nested_xpath('//footer') # footer_loader.add_xpath('social', 'a[@class = "social"]/@href') # footer_loader.add_xpath('email', 'a[@class = "email"]/@href') # # no need to call footer_loader.load_item() # loader.load_item() return l.load_item() # 内置的处理器 # Identity 啥也不做 # TakeFirst 返回第一个非空值,通常用作输出处理器 # Join 将结果连起来,默认使用空格’ ‘ # Compose 将函数链接起来形成管道流,产生最后的输出 # MapCompose 跟上面的Compose类似,区别在于内部结果在函数中的传递方式.它的输入值是可迭代的,首先将第一个函数依次作用于所有值,产生新的可迭代输入,作为第二个函数的输入,最后生成的结果连起来返回最终值,一般用在输入处理器中。 # SelectJmes 使用json路径来查询值并返回结果