def text_parse_content(self, response): for site in response.xpath('//div[@class="text"]'): item = GifItem() item['item_type'] = '3' try: item['name'] = site.xpath('./..//h3/a/b/text()').extract()[0] item['content'] = site.xpath('./p/a/text()').extract()[0] except Exception, ex: pass print item['name'], item['content'] yield item
def image_parse_content(self, response): for site in response.xpath('//div[@class="text"]'): item = GifItem() item['item_type'] = '2' item['content'] = ' ' item['path'] = ' ' try: item['link_url'] = site.xpath('./..//h3/a/@href').extract()[0] item['name'] = site.xpath('./..//h3/a/b/text()').extract()[0] item['src_url'] = site.xpath('./p/img/@src').extract()[0] except Exception, ex: item['name'] = ' ' item['src_url'] = ' ' pass # print item['name'],item['src_url'] yield item