def parse(self, response): self.log("\n\n\n We got data! \n\n\n") hxs = HtmlXPathSelector(response) sites = hxs.select('//ol[@id=\'result-set\']/li') items = [] for site in sites: item = myspiderBotItem() item['title'] = site.select('h2/a/text()').extract() item['link'] = site.select('h2/a/@href').extract() items.append(item) return items
def parse_item(self, response): print "####################" self.log("\n\n\n We got data! \n\n\n") self.log('Hi, this is an item page! %s' % response.url) # hxs = HtmlXPathSelector(response) sites = response.xpath('//html/body/div[3]/div/div[2]/div[5]/div/div[1]/div[2]/section/div/div/h3/a') # items = [] print "@@@@@@@@@@@@@@@@" items = myspiderBotItem() print "$$$$$$$$$$$$$$$$$$" items['title'] = sites.xpath('/text()').extract() items['link'] = sites.xpath('/@href').extract() # items.append(item) return items