Esempio n. 1
0
 def parse(self, response):
     sel = Selector(response)
     sites = sel.xpath('//div')
     items = []
     item_title = []
     item_poem = []
     item_poet = []
     final_item = []
     for site in sites:
         item = {}
         item['title'] = "".join(
             site.xpath('h2[@itemprop="name"]/text()').extract())
         item['poem'] = "".join(
             site.xpath('div[@class="KonaBody"]/p/text()').extract())
         item['poet'] = site.xpath(
             'div[@itemprop="author"]/text()').extract()
         if (len(item['poet']) != 0):
             item_poet.append(item['poet'])
         if (len(item['poem']) != 0):
             item_poem.append(item['poem'])
         if (len(item['title']) != 0):
             item_title.append(item['title'])
     final = {}
     final['poet'] = item_poet
     final['poem'] = item_poem
     final['title'] = item_title
     final_item.append(final)
     dataPoem.savePoems(self.start_urls, self.allowed_domains,
                        final_item)
     return 0
Esempio n. 2
0
 def parse(self, response):
     sel = Selector(response)
     sites = sel.xpath('//div')
     items = []
     item_poet = []
     item_title = []
     item_poem = []
     final_item = []
     for site in sites:
         item = {}
         item['poet'] = site.xpath('div[@id="poemwrapper"]/span[@class="author"]/a/text()').extract()
         item['title'] = site.xpath('div[@id="poem-top"]/h1/text()').extract()
         item['poem'] = site.xpath('div[@class="poem"]/div/text()').extract()
         if (len(item['poet']) != 0):
             item_poet.append(item['poet'])
         if (len(item['title']) != 0):
             item_title.append(item['title'])
         if (len(item['poem']) != 0):
             item_poem.append(item['poem'])
     final = {}
     final['poem'] = item_poem
     final['poet'] = item_poet
     final['title'] = item_title
     final_item.append(final)
     dataPoem.savePoems(self.start_urls, self.allowed_domains, final_item)
     return 0
Esempio n. 3
0
 def parse(self, response):
     sel = Selector(response)
     sites = sel.xpath('//div')
     items = []
     item_title = []
     item_poem = []
     item_poet = []
     final_item = []
     for site in sites:
         item = {}
         item['title'] = "".join(site.xpath('h2[@itemprop="name"]/text()').extract())
         item['poem'] = "".join(site.xpath('div[@class="KonaBody"]/p/text()').extract())
         item['poet'] = site.xpath('div[@itemprop="author"]/text()').extract()
         if (len(item['poet']) != 0):
             item_poet.append(item['poet'])
         if (len(item['poem']) != 0):
             item_poem.append(item['poem'])
         if (len(item['title']) != 0):
             item_title.append(item['title'])
     final = {}
     final['poet'] = item_poet
     final['poem'] = item_poem
     final['title'] = item_title
     final_item.append(final)
     dataPoem.savePoems(self.start_urls, self.allowed_domains, final_item)
     return 0
Esempio n. 4
0
 def parse(self, response):
     sel = Selector(response)
     sites = sel.xpath('//div')
     items = []
     item_poet = []
     item_title = []
     item_poem = []
     final_item = []
     for site in sites:
         item = {}
         item['poet'] = site.xpath(
             'div[@id="poemwrapper"]/span[@class="author"]/a/text()'
         ).extract()
         item['title'] = site.xpath(
             'div[@id="poem-top"]/h1/text()').extract()
         item['poem'] = site.xpath(
             'div[@class="poem"]/div/text()').extract()
         if (len(item['poet']) != 0):
             item_poet.append(item['poet'])
         if (len(item['title']) != 0):
             item_title.append(item['title'])
         if (len(item['poem']) != 0):
             item_poem.append(item['poem'])
     final = {}
     final['poem'] = item_poem
     final['poet'] = item_poet
     final['title'] = item_title
     final_item.append(final)
     dataPoem.savePoems(self.start_urls, self.allowed_domains,
                        final_item)
     return 0