Example #1
0
 def parse(self, response):
     print "%s" % response.url
     sel = Selector(response)
     if 'detail_page' in response.meta:
         yield self.process_detail_page(sel, response)
     else:
         rows = sel.xpath('//tr[@offerid]')
         for tr in rows:
             vip_an = tr.xpath('td/div/div[@class="vip_an"]')
             if not vip_an:
                 links = tr.xpath(
                     'td[@class="txt"]/a[@offerid]/@href').extract()
                 for url in links:
                     url_path = get_url_path(url)
                     if self.process_url_value(url_path):
                         #TODO: Вот тут проверка нужна
                         yield Request(url,
                                       callback=self.parse,
                                       meta={'detail_page': True})
         pager = sel.xpath('//div[@class="pager"]')
         if pager:
             next_page = int(response.meta['current_page']
                             ) + 1 if 'current_page' in response.meta else 2
             print next_page
             url = pager.xpath('a[text()="%s"]/@href' % next_page).extract()
             if url:
                 yield Request(url[0],
                               callback=self.parse,
                               meta={'current_page': next_page})
Example #2
0
 def _create_spyder_meta(self, spider, item, status, e=None):        
     spider_meta, created = SpiderMeta.objects.get_or_create(spider=spider, url=get_url_path(item['link']))  # @UnusedVariable
     spider_meta.status = status
     if item['phone']:
         spider_meta.phone = item['phone'][0]
     spider_meta.phone_filename = item.get('phone_filename')
     spider_meta.phone_guess = item.get('phone_guess')
     spider_meta.estate = e
     url = item['link']
     spider_meta.full_url = url if isinstance(url, basestring) else url[0]
     spider_meta.save()
     return spider_meta
         
Example #3
0
 def parse(self, response): 
     print "%s" % response.url      
     sel = Selector(response)
     if 'detail_page' in response.meta:
         yield self.process_detail_page(sel, response)
     else:        
         rows = sel.xpath('//tr[@offerid]')
         for tr in rows:
             vip_an = tr.xpath('td/div/div[@class="vip_an"]')          
             if not vip_an:
                 links = tr.xpath('td[@class="txt"]/a[@offerid]/@href').extract()
                 for url in links:
                     url_path = get_url_path(url)
                     if self.process_url_value(url_path):                              
                     #TODO: Вот тут проверка нужна                    
                         yield Request(url, callback=self.parse, meta={'detail_page':True})
         pager = sel.xpath('//div[@class="pager"]')             
         if pager:
             next_page = int(response.meta['current_page']) + 1 if 'current_page' in response.meta else 2
             print next_page 
             url = pager.xpath('a[text()="%s"]/@href' % next_page).extract()
             if url:
                 yield Request(url[0], callback=self.parse, meta={'current_page':next_page})