Beispiel #1
0
 def parse(self, response):
     items = []
     css_path = ('#cjo_content > div '
                 '> div.searchArea > div.galleryType.domainTwo '
                 '> ul > li')
     for sel in response.css(css_path):
         item = CrawlCjmallItem()
         try:
             path = 'div.domainTxt > div.name > a::attr(href)'
             item['id'] = extract_re(sel, path, r'item_cd=(\d*)', '')
             path = 'div.domainTxt > div.name > a::text'
             item['title'] = extract(sel, path, '')
             path = ('div.domainTxt > div.price '
                     '> span > span.blue > strong::text')
             price = extract(sel, path, 0)
             path = ('div.domainTxt > div.price '
                     '> span > span > strong::text')
             origin_price = extract(sel, path, 0)
             item['price'] = origin_price
             if price:
                 item['price'] = price
             path = 'a > img::attr(src)'
             item['image_url'] = extract(sel, path, '')
             path = 'div.domainTxt > div.name > a::attr(href)'
             re = r'viewDetailItem\(\'\s*(.*)'
             item['link'] = self.BASE_URL + extract_re(sel, path, re, '')
             items.append(item)
         except exceptions.IndexError:
             msg = 'Out of index, Keyword: {0}'.format(self.keyword)
             log.msg(msg, level=log.WARNING, spider=self)
             log.msg(str(item), level=log.WARNING, spider=self)
         except:
             msg = 'Unknown Error, Keyword: {0}'.format(self.keyword)
             log.msg(msg, level=log.WARNING, spider=self)
     return items
Beispiel #2
0
 def parse(self, response):
     items = []
     for sel in response.css('#productList > li'):
         item = CrawlCoupangItem()
         try:
             path = '*::attr(id)'
             item['id'] = extract(sel, path, '')
             path = 'a.detail-link > strong.title > em::text'
             item['title'] = extract(sel, path, '')
             path = ('a.detail-link > em.prod-price '
                     '> span.price-detail > strong.price '
                     '> em::text')
             item['price'] = extract(sel, path, 0)
             path = 'a.detail-link > span.condition > em::text'
             item['condition'] = extract(sel, path, 0)
             path = 'a.detail-link > img::attr(src)'
             item['image_url'] = extract(sel, path, '')
             path = 'a.detail-link::attr(href)'
             item['link'] = self.BASE_URL + extract(sel, path, '')
             items.append(item)
         except exceptions.IndexError:
             msg = 'Out of index, Keyword: {0}'.format(self.keyword)
             log.msg(msg, level=log.WARNING, spider=self)
             log.msg(str(item), level=log.WARNING, spider=self)
         except:
             msg = 'Unknown Error, Keyword: {0}'.format(self.keyword)
             log.msg(msg, level=log.WARNING, spider=self)
     return items