Ejemplo n.º 1
0
    def parse(self, response):
        # print(self.task_id)
        content = response.text
        try:
            data = json.loads(content)
            items = data['items']
        except:
            yield Request(response.url,
                          meta={'key': response.meta['key']},
                          callback=self.parse,
                          dont_filter=True)
            return

        equal = response.url.rfind('=') + 1
        page_num = int(response.url.split('=')[-1])
        if page_num == 1:
            for i in range(2, int(data['total'] / 20) + 2):
                yield Request(response.url[0:equal] + str(i),
                              meta={'key': response.meta['key']},
                              callback=self.parse,
                              dont_filter=True)

        #
        # for i in range(paginator['pages']):

        for i in items:
            item = CrawlingItem()
            item._values = i
            item._values['key'] = response.meta['key']
            item._values['collection'] = 'temp_pdd'
            yield item
Ejemplo n.º 2
0
 def parse(self, response):
     content = response.text
     try:
         data = json.loads(content)
     except:
         yield Request(response.url, meta={'key':response.meta['key']},
                       callback=self.parse)
         return
     item = CrawlingItem()
     item._values = data
     item._values['key'] = response.meta['key']
     yield item
Ejemplo n.º 3
0
 def parse_detail(self, response):
     content = response.text
     try:
         data = json.loads(content)
     except Exception as e:
         yield Request(response.url,
                       meta={'key': response.meta['key']},
                       callback=self.parse_detail,
                       dont_filter=True)
         return
     data['key'] = response.meta['key']
     item = CrawlingItem()
     item._values = data
     item._values['collection'] = 'temp_pdd_sku'
     yield item