def parse_item(self, response):
        meta = response.meta
        item_json = json.loads(response.body)
        data = item_json.get('data')
        if data:
            item = QianniuItem()
            item['content'] = data
            item['meta'] = meta
            item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
            yield item
            url = response.url
            url_list = url.split('&page=')
            if url_list[1] == '1':
                item_json = json.loads(response.body)
                data = item_json.get('data')
                if data:
                    recordCount = data.get('recordCount')
                    if recordCount:
                        page_num, page_mod = divmod(int(recordCount), 20)
                        # print page_num, page_mod
                        if page_mod > 0:
                            page_num = page_num + 1
                        # print page_num

                        for i in xrange(2, page_num + 1):
                            url_xg = url_list[0] + '&page=%s' % i
                            # print url_xg
                            yield Request(url_xg,
                                          callback=self.parse_item,
                                          headers=header,
                                          cookies=meta.get('cookie_brand'),
                                          meta={
                                              'month': meta.get('month'),
                                              'brand': meta.get('brand'),
                                              'catename': meta.get('catename'),
                                              'cateid': meta.get('cateid')
                                          },
                                          dont_filter=True)
Ejemplo n.º 2
0
    def parse_data(self, response):

        content = response.body
        content_json = json.loads(content)
        code = content_json.get('code')
        meta = response.meta
        meta['cate'] = 'parse_data'
        if str(code) == '0':
            data = content_json.get('data')
            if data:
                item = QianniuItem()
                item['content'] = data
                item['meta'] = meta
                item['dt'] = time.strftime('%Y-%m-%d',
                                           time.localtime(time.time()))
                yield item
                #recordCount = data.get('data')
                #if recordCount:
                #    count = recordCount.get('recordCount')
                #    if count:
                #        num = int(round(float(count / 20)))
                for i in xrange(2, 4):
                    url = 'https://sycm.taobao.com/datawar/v3/activity/itemCoreIndex/getItemListLive.json?activityId=%s&itemType=0&device=1&keyword=&pageSize=20&page=%s&order=desc&orderBy=%s' % (
                        str(meta.get('activityId')), str(i),
                        meta.get('orderby'))
                    yield Request(url,
                                  callback=self.parse_act_item,
                                  headers=header,
                                  cookies=meta.get('cookie_brand'),
                                  meta={
                                      'brand': meta.get('brand'),
                                      'cookie_brand': meta.get('cookie_brand'),
                                      'd': meta.get('d'),
                                      'activityId': meta.get('activityId'),
                                      'orderby': meta.get('orderby')
                                  },
                                  dont_filter=True)
Ejemplo n.º 3
0
    def parse(self, response):
        try:
            content = response.body
            # print content
            content_json = json.loads(content)
            code = content_json.get('code')
            # print code
            meta = response.meta
            if str(code) == '0':
                item = QianniuItem()
                item['content'] = response.body
                item['meta'] = meta
                item['dt'] = time.strftime('%Y-%m-%d',
                                           time.localtime(time.time()))
                yield item

                if meta.get('cate') == '商品效果':
                    url = response.url
                    url_list = url.split('&page=')
                    if url_list[1] == '1':
                        item_json = json.loads(response.body)
                        data = item_json.get('data')
                        if data:
                            recordCount = data.get('recordCount')
                            # recordCount = 6033
                            if recordCount:
                                page_num, page_mod = divmod(
                                    int(recordCount), 2000)
                                # print page_num, page_mod
                                if page_mod > 0:
                                    page_num = page_num + 1
                                # print page_num

                                for i in xrange(2, page_num + 1):
                                    url_xg = url_list[0] + '&page=%s' % i
                                    # print url_xg
                                    yield Request(
                                        url_xg,
                                        callback=self.parse,
                                        headers=header,
                                        cookies=meta.get('cookie_brand'),
                                        meta={
                                            'cate': meta.get('cate'),
                                            'month': meta.get('month'),
                                            'brand': meta.get('brand')
                                        },
                                        dont_filter=True)

            else:
                # print content
                try:
                    msg = content_json.get('msg')
                    if 'login' in msg:
                        #r.hset('cookie_logou',meta.get('brand'),time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
                        pass

                except Exception, e:
                    print e
                    # print response.body
                    pass
        except:
            print '*******' * 10
            meta = response.meta
            url = response.url