def parse_item(self, response): meta = response.meta item_json = json.loads(response.body) data = item_json.get('data') if data: item = QianniuItem() item['content'] = data item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item url = response.url url_list = url.split('&page=') if url_list[1] == '1': item_json = json.loads(response.body) data = item_json.get('data') if data: recordCount = data.get('recordCount') if recordCount: page_num, page_mod = divmod(int(recordCount), 20) # print page_num, page_mod if page_mod > 0: page_num = page_num + 1 # print page_num for i in xrange(2, page_num + 1): url_xg = url_list[0] + '&page=%s' % i # print url_xg yield Request(url_xg, callback=self.parse_item, headers=header, cookies=meta.get('cookie_brand'), meta={ 'month': meta.get('month'), 'brand': meta.get('brand'), 'catename': meta.get('catename'), 'cateid': meta.get('cateid') }, dont_filter=True)
def parse_data(self, response): content = response.body content_json = json.loads(content) code = content_json.get('code') meta = response.meta meta['cate'] = 'parse_data' if str(code) == '0': data = content_json.get('data') if data: item = QianniuItem() item['content'] = data item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item #recordCount = data.get('data') #if recordCount: # count = recordCount.get('recordCount') # if count: # num = int(round(float(count / 20))) for i in xrange(2, 4): url = 'https://sycm.taobao.com/datawar/v3/activity/itemCoreIndex/getItemListLive.json?activityId=%s&itemType=0&device=1&keyword=&pageSize=20&page=%s&order=desc&orderBy=%s' % ( str(meta.get('activityId')), str(i), meta.get('orderby')) yield Request(url, callback=self.parse_act_item, headers=header, cookies=meta.get('cookie_brand'), meta={ 'brand': meta.get('brand'), 'cookie_brand': meta.get('cookie_brand'), 'd': meta.get('d'), 'activityId': meta.get('activityId'), 'orderby': meta.get('orderby') }, dont_filter=True)
def parse(self, response): try: content = response.body # print content content_json = json.loads(content) code = content_json.get('code') # print code meta = response.meta if str(code) == '0': item = QianniuItem() item['content'] = response.body item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item if meta.get('cate') == '商品效果': url = response.url url_list = url.split('&page=') if url_list[1] == '1': item_json = json.loads(response.body) data = item_json.get('data') if data: recordCount = data.get('recordCount') # recordCount = 6033 if recordCount: page_num, page_mod = divmod( int(recordCount), 2000) # print page_num, page_mod if page_mod > 0: page_num = page_num + 1 # print page_num for i in xrange(2, page_num + 1): url_xg = url_list[0] + '&page=%s' % i # print url_xg yield Request( url_xg, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'), meta={ 'cate': meta.get('cate'), 'month': meta.get('month'), 'brand': meta.get('brand') }, dont_filter=True) else: # print content try: msg = content_json.get('msg') if 'login' in msg: #r.hset('cookie_logou',meta.get('brand'),time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) pass except Exception, e: print e # print response.body pass except: print '*******' * 10 meta = response.meta url = response.url