class SkuSpiderPipeline(object): def open_spider(self, Spider): self.mysql = Mysql() def close_spider(self, Spider): self.mysql.close_db() def process_item(self, item, Spider): if isinstance(item, SkuinfoItem): sql_detail = "insert into source_taobao_goods_change_0622_{id}(itemId,itemprice,quantity,deposittime)values('{itemId}','{itemprice}','{quantity}','{deposittime}')".format( id=item['account'], itemId=item['itemId'], itemprice=item['itemprice'], quantity=item['quantity'], deposittime=item['time'], ) self.mysql.insert_one(sql_detail) return item
def parse(self, response): try: item = TbGoodsItem() item['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') type = re.search('TShop.Setup\((.*?)\);', response.text, re.S) doc = pq(response.text) if type != None: content = json.loads(type.group(1).strip()) item['tb_state'] = content['itemDO']['auctionStatus'] if item['tb_state'] == '0': self.tmall_detail(content, item, doc) self.get_skuinfo(content, item, type='B') self.get_prop(doc, item) # self.tmall_shop(doc, item) yield item if item['tb_state'] == '-2': self.tmall_detail(content, item, doc) self.get_skuinfo(content, item, type='B') # self.tmall_shop(doc, item) yield item elif doc('.error-notice-hd'): print('商品下架不存在') id = re.search('&itemid=(\d+)&', response.url).group(1) sql = 'replace into `source_taobao_live_itemId_drop` (itemId) values ({id})'.format( id=id) mysql = Mysql() mysql.insert_one(sql) mysql.close_db() else: self.taobao_detail(response, item) # self.get_shop(response, item) if item['tb_state'] == '0': self.get_skuinfo(response, item) self.get_prop(doc, item) yield item else: item['price'] = re.search( 'name="current_price" value= "(.*?)"/>', response.text).group(1) yield item except Exception as e: with open('error_0612.txt', 'a+') as f: f.write(response.url + '\n') f.write('error: %s \n' % e)