예제 #1
0
class SkuSpiderPipeline(object):
    def open_spider(self, Spider):
        self.mysql = Mysql()

    def close_spider(self, Spider):
        self.mysql.close_db()

    def process_item(self, item, Spider):
        if isinstance(item, SkuinfoItem):
            sql_detail = "insert into source_taobao_goods_change_0622_{id}(itemId,itemprice,quantity,deposittime)values('{itemId}','{itemprice}','{quantity}','{deposittime}')".format(
                id=item['account'],
                itemId=item['itemId'],
                itemprice=item['itemprice'],
                quantity=item['quantity'],
                deposittime=item['time'],
            )
            self.mysql.insert_one(sql_detail)
            return item
예제 #2
0
 def parse(self, response):
     try:
         item = TbGoodsItem()
         item['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         type = re.search('TShop.Setup\((.*?)\);', response.text, re.S)
         doc = pq(response.text)
         if type != None:
             content = json.loads(type.group(1).strip())
             item['tb_state'] = content['itemDO']['auctionStatus']
             if item['tb_state'] == '0':
                 self.tmall_detail(content, item, doc)
                 self.get_skuinfo(content, item, type='B')
                 self.get_prop(doc, item)
                 # self.tmall_shop(doc, item)
                 yield item
             if item['tb_state'] == '-2':
                 self.tmall_detail(content, item, doc)
                 self.get_skuinfo(content, item, type='B')
                 # self.tmall_shop(doc, item)
                 yield item
         elif doc('.error-notice-hd'):
             print('商品下架不存在')
             id = re.search('&itemid=(\d+)&', response.url).group(1)
             sql = 'replace into `source_taobao_live_itemId_drop` (itemId) values ({id})'.format(
                 id=id)
             mysql = Mysql()
             mysql.insert_one(sql)
             mysql.close_db()
         else:
             self.taobao_detail(response, item)
             # self.get_shop(response, item)
             if item['tb_state'] == '0':
                 self.get_skuinfo(response, item)
                 self.get_prop(doc, item)
                 yield item
             else:
                 item['price'] = re.search(
                     'name="current_price" value= "(.*?)"/>',
                     response.text).group(1)
                 yield item
     except Exception as e:
         with open('error_0612.txt', 'a+') as f:
             f.write(response.url + '\n')
             f.write('error: %s \n' % e)