def dodetail(url, categorie): print(url) r3 = getHtml(url, get_proxy()) ids = re.findall(r"venderId:(.*?),\s.*?shopId:'(.*?)'", r3.text) if not ids: ids = re.findall(r"venderId:(.*?),\s.*?shopId:(.*?),", r3.text) vender_id = ids[0][0] shop_id = ids[0][1] response = Selector(r3) itemurl = r3.url name = '' if shop_id == '0': name = '京东自营' else: try: name = response.xpath('//ul[@class="parameter2 p-parameter-list"]/li/a//text()').extract()[0] except: try: name = response.xpath('//div[@class="name"]/a//text()').extract()[0].strip() except: try: name = response.xpath('//div[@class="shopName"]/strong/span/a//text()').extract()[0].strip() except: try: name = response.xpath('//div[@class="seller-infor"]/a//text()').extract()[0].strip() except: name = u'京东自营' try: title = response.xpath('//div[@class="sku-name"]/text()').extract()[0].replace(u"\xa0", "").strip() except Exception as e: title = response.xpath('//div[@id="name"]/h1/text()').extract()[0] product_id = r3.url.split('/')[-1][:-5] desc = response.xpath('//ul[@class="parameter2 p-parameter-list"]//text()').extract() desc2 = ';'.join([i.strip() for i in desc]) price_url = 'https://p.3.cn/prices/mgets?skuIds=J_' response = getHtml(price_url + product_id, get_proxy()) price_json = response.json() reallyPrice = price_json[0]['p'] originalPrice = price_json[0]['m'] comment_url = 'https://club.jd.com/comment/productPageComments.action?productId=%s&score=0&sortType=5&page=%s&pageSize=10' url = comment_url % (product_id, 1) response2 = getHtml(url, get_proxy()) data = json.loads(response2.text) commentcount = data["productCommentSummary"]["commentCount"] print((name, title, product_id, itemurl, reallyPrice, originalPrice, url, commentcount, desc2)) # 入库 print(desc2) print(len(desc2)) try: its = ma_myitems1(shopname=name, title=title, product_id=product_id, itemurl=itemurl, reallyPrice=reallyPrice, originalPrice=originalPrice, url=url, commentcount=commentcount, desc2=desc2, shop_id=shop_id, categorie=categorie) sessionDb.add(its) sessionDb.commit() except Exception as e: sessionDb.rollback() print(e)