예제 #1
0
def dodetail(url, categorie):
    print(url)
    r3 = getHtml(url, get_proxy())

    ids = re.findall(r"venderId:(.*?),\s.*?shopId:'(.*?)'", r3.text)
    if not ids:
        ids = re.findall(r"venderId:(.*?),\s.*?shopId:(.*?),", r3.text)

    vender_id = ids[0][0]
    shop_id = ids[0][1]

    response = Selector(r3)

    itemurl = r3.url

    name = ''
    if shop_id == '0':
        name = '京东自营'
    else:
        try:
            name = response.xpath('//ul[@class="parameter2 p-parameter-list"]/li/a//text()').extract()[0]
        except:
            try:
                name = response.xpath('//div[@class="name"]/a//text()').extract()[0].strip()
            except:
                try:
                    name = response.xpath('//div[@class="shopName"]/strong/span/a//text()').extract()[0].strip()
                except:
                    try:
                        name = response.xpath('//div[@class="seller-infor"]/a//text()').extract()[0].strip()
                    except:
                        name = u'京东自营'

    try:
        title = response.xpath('//div[@class="sku-name"]/text()').extract()[0].replace(u"\xa0", "").strip()
    except Exception as e:
        title = response.xpath('//div[@id="name"]/h1/text()').extract()[0]

    product_id = r3.url.split('/')[-1][:-5]

    desc = response.xpath('//ul[@class="parameter2 p-parameter-list"]//text()').extract()
    desc2 = ';'.join([i.strip() for i in desc])

    price_url = 'https://p.3.cn/prices/mgets?skuIds=J_'

    response = getHtml(price_url + product_id, get_proxy())
    price_json = response.json()
    reallyPrice = price_json[0]['p']
    originalPrice = price_json[0]['m']
    comment_url = 'https://club.jd.com/comment/productPageComments.action?productId=%s&score=0&sortType=5&page=%s&pageSize=10'
    url = comment_url % (product_id, 1)

    response2 = getHtml(url, get_proxy())

    data = json.loads(response2.text)

    commentcount = data["productCommentSummary"]["commentCount"]

    print((name, title, product_id, itemurl, reallyPrice, originalPrice, url, commentcount, desc2))

    # 入库
    print(desc2)
    print(len(desc2))

    try:

        its = ma_myitems1(shopname=name,
                          title=title,
                          product_id=product_id,
                          itemurl=itemurl,
                          reallyPrice=reallyPrice,
                          originalPrice=originalPrice,
                          url=url,
                          commentcount=commentcount,
                          desc2=desc2,
                          shop_id=shop_id,
                          categorie=categorie)

        sessionDb.add(its)
        sessionDb.commit()
    except Exception as e:
        sessionDb.rollback()
        print(e)