예제 #1
0
def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product['sku_id']:
        content = fetchContent("https://detail.1688.com/offer/" +
                               product['sku_id'] + ".html")
        doc = PyQuery(content)
        #product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
        product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
        product['star'] = doc('p.star-level > i').attr("class")
        product['total_sales'] = doc('p.bargain-number > a > em.value').text()
        if need_img_urls:
            url_list = get_img_urls(content)
            product['img_urls'] = ', '.join(url_list)
        else:
            product['img_urls'] = ''
        product['color'], product['size'] = '', ''
        for index, td in enumerate(
                doc('div.obj-content > table > tbody > tr > td')):
            tdQ = PyQuery(td)
            if tdQ.attr(
                    'class') == 'de-feature' and tdQ.text().strip() == u'颜色':
                product['color'] = PyQuery(
                    doc('div.obj-content > table > tbody > tr > td')[
                        index + 1]).text()
            if tdQ.attr(
                    'class') == 'de-feature' and tdQ.text().strip() == u'尺寸':
                product['size'] = PyQuery(
                    doc('div.obj-content > table > tbody > tr > td')[
                        index + 1]).text()
    return product
예제 #2
0
def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product['product_url']:
       content = fetchContent(product['product_url'], False)
       doc=PyQuery(content)
       #product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
       product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
       product['star'] = doc('p.star-level > i').attr("class")
       product['total_sales'] = doc('p.bargain-number > a > em.value').text()
       if need_img_urls:
           url_list = get_img_urls(content)
           product['img_urls'] = ', '.join(url_list)
       else:
           product['img_urls'] = ''
       product['color'], product['size'] = '', ''
       for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
            tdQ = PyQuery(td)
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
                product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
                product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
       product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
       if not product['MOQ'] or product['MOQ'] == 0:
           product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
       if product['MOQ'] == 1:
           #print product['product_url']
           product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
           product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
           product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
           product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
           print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
    return product
예제 #3
0
def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product["sku_id"]:
        content = fetchContent("https://detail.1688.com/offer/" + product["sku_id"] + ".html")
        doc = PyQuery(content)
        # product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
        product["reviews"] = doc("p.satisfaction-number > a > em.value").text()
        product["star"] = doc("p.star-level > i").attr("class")
        product["total_sales"] = doc("p.bargain-number > a > em.value").text()
        if need_img_urls:
            url_list = get_img_urls(content)
            product["img_urls"] = ", ".join(url_list)
        else:
            product["img_urls"] = ""
        product["color"], product["size"] = "", ""
        for index, td in enumerate(doc("div.obj-content > table > tbody > tr > td")):
            tdQ = PyQuery(td)
            if tdQ.attr("class") == "de-feature" and tdQ.text().strip() == u"颜色":
                product["color"] = PyQuery(doc("div.obj-content > table > tbody > tr > td")[index + 1]).text()
            if tdQ.attr("class") == "de-feature" and tdQ.text().strip() == u"尺寸":
                product["size"] = PyQuery(doc("div.obj-content > table > tbody > tr > td")[index + 1]).text()
    return product