Example #1
0
def id_page_one(pre_cate, i):
    url = "https://s.taobao.com/search"
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    res = requests.get(url,
                       headers=get_headers2(),
                       params=get_params2(pre_cate, i),
                       verify=False).text
    print(res)
Example #2
0
def one_detail_info(goods_id):
    url = "https://item.jd.com/{}.html".format(goods_id)
    res = requests.get(url, headers=get_headers3()).text
    product_info = {}
    product_info['source'] = url
    product_info['goods_id'] = goods_id
    product_info['title'] = ''.join(etree.HTML(res).xpath("//html[@lang='zh-CN']/head/title/text()"))
    lis = etree.HTML(res).xpath("//ul[@class='lh']/li")
    imgsSrc_list = []
    for li in lis:
        imgsSrc = "https:" + ''.join(li.xpath("./img/@src"))
        imgsSrc_list.append(imgsSrc)
    product_info['imgsSrc'] = imgsSrc_list
    product_info['shop_name'] = ''.join(etree.HTML(res).xpath("//div[@class='J-hove-wrap EDropdown fr']/div/div/a/@title"))
    # 获取视频id
    if re.search('mainVideoId\"\:.*?\"(\d+)\"', res, re.S) != None:
        mainVideoId = re.search('mainVideoId\"\:.*?\"(\d+)\"', res, re.S).group(1)
        url1 = "https://c.3.cn/tencent/video_v3"
        res1 = requests.get(url1, params=get_params2(mainVideoId), headers=get_headers4(url)).text
        product_info['videoUrl'] = re.search('playUrl\"\:.*?\"(.*?)\"', res1, re.S).group(1)
    # 获取价格请求参数中的venderId
    venderId = re.search('venderId\:.*?(\d+)', res, re.S).group(1)
    url2 = "https://c0.3.cn/stock"
    cat = re.search('cat\:.*?\[(.*?)\]', res, re.S).group(1)
    res2 = requests.get(url2, headers=get_headers4(url), params=get_params3(goods_id, venderId, cat)).text
    product_info['price'] = re.search('\"p\"\:.*?\"(.*?)\"', res2, re.S).group(1)
    # 获取商品的所有规格id
    color_dict = {}
    for color in etree.HTML(res).xpath("//div[@id='choose-attr-1']/div[@class='dd']/div"):
        # 颜色名
        key = ''.join(color.xpath("./@data-value"))
        # 颜色对应图片的链接为字典值
        color_dict['%s' % key] = 'https:' + ''.join(color.xpath("./a/img/@src"))
    # pprint.pprint(color_dict)

    sku_list = eval('[' + re.search('colorSize.*?\[(.*?)\]', res, re.S).group(1) + ']')
    sku_dict = {}
    for sku in sku_list:
        res3 = requests.get(url2, headers=get_headers4(url), params=get_params3(sku['skuId'], venderId, cat)).text
        price = re.search('\"p\"\:.*?\"(.*?)\"', res3, re.S).group(1)
        sku.pop('skuId')
        sku_dict['%s' % list(sku.values())] = price
    # pprint.pprint(sku_dict)

    another_dict = {}
    for k, v in sku_dict.items():
        k1 = eval(k)
        for x in k1:
            if x in color_dict:
                another_dict['%s' % k] = {"price": v, 'url': '%s' % color_dict[x]}
    product_info['spcification'] = another_dict
    pprint.pprint(another_dict)
    return product_info
Example #3
0
def one_page(cate, page):
    url1 = "http://search.suning.com/emall/mobile/wap/clientSearch.jsonp"
    res_temp = requests.get(url1,
                            params=get_params1(cate, page),
                            headers=get_headers2()).text
    res_eve = '{' + re.search('\{(.*?)jlf_fold_onoff', res_temp,
                              re.S).group(1).rstrip('"').rstrip(',') + '}'
    pprint.pprint(res_eve)
    res1 = json.loads(res_eve)
    for goods in res1['goods']:
        url2 = "https://m.suning.com/product/0000000000/{}.html".format(
            goods['partnumber'])
        res2 = requests.get(url2, params=get_params2(), headers=get_headers3())
Example #4
0
def one_id_plus(pre_cate, i):
    url = "https://s.taobao.com/search"
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    res = requests.get(url,
                       headers=get_headers2(),
                       params=get_params2(pre_cate, i),
                       verify=False).text
    temp = eval('[' + re.search('allNids.*?\[(.*?)\]', res, re.S).group(1) +
                ']')
    product_id_list = []
    for good_id in temp:
        product_id_list.append(good_id)
    return product_id_list
Example #5
0
def one_detail(goods_id):
    url2 = "https://m.suning.com/product/0000000000/{}.html".format(goods_id)
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    res2 = requests.get(url2,
                        params=get_params2(),
                        headers=get_headers3(),
                        verify=False).text
    # pprint.pprint(res2)
    product_info = {}
    product_info['title'] = ''.join(
        etree.HTML(res2).xpath("//span[@id='product-name']/text()"))
    product_info['goods_id'] = goods_id
    info_json = re.findall('shopName\:.*?\"(.*?)\"', res2, re.S)
    product_info['shop_name'] = info_json[1]
    product_info['source'] = "https:" + re.search(
        'toPcUrl.*?\"(.*?)\"', res2, re.S).group(1).replace('\\u002F', '/')
    res3 = requests.get(product_info['source'],
                        headers=get_headers4(),
                        verify=False).text
    cluster = eval('[' +
                   re.search('clusterMap\"\:.*?\[(.*?)colorList', res3,
                             re.S).group(1).rstrip('"').strip().rstrip(','))
    cluster_list = []
    zero_list = []
    for clu in cluster:
        for cl in clu['itemCuPartNumber']:
            cluster_list.append(cl['partNumber'])
            zero_list.append(cl['partNumber'][0:5] * 2)
    # 规格url构建
    url4_one = str(cluster_list).replace('[', '').replace("'", "").replace(
        ']', '').replace(' ', '')
    url4_two = str(zero_list).replace('[', '').replace("'", "").replace(
        ']', '').replace(' ', '')
    url4 = 'https://icps.suning.com/icps-web/getVarnishAllPriceNoCache/' + url4_one + '_028_0280199_' + url4_two + '_1_getClusterPrice.jsonp?callback=getClusterPrice'
    price_dict = {}
    res4_temp = requests.get(url4, headers=get_headers5(), verify=False).text
    res4_eve = re.search('\((.*?)\)', res4_temp, re.S).group(1)
    res4_evet = json.loads(res4_eve)
    for res4 in res4_evet:
        price_dict['%s' % res4['cmmdtyCode']] = res4['price']

    img_dict = {}
    for color in etree.HTML(res3).xpath("//dl[@id='colorItemList']/dd/ul/li"):
        url5 = 'https:' + ''.join(color.xpath("./a/@href"))
        res5 = requests.get(url5, headers=get_headers4(), verify=False).text
        imgs = json.loads(''.join(
            etree.HTML(res5).xpath(
                "//script[@type='application/ld+json']/text()")))
        img_dict['%s' % ''.join(color.xpath("./@title"))] = imgs['images'][0]

    version_dict = {}
    for version in etree.HTML(res3).xpath(
            "//dl[@id='versionItemList']/dd/ul/li"):
        version_dict['%s' % ''.join(version.xpath("./@sku"))] = ''.join(
            version.xpath("./@title"))

    merge_dict = {}
    for key1, value1 in version_dict.items():
        for key2, value2 in img_dict.items():
            merge_dict["%s" % value1 + '&' + key2] = {
                'url': '%s' % value2,
                'id': '%s' % key1
            }

    merge_price = {}
    for key3, value3 in merge_dict.items():
        if value3['id'] in price_dict:
            merge_price['%s' % key3] = {
                'price': '%s' % price_dict['%s' % value3['id']],
                'url': '%s' % value3['url']
            }
    product_info['spcification'] = merge_price

    price_list = []
    for key4, value4 in merge_price.items():
        price_list.append(float(value4['price']))
    product_info['price'] = min(price_list)

    video = re.search('videoUrl\"\:.*?\"(.*?)\"', res3, re.S)
    if video != None:
        product_info['videoUrl'] = re.search('videoUrl\"\:.*?\"(.*?)\"', res3,
                                             re.S).group(1)
    imgsSrc = etree.HTML(res2).xpath("//div[@class='swiper-slide']/img")
    imgsSrc_list = []
    for imgSrc in imgsSrc:
        imgsSrc_list.append("https:" + ''.join(imgSrc.xpath("./@src")))
    product_info['imgsSrc'] = imgsSrc_list
    # pprint.pprint(product_info)
    return product_info
Example #6
0
def get_detail_info(cate):
    url = 'https://dc.3.cn/category/get?&callback=getCategoryCallback'
    res = requests.get(url, headers=get_headers1()).text
    cate_temp = json.loads(re.search('\((.*?)\)', res, re.S).group(1))
    # pprint.pprint(cate_temp)
    cate_dict = {}
    for data_temp in cate_temp['data']:
        for data_eve in data_temp['s']:
            for temp in data_eve['s']:
                for data in temp['s']:
                    cate_name = re.search('\|(.*?)\|\|', data['n'],
                                          re.S).group(1)
                    cate_url = re.search('(.*?)\|', data['n'], re.S).group(1)
                    if '-' in cate_url:
                        cate_dict[
                            '%s' %
                            cate_name] = 'https://list.jd.com/list.html?cat=' + cate_url.replace(
                                '-', ',')
                    else:
                        cate_dict['%s' % cate_name] = 'https://' + cate_url
    res1 = requests.get(cate_dict['%s' % cate], headers=get_headers3()).text
    ids = eval('[' + re.search("wids.*?\'(.*?)\'", res1, re.S).group(1) + ']')
    product_info_list = []
    for ids_eve in ids:
        url4 = "https://item.jd.com/{}.html".format(ids_eve)
        print(url4)
        res4 = requests.get(url4, headers=get_headers3()).text
        product_info = {}
        product_info['source'] = url4
        product_info['goods_id'] = ids_eve
        product_info['title'] = ''.join(
            etree.HTML(res4).xpath("//html[@lang='zh-CN']/head/title/text()"))
        lis = etree.HTML(res4).xpath("//ul[@class='lh']/li")
        imgsSrc_list = []
        for li in lis:
            imgsSrc = "https:" + ''.join(li.xpath("./img/@src"))
            imgsSrc_list.append(imgsSrc)
        product_info['imgsSrc'] = imgsSrc_list
        product_info['shop_name'] = ''.join(
            etree.HTML(res4).xpath(
                "//div[@class='J-hove-wrap EDropdown fr']/div/div/a/@title"))
        # 获取视频id
        if re.search('mainVideoId\"\:.*?\"(\d+)\"', res4, re.S) != None:
            mainVideoId = re.search('mainVideoId\"\:.*?\"(\d+)\"', res4,
                                    re.S).group(1)
            url1 = "https://c.3.cn/tencent/video_v3"
            res1 = requests.get(url1,
                                params=get_params2(mainVideoId),
                                headers=get_headers4(url4)).text
            product_info['videoUrl'] = re.search('playUrl\"\:.*?\"(.*?)\"',
                                                 res1, re.S).group(1)
        # 获取价格请求参数中的venderId
        venderId = re.search('venderId\:.*?(\d+)', res4, re.S).group(1)
        url2 = "https://c0.3.cn/stock"
        cat = re.search('cat\:.*?\[(.*?)\]', res4, re.S).group(1)
        res2 = requests.get(url2,
                            headers=get_headers4(url4),
                            params=get_params3(ids_eve, venderId, cat)).text
        if re.search('\"p\"\:.*?\"(.*?)\"', res2, re.S) != None:
            product_info['price'] = re.search('\"p\"\:.*?\"(.*?)\"', res2,
                                              re.S).group(1)
        # 获取商品的所有规格id
        color_dict = {}
        for color in etree.HTML(res4).xpath(
                "//div[@id='choose-attr-1']/div[@class='dd']/div"):
            # 颜色名
            key = ''.join(color.xpath("./@data-value"))
            # 颜色对应图片的链接为字典值
            color_dict['%s' %
                       key] = 'https:' + ''.join(color.xpath("./a/img/@src"))
        # pprint.pprint(color_dict)

        sku_list = eval(
            '[' + re.search('colorSize.*?\[(.*?)\]', res4, re.S).group(1) +
            ']')
        sku_dict = {}
        for sku in sku_list:
            res3 = requests.get(url2,
                                headers=get_headers4(url4),
                                params=get_params3(sku['skuId'], venderId,
                                                   cat)).text
            price = re.search('\"p\"\:.*?\"(.*?)\"', res3, re.S).group(1)
            sku.pop('skuId')
            sku_dict['%s' % list(sku.values())] = price
        # pprint.pprint(sku_dict)

        another_dict = {}
        for k, v in sku_dict.items():
            k1 = eval(k)
            for x in k1:
                if x in color_dict:
                    another_dict['%s' % k] = {
                        "price": v,
                        'url': '%s' % color_dict[x]
                    }
        # pprint.pprint(another_dict)
        product_info['spcification'] = another_dict
        pprint.pprint(product_info)
        product_info_list.append(product_info)
    return product_info_list
Example #7
0
def one_detail(goods_id):
    url2 = "https://m.suning.com/product/0000000000/{}.html".format(goods_id)
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    res2 = requests.get(url2, params=get_params2(), headers=get_headers3(), verify=False).text
    pprint.pprint(res2)
Example #8
0
    etree.HTML(res).xpath("//html[@lang='zh-CN']/head/title/text()"))
lis = etree.HTML(res).xpath("//ul[@class='lh']/li")
imgsSrc_list = []
for li in lis:
    imgsSrc = "https:" + ''.join(li.xpath("./img/@src"))
    imgsSrc_list.append(imgsSrc)
product_info['imgsSrc'] = imgsSrc_list
product_info['shop_name'] = ''.join(
    etree.HTML(res).xpath(
        "//div[@class='J-hove-wrap EDropdown fr']/div/div/a/@title"))
# 获取视频id
if re.search('mainVideoId\"\:.*?\"(\d+)\"', res, re.S) != None:
    mainVideoId = re.search('mainVideoId\"\:.*?\"(\d+)\"', res, re.S).group(1)
    url1 = "https://c.3.cn/tencent/video_v3"
    res1 = requests.get(url1,
                        params=get_params2(mainVideoId),
                        headers=get_headers4(url)).text
    product_info['videoUrl'] = re.search('playUrl\"\:.*?\"(.*?)\"', res1,
                                         re.S).group(1)
# 获取价格请求参数中的venderId
venderId = re.search('venderId\:.*?(\d+)', res, re.S).group(1)
url2 = "https://c0.3.cn/stock"
goods_id = '70108536727'
cat = re.search('cat\:.*?\[(.*?)\]', res, re.S).group(1)
res2 = requests.get(url2,
                    headers=get_headers4(url),
                    params=get_params3(goods_id, venderId, cat)).text
product_info['price'] = re.search('\"p\"\:.*?\"(.*?)\"', res2, re.S).group(1)
# 获取商品的所有规格id
color_dict = {}
for color in etree.HTML(res).xpath(