Example #1
0
    def _get_true_sku_info(self, goods_id, tmp_data):
        '''
        得到每个规格对应的库存, 价格, 图片等详细信息
        :param tmp_data:
        :return:
        '''
        multiColor = tmp_data[5].get('result', {})
        # sku_price = tmp_data[2].get('result', {}).get('sku_price', [])
        ## ** 研究发现multiColor以及productSku中的type为1时,表示该商品规格库存为0
        productSku = tmp_data[6].get('result', {}).get('productSku', {})
        # tmp = {
        #     'multiColor': multiColor,
        #     # 'sku_price': sku_price,
        #     'productSku': productSku,
        # }
        # pprint(tmp)

        true_sku_info = []
        if multiColor == {} or productSku == {}:
            return []
        else:
            if multiColor.get('items') is None:
                color_ = None
            else:
                tmp_color_items = multiColor.get('items', [])
                color_ = []
                for item in tmp_color_items:
                    if item.get('type', 0) == 1:  # 该颜色无库存
                        continue
                    else:  # 为0,表示有库存
                        # 先获取到有库存的对应规格, 是否有颜色属性后面再判断
                        color_.append({
                            'goods_id':
                            item.get('product_id', ''),
                            'name':
                            item.get('name', ''),
                            'img_url':
                            'https:' +
                            item.get('icon', {}).get('imageUrl', '')
                        })

            if color_ == []:  # 没有规格 也可能是 # 表示没有库存, 买完或者下架
                print('获取到的color_为空[], 请检查!')
                return []
            else:
                if productSku.get('items') is None:
                    print('获取到的others_items为None')
                    return []

                else:
                    other_items = productSku.get('items', [])
                    other_ = []
                    for item in other_items:
                        if item.get('type', 0) == 1:  # 该规格无库存
                            continue
                        else:  # 该规格有库存
                            detail_price = item.get('promotion_price', '')
                            # 还是选择所有商品都拿最优惠的价格
                            # if detail_price == '' or goods_id[0] == 1:      # 为空就改为获取vipshop_price字段
                            if detail_price == '':  # 为空就改为获取vipshop_price字段
                                detail_price = item.get('vipshop_price', '')
                            else:
                                pass
                            normal_price = item.get('market_price', '')
                            if normal_price == '':
                                normal_price = detail_price
                            other_.append({
                                'spec_value':
                                item.get('sku_name', ''),
                                'detail_price':
                                detail_price,
                                'normal_price':
                                normal_price,
                                'img_url':
                                '',  # 设置默认为空值
                                'rest_number':
                                item.get('leavings', 0),  # 该规格的剩余库存量
                            })

                if color_ is None:
                    for item_2 in other_:
                        spec_value = item_2.get('spec_value', '')
                        item_2['spec_value'] = spec_value
                        item_2['img_url'] = ''
                        true_sku_info.append(item_2)

                elif len(
                        color_
                ) == 1:  # 颜色长度为1时,表示唯品会默认选择的属性,不需要将color_相关的值添加到spec_value里面
                    true_sku_info = other_

                else:
                    for item in color_:
                        if item.get(
                                'goods_id') == goods_id[1]:  # 表示为原先的那个goods_id
                            if item.get('name', '') == '无':  # 表示无颜色属性
                                pass
                            else:
                                for item_2 in other_:
                                    spec_value = item.get(
                                        'name', '') + '|' + item_2.get(
                                            'spec_value', '')
                                    item_2['spec_value'] = spec_value
                                    item_2['img_url'] = item.get('img_url', '')
                                    true_sku_info.append(item_2)

                        else:  # 表示是其他颜色对应的goods_id
                            '''
                            下面是获取该颜色对应goods_id的所有可售的规格价格信息
                            '''
                            url = 'https://m.vip.com/server.html'
                            params = self._set_params()

                            page = 'product-0-' + str(goods_id[1]) + '.html'
                            post_data = self._set_post_data(page=page)

                            tmp_data_2 = MyRequests.post_url_body(
                                url=url,
                                headers=self.headers,
                                params=params,
                                data=post_data)
                            # print(tmp_data_2)

                            # 先处理得到dict数据
                            if tmp_data_2 == '':
                                print('获取其他颜色规格的url的body时为空值')
                                return []
                            else:
                                try:
                                    tmp_data_2 = json.loads(tmp_data_2)
                                    # pprint(tmp_data_2)
                                except Exception:
                                    print('json.loads转换tmp_data_2时出错, 请检查!')
                                    return []

                                other_items_2 = tmp_data_2[6].get(
                                    'result', {}).get('productSku',
                                                      {}).get('items', [])
                                other_2 = []
                                for item_3 in other_items_2:
                                    if item_3.get('type', 0) == 1:  # 该规格无库存
                                        continue
                                    else:  # 该规格有库存
                                        detail_price = item_3.get(
                                            'promotion_price', '')
                                        # 还是都拿最优惠的价格 不管限时2小时时间问题的折扣
                                        # if detail_price == '' or goods_id[0] == 1:  # 为空就改为获取vipshop_price字段
                                        if detail_price == '':  # 为空就改为获取vipshop_price字段
                                            detail_price = item_3.get(
                                                'vipshop_price', '')
                                        normal_price = item_3.get(
                                            'market_price', '')
                                        if normal_price == '':
                                            normal_price = detail_price
                                        other_2.append({
                                            'spec_value':
                                            item_3.get('sku_name', ''),
                                            'detail_price':
                                            detail_price,
                                            'normal_price':
                                            normal_price,
                                            'rest_number':
                                            item_3.get('leavings',
                                                       0),  # 设置默认的值
                                            'img_url':
                                            '',  # 设置默认为空值
                                        })

                                for item_4 in other_2:
                                    spec_value = item.get(
                                        'name', '') + '|' + item_4.get(
                                            'spec_value', '')
                                    item_4['spec_value'] = spec_value
                                    item_4['img_url'] = item.get('img_url', '')
                                    true_sku_info.append(item_4)

        return true_sku_info
Example #2
0
    def get_goods_data(self, goods_id):
        '''
        模拟构造得到data的url
        :param goods_id: 类型 list
        :return: data dict类型
        '''
        if goods_id == []:
            self.result_data = {}
            return {}
        else:
            data = {}
            # 抓包: 唯品会微信小程序
            url = 'https://m.vip.com/server.html'
            params = self._set_params()

            page = 'product-0-' + str(goods_id[1]) + '.html'
            post_data = self._set_post_data(page=page)

            body = MyRequests.post_url_body(url=url,
                                            headers=self.headers,
                                            params=params,
                                            data=post_data)
            # print(body)

            if body == '':
                self.result_data = {}
                return {}

            else:
                try:
                    tmp_data = json.loads(body)
                    # pprint(tmp_data)
                except Exception:
                    print('json.loads转换body时出错, 请检查!')
                    tmp_data = {}

                if tmp_data == {}:
                    self.result_data = {}
                    return {}
                else:
                    try:
                        # title, sub_title
                        data['title'] = tmp_data[2].get('result', {}).get(
                            'product_name', '')
                        assert data['title'] != '', '获取到的title为空值, 请检查!'
                        data['sub_title'] = ''

                        # shop_name
                        data['shop_name'] = tmp_data[2].get('result', {}).get(
                            'brand_info', {}).get('brand_name', '')

                        # 获取所有示例图片
                        all_img_url = tmp_data[2].get('result',
                                                      {}).get('img_pre', [])
                        assert all_img_url != [], '获取到的all_img_url为空[], 请检查!'
                        all_img_url = [{
                            'img_url':
                            'https:' + item.get('b_img', '')
                        } for item in all_img_url]
                        # pprint(all_img_url)
                        data['all_img_url'] = all_img_url

                        # 获取p_info
                        p_info = self._get_p_info(tmp_data=tmp_data)
                        assert p_info != [], 'p_info为空list, 请检查!'
                        # pprint(p_info)
                        data['p_info'] = p_info

                        # 获取每个商品的div_desc
                        div_desc = self.get_goods_div_desc(
                            tmp_data=tmp_data[2].get('result', {}).get(
                                'detailImages', []))
                        assert div_desc != '', '获取到的div_desc为空值! 请检查'
                        data['div_desc'] = div_desc
                        '''
                        上下架时间
                        '''
                        data['sell_time'] = {
                            'begin_time':
                            tmp_data[2].get('result',
                                            {}).get('sell_time_from', {}),
                            'end_time':
                            tmp_data[2].get('result',
                                            {}).get('sell_time_to', {}),
                        }
                        if int(data['sell_time'].get('begin_time')) > int(
                                time.time()):
                            # *** 先根据上下架时间来判断是否为预售商品,如果是预售商品就按预售商品的method来去对应规格的价格
                            goods_id = [1, goods_id[1]]  # 设置成预售的商品goods_id格式

                        # 设置detail_name_list
                        detail_name_list = self._get_detail_name_list(
                            tmp_data=tmp_data)
                        # print(detail_name_list)
                        data['detail_name_list'] = detail_name_list
                        '''
                        获取每个规格对应价格跟规格以及库存
                        '''
                        true_sku_info = self._get_true_sku_info(
                            goods_id=goods_id, tmp_data=tmp_data)
                        # pprint(true_sku_info)
                        if true_sku_info == []:  # 也可能是 表示没有库存, 买完或者下架
                            print('获取到的sku_info为空值, 请检查!')
                            print('*** 注意可能是卖完了,库存为0 导致!! ***')
                            # raise Exception
                            data['price_info_list'] = true_sku_info
                        else:
                            data['price_info_list'] = true_sku_info

                    except Exception as e:
                        print('遇到错误如下: ', e)
                        self.result_data = {}  # 重置下,避免存入时影响下面爬取的赋值
                        return {}

                    if data != {}:
                        # pprint(data)
                        self.result_data = data
                        return data

                    else:
                        print('data为空!')
                        self.result_data = {}  # 重置下,避免存入时影响下面爬取的赋值
                        return {}
Example #3
0
def test():
    # 抓包: 唯品会微信小程序
    url = 'https://m.vip.com/server.html'
    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding':
        'gzip',
        'Accept-Language':
        'zh-cn',
        'Cache-Control':
        'max-age=0',
        'Connection':
        'keep-alive',
        'Host':
        'm.vip.com',
        'Referer':
        'https://servicewechat.com/wxe9714e742209d35f/284/page-frame.html',
        'User-Agent':
        'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Mobile/15A5341f MicroMessenger/6.6.5 NetType/WIFI Language/zh_CN',
    }

    t = str(time.time().__round__()) + str(randint(100, 999))
    params = {
        'serv': 'getGoodsActiveMsg',
        '_xcxid': t,
    }

    goods_id = '460143743'
    page = 'product-0-' + str(goods_id) + '.html'
    data = dumps([
        {
            "method": "getGoodsActiveMsg",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025335,
            'id': 1,
            "jsonrpc": "2.0"
        },
        {
            "method": "getCoupon",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025336,
            'id': 2,
            "jsonrpc": "2.0"
        },
        {
            "method": "getProductDetail",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025337,
            'id': 3,
            "jsonrpc": "2.0"
        },
        {
            "method": "getProductMeta",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025338,
            'id': 4,
            "jsonrpc": "2.0"
        },
        {
            "method": "getProductSlide",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025339,
            'id': 5,
            "jsonrpc": "2.0"
        },
        {
            "method": "getProductMultiColor",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025340,
            'id': 6,
            "jsonrpc": "2.0"
        },
        {
            "method": "getProductSize",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025341,
            'id': 7,
            "jsonrpc": "2.0"
        },
        {
            "method": "getProductCountdown",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025342,
            'id': 8,
            "jsonrpc": "2.0"
        },
        {
            "method": "ProductRpc.getProductLicense",
            "params": {
                "page": page,
                "query": ""
            },
            # "id":4884390025343,
            'id': 9,
            "jsonrpc": "2.0"
        },
    ])

    body = MyRequests.post_url_body(url=url,
                                    headers=headers,
                                    params=params,
                                    data=data)
    # print(body)
    try:
        data = json.loads(body)
        pprint(data)
    except:
        pass
Example #4
0
def test():
    # 抓包: 唯品会微信小程序
    url = 'https://m.vip.com/server.html'
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding':	'gzip',
        'Accept-Language': 'zh-cn',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': 'm.vip.com',
        'Referer': 'https://servicewechat.com/wxe9714e742209d35f/284/page-frame.html',
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Mobile/15A5341f MicroMessenger/6.6.5 NetType/WIFI Language/zh_CN',
    }

    cookies = {
        'mars_cid': '1522488378117_dc1dd95b12eabf2810ceccbe1d7b5f05',
        'userId': '246736848',
        'warehouse': 'VIP_SH',
        'vip_wh': 'VIP_SH',
        'WAP[p_wh]': 'VIP_SH',
        'saturn': 'v494a41983b12ac4be82124030c99f71f',
        'wap_consumer': 'C1-2',
        'client_from': 'wxsmall',
        'm_vip_province': '103103',
        'WAP[p_area]': '%E6%B5%99%E6%B1%9F',
    }
    t = str(int(time.time()))
    params = {
        'serv':	'getGoodsActiveMsg',
        '_xcxid': t + '001',
    }

    data = dumps([
        {
            "method":"getGoodsActiveMsg",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025335,
            "jsonrpc":"2.0"
        },{
            "method":"getCoupon",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025336,
            "jsonrpc":"2.0"
        },{
            "method":"getProductDetail",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025337,
            "jsonrpc":"2.0"
        },{
            "method":"getProductMeta",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025338,
            "jsonrpc":"2.0"
        },{
            "method":"getProductSlide",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025339,
            "jsonrpc":"2.0"
        },{
            "method":"getProductMultiColor",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025340,
            "jsonrpc":"2.0"
        },{
            "method":"getProductSize",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025341,
            "jsonrpc":"2.0"
        },{
            "method":"getProductCountdown",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025342,
            "jsonrpc":"2.0"
        },{
            "method":"ProductRpc.getProductLicense",
            "params":{
                "page":"product-2558393-460143743.html",
                "query":""
            },
            "id":4884390025343,
            "jsonrpc":"2.0"
        },
    ])

    body = MyRequests.post_url_body(url=url, headers=headers, params=params, data=data)
    # print(body)

    # body = MyRequests().get_url_body(url=url, headers=headers, params=params)
    # print(body)
    try:
        data = json.loads(body)
        pprint(data)
    except:
        pass