def get_sku_info_and_summary_and_first_page_comments(browser: Chrome) -> int:
    # 捕获三个接口的数据
    sku_info = {}
    summary = {}
    comments = {}
    target_urls = [{
        'url': 'commodity_page',
        'method': 'GET'
    }, {
        'url': 'user_comment/get_summary',
        'method': 'GET'
    }, {
        'url': 'user_comment/get_list',
        'method': 'GET'
    }]
    all_data = get_response_body_list(browser, target_urls)
    for data in all_data:
        if data['url'] == 'commodity_page' and data['method'] == 'GET':
            sku_info = data['response_body']
            sku_info = sku_info.lstrip('__jp0(').rstrip(');')
            sku_info = json.loads(sku_info)
        if data['url'] == 'user_comment/get_summary' and data[
                'method'] == 'GET':
            summary = data['response_body']
            summary = summary.lstrip('__jp6(').rstrip(');')
            summary = json.loads(summary)
        if data['url'] == 'user_comment/get_list' and data['method'] == 'GET':
            comments = data['response_body']
            comments = comments.lstrip('__jp5(').rstrip(');')
            comments = json.loads(comments)

    # 保存SKU与对应的产品规格信息
    if sku_info['msg'] == 'success':
        insert_mishop_sku_info(sku_info)
        print('------保存SKU信息成功------')
    else:
        print('---查询SKU信息失败---')

    # 保存评论统计数据
    if summary['msg'] == 'success':
        insert_mishop_comment_summary(summary)
        print('------保存评论统计数据成功------')
    else:
        print('---查询评论统计数据失败---')

    # 保存第一页评论数据
    if comments['msg'] == 'success':
        total_page = comments['data']['page_total']
        print(f'有 {total_page} 页评论, 共 {comments["data"]["total_count"]} 条')
        insert_mishop_comments(comments['data']['comments'])
        return total_page
    else:
        print('---获取第1页评论数据异常---')
        return 0
Exemple #2
0
def get_sn_sku_and_comment_summary_from_api(browser: Chrome, shop: Shop):
    skus_list = []
    summary = {}
    sn_sku_target = [{
        'url': 'getClusterPrice',
        'method': 'GET'
    }, {
        'url': 'cluster_review_satisfy',
        'method': 'GET'
    }]
    all_data = get_response_body_list(browser, sn_sku_target)
    for data in all_data:
        if data['url'] == 'getClusterPrice' and data['method'] == 'GET':
            skus_list.append(data['response_body'])
        if data['url'] == 'cluster_review_satisfy' and data['method'] == 'GET':
            summary = data['response_body']

    for skus in skus_list:
        skus = skus.lstrip('getClusterPrice(').rstrip(');')
        skus = json.loads(skus)
        for sku in skus:
            Sku.create(source=shop.source,
                       is_official=shop.is_official,
                       sku=sku['cmmdtyCode'].replace(
                           re.match(r'^[0]+', sku['cmmdtyCode']).group(), ''),
                       url_prefix='https://product.suning.com/',
                       shop_code=sku['vendorCode'],
                       shop=shop)
    print('------获取所有SKU完成------')

    summary = summary.lstrip('satisfy(').rstrip(')')
    summary = json.loads(summary)
    if summary['returnMsg'] == '查询数量成功':
        review_count = summary['reviewCounts'][0]
        CommentSummary.create(
            source=shop.source,
            is_official=shop.is_official,
            total=review_count['totalCount'],
            good_rate=str(review_count['goodRate']),
            default_good=review_count['defaultCount'],
            star_one=review_count['oneStarCount'],
            star_two=review_count['twoStarCount'],
            star_three=review_count['threeStarCount'],
            star_four=review_count['fourStarCount'],
            star_five=review_count['fiveStarCount'],
        )
        print('------保存商品评论统计数量完成------')
    else:
        print('---查询商品评论统计数量失败---')
Exemple #3
0
def get_sn_sku_and_total_from_api(browser: Chrome, shop_code: str, sku: str):
    skus_list = []
    summary = {}
    sn_sku_target = [{
        'url': 'getClusterPrice',
        'method': 'GET'
    }, {
        'url': 'cluster_review_satisfy',
        'method': 'GET'
    }]
    all_data = get_response_body_list(browser, sn_sku_target)
    for data in all_data:
        if data['url'] == 'getClusterPrice' and data['method'] == 'GET':
            skus_list.append(data['response_body'])
        if data['url'] == 'cluster_review_satisfy' and data['method'] == 'GET':
            summary = data['response_body']

    if len(skus_list) == 0:
        SNExistedSku.get_or_create(shop_code=shop_code, sku=sku)
        print('------当前商品为单SKU商品------')

    for skus in skus_list:
        skus = skus.lstrip('getClusterPrice(').rstrip(');')
        skus = json.loads(skus)
        for sku in skus:
            SNExistedSku.get_or_create(
                shop_code=sku['vendorCode'],
                sku=sku['cmmdtyCode'].replace(
                    re.match(r'^[0]+', sku['cmmdtyCode']).group(), ''),
            )
    print('------获取当前商品所有SKU完成------')

    summary = summary.lstrip('satisfy(').rstrip(')')
    summary = json.loads(summary)
    if summary['returnMsg'] == '查询数量成功':
        return summary['reviewCounts'][0]['totalCount']
    else:
        return -1
def get_sn_comments(browser: Chrome, sn_ss: Union[Shop, SNSku], sku_mode: bool = False):
    page = 1
    while True:
        try:
            # 获取当前页面的评论
            if sku_mode is True and page == 1:
                sn_comments = {}
                sn_model_summary = {}
                target_urls = [
                    {'url': 'cluster_review_lists/general', 'method': 'GET'},
                    {'url': 'review_count/general', 'method': 'GET'}
                ]
                all_data = get_response_body_list(browser, target_urls)
                for data in all_data:
                    if data['url'] == 'cluster_review_lists/general' and data['method'] == 'GET':
                        sn_comments = data['response_body']
                        sn_comments = sn_comments.lstrip('reviewList(').rstrip(')')
                        sn_comments = json.loads(sn_comments)
                    if data['url'] == 'review_count/general' and data['method'] == 'GET':
                        sn_model_summary = data['response_body']
                        sn_model_summary = sn_model_summary.lstrip('satisfy(').rstrip(')')
                        sn_model_summary = json.loads(sn_model_summary)
                if sn_comments['returnMsg'] == '无评价数据':
                    print('---无评价数据, 跳过此SKU---')
                    break
                else:
                    if sn_model_summary['returnMsg'] == '查询数量成功':
                        insert_sn_model_summary(sn_model_summary['reviewCounts'][0],
                                                sn_comments['commodityReviews'][0]['commodityInfo'], sn_ss)
                    else:
                        print('---查询当前SKU评论统计数量失败---')
            else:
                if sku_mode is False:
                    sn_comments_url = 'cluster_review_lists/cluster'
                else:
                    sn_comments_url = 'cluster_review_lists/general'
                sn_comments = get_response_body(browser, sn_comments_url, 'GET')
                sn_comments = sn_comments.lstrip('reviewList(').rstrip(')')
                sn_comments = json.loads(sn_comments)

            # 保存评论
            if sn_comments['returnMsg'] == '成功取得评价列表':
                comment_list = sn_comments['commodityReviews']
                insert_sn_comments(comment_list, sn_ss)
            else:
                # 最大页数为50页, 小于50页时需要打印出异常情况
                if page <= 50:
                    print(f'---获取第{page}页评论数据异常---')
                break
        except (WebDriverException, AttributeError, TypeError):
            print(f'---获取第{page}页评论数据异常, 跳过此轮---')
            break

        print(f'当前页数: {page}')
        # 下滑点击下一页
        while True:
            try:
                WebDriverWait(browser, 0.5).until(
                    ec.element_to_be_clickable((By.CSS_SELECTOR, '.next.rv-maidian'))
                )
                browser.execute_script('document.getElementsByClassName("next rv-maidian")[0].click()')
                waiting_content_loading(browser, 'rv-target-item')
                break
            except TimeoutException:
                window_scroll_by(browser, 500)

        page += 1

    back_to_first_window(browser)
    print('------当前浏览器窗口已关闭, 暂停10秒------')
    sleep(10)
def get_youpin_comments(browser: Chrome, shop: Shop):
    page = 1
    max_page = 141
    while page <= max_page:
        try:
            # 获取当前页面的评论
            if page == 1:
                # 获取第一页评论和评论统计数据
                comment_index = {}
                comment_content = {}
                target_urls = [{
                    'url': 'comment/product/index',
                    'method': 'POST'
                }, {
                    'url': 'comment/product/content',
                    'method': 'POST'
                }]
                all_data = get_response_body_list(browser, target_urls)
                for data in all_data:
                    if data['url'] == 'comment/product/index' and data[
                            'method'] == 'POST':
                        comment_index = data['response_body']
                        comment_index = json.loads(comment_index)
                    if data['url'] == 'comment/product/content' and data[
                            'method'] == 'POST':
                        comment_content = data['response_body']
                        comment_content = json.loads(comment_content)
                        max_page = (comment_content['data']['page']['total'] +
                                    9) // 10
                        print(f'---评论总页数: {max_page} 页')
                # 保存评论统计数量
                if comment_index['message'] == 'ok':
                    summary = comment_index['data']
                    CommentSummary.create(source=shop.source,
                                          is_official=shop.is_official,
                                          total=summary['total_count'],
                                          good_rate=summary['positive_rate'])
                    print('------保存评论统计数量成功------')
                else:
                    print('---查询评论统计数量失败---')
            else:
                content_url = 'comment/product/content'
                comment_content = get_response_body(browser, content_url,
                                                    'POST')
                if comment_content is None:
                    print('---未找到评论接口数据---')
                    break
                comment_content = json.loads(comment_content)

            # 保存评论
            if comment_content['message'] == 'ok':
                comment_list = comment_content['data']['list']
                insert_youpin_comments(comment_list, shop)
            else:
                print(f'---获取第{page}页评论数据异常---')
                break
        except WebDriverException:
            print(f'---获取第{page}页评论数据异常(WebDriverException), 尝试翻到下一页---')

        print(f'当前页数: {page}')
        # 下滑点击下一页
        turn_to_the_next_page(browser)
        page += 1

    print('------评论获取阶段结束------')