def get_sku_info_and_summary_and_first_page_comments(browser: Chrome) -> int: # 捕获三个接口的数据 sku_info = {} summary = {} comments = {} target_urls = [{ 'url': 'commodity_page', 'method': 'GET' }, { 'url': 'user_comment/get_summary', 'method': 'GET' }, { 'url': 'user_comment/get_list', 'method': 'GET' }] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'commodity_page' and data['method'] == 'GET': sku_info = data['response_body'] sku_info = sku_info.lstrip('__jp0(').rstrip(');') sku_info = json.loads(sku_info) if data['url'] == 'user_comment/get_summary' and data[ 'method'] == 'GET': summary = data['response_body'] summary = summary.lstrip('__jp6(').rstrip(');') summary = json.loads(summary) if data['url'] == 'user_comment/get_list' and data['method'] == 'GET': comments = data['response_body'] comments = comments.lstrip('__jp5(').rstrip(');') comments = json.loads(comments) # 保存SKU与对应的产品规格信息 if sku_info['msg'] == 'success': insert_mishop_sku_info(sku_info) print('------保存SKU信息成功------') else: print('---查询SKU信息失败---') # 保存评论统计数据 if summary['msg'] == 'success': insert_mishop_comment_summary(summary) print('------保存评论统计数据成功------') else: print('---查询评论统计数据失败---') # 保存第一页评论数据 if comments['msg'] == 'success': total_page = comments['data']['page_total'] print(f'有 {total_page} 页评论, 共 {comments["data"]["total_count"]} 条') insert_mishop_comments(comments['data']['comments']) return total_page else: print('---获取第1页评论数据异常---') return 0
def get_sn_sku_and_comment_summary_from_api(browser: Chrome, shop: Shop): skus_list = [] summary = {} sn_sku_target = [{ 'url': 'getClusterPrice', 'method': 'GET' }, { 'url': 'cluster_review_satisfy', 'method': 'GET' }] all_data = get_response_body_list(browser, sn_sku_target) for data in all_data: if data['url'] == 'getClusterPrice' and data['method'] == 'GET': skus_list.append(data['response_body']) if data['url'] == 'cluster_review_satisfy' and data['method'] == 'GET': summary = data['response_body'] for skus in skus_list: skus = skus.lstrip('getClusterPrice(').rstrip(');') skus = json.loads(skus) for sku in skus: Sku.create(source=shop.source, is_official=shop.is_official, sku=sku['cmmdtyCode'].replace( re.match(r'^[0]+', sku['cmmdtyCode']).group(), ''), url_prefix='https://product.suning.com/', shop_code=sku['vendorCode'], shop=shop) print('------获取所有SKU完成------') summary = summary.lstrip('satisfy(').rstrip(')') summary = json.loads(summary) if summary['returnMsg'] == '查询数量成功': review_count = summary['reviewCounts'][0] CommentSummary.create( source=shop.source, is_official=shop.is_official, total=review_count['totalCount'], good_rate=str(review_count['goodRate']), default_good=review_count['defaultCount'], star_one=review_count['oneStarCount'], star_two=review_count['twoStarCount'], star_three=review_count['threeStarCount'], star_four=review_count['fourStarCount'], star_five=review_count['fiveStarCount'], ) print('------保存商品评论统计数量完成------') else: print('---查询商品评论统计数量失败---')
def get_sn_sku_and_total_from_api(browser: Chrome, shop_code: str, sku: str): skus_list = [] summary = {} sn_sku_target = [{ 'url': 'getClusterPrice', 'method': 'GET' }, { 'url': 'cluster_review_satisfy', 'method': 'GET' }] all_data = get_response_body_list(browser, sn_sku_target) for data in all_data: if data['url'] == 'getClusterPrice' and data['method'] == 'GET': skus_list.append(data['response_body']) if data['url'] == 'cluster_review_satisfy' and data['method'] == 'GET': summary = data['response_body'] if len(skus_list) == 0: SNExistedSku.get_or_create(shop_code=shop_code, sku=sku) print('------当前商品为单SKU商品------') for skus in skus_list: skus = skus.lstrip('getClusterPrice(').rstrip(');') skus = json.loads(skus) for sku in skus: SNExistedSku.get_or_create( shop_code=sku['vendorCode'], sku=sku['cmmdtyCode'].replace( re.match(r'^[0]+', sku['cmmdtyCode']).group(), ''), ) print('------获取当前商品所有SKU完成------') summary = summary.lstrip('satisfy(').rstrip(')') summary = json.loads(summary) if summary['returnMsg'] == '查询数量成功': return summary['reviewCounts'][0]['totalCount'] else: return -1
def get_sn_comments(browser: Chrome, sn_ss: Union[Shop, SNSku], sku_mode: bool = False): page = 1 while True: try: # 获取当前页面的评论 if sku_mode is True and page == 1: sn_comments = {} sn_model_summary = {} target_urls = [ {'url': 'cluster_review_lists/general', 'method': 'GET'}, {'url': 'review_count/general', 'method': 'GET'} ] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'cluster_review_lists/general' and data['method'] == 'GET': sn_comments = data['response_body'] sn_comments = sn_comments.lstrip('reviewList(').rstrip(')') sn_comments = json.loads(sn_comments) if data['url'] == 'review_count/general' and data['method'] == 'GET': sn_model_summary = data['response_body'] sn_model_summary = sn_model_summary.lstrip('satisfy(').rstrip(')') sn_model_summary = json.loads(sn_model_summary) if sn_comments['returnMsg'] == '无评价数据': print('---无评价数据, 跳过此SKU---') break else: if sn_model_summary['returnMsg'] == '查询数量成功': insert_sn_model_summary(sn_model_summary['reviewCounts'][0], sn_comments['commodityReviews'][0]['commodityInfo'], sn_ss) else: print('---查询当前SKU评论统计数量失败---') else: if sku_mode is False: sn_comments_url = 'cluster_review_lists/cluster' else: sn_comments_url = 'cluster_review_lists/general' sn_comments = get_response_body(browser, sn_comments_url, 'GET') sn_comments = sn_comments.lstrip('reviewList(').rstrip(')') sn_comments = json.loads(sn_comments) # 保存评论 if sn_comments['returnMsg'] == '成功取得评价列表': comment_list = sn_comments['commodityReviews'] insert_sn_comments(comment_list, sn_ss) else: # 最大页数为50页, 小于50页时需要打印出异常情况 if page <= 50: print(f'---获取第{page}页评论数据异常---') break except (WebDriverException, AttributeError, TypeError): print(f'---获取第{page}页评论数据异常, 跳过此轮---') break print(f'当前页数: {page}') # 下滑点击下一页 while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable((By.CSS_SELECTOR, '.next.rv-maidian')) ) browser.execute_script('document.getElementsByClassName("next rv-maidian")[0].click()') waiting_content_loading(browser, 'rv-target-item') break except TimeoutException: window_scroll_by(browser, 500) page += 1 back_to_first_window(browser) print('------当前浏览器窗口已关闭, 暂停10秒------') sleep(10)
def get_youpin_comments(browser: Chrome, shop: Shop): page = 1 max_page = 141 while page <= max_page: try: # 获取当前页面的评论 if page == 1: # 获取第一页评论和评论统计数据 comment_index = {} comment_content = {} target_urls = [{ 'url': 'comment/product/index', 'method': 'POST' }, { 'url': 'comment/product/content', 'method': 'POST' }] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'comment/product/index' and data[ 'method'] == 'POST': comment_index = data['response_body'] comment_index = json.loads(comment_index) if data['url'] == 'comment/product/content' and data[ 'method'] == 'POST': comment_content = data['response_body'] comment_content = json.loads(comment_content) max_page = (comment_content['data']['page']['total'] + 9) // 10 print(f'---评论总页数: {max_page} 页') # 保存评论统计数量 if comment_index['message'] == 'ok': summary = comment_index['data'] CommentSummary.create(source=shop.source, is_official=shop.is_official, total=summary['total_count'], good_rate=summary['positive_rate']) print('------保存评论统计数量成功------') else: print('---查询评论统计数量失败---') else: content_url = 'comment/product/content' comment_content = get_response_body(browser, content_url, 'POST') if comment_content is None: print('---未找到评论接口数据---') break comment_content = json.loads(comment_content) # 保存评论 if comment_content['message'] == 'ok': comment_list = comment_content['data']['list'] insert_youpin_comments(comment_list, shop) else: print(f'---获取第{page}页评论数据异常---') break except WebDriverException: print(f'---获取第{page}页评论数据异常(WebDriverException), 尝试翻到下一页---') print(f'当前页数: {page}') # 下滑点击下一页 turn_to_the_next_page(browser) page += 1 print('------评论获取阶段结束------')