def insert_mishop_comment_summary(summary: dict): summary_detail = summary['data']['detail'] CommentSummary.create(source='小米商城', is_official=True, total=summary_detail['comments_total'], good_rate=summary_detail['satisfy_per'], default_good=summary_detail['default_good'], star_one=summary_detail['one_star'], star_two=summary_detail['two_star'], star_three=summary_detail['three_star'], star_four=summary_detail['four_star'], star_five=summary_detail['five_star'])
def get_sn_sku_and_comment_summary_from_api(browser: Chrome, shop: Shop): skus_list = [] summary = {} sn_sku_target = [{ 'url': 'getClusterPrice', 'method': 'GET' }, { 'url': 'cluster_review_satisfy', 'method': 'GET' }] all_data = get_response_body_list(browser, sn_sku_target) for data in all_data: if data['url'] == 'getClusterPrice' and data['method'] == 'GET': skus_list.append(data['response_body']) if data['url'] == 'cluster_review_satisfy' and data['method'] == 'GET': summary = data['response_body'] for skus in skus_list: skus = skus.lstrip('getClusterPrice(').rstrip(');') skus = json.loads(skus) for sku in skus: Sku.create(source=shop.source, is_official=shop.is_official, sku=sku['cmmdtyCode'].replace( re.match(r'^[0]+', sku['cmmdtyCode']).group(), ''), url_prefix='https://product.suning.com/', shop_code=sku['vendorCode'], shop=shop) print('------获取所有SKU完成------') summary = summary.lstrip('satisfy(').rstrip(')') summary = json.loads(summary) if summary['returnMsg'] == '查询数量成功': review_count = summary['reviewCounts'][0] CommentSummary.create( source=shop.source, is_official=shop.is_official, total=review_count['totalCount'], good_rate=str(review_count['goodRate']), default_good=review_count['defaultCount'], star_one=review_count['oneStarCount'], star_two=review_count['twoStarCount'], star_three=review_count['threeStarCount'], star_four=review_count['fourStarCount'], star_five=review_count['fiveStarCount'], ) print('------保存商品评论统计数量完成------') else: print('---查询商品评论统计数量失败---')
def get_jd_total(): jd_comments_summary = CommentSummary.get(CommentSummary.source == '京东') jd_total = jd_comments_summary.default_good + jd_comments_summary.star_one + jd_comments_summary.star_two + \ jd_comments_summary.star_three + jd_comments_summary.star_four + jd_comments_summary.star_five jd_good_count = jd_comments_summary.star_four + jd_comments_summary.star_five jd_general_cont = jd_comments_summary.star_two + jd_comments_summary.star_three jd_bad_count = jd_comments_summary.star_one jd_cal_total = jd_comments_summary.star_one + jd_comments_summary.star_two + jd_comments_summary.star_three + \ jd_comments_summary.star_four + jd_comments_summary.star_five jd_good_rate = calculate_percentage(jd_cal_total, jd_good_count) jd_models_total = 0 for ms in ModelSummary.select().where(ModelSummary.source == '京东'): jd_models_total += ms.default_good + ms.star_one + ms.star_two + ms.star_three + ms.star_four + ms.star_five Total.create( source='京东', total=jd_total, all_models_total=jd_models_total, good_rate=jd_good_rate, default_good=jd_comments_summary.default_good, good_count=jd_good_count, general_count=jd_general_cont, bad_count=jd_bad_count, star_one=jd_comments_summary.star_one, star_two=jd_comments_summary.star_two, star_three=jd_comments_summary.star_three, star_four=jd_comments_summary.star_four, star_five=jd_comments_summary.star_five )
def get_mi10_data_from_sn(browser: Chrome): for sn_shop in Shop.select().where(Shop.source == '苏宁'): print(f'------打开当前苏宁商品链接: {sn_shop.url}------') browser.get(sn_shop.url) # 获取所有SKU和评论统计 get_sn_sku_and_comment_summary_from_api(browser, sn_shop) # 获取默认排序评论, 并遍历所有SKU print('------开始获取默认排序评论------') switch_to_sn_default_comments_page(browser, sn_shop.url) get_sn_comments(browser, sn_shop) # 轮询各个SKU的商品页面 print('------SKU轮询开始------') for current_sku in sn_shop.sku: print(f'------本轮SKU: {current_sku.sku}------') current_sku_url = current_sku.url_prefix + current_sku.shop_code + '/' + current_sku.sku + '.html' print(f'------正在打开当前SKU链接: {current_sku_url}------') browser.get(current_sku_url) print('------开始获取当前SKU默认排序评论------') switch_to_sn_sku_comments_page(browser, current_sku_url) get_sn_comments(browser, sn_shop, sku_mode=True) # 数据汇总后计算最终好评率 calculate_jd_and_sn_good_rate( CommentSummary.select().where(CommentSummary.source == '苏宁')) calculate_jd_and_sn_good_rate( ModelSummary.select().where(ModelSummary.source == '苏宁')) print('------苏宁平台数据获取完成------')
def insert_jd_comment_summary(comment_summary: dict, shop: Shop): try: cs = CommentSummary.get(source=shop.source, is_official=shop.is_official) update_jd_summary_data(cs, comment_summary) except CommentSummary.DoesNotExist: CommentSummary.create(source=shop.source, is_official=shop.is_official, total=parse_jd_count_str( comment_summary['commentCountStr']), good_rate=str(comment_summary['goodRate'] * 100), default_good=parse_jd_count_str( comment_summary['defaultGoodCountStr']), star_one=comment_summary['score1Count'], star_two=comment_summary['score2Count'], star_three=comment_summary['score3Count'], star_four=comment_summary['score4Count'], star_five=comment_summary['score5Count'])
def get_mi10_data_from_jd(browser: Chrome): for jd_shop in Shop.select().where(Shop.source == '京东'): print(f'------打开当前京东商品链接: {jd_shop.url}------') browser.get(jd_shop.url) # 打开商品页面 # 获取已上架SKU get_jd_sku_from_api(browser, jd_shop) # 获取默认推荐排序评论和默认时间排序评论, 并遍历所有SKU print('------开始获取默认推荐排序评论------') switch_to_jd_default_comments_page(browser, jd_shop.url) # 打开评论默认页面 get_jd_comments(browser, jd_shop, get_sku=True, summary=True) # 从全部评价标签获取评论和统计信息 print('------开始获取默认时间排序评论------') switch_to_jd_default_comments_page(browser, jd_shop.url) switch_to_jd_time_sort(browser) # 切换到时间排序 get_jd_comments(browser, jd_shop, get_sku=True) # 从全部评价标签获取评论 # 轮询各个SKU的商品页面 print('------SKU轮询开始------') for current_sku in jd_shop.sku: print(f'------本轮SKU: {current_sku.sku}------') current_sku_url = current_sku.url_prefix + current_sku.sku + '.html' print(f'------正在打开当前SKU链接: {current_sku_url}------') browser.get(current_sku_url) print('------开始获取当前SKU推荐排序评论------') switch_to_jd_sku_comments_page(browser, current_sku_url) get_jd_comments(browser, jd_shop, sku_mode=True, summary=True) # 从全部评价标签获取评论和统计信息 print('------开始获取当前SKU时间排序评论------') switch_to_jd_sku_comments_page(browser, current_sku_url) switch_to_jd_time_sort(browser) # 切换到时间排序 get_jd_comments(browser, jd_shop, sku_mode=True) # 从全部评价标签获取评论 # 数据汇总后计算最终好评率 calculate_jd_and_sn_good_rate( CommentSummary.select().where(CommentSummary.source == '京东')) calculate_jd_and_sn_good_rate( ModelSummary.select().where(ModelSummary.source == '京东')) print('------京东平台数据获取完成------')
def get_mishop_total(): mishop_comments_summary = CommentSummary.get(CommentSummary.source == '小米商城') mishop_good_count = mishop_comments_summary.star_three + mishop_comments_summary.star_four + \ mishop_comments_summary.star_five mishop_general_cont = mishop_comments_summary.star_two mishop_bad_count = mishop_comments_summary.star_one Total.create( source='小米商城', total=mishop_comments_summary.total, all_models_total=mishop_comments_summary.total, good_rate=mishop_comments_summary.good_rate, default_good=mishop_comments_summary.default_good, good_count=mishop_good_count, general_count=mishop_general_cont, bad_count=mishop_bad_count, star_one=mishop_comments_summary.star_one, star_two=mishop_comments_summary.star_two, star_three=mishop_comments_summary.star_three, star_four=mishop_comments_summary.star_four, star_five=mishop_comments_summary.star_five )
def get_sn_total(): sn_comments_summary = CommentSummary.get(CommentSummary.source == '苏宁') sn_good_count = sn_comments_summary.star_four + sn_comments_summary.star_five sn_general_cont = sn_comments_summary.star_two + sn_comments_summary.star_three sn_bad_count = sn_comments_summary.star_one sn_models_total = 0 for ms in ModelSummary.select().where(ModelSummary.source == '苏宁'): sn_models_total += ms.total Total.create( source='苏宁', total=sn_comments_summary.total, all_models_total=sn_models_total, good_rate=sn_comments_summary.good_rate, default_good=sn_comments_summary.default_good, good_count=sn_good_count, general_count=sn_general_cont, bad_count=sn_bad_count, star_one=sn_comments_summary.star_one, star_two=sn_comments_summary.star_two, star_three=sn_comments_summary.star_three, star_four=sn_comments_summary.star_four, star_five=sn_comments_summary.star_five )
def get_youpin_comments(browser: Chrome, shop: Shop): page = 1 max_page = 141 while page <= max_page: try: # 获取当前页面的评论 if page == 1: # 获取第一页评论和评论统计数据 comment_index = {} comment_content = {} target_urls = [{ 'url': 'comment/product/index', 'method': 'POST' }, { 'url': 'comment/product/content', 'method': 'POST' }] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'comment/product/index' and data[ 'method'] == 'POST': comment_index = data['response_body'] comment_index = json.loads(comment_index) if data['url'] == 'comment/product/content' and data[ 'method'] == 'POST': comment_content = data['response_body'] comment_content = json.loads(comment_content) max_page = (comment_content['data']['page']['total'] + 9) // 10 print(f'---评论总页数: {max_page} 页') # 保存评论统计数量 if comment_index['message'] == 'ok': summary = comment_index['data'] CommentSummary.create(source=shop.source, is_official=shop.is_official, total=summary['total_count'], good_rate=summary['positive_rate']) print('------保存评论统计数量成功------') else: print('---查询评论统计数量失败---') else: content_url = 'comment/product/content' comment_content = get_response_body(browser, content_url, 'POST') if comment_content is None: print('---未找到评论接口数据---') break comment_content = json.loads(comment_content) # 保存评论 if comment_content['message'] == 'ok': comment_list = comment_content['data']['list'] insert_youpin_comments(comment_list, shop) else: print(f'---获取第{page}页评论数据异常---') break except WebDriverException: print(f'---获取第{page}页评论数据异常(WebDriverException), 尝试翻到下一页---') print(f'当前页数: {page}') # 下滑点击下一页 turn_to_the_next_page(browser) page += 1 print('------评论获取阶段结束------')