def get_jd_comments(browser: Chrome, jd_ss: Union[Shop, JDSku], get_sku: bool = False, sku_mode: bool = False, summary: bool = False): max_page = 141 while max_page > 0: try: # 获取当前页面的评论 if sku_mode is True: jd_comments_url = 'skuProductPageComments' else: jd_comments_url = 'productPageComments' jd_comments = get_response_body(browser, jd_comments_url, 'GET') if jd_comments is None: print('---未找到评论接口数据---') break jd_comments = jd_comments.lstrip('fetchJSON_comment98(').rstrip(');') jd_comments = json.loads(jd_comments) # 保存评论 comment_list = jd_comments['comments'] insert_jd_comments(comment_list, jd_ss) if len(comment_list) == 0: print('该页评论数据0条') break # 遍历评论中的所有SKU if get_sku is True: get_sku_from_jd_comments(comment_list, jd_ss) except WebDriverException: print('---此页评论数据获取异常(WebDriverException), 跳过此分类---') break # 赋值最大页数 if max_page == 141: max_page = jd_comments['maxPage'] if sku_mode and summary: sku_summary = jd_comments['productCommentSummary'] first_comment = comment_list[0] insert_jd_model_summary(sku_summary, first_comment, jd_ss) elif summary is True: total_summary = jd_comments['productCommentSummary'] insert_jd_comment_summary(total_summary, jd_ss) # 最后一页就不下滑了 max_page -= 1 print(f'本轮剩余页数: {max_page}') if max_page == 0: break # 下滑点击下一页 while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable((By.CLASS_NAME, 'ui-pager-next')) ) browser.execute_script('document.getElementsByClassName("ui-pager-next")[0].click()') waiting_content_loading(browser, 'comment-item') break except TimeoutException: window_scroll_by(browser, 200) back_to_first_window(browser) print('------当前浏览器窗口已关闭, 暂停10秒------') sleep(10)
def get_jd_sku_from_api(browser: Chrome): jd_sku_url = 'type=getstocks' skus = get_response_body(browser, jd_sku_url, 'GET') skus = skus.rstrip(')') skus = re.sub(r'^\w+?\(', '', skus) skus = json.loads(skus) for key in skus.keys(): JDSku.get_or_create(sku=key) print('------获取已上架SKU完成------')
def get_comment_score(browser: Chrome): # 华为 Mate 30 Pro 5G 华为商城 browser.get('https://www.vmall.com/product/10086775311605.html') # 获取评论统计信息并保存到本地json文件 comment_score_url = 'https://openapi.vmall.com/rms/comment/getCommentScore.json' comment_score = get_response_body(browser, comment_score_url, 'POST') # print(comment_score) with open('hwsc_comment_score.json', 'w', encoding='UTF-8') as file: file.write(comment_score)
def get_detail(browser: Chrome): # 小米10ultra 小米自营 browser.get( 'https://www.xiaomiyoupin.com/detail?gid=134230&spmref=YouPinPC.$SearchFilter$1.search_list.1.4643522' ) sleep(1) # 获取小米有品商品信息并保存到本地json文件 detail_url = 'https://www.xiaomiyoupin.com/api/gateway/detail' detail = get_response_body(browser, detail_url, 'POST') # print(detail) with open('xmyp_detail.json', 'w', encoding='UTF-8') as file: file.write(detail)
def get_jd_sku_from_api(browser: Chrome, shop: Shop): jd_sku_url = 'type=getstocks' skus = get_response_body(browser, jd_sku_url, 'GET') skus = skus.rstrip(')') skus = re.sub(r'^\w+?\(', '', skus) skus = json.loads(skus) for key in skus.keys(): Sku.create(source=shop.source, is_official=shop.is_official, sku=key, url_prefix='https://item.jd.com/', shop=shop) print('------获取已上架SKU完成------')
def get_jd_sku_from_api(browser: Chrome, sku: str): jd_sku_url = 'type=getstocks' skus = get_response_body(browser, jd_sku_url, 'GET') if skus is None: ExistedSku.create(source='京东', sku=sku) print('---当前商品所有SKU编号获取失败, 可能是单SKU商品---') return skus = skus.rstrip(')') skus = re.sub(r'^\w+?\(', '', skus) skus = json.loads(skus) for key in skus.keys(): ExistedSku.get_or_create(source='京东', sku=key) print('------保存已上架SKU完成------')
def get_jd_sku_from_api(browser: Chrome, sku: str): try: jd_sku_url = 'type=getstocks' skus = get_response_body(browser, jd_sku_url, 'GET') if skus is None: raise WebDriverException() skus = skus.rstrip(')') skus = re.sub(r'^\w+?\(', '', skus) skus = json.loads(skus) for key in skus.keys(): JDExistedSku.get_or_create(sku=key) print('------保存已上架SKU完成------') except (WebDriverException, JSONDecodeError): JDExistedSku.get_or_create(sku=sku) print('------当前商品是单SKU商品------')
def get_jd_comments(browser: Chrome): # 小米10ultra 京东自营旗舰店 browser.get('https://item.jd.com/100014565800.html') # 模拟滚动条的js脚本 js = 'window.scrollBy({top:1000, left:0, behavior: "smooth"})' # 执行js脚本 browser.execute_script(js) sleep(1) # 选择商品评论标签并点击 browser.find_element_by_xpath( '/html/body/div[10]/div[2]/div[1]/div[1]/ul/li[5]').click() sleep(1) # 获取京东评论接口数据并保存到本地json文件 page_comment_url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98' \ '&productId=100014565800&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1' comments = get_response_body(browser, page_comment_url, 'GET') comments = comments.lstrip('fetchJSON_comment98(').rstrip(');') # print(comments) with open('jd_comments.json', 'w', encoding='UTF-8') as file: file.write(comments)
def get_mi_sku_and_product_info_from_api(browser: Chrome, shop: Shop): detail_url = 'api/gateway/detail' detail = get_response_body(browser, detail_url, 'POST') detail = json.loads(detail) if detail['message'] == 'ok': for info in detail['data']['goods']['productInfo'].values(): ram_and_rom = info['attributeValues'][0].split('+') ram = ram_and_rom[0] rom = ram_and_rom[1] mi_sku, created = MiSku.get_or_create( source=shop.source, is_official=shop.is_official, sku=str(info['mapId']), product_color=info['attributeValues'][1], product_ram=ram, product_rom=rom) if created is False: print(f'SKU: {str(info["mapId"])} 已存在') print('-----保存商品SKU和规格信息成功------') else: print('---获取商品SKU和规格信息失败---')
def get_comment_content(browser: Chrome): # 小米10ultra 小米自营 browser.get( 'https://www.xiaomiyoupin.com/detail?gid=134230&spmref=YouPinPC.$SearchFilter$1.search_list.1.4643522' ) # 模拟滚动条的js脚本 js = 'window.scrollBy({top:800, left:0, behavior: "smooth"})' # 执行js脚本 browser.execute_script(js) sleep(1) # 选择商品评论标签并点击 browser.find_element_by_xpath( '/html/body/div[1]/div/div[3]/div/div[1]/div[2]/div[1]/div[1]/ul/li[2]' ).click() sleep(1) # 获取评论详情并保存到本地json文件 index_url = 'https://www.xiaomiyoupin.com/mtop/market/comment/product/content' index = get_response_body(browser, index_url, 'POST') # print(index) with open('xmyp_comment_content.json', 'w', encoding='UTF-8') as file: file.write(index)
def get_mi10_data_from_mishop(browser: Chrome): for mishop in Shop.select().where(Shop.source == '小米商城'): print(f'------打开当前小米商城商品评论链接: {mishop.url}------') browser.get(mishop.url) current_page = 0 max_page = 141 while current_page <= max_page: try: if current_page == 0: max_page = get_sku_info_and_summary_and_first_page_comments( browser) current_page += 1 else: # 获取本页评论数据 mishop_comments = get_response_body( browser, 'user_comment/get_list', 'GET') mishop_comments = mishop_comments.rstrip(');') mishop_comments = re.sub(r'^\w+\(', '', mishop_comments) mishop_comments = json.loads(mishop_comments) # 保存本页评论数据 if mishop_comments['msg'] == 'success': current_page = mishop_comments['data']['page_current'] insert_mishop_comments( mishop_comments['data']['comments']) else: print(f'---获取第{current_page + 1}页评论数据异常---') break except WebDriverException: print( f'---获取第{current_page}页评论异常(WebDriverException), 尝试翻到下一页---' ) print(f'当前页数: {current_page}') turn_to_the_next_page(browser) print('------评论获取阶段结束------') print('------小米商城数据获取完成------')
def get_sn_comments(browser: Chrome, sn_ss: Union[Shop, SNSku], sku_mode: bool = False): page = 1 while True: try: # 获取当前页面的评论 if sku_mode is True and page == 1: sn_comments = {} sn_model_summary = {} target_urls = [ {'url': 'cluster_review_lists/general', 'method': 'GET'}, {'url': 'review_count/general', 'method': 'GET'} ] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'cluster_review_lists/general' and data['method'] == 'GET': sn_comments = data['response_body'] sn_comments = sn_comments.lstrip('reviewList(').rstrip(')') sn_comments = json.loads(sn_comments) if data['url'] == 'review_count/general' and data['method'] == 'GET': sn_model_summary = data['response_body'] sn_model_summary = sn_model_summary.lstrip('satisfy(').rstrip(')') sn_model_summary = json.loads(sn_model_summary) if sn_comments['returnMsg'] == '无评价数据': print('---无评价数据, 跳过此SKU---') break else: if sn_model_summary['returnMsg'] == '查询数量成功': insert_sn_model_summary(sn_model_summary['reviewCounts'][0], sn_comments['commodityReviews'][0]['commodityInfo'], sn_ss) else: print('---查询当前SKU评论统计数量失败---') else: if sku_mode is False: sn_comments_url = 'cluster_review_lists/cluster' else: sn_comments_url = 'cluster_review_lists/general' sn_comments = get_response_body(browser, sn_comments_url, 'GET') sn_comments = sn_comments.lstrip('reviewList(').rstrip(')') sn_comments = json.loads(sn_comments) # 保存评论 if sn_comments['returnMsg'] == '成功取得评价列表': comment_list = sn_comments['commodityReviews'] insert_sn_comments(comment_list, sn_ss) else: # 最大页数为50页, 小于50页时需要打印出异常情况 if page <= 50: print(f'---获取第{page}页评论数据异常---') break except (WebDriverException, AttributeError, TypeError): print(f'---获取第{page}页评论数据异常, 跳过此轮---') break print(f'当前页数: {page}') # 下滑点击下一页 while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable((By.CSS_SELECTOR, '.next.rv-maidian')) ) browser.execute_script('document.getElementsByClassName("next rv-maidian")[0].click()') waiting_content_loading(browser, 'rv-target-item') break except TimeoutException: window_scroll_by(browser, 500) page += 1 back_to_first_window(browser) print('------当前浏览器窗口已关闭, 暂停10秒------') sleep(10)
def get_youpin_comments(browser: Chrome, shop: Shop): page = 1 max_page = 141 while page <= max_page: try: # 获取当前页面的评论 if page == 1: # 获取第一页评论和评论统计数据 comment_index = {} comment_content = {} target_urls = [{ 'url': 'comment/product/index', 'method': 'POST' }, { 'url': 'comment/product/content', 'method': 'POST' }] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'comment/product/index' and data[ 'method'] == 'POST': comment_index = data['response_body'] comment_index = json.loads(comment_index) if data['url'] == 'comment/product/content' and data[ 'method'] == 'POST': comment_content = data['response_body'] comment_content = json.loads(comment_content) max_page = (comment_content['data']['page']['total'] + 9) // 10 print(f'---评论总页数: {max_page} 页') # 保存评论统计数量 if comment_index['message'] == 'ok': summary = comment_index['data'] CommentSummary.create(source=shop.source, is_official=shop.is_official, total=summary['total_count'], good_rate=summary['positive_rate']) print('------保存评论统计数量成功------') else: print('---查询评论统计数量失败---') else: content_url = 'comment/product/content' comment_content = get_response_body(browser, content_url, 'POST') if comment_content is None: print('---未找到评论接口数据---') break comment_content = json.loads(comment_content) # 保存评论 if comment_content['message'] == 'ok': comment_list = comment_content['data']['list'] insert_youpin_comments(comment_list, shop) else: print(f'---获取第{page}页评论数据异常---') break except WebDriverException: print(f'---获取第{page}页评论数据异常(WebDriverException), 尝试翻到下一页---') print(f'当前页数: {page}') # 下滑点击下一页 turn_to_the_next_page(browser) page += 1 print('------评论获取阶段结束------')