def switch_to_sn_default_comments_page(browser: Chrome, shop_url: str): open_second_window(browser) print('------打开新窗口并正在加载默认评论页面------') browser.get(shop_url + '#productCommTitle') browser.execute_script('document.querySelector("#productCommTitle > a:nth-child(1)").click()') print('------默认评论页面加载完成------') waiting_content_loading(browser, 'rv-target-item')
def switch_to_jd_sku_comments_page(browser: Chrome, sku_url: str): open_second_window(browser) print('------打开新窗口并正在加载当前SKU默认评论页面------') browser.get(sku_url + '#comment') browser.execute_script('document.getElementById("comm-curr-sku").click()') print('------当前SKU默认评论页面加载完成------') waiting_content_loading(browser, 'comment-item')
def get_jd_comments(browser: Chrome, jd_ss: Union[Shop, JDSku], get_sku: bool = False, sku_mode: bool = False, summary: bool = False): max_page = 141 while max_page > 0: try: # 获取当前页面的评论 if sku_mode is True: jd_comments_url = 'skuProductPageComments' else: jd_comments_url = 'productPageComments' jd_comments = get_response_body(browser, jd_comments_url, 'GET') if jd_comments is None: print('---未找到评论接口数据---') break jd_comments = jd_comments.lstrip('fetchJSON_comment98(').rstrip(');') jd_comments = json.loads(jd_comments) # 保存评论 comment_list = jd_comments['comments'] insert_jd_comments(comment_list, jd_ss) if len(comment_list) == 0: print('该页评论数据0条') break # 遍历评论中的所有SKU if get_sku is True: get_sku_from_jd_comments(comment_list, jd_ss) except WebDriverException: print('---此页评论数据获取异常(WebDriverException), 跳过此分类---') break # 赋值最大页数 if max_page == 141: max_page = jd_comments['maxPage'] if sku_mode and summary: sku_summary = jd_comments['productCommentSummary'] first_comment = comment_list[0] insert_jd_model_summary(sku_summary, first_comment, jd_ss) elif summary is True: total_summary = jd_comments['productCommentSummary'] insert_jd_comment_summary(total_summary, jd_ss) # 最后一页就不下滑了 max_page -= 1 print(f'本轮剩余页数: {max_page}') if max_page == 0: break # 下滑点击下一页 while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable((By.CLASS_NAME, 'ui-pager-next')) ) browser.execute_script('document.getElementsByClassName("ui-pager-next")[0].click()') waiting_content_loading(browser, 'comment-item') break except TimeoutException: window_scroll_by(browser, 200) back_to_first_window(browser) print('------当前浏览器窗口已关闭, 暂停10秒------') sleep(10)
def turn_to_the_next_page(browser: Chrome): while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable((By.CLASS_NAME, 'more'))) browser.execute_script('document.querySelector(".more").click()') waiting_content_loading(browser, 'common') break except TimeoutException: window_scroll_by(browser, 500)
def turn_to_the_next_page(browser: Chrome): while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable( (By.CSS_SELECTOR, 'li.m-pagination-item:nth-child(8) > a:nth-child(1)'))) js_script = 'document.querySelector("li.m-pagination-item:nth-child(8) > a:nth-child(1)").click()' browser.execute_script(js_script) waiting_content_loading(browser, 'commentItem') break except TimeoutException: window_scroll_by(browser, 500)
def switch_to_jd_default_comments_page(browser: Chrome, shop_url: str): open_second_window(browser) print('------打开新窗口并正在加载默认评论页面------') browser.get(shop_url + '#comment') print('------默认评论页面加载完成------') waiting_content_loading(browser, 'comment-item')
def switch_to_jd_time_sort(browser: Chrome): browser.execute_script( 'document.querySelector("li.J-sortType-item:nth-child(2)").click()') print('------切换到时间排序------') waiting_content_loading(browser, 'comment-item')
def get_sn_comments(browser: Chrome, sn_ss: Union[Shop, SNSku], sku_mode: bool = False): page = 1 while True: try: # 获取当前页面的评论 if sku_mode is True and page == 1: sn_comments = {} sn_model_summary = {} target_urls = [ {'url': 'cluster_review_lists/general', 'method': 'GET'}, {'url': 'review_count/general', 'method': 'GET'} ] all_data = get_response_body_list(browser, target_urls) for data in all_data: if data['url'] == 'cluster_review_lists/general' and data['method'] == 'GET': sn_comments = data['response_body'] sn_comments = sn_comments.lstrip('reviewList(').rstrip(')') sn_comments = json.loads(sn_comments) if data['url'] == 'review_count/general' and data['method'] == 'GET': sn_model_summary = data['response_body'] sn_model_summary = sn_model_summary.lstrip('satisfy(').rstrip(')') sn_model_summary = json.loads(sn_model_summary) if sn_comments['returnMsg'] == '无评价数据': print('---无评价数据, 跳过此SKU---') break else: if sn_model_summary['returnMsg'] == '查询数量成功': insert_sn_model_summary(sn_model_summary['reviewCounts'][0], sn_comments['commodityReviews'][0]['commodityInfo'], sn_ss) else: print('---查询当前SKU评论统计数量失败---') else: if sku_mode is False: sn_comments_url = 'cluster_review_lists/cluster' else: sn_comments_url = 'cluster_review_lists/general' sn_comments = get_response_body(browser, sn_comments_url, 'GET') sn_comments = sn_comments.lstrip('reviewList(').rstrip(')') sn_comments = json.loads(sn_comments) # 保存评论 if sn_comments['returnMsg'] == '成功取得评价列表': comment_list = sn_comments['commodityReviews'] insert_sn_comments(comment_list, sn_ss) else: # 最大页数为50页, 小于50页时需要打印出异常情况 if page <= 50: print(f'---获取第{page}页评论数据异常---') break except (WebDriverException, AttributeError, TypeError): print(f'---获取第{page}页评论数据异常, 跳过此轮---') break print(f'当前页数: {page}') # 下滑点击下一页 while True: try: WebDriverWait(browser, 0.5).until( ec.element_to_be_clickable((By.CSS_SELECTOR, '.next.rv-maidian')) ) browser.execute_script('document.getElementsByClassName("next rv-maidian")[0].click()') waiting_content_loading(browser, 'rv-target-item') break except TimeoutException: window_scroll_by(browser, 500) page += 1 back_to_first_window(browser) print('------当前浏览器窗口已关闭, 暂停10秒------') sleep(10)
def switch_to_youpin_default_comments_page(browser: Chrome): window_scroll_by(browser, 800) browser.execute_script( 'document.querySelector("li.info-nav-item:nth-child(2)").click()') waiting_content_loading(browser, 'commentItem')