Ejemplo n.º 1
0
def get_jd_comments(browser: Chrome, jd_ss: Union[Shop, JDSku], get_sku: bool = False,
                    sku_mode: bool = False, summary: bool = False):
    max_page = 141
    while max_page > 0:
        try:
            # 获取当前页面的评论
            if sku_mode is True:
                jd_comments_url = 'skuProductPageComments'
            else:
                jd_comments_url = 'productPageComments'
            jd_comments = get_response_body(browser, jd_comments_url, 'GET')
            if jd_comments is None:
                print('---未找到评论接口数据---')
                break
            jd_comments = jd_comments.lstrip('fetchJSON_comment98(').rstrip(');')
            jd_comments = json.loads(jd_comments)
            # 保存评论
            comment_list = jd_comments['comments']
            insert_jd_comments(comment_list, jd_ss)
            if len(comment_list) == 0:
                print('该页评论数据0条')
                break
            # 遍历评论中的所有SKU
            if get_sku is True:
                get_sku_from_jd_comments(comment_list, jd_ss)
        except WebDriverException:
            print('---此页评论数据获取异常(WebDriverException), 跳过此分类---')
            break
        # 赋值最大页数
        if max_page == 141:
            max_page = jd_comments['maxPage']
            if sku_mode and summary:
                sku_summary = jd_comments['productCommentSummary']
                first_comment = comment_list[0]
                insert_jd_model_summary(sku_summary, first_comment, jd_ss)
            elif summary is True:
                total_summary = jd_comments['productCommentSummary']
                insert_jd_comment_summary(total_summary, jd_ss)
        # 最后一页就不下滑了
        max_page -= 1
        print(f'本轮剩余页数: {max_page}')
        if max_page == 0:
            break
        # 下滑点击下一页
        while True:
            try:
                WebDriverWait(browser, 0.5).until(
                    ec.element_to_be_clickable((By.CLASS_NAME, 'ui-pager-next'))
                )
                browser.execute_script('document.getElementsByClassName("ui-pager-next")[0].click()')
                waiting_content_loading(browser, 'comment-item')
                break
            except TimeoutException:
                window_scroll_by(browser, 200)

    back_to_first_window(browser)
    print('------当前浏览器窗口已关闭, 暂停10秒------')
    sleep(10)
def get_jd_sku_from_api(browser: Chrome):
    jd_sku_url = 'type=getstocks'
    skus = get_response_body(browser, jd_sku_url, 'GET')
    skus = skus.rstrip(')')
    skus = re.sub(r'^\w+?\(', '', skus)
    skus = json.loads(skus)
    for key in skus.keys():
        JDSku.get_or_create(sku=key)
    print('------获取已上架SKU完成------')
def get_comment_score(browser: Chrome):
    # 华为 Mate 30 Pro 5G 华为商城
    browser.get('https://www.vmall.com/product/10086775311605.html')
    # 获取评论统计信息并保存到本地json文件
    comment_score_url = 'https://openapi.vmall.com/rms/comment/getCommentScore.json'
    comment_score = get_response_body(browser, comment_score_url, 'POST')
    # print(comment_score)
    with open('hwsc_comment_score.json', 'w', encoding='UTF-8') as file:
        file.write(comment_score)
Ejemplo n.º 4
0
def get_detail(browser: Chrome):
    # 小米10ultra 小米自营
    browser.get(
        'https://www.xiaomiyoupin.com/detail?gid=134230&spmref=YouPinPC.$SearchFilter$1.search_list.1.4643522'
    )
    sleep(1)
    # 获取小米有品商品信息并保存到本地json文件
    detail_url = 'https://www.xiaomiyoupin.com/api/gateway/detail'
    detail = get_response_body(browser, detail_url, 'POST')
    # print(detail)
    with open('xmyp_detail.json', 'w', encoding='UTF-8') as file:
        file.write(detail)
Ejemplo n.º 5
0
def get_jd_sku_from_api(browser: Chrome, shop: Shop):
    jd_sku_url = 'type=getstocks'
    skus = get_response_body(browser, jd_sku_url, 'GET')
    skus = skus.rstrip(')')
    skus = re.sub(r'^\w+?\(', '', skus)
    skus = json.loads(skus)
    for key in skus.keys():
        Sku.create(source=shop.source,
                   is_official=shop.is_official,
                   sku=key,
                   url_prefix='https://item.jd.com/',
                   shop=shop)
    print('------获取已上架SKU完成------')
def get_jd_sku_from_api(browser: Chrome, sku: str):
    jd_sku_url = 'type=getstocks'
    skus = get_response_body(browser, jd_sku_url, 'GET')
    if skus is None:
        ExistedSku.create(source='京东', sku=sku)
        print('---当前商品所有SKU编号获取失败, 可能是单SKU商品---')
        return
    skus = skus.rstrip(')')
    skus = re.sub(r'^\w+?\(', '', skus)
    skus = json.loads(skus)
    for key in skus.keys():
        ExistedSku.get_or_create(source='京东', sku=key)
    print('------保存已上架SKU完成------')
def get_jd_sku_from_api(browser: Chrome, sku: str):
    try:
        jd_sku_url = 'type=getstocks'
        skus = get_response_body(browser, jd_sku_url, 'GET')
        if skus is None:
            raise WebDriverException()

        skus = skus.rstrip(')')
        skus = re.sub(r'^\w+?\(', '', skus)
        skus = json.loads(skus)
        for key in skus.keys():
            JDExistedSku.get_or_create(sku=key)
        print('------保存已上架SKU完成------')

    except (WebDriverException, JSONDecodeError):
        JDExistedSku.get_or_create(sku=sku)
        print('------当前商品是单SKU商品------')
def get_jd_comments(browser: Chrome):
    # 小米10ultra 京东自营旗舰店
    browser.get('https://item.jd.com/100014565800.html')
    # 模拟滚动条的js脚本
    js = 'window.scrollBy({top:1000, left:0, behavior: "smooth"})'
    # 执行js脚本
    browser.execute_script(js)
    sleep(1)
    # 选择商品评论标签并点击
    browser.find_element_by_xpath(
        '/html/body/div[10]/div[2]/div[1]/div[1]/ul/li[5]').click()
    sleep(1)
    # 获取京东评论接口数据并保存到本地json文件
    page_comment_url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98' \
                       '&productId=100014565800&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1'
    comments = get_response_body(browser, page_comment_url, 'GET')
    comments = comments.lstrip('fetchJSON_comment98(').rstrip(');')
    # print(comments)
    with open('jd_comments.json', 'w', encoding='UTF-8') as file:
        file.write(comments)
def get_mi_sku_and_product_info_from_api(browser: Chrome, shop: Shop):
    detail_url = 'api/gateway/detail'
    detail = get_response_body(browser, detail_url, 'POST')
    detail = json.loads(detail)
    if detail['message'] == 'ok':
        for info in detail['data']['goods']['productInfo'].values():
            ram_and_rom = info['attributeValues'][0].split('+')
            ram = ram_and_rom[0]
            rom = ram_and_rom[1]
            mi_sku, created = MiSku.get_or_create(
                source=shop.source,
                is_official=shop.is_official,
                sku=str(info['mapId']),
                product_color=info['attributeValues'][1],
                product_ram=ram,
                product_rom=rom)
            if created is False:
                print(f'SKU: {str(info["mapId"])} 已存在')
        print('-----保存商品SKU和规格信息成功------')
    else:
        print('---获取商品SKU和规格信息失败---')
Ejemplo n.º 10
0
def get_comment_content(browser: Chrome):
    # 小米10ultra 小米自营
    browser.get(
        'https://www.xiaomiyoupin.com/detail?gid=134230&spmref=YouPinPC.$SearchFilter$1.search_list.1.4643522'
    )
    # 模拟滚动条的js脚本
    js = 'window.scrollBy({top:800, left:0, behavior: "smooth"})'
    # 执行js脚本
    browser.execute_script(js)
    sleep(1)
    # 选择商品评论标签并点击
    browser.find_element_by_xpath(
        '/html/body/div[1]/div/div[3]/div/div[1]/div[2]/div[1]/div[1]/ul/li[2]'
    ).click()
    sleep(1)
    # 获取评论详情并保存到本地json文件
    index_url = 'https://www.xiaomiyoupin.com/mtop/market/comment/product/content'
    index = get_response_body(browser, index_url, 'POST')
    # print(index)
    with open('xmyp_comment_content.json', 'w', encoding='UTF-8') as file:
        file.write(index)
def get_mi10_data_from_mishop(browser: Chrome):
    for mishop in Shop.select().where(Shop.source == '小米商城'):
        print(f'------打开当前小米商城商品评论链接: {mishop.url}------')
        browser.get(mishop.url)
        current_page = 0
        max_page = 141
        while current_page <= max_page:
            try:
                if current_page == 0:
                    max_page = get_sku_info_and_summary_and_first_page_comments(
                        browser)
                    current_page += 1
                else:
                    # 获取本页评论数据
                    mishop_comments = get_response_body(
                        browser, 'user_comment/get_list', 'GET')
                    mishop_comments = mishop_comments.rstrip(');')
                    mishop_comments = re.sub(r'^\w+\(', '', mishop_comments)
                    mishop_comments = json.loads(mishop_comments)
                    # 保存本页评论数据
                    if mishop_comments['msg'] == 'success':
                        current_page = mishop_comments['data']['page_current']
                        insert_mishop_comments(
                            mishop_comments['data']['comments'])
                    else:
                        print(f'---获取第{current_page + 1}页评论数据异常---')
                        break
            except WebDriverException:
                print(
                    f'---获取第{current_page}页评论异常(WebDriverException), 尝试翻到下一页---'
                )

            print(f'当前页数: {current_page}')
            turn_to_the_next_page(browser)

        print('------评论获取阶段结束------')
    print('------小米商城数据获取完成------')
def get_sn_comments(browser: Chrome, sn_ss: Union[Shop, SNSku], sku_mode: bool = False):
    page = 1
    while True:
        try:
            # 获取当前页面的评论
            if sku_mode is True and page == 1:
                sn_comments = {}
                sn_model_summary = {}
                target_urls = [
                    {'url': 'cluster_review_lists/general', 'method': 'GET'},
                    {'url': 'review_count/general', 'method': 'GET'}
                ]
                all_data = get_response_body_list(browser, target_urls)
                for data in all_data:
                    if data['url'] == 'cluster_review_lists/general' and data['method'] == 'GET':
                        sn_comments = data['response_body']
                        sn_comments = sn_comments.lstrip('reviewList(').rstrip(')')
                        sn_comments = json.loads(sn_comments)
                    if data['url'] == 'review_count/general' and data['method'] == 'GET':
                        sn_model_summary = data['response_body']
                        sn_model_summary = sn_model_summary.lstrip('satisfy(').rstrip(')')
                        sn_model_summary = json.loads(sn_model_summary)
                if sn_comments['returnMsg'] == '无评价数据':
                    print('---无评价数据, 跳过此SKU---')
                    break
                else:
                    if sn_model_summary['returnMsg'] == '查询数量成功':
                        insert_sn_model_summary(sn_model_summary['reviewCounts'][0],
                                                sn_comments['commodityReviews'][0]['commodityInfo'], sn_ss)
                    else:
                        print('---查询当前SKU评论统计数量失败---')
            else:
                if sku_mode is False:
                    sn_comments_url = 'cluster_review_lists/cluster'
                else:
                    sn_comments_url = 'cluster_review_lists/general'
                sn_comments = get_response_body(browser, sn_comments_url, 'GET')
                sn_comments = sn_comments.lstrip('reviewList(').rstrip(')')
                sn_comments = json.loads(sn_comments)

            # 保存评论
            if sn_comments['returnMsg'] == '成功取得评价列表':
                comment_list = sn_comments['commodityReviews']
                insert_sn_comments(comment_list, sn_ss)
            else:
                # 最大页数为50页, 小于50页时需要打印出异常情况
                if page <= 50:
                    print(f'---获取第{page}页评论数据异常---')
                break
        except (WebDriverException, AttributeError, TypeError):
            print(f'---获取第{page}页评论数据异常, 跳过此轮---')
            break

        print(f'当前页数: {page}')
        # 下滑点击下一页
        while True:
            try:
                WebDriverWait(browser, 0.5).until(
                    ec.element_to_be_clickable((By.CSS_SELECTOR, '.next.rv-maidian'))
                )
                browser.execute_script('document.getElementsByClassName("next rv-maidian")[0].click()')
                waiting_content_loading(browser, 'rv-target-item')
                break
            except TimeoutException:
                window_scroll_by(browser, 500)

        page += 1

    back_to_first_window(browser)
    print('------当前浏览器窗口已关闭, 暂停10秒------')
    sleep(10)
def get_youpin_comments(browser: Chrome, shop: Shop):
    page = 1
    max_page = 141
    while page <= max_page:
        try:
            # 获取当前页面的评论
            if page == 1:
                # 获取第一页评论和评论统计数据
                comment_index = {}
                comment_content = {}
                target_urls = [{
                    'url': 'comment/product/index',
                    'method': 'POST'
                }, {
                    'url': 'comment/product/content',
                    'method': 'POST'
                }]
                all_data = get_response_body_list(browser, target_urls)
                for data in all_data:
                    if data['url'] == 'comment/product/index' and data[
                            'method'] == 'POST':
                        comment_index = data['response_body']
                        comment_index = json.loads(comment_index)
                    if data['url'] == 'comment/product/content' and data[
                            'method'] == 'POST':
                        comment_content = data['response_body']
                        comment_content = json.loads(comment_content)
                        max_page = (comment_content['data']['page']['total'] +
                                    9) // 10
                        print(f'---评论总页数: {max_page} 页')
                # 保存评论统计数量
                if comment_index['message'] == 'ok':
                    summary = comment_index['data']
                    CommentSummary.create(source=shop.source,
                                          is_official=shop.is_official,
                                          total=summary['total_count'],
                                          good_rate=summary['positive_rate'])
                    print('------保存评论统计数量成功------')
                else:
                    print('---查询评论统计数量失败---')
            else:
                content_url = 'comment/product/content'
                comment_content = get_response_body(browser, content_url,
                                                    'POST')
                if comment_content is None:
                    print('---未找到评论接口数据---')
                    break
                comment_content = json.loads(comment_content)

            # 保存评论
            if comment_content['message'] == 'ok':
                comment_list = comment_content['data']['list']
                insert_youpin_comments(comment_list, shop)
            else:
                print(f'---获取第{page}页评论数据异常---')
                break
        except WebDriverException:
            print(f'---获取第{page}页评论数据异常(WebDriverException), 尝试翻到下一页---')

        print(f'当前页数: {page}')
        # 下滑点击下一页
        turn_to_the_next_page(browser)
        page += 1

    print('------评论获取阶段结束------')