Esempio n. 1
0
def insert_jd_model_summary(model_summary: dict, comment: dict, shop: Shop):
    color, ram, rom = parse_mi10_product_info(comment['productColor'],
                                              comment['productSize'])
    try:
        ms = ModelSummary.get(source=shop.source,
                              is_official=shop.is_official,
                              product_color=color,
                              product_ram=ram,
                              product_rom=rom)
        update_jd_summary_data(ms, model_summary)
    except ModelSummary.DoesNotExist:
        ModelSummary.create(source=shop.source,
                            is_official=shop.is_official,
                            product_color=color,
                            product_ram=ram,
                            product_rom=rom,
                            total=parse_jd_count_str(
                                model_summary['commentCountStr']),
                            good_rate=str(model_summary['goodRate'] * 100),
                            default_good=parse_jd_count_str(
                                model_summary['defaultGoodCountStr']),
                            star_one=model_summary['score1Count'],
                            star_two=model_summary['score2Count'],
                            star_three=model_summary['score3Count'],
                            star_four=model_summary['score4Count'],
                            star_five=model_summary['score5Count'])
Esempio n. 2
0
def insert_jd_model_summary(model_summary: dict, comment: dict, jd_ss: Union[Shop, JDSku]):
    color, rom = parse_iPhone11_product_info(comment['productColor'], comment['productSize'])
    try:
        ms = ModelSummary.get(
            source='京东',
            is_self=jd_ss.is_self,
            color=color,
            rom=rom
        )
        update_jd_summary_data(ms, model_summary)
    except ModelSummary.DoesNotExist:
        ModelSummary.create(
            source='京东',
            is_self=jd_ss.is_self,
            color=color,
            rom=rom,
            total=parse_jd_count_str(model_summary['commentCountStr']),
            good_rate=str(model_summary['goodRate'] * 100),
            default_good=parse_jd_count_str(model_summary['defaultGoodCountStr']),
            star_one=model_summary['score1Count'],
            star_two=model_summary['score2Count'],
            star_three=model_summary['score3Count'],
            star_four=model_summary['score4Count'],
            star_five=model_summary['score5Count']
        )
def update_jd_summary_data(s, summary):
    s.total += parse_jd_count_str(summary['commentCountStr'])
    s.default_good += parse_jd_count_str(summary['defaultGoodCountStr'])
    s.star_one += summary['score1Count']
    s.star_two += summary['score2Count']
    s.star_three += summary['score3Count']
    s.star_four += summary['score4Count']
    s.star_five += summary['score5Count']
    s.save()
def insert_jd_comment_summary(comment_summary: dict, self: bool):
    try:
        cs = CommentSummary.get(source='京东', is_official=self)
        update_jd_summary_data(cs, comment_summary)
    except CommentSummary.DoesNotExist:
        CommentSummary.create(source='京东',
                              is_official=self,
                              total=parse_jd_count_str(
                                  comment_summary['commentCountStr']),
                              good_rate=str(comment_summary['goodRate'] * 100),
                              default_good=parse_jd_count_str(
                                  comment_summary['defaultGoodCountStr']),
                              star_one=comment_summary['score1Count'],
                              star_two=comment_summary['score2Count'],
                              star_three=comment_summary['score3Count'],
                              star_four=comment_summary['score4Count'],
                              star_five=comment_summary['score5Count'])
def insert_jd_model_summary(model_summary: dict, comment: dict, self: bool):
    try:
        ms = ModelSummary.get(source='京东',
                              is_official=self,
                              spec=comment['productColor'] + ' ' +
                              comment['productSize'] + ' ' +
                              comment['productSales'])
        update_jd_summary_data(ms, model_summary)
    except ModelSummary.DoesNotExist:
        ModelSummary.create(
            source='京东',
            is_official=self,
            spec=comment['productColor'] + ' ' + comment['productSize'] + ' ' +
            comment['productSales'],
            total=parse_jd_count_str(model_summary['commentCountStr']),
            good_rate=str(model_summary['goodRate'] * 100),
            default_good=parse_jd_count_str(
                model_summary['defaultGoodCountStr']),
            star_one=model_summary['score1Count'],
            star_two=model_summary['score2Count'],
            star_three=model_summary['score3Count'],
            star_four=model_summary['score4Count'],
            star_five=model_summary['score5Count'])
def insert_jd_all_commodity(browser: Chrome):
    for target_sku in TargetSku.select().where(TargetSku.source == '京东'):
        # 获取当前商品SKU编号
        sku: str = target_sku.sku
        # 检查当前SKU是否在数据库中保存的SKU中, 避免销量重复计数
        result = ExistedSku.get_or_none(ExistedSku.source == '京东',
                                        ExistedSku.sku == sku)
        if result is not None:
            # 删除已经保存的商品target_sku
            delete_saved_commodity_sku(sku)
            print(f'---SKU编号为 {sku} 的商品信息已保存过---')
            continue

        # 开始抓取商品信息
        commodity = Commodity()
        commodity.source = '京东'
        commodity.url = 'https://item.jd.com/' + sku + '.html'

        # 打开并切换到当前商品页面
        switch_to_current_sku_page(browser, commodity.url)
        # 从后端API接口获取并保存已上架的SKU
        get_jd_sku_from_api(browser, sku)

        try:
            commodity.price = float(
                browser.find_element_by_css_selector(
                    'span.price:nth-child(2)').text)
        except (ValueError, NoSuchElementException):
            # 价格显示为待发布时或商品以下柜时, 抛出异常
            commodity.price = -1

        try:
            commodity.title = browser.find_element_by_class_name(
                'sku-name').text.strip()
        except NoSuchElementException:
            commodity.title = '无商品标题'

        try:
            total_str = browser.find_element_by_css_selector(
                '#comment-count > a').text
            commodity.total = parse_jd_count_str(total_str)
        except NoSuchElementException:
            # 商品为预约状态时销量不显示在价格旁边, 抛出异常
            commodity.total = -1

        # 判断是否为京东自营
        try:
            self_str = browser.find_element_by_class_name('u-jd').text
            if self_str == '自营':
                self = True
            else:
                self = False
        except NoSuchElementException:
            self = False
        commodity.is_self = self

        try:
            commodity.shop_name = browser.find_element_by_css_selector(
                '#crumb-wrap > div > div.contact.fr.clearfix > div.J-hove-wrap.EDropdown.fr > div:nth-child(1) > div '
                '> a').text
        except NoSuchElementException:
            commodity.shop_name = '店铺名称为空'

        # 从商品介绍中获取商品信息
        try:
            commodity.brand = browser.find_element_by_css_selector(
                '#parameter-brand > li > a').text
        except NoSuchElementException:
            commodity.brand = '品牌未注明'

        intro = browser.find_elements_by_css_selector('.parameter2 > li')
        intro_list = []
        for i in intro:
            intro_list.append(i.text)
        # 预赋值, 防止注入空置报错
        commodity.os = '页面未注明'
        commodity.model = '页面未注明'
        for intro_item in intro_list:
            if '操作系统' in intro_item:
                commodity.os = intro_item.replace('操作系统:', '')
            if 'CPU型号' in intro_item:
                commodity.soc_model = intro_item.replace('CPU型号:', '')
            if '商品名称' in intro_item:
                commodity.model = intro_item.replace('商品名称:', '')

        # 下滑点击 规格与包装 选项
        window_scroll_by(browser, 1200)
        js_script = 'document.querySelector("#detail > div.tab-main.large > ul > li:nth-child(2)").click()'
        browser.execute_script(js_script)
        sleep(1)

        # 从 规格与包装 中获取商品信息
        spec_list = browser.find_elements_by_class_name('Ptable-item')
        for spec_item in spec_list:
            spec_item_title = spec_item.find_element_by_tag_name('h3').text
            item_list = spec_item.find_elements_by_class_name('clearfix')
            if '主体' == spec_item_title:
                for item in item_list:
                    item_name = item.find_element_by_tag_name('dt').text
                    item_value = item.find_element_by_tag_name('dd').text
                    if '产品名称' == item_name:
                        commodity.model = item_value
            if '基本信息' == spec_item_title:
                for item in item_list:
                    item_name = item.find_element_by_tag_name('dt').text
                    item_value = item.find_element_by_tag_name('dd').text.replace('mm', '').replace('MM', '')\
                        .replace('mM', '').replace('Mm', '').replace('g', '').replace('G', '').replace('约', '')\
                        .replace('大约', '').replace('左右', '').replace('大概', '').strip()
                    try:
                        if '机身宽度' in item_name:
                            commodity.width = float(item_value)
                        if '机身厚度' in item_name:
                            commodity.thickness = float(item_value)
                        if '机身长度' in item_name:
                            commodity.length = float(item_value)
                        if '机身重量' in item_name:
                            commodity.weight = float(item_value)
                    except ValueError:
                        pass
            if '主芯片' == spec_item_title:
                for item in item_list:
                    item_name = item.find_element_by_tag_name('dt').text
                    item_value = item.find_element_by_tag_name('dd').text
                    if 'CPU品牌' == item_name:
                        commodity.soc_mfrs = item_value
            if '屏幕' == spec_item_title:
                for item in item_list:
                    item_name = item.find_element_by_tag_name('dt').text
                    item_value_str = item.find_element_by_tag_name('dd').text
                    if '主屏幕尺寸' in item_name:
                        try:
                            item_value = float(
                                item_value_str.replace('英寸', '').strip())
                            commodity.screen_size = item_value
                        except ValueError:
                            pass
        # 保存商品信息
        commodity.save()
        # 删除已经保存的商品target_sku
        delete_saved_commodity_sku(sku)
        print(f'------SKU编号为 {sku} 的商品信息保存完毕------')
        # 回到手机分类页面
        back_to_first_window(browser)
def insert_jd_all_commodity(browser: Chrome):
    for target_sku in JDTargetSku.select():
        # 获取当前商品SKU编号
        sku: str = target_sku.sku
        # 检查当前SKU是否在数据库中保存的SKU中, 避免销量重复计数
        result = JDExistedSku.get_or_none(JDExistedSku.sku == sku)
        if result is not None:
            # 删除已经保存的商品target_sku
            delete_saved_commodity_sku(sku)
            print(f'---SKU编号为 {sku} 的商品信息已保存过---')
            continue

        # 开始抓取商品信息
        commodity = Commodity()
        commodity.source = '京东'
        commodity.url = 'https://item.jd.com/' + sku + '.html'

        # 打开并切换到当前商品页面
        switch_to_current_sku_page(browser, commodity.url)
        # 从后端API接口获取并保存已上架的SKU
        get_jd_sku_from_api(browser, sku)

        try:
            commodity.price = float(
                browser.find_element_by_css_selector(
                    'span.price:nth-child(2)').text)
        except (ValueError, NoSuchElementException):
            # 价格显示为待发布时或商品以下柜时, 抛出异常
            commodity.price = -2

        try:
            commodity.title = browser.find_element_by_class_name(
                'sku-name').text.strip()
        except NoSuchElementException:
            commodity.title = '无商品标题'

        commodity.total = -1  # 商品销量预赋值
        for item in browser.find_elements_by_css_selector(
                '#detail > div.tab-main.large > ul > li'):
            if '商品评价' in item.text:
                total_str = item.find_element_by_tag_name('s').text.lstrip(
                    '(').rstrip(')')
                commodity.total = parse_jd_count_str(total_str)

        # 判断是否为京东自营
        try:
            self_str = browser.find_element_by_class_name('u-jd').text
            if self_str == '自营':
                self = True
            else:
                self = False
        except NoSuchElementException:
            self = False
        commodity.is_self = self

        try:
            commodity.shop_name = browser.find_element_by_css_selector(
                '#crumb-wrap > div > div.contact.fr.clearfix > div.J-hove-wrap.EDropdown.fr > div:nth-child(1) > div '
                '> a').text
        except NoSuchElementException:
            commodity.shop_name = '店铺名称为空'

        # 从商品介绍中获取商品信息
        try:
            commodity.brand = browser.find_element_by_css_selector(
                '#parameter-brand > li > a').text
        except NoSuchElementException:
            commodity.brand = '未知'

        intro = browser.find_elements_by_css_selector('.parameter2 > li')
        intro_list = []
        for i in intro:
            intro_list.append(i.text)
        # 预赋值, 防止注入空置报错
        commodity.model = '未知'
        for intro_item in intro_list:
            if '商品名称' in intro_item:
                commodity.model = intro_item.replace('商品名称:', '')

        # 保存商品信息
        commodity.save()
        # 删除已经保存的商品target_sku
        delete_saved_commodity_sku(sku)
        print(f'------SKU编号为 {sku} 的商品信息保存完毕------')
        # 回到无线耳机分类页面
        back_to_first_window(browser)