def insert_jd_model_summary(model_summary: dict, comment: dict, shop: Shop): color, ram, rom = parse_mi10_product_info(comment['productColor'], comment['productSize']) try: ms = ModelSummary.get(source=shop.source, is_official=shop.is_official, product_color=color, product_ram=ram, product_rom=rom) update_jd_summary_data(ms, model_summary) except ModelSummary.DoesNotExist: ModelSummary.create(source=shop.source, is_official=shop.is_official, product_color=color, product_ram=ram, product_rom=rom, total=parse_jd_count_str( model_summary['commentCountStr']), good_rate=str(model_summary['goodRate'] * 100), default_good=parse_jd_count_str( model_summary['defaultGoodCountStr']), star_one=model_summary['score1Count'], star_two=model_summary['score2Count'], star_three=model_summary['score3Count'], star_four=model_summary['score4Count'], star_five=model_summary['score5Count'])
def insert_jd_model_summary(model_summary: dict, comment: dict, jd_ss: Union[Shop, JDSku]): color, rom = parse_iPhone11_product_info(comment['productColor'], comment['productSize']) try: ms = ModelSummary.get( source='京东', is_self=jd_ss.is_self, color=color, rom=rom ) update_jd_summary_data(ms, model_summary) except ModelSummary.DoesNotExist: ModelSummary.create( source='京东', is_self=jd_ss.is_self, color=color, rom=rom, total=parse_jd_count_str(model_summary['commentCountStr']), good_rate=str(model_summary['goodRate'] * 100), default_good=parse_jd_count_str(model_summary['defaultGoodCountStr']), star_one=model_summary['score1Count'], star_two=model_summary['score2Count'], star_three=model_summary['score3Count'], star_four=model_summary['score4Count'], star_five=model_summary['score5Count'] )
def update_jd_summary_data(s, summary): s.total += parse_jd_count_str(summary['commentCountStr']) s.default_good += parse_jd_count_str(summary['defaultGoodCountStr']) s.star_one += summary['score1Count'] s.star_two += summary['score2Count'] s.star_three += summary['score3Count'] s.star_four += summary['score4Count'] s.star_five += summary['score5Count'] s.save()
def insert_jd_comment_summary(comment_summary: dict, self: bool): try: cs = CommentSummary.get(source='京东', is_official=self) update_jd_summary_data(cs, comment_summary) except CommentSummary.DoesNotExist: CommentSummary.create(source='京东', is_official=self, total=parse_jd_count_str( comment_summary['commentCountStr']), good_rate=str(comment_summary['goodRate'] * 100), default_good=parse_jd_count_str( comment_summary['defaultGoodCountStr']), star_one=comment_summary['score1Count'], star_two=comment_summary['score2Count'], star_three=comment_summary['score3Count'], star_four=comment_summary['score4Count'], star_five=comment_summary['score5Count'])
def insert_jd_model_summary(model_summary: dict, comment: dict, self: bool): try: ms = ModelSummary.get(source='京东', is_official=self, spec=comment['productColor'] + ' ' + comment['productSize'] + ' ' + comment['productSales']) update_jd_summary_data(ms, model_summary) except ModelSummary.DoesNotExist: ModelSummary.create( source='京东', is_official=self, spec=comment['productColor'] + ' ' + comment['productSize'] + ' ' + comment['productSales'], total=parse_jd_count_str(model_summary['commentCountStr']), good_rate=str(model_summary['goodRate'] * 100), default_good=parse_jd_count_str( model_summary['defaultGoodCountStr']), star_one=model_summary['score1Count'], star_two=model_summary['score2Count'], star_three=model_summary['score3Count'], star_four=model_summary['score4Count'], star_five=model_summary['score5Count'])
def insert_jd_all_commodity(browser: Chrome): for target_sku in TargetSku.select().where(TargetSku.source == '京东'): # 获取当前商品SKU编号 sku: str = target_sku.sku # 检查当前SKU是否在数据库中保存的SKU中, 避免销量重复计数 result = ExistedSku.get_or_none(ExistedSku.source == '京东', ExistedSku.sku == sku) if result is not None: # 删除已经保存的商品target_sku delete_saved_commodity_sku(sku) print(f'---SKU编号为 {sku} 的商品信息已保存过---') continue # 开始抓取商品信息 commodity = Commodity() commodity.source = '京东' commodity.url = 'https://item.jd.com/' + sku + '.html' # 打开并切换到当前商品页面 switch_to_current_sku_page(browser, commodity.url) # 从后端API接口获取并保存已上架的SKU get_jd_sku_from_api(browser, sku) try: commodity.price = float( browser.find_element_by_css_selector( 'span.price:nth-child(2)').text) except (ValueError, NoSuchElementException): # 价格显示为待发布时或商品以下柜时, 抛出异常 commodity.price = -1 try: commodity.title = browser.find_element_by_class_name( 'sku-name').text.strip() except NoSuchElementException: commodity.title = '无商品标题' try: total_str = browser.find_element_by_css_selector( '#comment-count > a').text commodity.total = parse_jd_count_str(total_str) except NoSuchElementException: # 商品为预约状态时销量不显示在价格旁边, 抛出异常 commodity.total = -1 # 判断是否为京东自营 try: self_str = browser.find_element_by_class_name('u-jd').text if self_str == '自营': self = True else: self = False except NoSuchElementException: self = False commodity.is_self = self try: commodity.shop_name = browser.find_element_by_css_selector( '#crumb-wrap > div > div.contact.fr.clearfix > div.J-hove-wrap.EDropdown.fr > div:nth-child(1) > div ' '> a').text except NoSuchElementException: commodity.shop_name = '店铺名称为空' # 从商品介绍中获取商品信息 try: commodity.brand = browser.find_element_by_css_selector( '#parameter-brand > li > a').text except NoSuchElementException: commodity.brand = '品牌未注明' intro = browser.find_elements_by_css_selector('.parameter2 > li') intro_list = [] for i in intro: intro_list.append(i.text) # 预赋值, 防止注入空置报错 commodity.os = '页面未注明' commodity.model = '页面未注明' for intro_item in intro_list: if '操作系统' in intro_item: commodity.os = intro_item.replace('操作系统:', '') if 'CPU型号' in intro_item: commodity.soc_model = intro_item.replace('CPU型号:', '') if '商品名称' in intro_item: commodity.model = intro_item.replace('商品名称:', '') # 下滑点击 规格与包装 选项 window_scroll_by(browser, 1200) js_script = 'document.querySelector("#detail > div.tab-main.large > ul > li:nth-child(2)").click()' browser.execute_script(js_script) sleep(1) # 从 规格与包装 中获取商品信息 spec_list = browser.find_elements_by_class_name('Ptable-item') for spec_item in spec_list: spec_item_title = spec_item.find_element_by_tag_name('h3').text item_list = spec_item.find_elements_by_class_name('clearfix') if '主体' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value = item.find_element_by_tag_name('dd').text if '产品名称' == item_name: commodity.model = item_value if '基本信息' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value = item.find_element_by_tag_name('dd').text.replace('mm', '').replace('MM', '')\ .replace('mM', '').replace('Mm', '').replace('g', '').replace('G', '').replace('约', '')\ .replace('大约', '').replace('左右', '').replace('大概', '').strip() try: if '机身宽度' in item_name: commodity.width = float(item_value) if '机身厚度' in item_name: commodity.thickness = float(item_value) if '机身长度' in item_name: commodity.length = float(item_value) if '机身重量' in item_name: commodity.weight = float(item_value) except ValueError: pass if '主芯片' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value = item.find_element_by_tag_name('dd').text if 'CPU品牌' == item_name: commodity.soc_mfrs = item_value if '屏幕' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value_str = item.find_element_by_tag_name('dd').text if '主屏幕尺寸' in item_name: try: item_value = float( item_value_str.replace('英寸', '').strip()) commodity.screen_size = item_value except ValueError: pass # 保存商品信息 commodity.save() # 删除已经保存的商品target_sku delete_saved_commodity_sku(sku) print(f'------SKU编号为 {sku} 的商品信息保存完毕------') # 回到手机分类页面 back_to_first_window(browser)
def insert_jd_all_commodity(browser: Chrome): for target_sku in JDTargetSku.select(): # 获取当前商品SKU编号 sku: str = target_sku.sku # 检查当前SKU是否在数据库中保存的SKU中, 避免销量重复计数 result = JDExistedSku.get_or_none(JDExistedSku.sku == sku) if result is not None: # 删除已经保存的商品target_sku delete_saved_commodity_sku(sku) print(f'---SKU编号为 {sku} 的商品信息已保存过---') continue # 开始抓取商品信息 commodity = Commodity() commodity.source = '京东' commodity.url = 'https://item.jd.com/' + sku + '.html' # 打开并切换到当前商品页面 switch_to_current_sku_page(browser, commodity.url) # 从后端API接口获取并保存已上架的SKU get_jd_sku_from_api(browser, sku) try: commodity.price = float( browser.find_element_by_css_selector( 'span.price:nth-child(2)').text) except (ValueError, NoSuchElementException): # 价格显示为待发布时或商品以下柜时, 抛出异常 commodity.price = -2 try: commodity.title = browser.find_element_by_class_name( 'sku-name').text.strip() except NoSuchElementException: commodity.title = '无商品标题' commodity.total = -1 # 商品销量预赋值 for item in browser.find_elements_by_css_selector( '#detail > div.tab-main.large > ul > li'): if '商品评价' in item.text: total_str = item.find_element_by_tag_name('s').text.lstrip( '(').rstrip(')') commodity.total = parse_jd_count_str(total_str) # 判断是否为京东自营 try: self_str = browser.find_element_by_class_name('u-jd').text if self_str == '自营': self = True else: self = False except NoSuchElementException: self = False commodity.is_self = self try: commodity.shop_name = browser.find_element_by_css_selector( '#crumb-wrap > div > div.contact.fr.clearfix > div.J-hove-wrap.EDropdown.fr > div:nth-child(1) > div ' '> a').text except NoSuchElementException: commodity.shop_name = '店铺名称为空' # 从商品介绍中获取商品信息 try: commodity.brand = browser.find_element_by_css_selector( '#parameter-brand > li > a').text except NoSuchElementException: commodity.brand = '未知' intro = browser.find_elements_by_css_selector('.parameter2 > li') intro_list = [] for i in intro: intro_list.append(i.text) # 预赋值, 防止注入空置报错 commodity.model = '未知' for intro_item in intro_list: if '商品名称' in intro_item: commodity.model = intro_item.replace('商品名称:', '') # 保存商品信息 commodity.save() # 删除已经保存的商品target_sku delete_saved_commodity_sku(sku) print(f'------SKU编号为 {sku} 的商品信息保存完毕------') # 回到无线耳机分类页面 back_to_first_window(browser)