def insert_jd_all_commodity(browser: Chrome): for target_sku in TargetSku.select().where(TargetSku.source == '京东'): # 获取当前商品SKU编号 sku: str = target_sku.sku # 检查当前SKU是否在数据库中保存的SKU中, 避免销量重复计数 result = ExistedSku.get_or_none(ExistedSku.source == '京东', ExistedSku.sku == sku) if result is not None: # 删除已经保存的商品target_sku delete_saved_commodity_sku(sku) print(f'---SKU编号为 {sku} 的商品信息已保存过---') continue # 开始抓取商品信息 commodity = Commodity() commodity.source = '京东' commodity.url = 'https://item.jd.com/' + sku + '.html' # 打开并切换到当前商品页面 switch_to_current_sku_page(browser, commodity.url) # 从后端API接口获取并保存已上架的SKU get_jd_sku_from_api(browser, sku) try: commodity.price = float( browser.find_element_by_css_selector( 'span.price:nth-child(2)').text) except (ValueError, NoSuchElementException): # 价格显示为待发布时或商品以下柜时, 抛出异常 commodity.price = -1 try: commodity.title = browser.find_element_by_class_name( 'sku-name').text.strip() except NoSuchElementException: commodity.title = '无商品标题' try: total_str = browser.find_element_by_css_selector( '#comment-count > a').text commodity.total = parse_jd_count_str(total_str) except NoSuchElementException: # 商品为预约状态时销量不显示在价格旁边, 抛出异常 commodity.total = -1 # 判断是否为京东自营 try: self_str = browser.find_element_by_class_name('u-jd').text if self_str == '自营': self = True else: self = False except NoSuchElementException: self = False commodity.is_self = self try: commodity.shop_name = browser.find_element_by_css_selector( '#crumb-wrap > div > div.contact.fr.clearfix > div.J-hove-wrap.EDropdown.fr > div:nth-child(1) > div ' '> a').text except NoSuchElementException: commodity.shop_name = '店铺名称为空' # 从商品介绍中获取商品信息 try: commodity.brand = browser.find_element_by_css_selector( '#parameter-brand > li > a').text except NoSuchElementException: commodity.brand = '品牌未注明' intro = browser.find_elements_by_css_selector('.parameter2 > li') intro_list = [] for i in intro: intro_list.append(i.text) # 预赋值, 防止注入空置报错 commodity.os = '页面未注明' commodity.model = '页面未注明' for intro_item in intro_list: if '操作系统' in intro_item: commodity.os = intro_item.replace('操作系统:', '') if 'CPU型号' in intro_item: commodity.soc_model = intro_item.replace('CPU型号:', '') if '商品名称' in intro_item: commodity.model = intro_item.replace('商品名称:', '') # 下滑点击 规格与包装 选项 window_scroll_by(browser, 1200) js_script = 'document.querySelector("#detail > div.tab-main.large > ul > li:nth-child(2)").click()' browser.execute_script(js_script) sleep(1) # 从 规格与包装 中获取商品信息 spec_list = browser.find_elements_by_class_name('Ptable-item') for spec_item in spec_list: spec_item_title = spec_item.find_element_by_tag_name('h3').text item_list = spec_item.find_elements_by_class_name('clearfix') if '主体' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value = item.find_element_by_tag_name('dd').text if '产品名称' == item_name: commodity.model = item_value if '基本信息' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value = item.find_element_by_tag_name('dd').text.replace('mm', '').replace('MM', '')\ .replace('mM', '').replace('Mm', '').replace('g', '').replace('G', '').replace('约', '')\ .replace('大约', '').replace('左右', '').replace('大概', '').strip() try: if '机身宽度' in item_name: commodity.width = float(item_value) if '机身厚度' in item_name: commodity.thickness = float(item_value) if '机身长度' in item_name: commodity.length = float(item_value) if '机身重量' in item_name: commodity.weight = float(item_value) except ValueError: pass if '主芯片' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value = item.find_element_by_tag_name('dd').text if 'CPU品牌' == item_name: commodity.soc_mfrs = item_value if '屏幕' == spec_item_title: for item in item_list: item_name = item.find_element_by_tag_name('dt').text item_value_str = item.find_element_by_tag_name('dd').text if '主屏幕尺寸' in item_name: try: item_value = float( item_value_str.replace('英寸', '').strip()) commodity.screen_size = item_value except ValueError: pass # 保存商品信息 commodity.save() # 删除已经保存的商品target_sku delete_saved_commodity_sku(sku) print(f'------SKU编号为 {sku} 的商品信息保存完毕------') # 回到手机分类页面 back_to_first_window(browser)
def insert_sn_all_commodity(browser: Chrome): for target_sku in SNTargetSku.select(): # 获取当前商品店铺代码和SKU编号 shop_code: str = target_sku.shop_code sku: str = target_sku.sku # 检查当前SKU是否在数据库中保存的SKU中, 避免销量重复计数 result = SNExistedSku.get_or_none(SNExistedSku.shop_code == shop_code, SNExistedSku.sku == sku) if result is not None: # 删除已经保存的商品target_sku delete_saved_commodity_sku(shop_code, sku) print(f'---SKU编号为 {sku} 的商品信息已保存过---') continue # 开始抓取商品信息 commodity = Commodity() commodity.source = '苏宁' commodity.url = 'https://product.suning.com/' + shop_code + '/' + sku + '.html' # 打开并切换到当前商品页面 switch_to_current_sku_page(browser, commodity.url) # 从后端API接口获取所有SKU和销量 commodity.total = get_sn_sku_and_total_from_api( browser, shop_code, sku) # 判断是否为苏宁自营, 苏宁自营的店铺码为 0000000000 if int(shop_code) == 0: commodity.is_self = True else: commodity.is_self = False try: commodity.title = browser.find_element_by_id( 'itemDisplayName').text except NoSuchElementException: commodity.title = '无商品标题' try: commodity.price = float( browser.find_element_by_class_name('mainprice').text.replace( '¥', '')) except (ValueError, NoSuchElementException): commodity.price = -2 try: commodity.shop_name = browser.find_element_by_class_name( 'header-shop-name').text except NoSuchElementException: commodity.shop_name = '店铺名称为空' # 从商品介绍中获取商品信息 # 预赋值, 防止注入空置报错 commodity.brand = '页面未注明' commodity.model = '页面未注明' commodity.os = '页面未注明' intro_list = browser.find_elements_by_css_selector( '#phoneParameters > ul > li') for intro in intro_list: intro_title = intro.find_element_by_tag_name('p').text items = intro.find_elements_by_css_selector( 'dl > dd > div > ul > li') if intro_title == '屏幕': for item in items: if '屏幕尺寸' in item.text: commodity.screen_size = float( item.text.replace('屏幕尺寸:', '').replace('英寸', '').strip()) if intro_title == 'CPU': for item in items: if 'CPU型号' in item.text: commodity.soc_model = item.text.replace('CPU型号:', '') # 下滑点击 包装及参数 选项 window_scroll_by(browser, 1500) browser.execute_script( 'document.querySelector("#productParTitle > a").click()') sleep(1) # 从 规格与包装 中获取商品信息 spec_list = browser.find_elements_by_css_selector( '#itemParameter > tbody > tr') for spec in spec_list: if spec.get_attribute('parametercode') is not None: spec_name = spec.find_element_by_tag_name('span').text spec_value = spec.find_element_by_class_name('val').text if spec_name == '品牌': commodity.brand = spec_value if spec_name == '型号': commodity.model = spec_value if spec_name == '手机操作系统': commodity.os = spec_value if spec_name == 'CPU品牌': commodity.soc_mfrs = spec_value if spec_name == 'CPU型号': commodity.soc_model = spec_value try: spec_val = spec_value.replace('mm', '').replace('MM', '').replace('毫米', '').replace('英寸', '')\ .replace('mM', '').replace('Mm', '').replace('g', '').replace('G', '').replace('约', '')\ .replace('大约', '').replace('左右', '').replace('克', '').replace('寸', '').strip() if spec_name == '屏幕尺寸': commodity.screen_size = float(spec_val) if spec_name == '机身长度': commodity.length = float(spec_val) if spec_name == '机身宽度': commodity.width = float(spec_val) if spec_name == '机身厚度': commodity.thickness = float(spec_val) if spec_name == '重量': commodity.weight = float(spec_val) except ValueError: pass # 保存商品信息 commodity.save() # 删除已经保存的商品target_sku delete_saved_commodity_sku(shop_code, sku) print(f'------SKU编号为 {sku} 的商品信息保存完毕------') # 回到手机分类页面 back_to_first_window(browser) sleep(2)