Esempio n. 1
0
 def read_single_goods_item(self, element: WebElement) \
         -> (Item, None):
     """
     :param element:
         A item of the list witch gain by method: get_goods_list().
     :return:
         An instance of Item or None, if an error occurred.
     """
     item = Item()
     # 起止时间
     item.data_begin_time = time.time()
     # 获取颜色标题及所选项目值(详情列表不存在的)
     # 获取版本标题及所选项目值(详情列表不存在的)
     # 获取商品url
     item.url = self.get_item_url(element)
     # 获取价格
     item.price = self.get_price(element)
     if item.price == -1:
         logging.warning("JdListPageReader:Get price is -1\n" + item.url)
         return None
     # 获取plus会员价格(详情列表不存在的)
     # 领券(详情列表不存在的)
     # 库存:京东不显示库存量,只有有无货之分(详情列表不存在的)
     # 快递费:京东各省价格均不同,有货情况也不同故不做记录(详情列表不存在的)
     # 销量
     item.sales_amount = self.get_sales_amount(element)
     # 可选字段(详情列表不存在的)
     # 生成所有字段(详情列表不存在的)
     item.generate_all_specification()
     return item
Esempio n. 2
0
 def read_item(self, browser: selenium.webdriver.Chrome) -> (Item, None):
     """
     method:
         Rely the supplied jd item detail page. the method format read in info and then return a
         item instance.
     :param
         browser: an instance of browser which includes Chrome/Edge/FireFox etc.
         cautiously, the browser current page must be a commodity detail page.
     :return:
     """
     # TODO:有可能无货/链接无效
     if not self.is_jd_detail_page(browser):
         logging.info("[JdDetailPageReader.read_item]"
                      " Current page is't detail page:", browser.title)
         return None  # 判断是否是详情页
     item = Item()
     # 获取颜色标题及所选项目值
     color_dom = self.get_color_dom(browser)
     if color_dom is not None and len(color_dom) != 0:
         color_title_str = color_dom[0].text  # 获取标题
         color_selected_str = color_dom[1].find_element(By.CLASS_NAME, "selected").text  # 获取已选值
         item.spec1 = color_title_str + '=' + color_selected_str
     else:
         # 某些单一款不需要选颜色
         pass
     # 获取版本标题及所选项目值
     edition_dom = self.get_edition_dom(browser)
     if edition_dom is not None and len(edition_dom) != 0:
         edition_title_str = edition_dom[0].text
         edition_selected_str = edition_dom[1].find_element(By.CLASS_NAME, "selected").text
         item.spec2 = edition_title_str + '=' + edition_selected_str
     else:
         # 说明不需要选择型号,或者说没有型号信息
         pass
     # 获取商品url,可能因加载中等失败
     try:
         item.url = browser.current_url
     except TimeoutException:
         logging.warning('Get url failed! at method:JdDetailReader.read_item()')
         return None
     # 获取价格
     item.price = self.get_price(browser)
     if item.price == -1:
         logging.warning("JdDetailPageReader:Get price is -1\n" + item.url)
         return None
     # 获取plus会员价格
     item.plus_price = self.get_plus_price(browser)
     # 领券
     ticket_dom = self.get_ticket_dom(browser)
     if ticket_dom is not None and len(ticket_dom) != 0:
         ticket_str = ''
         for ti in ticket_dom:
             ticket_str += ti.text + '\n'
         item.ticket = ticket_str
     # 库存:京东不显示库存量,只有有无货之分
     # 快递费:京东各省价格均不同,有货情况也不同故不做记录
     # 销量
     item.sales_amount = self.get_remark(browser)
     # 可选字段
     # 生成所有字段
     item.generate_all_specification()
     return item