class Test(object): # driver = BrowserEngine.init_local_driver_no_gui() logger = logging_setting.get_logger() def __init__(self): driver_dict = BrowserEngine.init_remote_driver_no_gui() self.driver = driver_dict["linux"] def test_save_info(self): self.logger.debug("目标保存电脑详细信息,出发啦@!!冲啊!!") goods_list_page = GoodsListPage(self.driver) goods_list_page.get_goods_list_driver("电脑", "笔记本") brand_locator = (By.ID, "brand-11518") price_locator = (By.LINK_TEXT, "7000以上") comment_locator = (By.LINK_TEXT, "评论数") goods_list_page.get_selector_page( [brand_locator, price_locator, comment_locator]) self.logger.debug("开始获取具体的商品页面") goods = (By.XPATH, "//*[@id=\"plist\"]/ul/li[1]/div/div[1]/a/img") driver = goods_list_page.get_goods_info_page(goods) self.logger.info("当前的url地址是" + driver.current_url) goods_info = GoodsInfoPage(driver) goods_info.save_product_info() self.logger.info("保存商品信息成功")
class Test: driver = BrowserEngine.init_local_driver() logger = logging_setting.get_logger() def test_save_info(self): self.logger.debug("目标保存电脑详情信息") goods_list_page = GoodsListPage(self.driver) goods_list_page.get_goods_list_driver("电脑", "笔记本") brand_locator = (By.ID, "brabd-11518") price_locator = (By.LINK_TEXT, "7000以上") comment_locator = (By.LINK_TEXT, "评论数") goods_list_page.get_selector_page( [brand_locator, price_locator, comment_locator]) self.logger.debug("开始获取具体的商品页面") goods = ( By.XPATH, "//div[@id='J_searchWrap']//li[1]//div[1]//div[1]//a[1]//img[1]") driver = goods_list_page.get_goods_info_page(goods) self.logger.info("当前的url地址是:", driver.current_url) goods_info = GoodsInfoPage(driver) goods_info.save_product_info() self.logger.debug("保存商品信息成功")
class Test(object): # driver = BrowserEngine.init_local_driver() logger = logging_setting.get_logger() def __init__(self): # 複数存在するリモートdriverを起動する driver_dict = BrowserEngine.init_remote_driver() # 複数一気に起動する場合、ここでまた処理が必要 self.driver = driver_dict["docker_1"] def test_save_info(self): self.logger.debug("パソコン詳細保存") goods_list_page = GoodsListPage(self.driver) goods_list_page.get_goods_list_driver("电脑", "笔记本电脑") brand_locator = (By.ID, "brand-11518") price_locator = (By.LINK_TEXT, "7000以上") comment_locator = (By.LINK_TEXT, "评论数") goods_list_page.get_select_page( [brand_locator, price_locator, comment_locator]) self.logger.debug("詳細データー取得開始") goods = (By.XPATH, "//*[@id='plist']/ul/li[1]/div/div[1]/a/img") driver = goods_list_page.get_goods_info_page(goods) self.logger.info("現在のurlは:" + driver.current_url) goods_info = GoodsInfoPage(driver) goods_info.save_product_info() self.logger.info("商品データ保存しました")
class GoodsInfoPage(BasePage): logger = get_logger() def __init__(self, driver): self._driver = driver super(GoodsInfoPage, self).__init__(driver, basic_config.START_URL) def save_product_info(self): """ :return: """ js = "window.scrollTo(0,1000)" self._driver.execute_script(js) product_element = (By.XPATH, "//*[@id=...]") self.find_element(*product_element).click() info_ele = (By.CLASS_NAME, 'Ptable-item') info_elements = self.find_elements(*info_ele) result_list = [] for info_element in info_elements: info_element_dict = self.__get_info_element_dict(info_element) result_list.append(info_element_dict) self.logger.debug(str(info_element_dict)) self.__save_info_to_mysql(result_list) def __save_info_to_mysql(self, info_list): goods = Goods for info in info_list: for key, value in info.items(): goods.insert(["computer_part", "computer_info"], [str(key), str(value)]) def __get_info_element_dict(self, info_element): computer_part_element = (By.TAG_NAME, "h3") # info_elementを親elementとして使用する computer_part = self.find_element(*computer_part_element, element=info_element) # computerのkey, computer_info_keys_element = (By.TAG_NAME, 'dt') computer_info_keys = self.find_elements(*computer_info_keys_element, element=info_element) # computer_info_values_element = ( By.XPATH, "dl//dd[not(contains(@class,'Ptable-tips')]") computer_info_values = self.find_elements( *computer_info_values_element, element=info_element) self.logger.debug("全ての規格と包装info") key_and_value_dict = {} parts_dict = {} for i in range(len(computer_info_keys)): key_and_value_dict[ computer_info_keys[i].text] = computer_info_values[i].text parts_dict[computer_part.text] = key_and_value_dict return parts_dict
class GoodsInfoPage(BasePage): logger = get_logger() def __init__(self, driver): self._driver = driver super(GoodsInfoPage, self).__init__(driver, basic_config.START_URL) # self.logger("初始化商品详情页面") def save_product_info(self): """ :return: """ js = "window.scrollTo(0,1000)" self._driver.execute_script(js) # 定位到规格与包装 product_element = (By.XPATH, "//*[@id=\"detail\"]/div[1]/ul/li[2]") self.find_element(*product_element).click() info_ele = (By.CLASS_NAME, "Ptable-item") info_elements = self.find_elements(*info_ele) result_list = [] for info_element in info_elements: info_element_dict = self.__get_info_element_dict(info_element) result_list.append(info_element_dict) # self.logger.debug(str(info_element_dict)) self.__save_info_to_mysql(result_list) def __save_info_to_mysql(self, info_list): goods = Goods() for info in info_list: for key, value in info.items(): goods.insert(["computer_part", "computer_info"], [str(key), str(value)]) def __get_info_element_dict(self, info_element): # 计算机组成信息,第一列的值 computer_part_element = (By.TAG_NAME, "h3") computer_part = self.find_element(*computer_part_element, element=info_element) # 计算机信息中的key值,第二列的值 computer_info_keys_element = (By.TAG_NAME, "dt") computer_info_keys = self.find_elements(*computer_info_keys_element, element=info_element) # 计算机信息中的值,就是第三列的值 computer_info_values_element = ( By.XPATH, "dl//dd[not(contains(@class,'Ptable-tips'))]") computer_info_values = self.find_elements( *computer_info_values_element, element=info_element) self.logger.debug("获取到了所有的规格与包装信息") key_and_value_dict = {} parts_dict = {} for i in range(len(computer_info_keys)): key_and_value_dict[ computer_info_keys[i].text] = computer_info_values[i].text parts_dict[computer_part.text] = key_and_value_dict return parts_dict
class GoodsListPage(BasePage): logger = get_logger() def __init__(self, driver): self._driver = driver super().__init__(driver, basic_config.START_URL) def get_goods_list_driver(self, first_list_name, second_list_name): """ 获取商品列表的driver :param first_list_name: 一级菜单元素名称 :param second_list_name: 二级菜单元素名称 :return: driver """ driver = self.open() first_element = (By.LINK_TEXT, first_list_name) second_element = (By.LINK_TEXT, second_list_name) first = self.find_element(*first_element) ActionChains(driver).move_to_element(first).perform() second = self.find_element(*second_element) second.click() # 切换句柄 handles = driver.window_handles index_handle = driver.current_window_handle for handle in handles: if handle != index_handle: driver.close() driver.switch_to.window(handle) self.logger.info("获取到页面:" + second_list_name) self.logger.info("当前url是:" + driver.current_url) self._driver = driver return driver def get_selector_page(self, selector_condition_list): """ :param selector_condition_list: 筛选条件的 list,比如 [(By.ID, "id_value"), (By.NAME, "name_value")] :return: """ for condition in selector_condition_list: element = self.find_element(*condition) element.click() def get_goods_info_page(self, selector_condition): """ 获取商品的详情页面 :param selector_condition: 具体商品的筛选条件;例如:(By.ID, "id_value") :return: 浏览器 driver """ self.find_element(*selector_condition).click() handles = self._driver.window_handles index_handle = self._driver.current_window_handle for handle in handles: if handle != index_handle: self._driver.close() self._driver.switch_to.window(handle) return self._driver
class GoodsListPage(BasePage): logger = get_logger() def __init__(self, driver): self._driver = driver super(GoodsListPage, self).__init__(driver, basic_config.START_URL) def get_goods_list_driver(self, first_list_name, second_list_name): """ 商品リストのdriver :param first_list_name: 一級メニュー名 :param second_list_name: 二級メニュー名 :return: """ driver = self.open() first_element = (By.LINK_TEXT, first_list_name) second_element = (By.LINK_TEXT, second_list_name) first = self.find_element(*first_element) ActionChains(driver).move_to_element(first).perform() second = self.find_element(*second_element) second.click() # ハンドルの切り替え handles = driver.window_handles index_handle = driver.current_window_handle for handle in handles: if handle != index_handle: driver.close() driver.switch_to.window(handle) self.logger.info("ページ取得" + second_list_name) self.logger.info("現在urlは" + driver.current_url) self._driver = driver return driver def get_select_page(self, selector_condition_list): """ 複数の選択条件を選択 :param selector_condition_list: 選択条件のリスト 例:[(By.ID,"id_value"),(By.name,"name_value")] リスト使用するのは順位変化しないためである :return: """ # リストであるかどうかの判断しても良い for condition in selector_condition_list: element = self.find_element(*condition) element.click() def get_goods_info_page(self, selector_condition): """ 商品の詳細を取得 :param selector_condition: 具体的な商品の選択条件 例:(By.ID,"id_value") :return:driver """ self.find_element(*selector_condition).click() # ハンドルの切り替え handles = self._driver.window_handles index_handle = self._driver.current_window_handle for handle in handles: if handle != index_handle: self._driver.close() self._driver.switch_to.window(handle) self.logger.info("詳細ページを取得") self.logger.info("現在urlを取得" + self._driver.current_url) return self._driver