def get_into_recommend_good_manage(self, driver: BaseDriver): """ 进入荐好管理 :param driver: :return: """ try: driver.find_element( value=self.recommend_good_label_css_selector).click() # 等待下方标签出现 sleep(.5) driver.find_element(value='a.J_menuItem').click() except SeleniumTimeoutException: # 进入目标页失败, 则抛出异常! raise EnterTargetPageFailException
def wait_for_delete_img_appear(self, driver: BaseDriver): """ 直至出现图片, 超时退出(并且避免发布无图文章) :return: """ while True: # 改用 不宜用下面方式 长期跑电脑卡死 try: delete_btn_text = driver.find_element( value='div.deletebut').text except NoSuchElementException: # 处理这个异常, 并继续等待 sleep(.3) continue # 原先 但是老是发布失败!! # delete_btn_text = driver.find_element(value='div.deletebut').text # self.lg.info('delete_btn_text: {}'.format(delete_btn_text)) if delete_btn_text == '删除': break else: continue self.lg.info('该url采集完毕!')
def login_bg(self, driver: BaseDriver): """ login :return: """ self.lg.info('login ...') body = driver.get_url_body( url=self.publish_url, timeout=30, ) try: assert body != '' driver.find_element(value='input#loginName').send_keys( self.yx_username) driver.find_element(value='input#loginPwd').send_keys( self.yx_password) driver.find_element(value='button#subbut').click() except ( NoSuchElementException, SeleniumTimeoutException, AssertionError, WebDriverException, AttributeError, ): # 抛出登录异常 raise LoginFailException try: self.wait_for_recommend_good_label_appear(driver=driver) except FZTimeoutError: # 进入目标页失败, 则抛出异常! raise EnterTargetPageFailException
def wait_for_delete_img_appear(self, driver: BaseDriver): """ 直至出现图片, 超时退出(并且避免发布无图文章) :return: """ while True: delete_btn_text = driver.find_element(value='div.deletebut').text # self.lg.info('delete_btn_text: {}'.format(delete_btn_text)) if delete_btn_text == '删除': break else: continue self.lg.info('该url采集完毕!')
def wait_for_recommend_good_label_appear(self, driver: BaseDriver): """ 直到出现荐好管理label :param driver: :return: """ while True: recommend_good_label_text = driver.find_element( value=self.recommend_good_label_css_selector).text # self.lg.info('recommend_good_label_text: {}'.format(recommend_good_label_text)) if recommend_good_label_text == '荐好管理': break else: continue self.lg.info('login success!')
def publish_one_article(self, driver: BaseDriver, article_url: str): """ 发布一篇图文 :param driver: :param article_url: :return: """ try: # 切换到目标iframe(用index有时候不准, pass) # driver.switch_to_frame(frame_reference=1) iframe_ele_list = driver.find_elements(by=By.TAG_NAME, value='iframe') # pprint(iframe_ele_list) assert iframe_ele_list != [] target_iframe_ele = iframe_ele_list[1] if len( iframe_ele_list) > 1 else iframe_ele_list[0] driver.switch_to_frame(frame_reference=target_iframe_ele) except (NoSuchFrameException, ) as e: # 没匹配到frame(可能是原先就在目标iframe, eg: title过长的, 再切回iframe, 但是iframe_ele_list为0) raise e # 清空输入框 input_box_ele = driver.find_element(value='input#SnatchUrl') input_box_ele.clear() # 输入待采集地址 input_box_ele.send_keys(article_url) # 点击采集按钮 driver.find_elements(value='span.input-group-btn button')[0].click() try: self.wait_for_delete_img_appear(driver=driver) except (FZTimeoutError, NoSuchElementException): # 发布某文章超时失败or无元素存在, 则抛出发布异常 raise PublishOneArticleFailException # 获取输入框的值 title = driver.find_element( value='input#RecommendName').get_attribute('value') self.lg.info('title: {}'.format(title)) if target_str_contain_some_char_check( target_str=title, check_char_obj=ARTICLE_TITLE_SENSITIVE_STR_TUPLE): raise ArticleTitleContainSensitiveWordsException else: pass if isinstance(title, str) and len(title) > 30: # 标题过长则return, 不发布 self.lg.info('@@@ title 标题过长, 无法发布!! 跳过!') # 由于标题过长后, 无法处理后续文章, 故不return, 直接抛出异常 # return raise ArticleTitleOverLongException else: pass try: # 点击发布按钮 driver.find_elements( value='span.input-group-btn button')[1].click() except WebDriverException: # 处理发布单篇异常! # 处理报错: Message: unknown error: Element <iframe class="J_iframe" name="iframe0" raise PublishOneArticleFailException # 切换至主页面 driver.switch_to_default_content() # 填写被发布人 random_phone = self.get_random_phone() driver.find_element( value='input.layui-layer-input').send_keys(random_phone) # 点击确定 driver.find_element(value='a.layui-layer-btn0').click() self.lg.info('url: {} 发布成功!'.format(article_url)) # 发布成功, 等待5.秒, 等待页面元素置空 sleep(5.) return
def get_stg_search_res2(self, k: str, default_sort_value: int=None) -> dict: """ 搜题狗2(driver 版) :param k: :return: """ # 只获取第一页数据 k = '社会主义核心' driver = BaseDriver( executable_path=PHANTOMJS_DRIVER_PATH, load_images=False, logger=self.lg, user_agent_type=PHONE, ip_pool_type=self.ip_pool_type, ) # 输入框选择器 input_css_sel = 'input#scform_srchtxt' submit_btn_sel = 'button#scform_submit' body = driver.get_url_body( url='http://www.etkz.cn/search.php?mod=forum', css_selector=submit_btn_sel, timeout=20,) assert body != '' # self.lg.info(body) driver.find_element(value=input_css_sel).send_keys(k) driver.find_element(value=submit_btn_sel).click() sleep(5.) body = Requests._wash_html(driver.page_source) assert body != '' self.lg.info(body) try: del driver except: pass question_item_sel = { 'method': 'css', 'selector': 'div#threadlist ul li', } question_desc_div_sel = { 'method': 're', 'selector': '问题:(.*?)答案:', } answer_div_sel = { 'method': 're', 'selector': '答案:(.*?)更多相关问题', } question_item = parse_field( parser=question_item_sel, target_obj=body, is_first=False, logger=self.lg, ) assert question_item != [] # pprint(question_item) res = [] for item in question_item: # 有序的 try: question_desc_div = parse_field( parser=question_desc_div_sel, target_obj=item, logger=self.lg, ) assert question_desc_div != '' answer_div = parse_field( parser=answer_div_sel, target_obj=item, logger=self.lg, ) assert answer_div != '' # 清洗 question_desc = fix_text(wash_sensitive_info( data=question_desc_div, replace_str_list=[], add_sensitive_str_list=[ '<strong>', '</strong>', '<font .*?>', '</font>', '<span .*?>', '</span>', ], is_default_filter=False, is_lower=False, )) answer = fix_text(wash_sensitive_info( data=answer_div, replace_str_list=[], add_sensitive_str_list=[ '<strong>', '</strong>', '<font .*?>', '</font>', ], is_default_filter=False, is_lower=False, )) except Exception: continue ask_questions_result_item = AskQuestionsResultItem() ask_questions_result_item['question_desc'] = question_desc ask_questions_result_item['answer'] = answer res.append(dict(ask_questions_result_item)) self.lg.info('[{}] stg2, k: {}'.format( '+' if res != [] else '-', k, )) return { 'k': k, 'page_num': default_sort_value, 'res': res, }