예제 #1
0
 def get_into_recommend_good_manage(self, driver: BaseDriver):
     """
     进入荐好管理
     :param driver:
     :return:
     """
     try:
         driver.find_element(
             value=self.recommend_good_label_css_selector).click()
         # 等待下方标签出现
         sleep(.5)
         driver.find_element(value='a.J_menuItem').click()
     except SeleniumTimeoutException:
         # 进入目标页失败, 则抛出异常!
         raise EnterTargetPageFailException
예제 #2
0
    def wait_for_delete_img_appear(self, driver: BaseDriver):
        """
        直至出现图片, 超时退出(并且避免发布无图文章)
        :return:
        """
        while True:
            # 改用 不宜用下面方式 长期跑电脑卡死
            try:
                delete_btn_text = driver.find_element(
                    value='div.deletebut').text
            except NoSuchElementException:
                # 处理这个异常, 并继续等待
                sleep(.3)
                continue

            # 原先 但是老是发布失败!!
            # delete_btn_text = driver.find_element(value='div.deletebut').text

            # self.lg.info('delete_btn_text: {}'.format(delete_btn_text))
            if delete_btn_text == '删除':
                break
            else:
                continue

        self.lg.info('该url采集完毕!')
예제 #3
0
    def login_bg(self, driver: BaseDriver):
        """
        login
        :return:
        """
        self.lg.info('login ...')
        body = driver.get_url_body(
            url=self.publish_url,
            timeout=30,
        )
        try:
            assert body != ''
            driver.find_element(value='input#loginName').send_keys(
                self.yx_username)
            driver.find_element(value='input#loginPwd').send_keys(
                self.yx_password)
            driver.find_element(value='button#subbut').click()
        except (
                NoSuchElementException,
                SeleniumTimeoutException,
                AssertionError,
                WebDriverException,
                AttributeError,
        ):
            # 抛出登录异常
            raise LoginFailException

        try:
            self.wait_for_recommend_good_label_appear(driver=driver)
        except FZTimeoutError:
            # 进入目标页失败, 则抛出异常!
            raise EnterTargetPageFailException
예제 #4
0
    def wait_for_delete_img_appear(self, driver: BaseDriver):
        """
        直至出现图片, 超时退出(并且避免发布无图文章)
        :return:
        """
        while True:
            delete_btn_text = driver.find_element(value='div.deletebut').text
            # self.lg.info('delete_btn_text: {}'.format(delete_btn_text))
            if delete_btn_text == '删除':
                break
            else:
                continue

        self.lg.info('该url采集完毕!')
예제 #5
0
    def wait_for_recommend_good_label_appear(self, driver: BaseDriver):
        """
        直到出现荐好管理label
        :param driver:
        :return:
        """
        while True:
            recommend_good_label_text = driver.find_element(
                value=self.recommend_good_label_css_selector).text
            # self.lg.info('recommend_good_label_text: {}'.format(recommend_good_label_text))
            if recommend_good_label_text == '荐好管理':
                break
            else:
                continue

        self.lg.info('login success!')
예제 #6
0
    def publish_one_article(self, driver: BaseDriver, article_url: str):
        """
        发布一篇图文
        :param driver:
        :param article_url:
        :return:
        """
        try:
            # 切换到目标iframe(用index有时候不准, pass)
            # driver.switch_to_frame(frame_reference=1)

            iframe_ele_list = driver.find_elements(by=By.TAG_NAME,
                                                   value='iframe')
            # pprint(iframe_ele_list)
            assert iframe_ele_list != []
            target_iframe_ele = iframe_ele_list[1] if len(
                iframe_ele_list) > 1 else iframe_ele_list[0]
            driver.switch_to_frame(frame_reference=target_iframe_ele)
        except (NoSuchFrameException, ) as e:
            # 没匹配到frame(可能是原先就在目标iframe, eg: title过长的, 再切回iframe, 但是iframe_ele_list为0)
            raise e

        # 清空输入框
        input_box_ele = driver.find_element(value='input#SnatchUrl')
        input_box_ele.clear()
        # 输入待采集地址
        input_box_ele.send_keys(article_url)
        # 点击采集按钮
        driver.find_elements(value='span.input-group-btn button')[0].click()
        try:
            self.wait_for_delete_img_appear(driver=driver)
        except (FZTimeoutError, NoSuchElementException):
            # 发布某文章超时失败or无元素存在, 则抛出发布异常
            raise PublishOneArticleFailException

        # 获取输入框的值
        title = driver.find_element(
            value='input#RecommendName').get_attribute('value')
        self.lg.info('title: {}'.format(title))
        if target_str_contain_some_char_check(
                target_str=title,
                check_char_obj=ARTICLE_TITLE_SENSITIVE_STR_TUPLE):
            raise ArticleTitleContainSensitiveWordsException
        else:
            pass
        if isinstance(title, str) and len(title) > 30:
            # 标题过长则return, 不发布
            self.lg.info('@@@ title 标题过长, 无法发布!! 跳过!')
            # 由于标题过长后, 无法处理后续文章, 故不return, 直接抛出异常
            # return
            raise ArticleTitleOverLongException
        else:
            pass

        try:
            # 点击发布按钮
            driver.find_elements(
                value='span.input-group-btn button')[1].click()
        except WebDriverException:
            # 处理发布单篇异常!
            # 处理报错: Message: unknown error: Element <iframe class="J_iframe" name="iframe0"
            raise PublishOneArticleFailException

        # 切换至主页面
        driver.switch_to_default_content()
        # 填写被发布人
        random_phone = self.get_random_phone()
        driver.find_element(
            value='input.layui-layer-input').send_keys(random_phone)
        # 点击确定
        driver.find_element(value='a.layui-layer-btn0').click()

        self.lg.info('url: {} 发布成功!'.format(article_url))
        # 发布成功, 等待5.秒, 等待页面元素置空
        sleep(5.)

        return
예제 #7
0
    def get_stg_search_res2(self, k: str, default_sort_value: int=None) -> dict:
        """
        搜题狗2(driver 版)
        :param k:
        :return:
        """
        # 只获取第一页数据
        k = '社会主义核心'
        driver = BaseDriver(
            executable_path=PHANTOMJS_DRIVER_PATH,
            load_images=False,
            logger=self.lg,
            user_agent_type=PHONE,
            ip_pool_type=self.ip_pool_type,
        )
        # 输入框选择器
        input_css_sel = 'input#scform_srchtxt'
        submit_btn_sel = 'button#scform_submit'
        body = driver.get_url_body(
            url='http://www.etkz.cn/search.php?mod=forum',
            css_selector=submit_btn_sel,
            timeout=20,)
        assert body != ''
        # self.lg.info(body)
        driver.find_element(value=input_css_sel).send_keys(k)
        driver.find_element(value=submit_btn_sel).click()
        sleep(5.)
        body = Requests._wash_html(driver.page_source)
        assert body != ''
        self.lg.info(body)

        try:
            del driver
        except:
            pass

        question_item_sel = {
            'method': 'css',
            'selector': 'div#threadlist ul li',
        }
        question_desc_div_sel = {
            'method': 're',
            'selector': '问题:(.*?)答案:',
        }
        answer_div_sel = {
            'method': 're',
            'selector': '答案:(.*?)更多相关问题',
        }
        question_item = parse_field(
            parser=question_item_sel,
            target_obj=body,
            is_first=False,
            logger=self.lg,
        )
        assert question_item != []
        # pprint(question_item)

        res = []
        for item in question_item:
            # 有序的
            try:
                question_desc_div = parse_field(
                    parser=question_desc_div_sel,
                    target_obj=item,
                    logger=self.lg,
                )
                assert question_desc_div != ''
                answer_div = parse_field(
                    parser=answer_div_sel,
                    target_obj=item,
                    logger=self.lg,
                )
                assert answer_div != ''
                # 清洗
                question_desc = fix_text(wash_sensitive_info(
                    data=question_desc_div,
                    replace_str_list=[],
                    add_sensitive_str_list=[
                        '<strong>',
                        '</strong>',
                        '<font .*?>',
                        '</font>',
                        '<span .*?>',
                        '</span>',
                    ],
                    is_default_filter=False,
                    is_lower=False,
                ))
                answer = fix_text(wash_sensitive_info(
                    data=answer_div,
                    replace_str_list=[],
                    add_sensitive_str_list=[
                        '<strong>',
                        '</strong>',
                        '<font .*?>',
                        '</font>',
                    ],
                    is_default_filter=False,
                    is_lower=False,
                ))
            except Exception:
                continue

            ask_questions_result_item = AskQuestionsResultItem()
            ask_questions_result_item['question_desc'] = question_desc
            ask_questions_result_item['answer'] = answer
            res.append(dict(ask_questions_result_item))

        self.lg.info('[{}] stg2, k: {}'.format(
            '+' if res != [] else '-',
            k,
        ))

        return {
            'k': k,
            'page_num': default_sort_value,
            'res': res,
        }