Python conv_pub_date Exemples, utils.conv_pub_date Python Exemples

Exemple #1

0

Afficher le fichier

    def crawl_search_results(self):
        search_results = []

        try:
            self.wait.until(ec.presence_of_element_located((By.ID, 'searchlist')))
        except TimeoutException:
            CustomLogging.log_to_file('第一财经网搜索结果页错误', LogType.ERROR)

        exit_flag = 0
        start_index = 0
        while True:
            try:
                self.wait.until(ec.presence_of_all_elements_located((By.CLASS_NAME, 'f-db')))
            except TimeoutException:
                CustomLogging.log_to_file('文章列表加载失败', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_class_name('f-db')

                for each_article in result_articles[start_index:]:
                    item = Entity()
                    item.publish_date = \
                        each_article.find_element_by_class_name('author').find_elements_by_tag_name('span')[
                            -1].text

                    if not in_date_range(conv_pub_date(item.publish_date, 'yicai'), self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    item.title = each_article.find_element_by_tag_name('h2').text
                    item.short_description = each_article.find_element_by_tag_name('p').text

                    if self.keyword not in item.title and self.keyword not in item.short_description:
                        continue

                    item.url = each_article.get_attribute('href')
                    threading.Thread(target=self.download_and_save_item, args=(item,)).start()

                if exit_flag == 1:
                    break

            except NoSuchElementException:
                CustomLogging.log_to_file('没有搜索结果', LogType.ERROR)
                pass

            try:
                # next_page = self.wait.until(ec.visibility_of_element_located(
                #     (By.XPATH, '//button[@class="u-btn" and contains(text(), "加载更多内容")]')))
                # next_page = self.driver.find_element_by_xpath('//button[@class="u-btn" and contains(text(), "加载更多内容")]')
                # next_page.click()
                self.driver.execute_script('document.getElementsByClassName("u-btn")[0].click()')
                time.sleep(2)
                start_index += 20
            except TimeoutException:
                CustomLogging.log_to_file('全部页面加载完成', LogType.INFO)
                break

        return search_results

Exemple #2

0

Afficher le fichier

Fichier : PeopleCN.py Projet : ArP2018/WebCrawling

    def crawl_search_results(self):
        exit_flag = 0
        index = 0
        while True:
            try:
                self.wait.until(ec.presence_of_element_located((By.CLASS_NAME, 'fr')))
            except TimeoutException:
                CustomLogging.log_to_file('人民网搜索结果页面加载失败', LogType.ERROR)
                CustomLogging.log_to_file(traceback.format_exc(), LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_xpath('//div[@class="fr w800"]//ul')

                for each_article in result_articles:
                    item = Entity()
                    pub_date = each_article.find_elements_by_tag_name('li')[2].text

                    item.publish_date = re.search(re.compile(
                        '[1-9]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])\s+(20|21|22|23|[0-1]\d):[0-5]\d:[0-5]\d'),
                        pub_date).group()

                    if not in_date_range(conv_pub_date(item.publish_date, 'peoplecn'), self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    item.title = each_article.find_element_by_tag_name('a').text
                    item.short_description = each_article.find_elements_by_tag_name('li')[1].text
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    item.url = each_article.find_element_by_tag_name('a').get_attribute('href')
                    threading.Thread(target=self.download_and_save_item, args=(item,)).start()

            except NoSuchElementException:
                break

            if exit_flag == 1:
                break

            try:
                next_page = self.driver.find_element_by_xpath(
                    '//div[@class="show_nav_bar"]//a[contains(text(), "下一页")]')
                next_page.click()
                time.sleep(2)
            except NoSuchElementException:
                break

Exemple #3

0

Afficher le fichier

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()
        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_all_elements_located(
                        (By.CLASS_NAME, 'r-info')))
            except TimeoutException:
                CustomLogging.log_to_file('搜索结果为空', LogType.ERROR)

            result_articles = self.driver.find_elements_by_class_name('r-info')

            for each_article in result_articles:
                item = Entity()
                try:
                    pub_date = each_article.find_element_by_class_name(
                        'fgray_time').text
                except NoSuchElementException:
                    continue
                item.publish_date = re.search(
                    re.compile(
                        '[1-9]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])\s+(20|21|22|23|[0-1]\d):[0-5]\d:[0-5]\d'
                    ), pub_date).group()
                # 判断文章是否在指定年限内,如果不在指定年限则退出
                if not in_date_range(conv_pub_date(item.publish_date, 'sina'),
                                     self.year_range):
                    exit_flag = 1
                    # 跳出for循环
                    break
                item.short_description = each_article.find_element_by_class_name(
                    'content').text
                item.title = each_article.find_element_by_tag_name('h2').text

                # 关键字过滤,如果关键在在文章标题和简述里都没出现，则判断下一条
                if self.keyword not in item.short_description and self.keyword not in item.title:
                    continue

                if item.title in self.titles:
                    continue
                else:
                    self.titles.append(item.title)

                item.url = each_article.find_element_by_xpath(
                    './/h2/a').get_attribute('href')
                threading.Thread(target=self.download_and_save_item,
                                 args=(item, )).start()

            # 跳出while循环
            if exit_flag == 1:
                break

            try:
                next_page = self.driver.find_element_by_xpath(
                    '//div[@class="pagebox"]/a[@title="下一页"]')
                # next_page.click()
                self.driver.get(next_page.get_attribute('href'))
                # time.sleep(2)
            except NoSuchElementException:
                # print('已经是最后一页')
                break

        return search_results

Exemple #4

0

Afficher le fichier

Fichier : CNStock.py Projet : ArP2018/WebCrawling

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()

        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located(
                        (By.CLASS_NAME, 'result-cont')))
            except TimeoutException:
                CustomLogging.log_to_file('中国证券网搜索结果页错误', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_class_name(
                    'result-article')

                for each_article in result_articles:
                    item = Entity()

                    publish_date = each_article.find_element_by_class_name(
                        'g').text
                    item.publish_date = re.search(
                        re.compile(
                            '[1-9]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])\s+(20|21|22|23|[0-1]\d):[0-5]\d'
                        ), publish_date).group()

                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'cnstock'),
                            self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    item.short_description = each_article.find_element_by_class_name(
                        'des').text
                    item.title = each_article.find_element_by_tag_name(
                        'a').text
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    item.url = each_article.find_element_by_tag_name(
                        'a').get_attribute('href')
                    threading.Thread(target=super().download_and_save_item,
                                     args=(item, )).start()

                if exit_flag == 1:
                    break
            except NoSuchElementException:
                CustomLogging.log_to_file('没有搜索结果', LogType.INFO)
                break

            try:
                next_page = self.driver.find_element_by_xpath(
                    '//div[@class="pagination pagination-centered"]//a[contains(text(), "下一页")]'
                )
                self.driver.get(next_page.get_attribute('href'))
                # next_page.click()
            except NoSuchElementException:
                break

        return search_results

Exemple #5

0

Afficher le fichier

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()

        # 和讯文章
        try:
            wz_btn = self.driver.find_element_by_xpath(
                '//div[@class="searchRe-top-b"]/a[contains(text(), "文章")]')
            wz_btn.click()

            while True:
                try:
                    result_articles = self.driver.find_elements_by_xpath(
                        '//table[@class="stocktab mt6"]//tr')

                    for each_article in result_articles[1:]:
                        item = Entity()
                        item.publish_date = each_article.find_elements_by_tag_name(
                            'td')[3].text

                        if not in_date_range(
                                conv_pub_date(item.publish_date, 'hexun'),
                                self.year_range):
                            continue
                        item.title = each_article.find_elements_by_tag_name(
                            'td')[1].text
                        item.short_description = each_article.find_elements_by_tag_name(
                            'td')[2].text
                        if self.keyword not in item.short_description and self.keyword not in item.title:
                            continue

                        item.url = each_article.find_elements_by_tag_name(
                            'td')[1].find_element_by_tag_name(
                                'a').get_attribute('href')
                        threading.Thread(target=self.download_and_save_item,
                                         args=(item, )).start()
                except NoSuchElementException:
                    break
                try:
                    next_page = self.driver.find_element_by_xpath(
                        '//div[@class="hx_paging"]//a[contains(text(), "下一页")]'
                    )
                    next_page_class = self.driver.find_element_by_xpath(
                        '//div[@class="hx_paging"]//a[contains(text(), "下一页")]/..'
                    ).get_attribute('class')

                    if next_page_class == 'no_next':
                        break

                    next_page.click()
                    time.sleep(2)
                except:
                    break
        except NoSuchElementException:
            pass

        # 和讯新闻
        news_btn = self.driver.find_element_by_xpath(
            '//div[@id="headLayer"]/a[contains(text(), "新闻")]')
        news_btn.click()
        time.sleep(1)
        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located(
                        (By.CLASS_NAME, 'searchResult')))
            except TimeoutException:
                CustomLogging.log_to_file('和讯财经新闻搜索结果加载失败', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_class_name(
                    'newslist-a')

                for each_article in result_articles:
                    item = Entity()
                    item.publish_date = \
                        each_article.find_element_by_class_name('news-l-t').find_elements_by_tag_name('span')[-1].text
                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'hexun_news'),
                            self.year_range):
                        continue

                    item.title = each_article.find_element_by_xpath(
                        './/span[@class="breakdiv"]/a').text
                    item.short_description = each_article.find_element_by_class_name(
                        'news-l-c').text
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    item.url = each_article.find_element_by_xpath(
                        './/span[@class="breakdiv"]/a').get_attribute('href')
                    threading.Thread(target=self.download_and_save_item,
                                     args=(item, )).start()

            except NoSuchElementException:
                break

            try:
                next_page = self.driver.find_element_by_xpath(
                    '//div[@class="hx_paging"]//a[contains(text(), "下一页")]')
                next_page_class = self.driver.find_element_by_xpath(
                    '//div[@class="hx_paging"]//a[contains(text(), "下一页")]/..'
                ).get_attribute('class')

                if next_page_class == 'no_next':
                    break

                next_page.click()
                time.sleep(2)
            except:
                break

        # 和讯博客
        news_btn = self.driver.find_element_by_xpath(
            '//div[@class="search-rs-list-ty"]/a[contains(text(), "博客")]')
        news_btn.click()
        self.driver.find_element_by_id('s1_t').click()
        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located(
                        (By.CLASS_NAME, 'searchResult')))
            except TimeoutException:
                CustomLogging.log_to_file('和讯财经博客搜索结果加载失败', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_class_name(
                    'newslist-a')

                for each_article in result_articles:
                    item = Entity()
                    item.publish_date = \
                        each_article.find_element_by_class_name('news-l-t').find_elements_by_tag_name('span')[
                            -1].text
                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'hexun_blog'),
                            self.year_range):
                        exit_flag = 1
                        break

                    item.title = each_article.find_element_by_xpath(
                        './/span[@class="breakdiv"]/a').text
                    item.short_description = each_article.find_element_by_class_name(
                        'news-l-c').text
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    item.url = each_article.find_element_by_xpath(
                        './/span[@class="breakdiv"]/a').get_attribute('href')
                    threading.Thread(target=self.download_and_save_item,
                                     args=(item, )).start()

            except NoSuchElementException:
                break

            if exit_flag == 1:
                break

            try:
                next_page = self.driver.find_element_by_xpath(
                    '//div[@class="hx_paging"]//a[contains(text(), "下一页")]')
                next_page_class = self.driver.find_element_by_xpath(
                    '//div[@class="hx_paging"]//a[contains(text(), "下一页")]/..'
                ).get_attribute('class')

                if next_page_class == 'no_next':
                    break

                next_page.click()
                time.sleep(2)
            except:
                break

        return search_results

Exemple #6

0

Afficher le fichier

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()
        try:
            self.wait.until(ec.presence_of_element_located(
                (By.XPATH, '//div[@id="search_result"]//a[contains(text(), "按更新时间排序")]')))
            self.driver.find_element_by_xpath('//div[@id="search_result"]//a[contains(text(), "按更新时间排序")]').click()
        except:
            CustomLogging.log_to_file('证券时报搜索结果页打开失败', LogType.ERROR)

        exit_flag = 0
        page_num = 1
        while True:
            # 搜索结果只会显示100页
            if page_num == 100:
                break

            try:
                self.wait.until(ec.presence_of_element_located((By.ID, 'search_list')))
            except TimeoutException:
                CustomLogging.log_to_file('中国证券网搜索结果页错误', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_xpath('//div[@id="search_list"]//dl')

                for each_article in result_articles:
                    item = Entity()
                    item.publish_date = each_article.find_elements_by_tag_name('dd')[1].find_element_by_tag_name(
                        'span').text
                    # 判断文章是否在指定年限内,如果不在指定年限则退出
                    if not in_date_range(conv_pub_date(item.publish_date, 'STCN'), self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    item.short_description = each_article.find_elements_by_tag_name('dd')[0].text
                    item.title = each_article.find_element_by_tag_name('a').text

                    # 关键字过滤,如果关键在在文章标题和简述里都没出现，则判断下一条
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    item.url = each_article.find_element_by_tag_name('a').get_attribute('href')
                    threading.Thread(target=self.download_and_save_item, args=(item,)).start()

                # 跳出while循环
                if exit_flag == 1:
                    break
            except TimeoutException:
                CustomLogging.log_to_file('中国证券网搜索结果页加载错误', LogType.ERROR)
            try:
                next_page = self.driver.find_element_by_class_name('next')
                next_page.click()
                page_num += 1
            except NoSuchElementException:
                # print('已经是最后一页')
                break

        return search_results

Exemple #7

0

Afficher le fichier

Fichier : baidu.py Projet : ArP2018/WebCrawling

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()

        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located((By.ID, 'container')))
            except TimeoutException:
                CustomLogging.log_to_file('百度搜索结果页错误', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_xpath(
                    '//div[@class="result c-container "]')

                for each_article in result_articles:
                    item = Entity()
                    try:
                        item.publish_date = each_article.find_element_by_xpath(
                            './/span[contains(@class,"newTimeFactor_before_abs")]'
                        ).text.replace('-', '')
                    except NoSuchElementException:
                        continue
                    try:
                        article_cont = each_article.find_element_by_class_name(
                            'c-abstract')
                    except NoSuchElementException:
                        continue
                    short_description = article_cont.text
                    item.short_description = re.sub(
                        re.compile(
                            '[1-9]\d{3}年(0?[1-9]|1[0-2])月(0?[1-9]|[1-2][0-9]|3[0-1])日\s+-'
                        ), '', short_description)
                    item.title = each_article.find_element_by_class_name(
                        't').text
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'baidu'),
                            self.year_range):
                        continue

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    item.url = each_article.find_element_by_xpath(
                        './/h3[@class="t"]//a').get_attribute('href')

                    threading.Thread(target=super().download_and_save_item,
                                     args=(item, )).start()

            except TimeoutException:
                CustomLogging.log_to_file('没有搜索结果', LogType.INFO)
                break

            try:
                next_page = self.driver.find_element_by_id(
                    'page').find_element_by_class_name('n')
                # self.driver.get(next_page.get_attribute('href'))
                next_page.click()
                time.sleep(2)
            except TimeoutException:
                self.driver.execute_script('window.stop();')
            except NoSuchElementException:
                break

        return search_results

Exemple #8

0

Afficher le fichier

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()

        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located((By.CLASS_NAME, 'results')))
            except TimeoutException:
                CustomLogging.log_to_file('每经网搜索结果页错误', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_xpath(
                    '//div[@class="results"]/div[@class="rb" or @class="vrwrap"]'
                )

                for each_article in result_articles[1:]:
                    item = Entity()

                    item.publish_date = format_sougou_date(
                        each_article.find_element_by_xpath(
                            './/cite[contains(@id,"cacheresult_info_")]').text)
                    if each_article.get_attribute('class') == 'rb':
                        try:
                            article_cont = each_article.find_element_by_xpath(
                                './/div[contains(@id, "cacheresult_summary_")]'
                            )
                        except NoSuchElementException:
                            continue
                        short_description = article_cont.text
                        item.short_description = re.sub(
                            re.compile(
                                '[1-9]\d{3}年(0?[1-9]|1[0-2])月(0?[1-9]|[1-2][0-9]|3[0-1])日\s+-'
                            ), '', short_description)
                        item.title = each_article.find_element_by_xpath(
                            './/a[contains(@id, "uigs_")]').text
                        if self.keyword not in item.short_description and self.keyword not in item.title:
                            continue

                        if item.publish_date == '':
                            try:
                                publish_date = each_article.find_element_by_xpath(
                                    './/div[contains(@id, "cacheresult_summary_")]/span'
                                ).text
                                item.publish_date = publish_date.replace(
                                    '年', '-').replace('月', '-').replace(
                                        '日', '').replace('-', '')
                            except NoSuchElementException:
                                continue
                    else:
                        item.title = each_article.find_element_by_class_name(
                            'vrTitle').text
                        try:
                            short_description = each_article.find_element_by_class_name(
                                'str_info').text
                        except NoSuchElementException:
                            continue
                        item.short_description = re.sub(
                            re.compile(
                                '[1-9]\d{3}年(0?[1-9]|1[0-2])月(0?[1-9]|[1-2][0-9]|3[0-1])日\s+-'
                            ), '', short_description)

                        if self.keyword not in item.short_description and self.keyword not in item.title:
                            continue

                        if item.publish_date == '':
                            try:
                                publish_date = each_article.find_element_by_class_name(
                                    'gray-color').text
                                item.publish_date = publish_date.replace(
                                    '-', '').replace('年', '-').replace(
                                        '月', '-').replace('日', '')
                            except NoSuchElementException:
                                continue

                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'sougou'),
                            self.year_range):
                        exit_flag = 1
                        break

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    url = each_article.find_element_by_xpath(
                        './/a[contains(@id, "sogou_snapshot_")]'
                    ).get_attribute('href')
                    item.url = urllib.parse.unquote(
                        url.split('&')[1].replace('url=', ''))

                    threading.Thread(target=super().download_and_save_item,
                                     args=(item, )).start()
                if exit_flag == 1:
                    break

            except TimeoutException:
                CustomLogging.log_to_file('没有搜索结果', LogType.INFO)
                break

            try:
                next_page = self.driver.find_element_by_id('sogou_next')
                # self.driver.get(next_page.get_attribute('href'))
                next_page.click()
                time.sleep(2)
            except TimeoutException:
                self.driver.execute_script('window.stop();')
            except NoSuchElementException:
                break

        return search_results

Exemple #9

0

Afficher le fichier

Fichier : bjgsj.py Projet : ArP2018/WebCrawling

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()

        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_all_elements_located(
                        (By.XPATH, '//div[@class="content"]//div')))
            except TimeoutException:
                CustomLogging.log_to_file('搜索结果出错', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_xpath(
                    '//div[@class="content"]//div[@class="news"]')

                for each_article in result_articles:
                    item = Entity()

                    publish_date = each_article.find_element_by_id(
                        'essaypubtime').text
                    item.publish_date = re.search(
                        re.compile(
                            '[1-9]\d{3}.(0[1-9]|1[0-2]).(0[1-9]|[1-2][0-9]|3[0-1])\s+(20|21|22|23|[0-1]\d):[0-5]\d:[0-5]\d'
                        ), publish_date).group()

                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'bjgsj'),
                            self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    item.short_description = each_article.find_element_by_id(
                        'essaycontent').text
                    item.title = each_article.find_element_by_id(
                        'essaytitlelinks').text

                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    item.url = each_article.find_element_by_xpath(
                        './/li[@id="essaytitlelinks"]/a').get_attribute("href")
                    threading.Thread(target=super().download_and_save_item,
                                     args=(item, )).start()

                if exit_flag == 1:
                    break
            except NoSuchElementException:
                CustomLogging.log_to_file('没有搜索结果', LogType.INFO)
                break

            try:
                next_page = self.driver.find_element_by_class_name('next-page')
                self.driver.get(next_page.get_attribute('href'))
                # next_page.click()
            except NoSuchElementException:
                break

        return search_results

Exemple #10

0

Afficher le fichier

    def crawl_search_results(self):
        search_results = []
        self.driver.maximize_window()

        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located(
                        (By.CLASS_NAME, 'search-text')))
            except TimeoutException:
                CustomLogging.log_to_file('每经网搜索结果页错误', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_xpath(
                    '//ul[@class="search-text mt15"]/li')

                for each_article in result_articles:
                    item = Entity()

                    item.publish_date = each_article.find_element_by_class_name(
                        'articleMaterial_meta').text

                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'mrjj'),
                            self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    try:
                        item.short_description = each_article.find_element_by_class_name(
                            'articleMaterial_depict').text
                    except NoSuchElementException:
                        item.short_description = ''
                    item.title = each_article.find_element_by_class_name(
                        'articleMaterial_title').text

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    item.url = each_article.find_element_by_class_name(
                        'articleMaterial_title').find_element_by_tag_name(
                            'a').get_attribute('href')
                    threading.Thread(target=super().download_and_save_item,
                                     args=(item, )).start()

                if exit_flag == 1:
                    break
            except NoSuchElementException:
                CustomLogging.log_to_file('没有搜索结果', LogType.INFO)
                break

            try:
                next_page = self.driver.find_element_by_class_name(
                    'next').find_element_by_tag_name('a')
                self.driver.get(next_page.get_attribute('href'))
                # next_page.click()
                # time.sleep(2)
            except TimeoutException:
                self.driver.execute_script('window.stop();')
            except NoSuchElementException:
                break

        return search_results