def export_news_info(): print('export_news_info') locator = (By.CLASS_NAME, "list-group") WebDriverWait(BROWSER, 90, 0.5).until( expected_conditions.presence_of_element_located(locator)) news, n = [], 0 while n == 0: news = BROWSER.find_elements_by_css_selector( '#article-tab-1-view-1 .list-group .list-group-item') print('\n+++++\n') n = len(news) time.sleep(2) print('\n+++++\n') last_record_time = None for new in news: title = new.find_element_by_css_selector( '.list-group-item-heading span').text.replace("\"", "'") news_office = new.find_element_by_css_selector('small a').text word_number = str(new.find_element_by_css_selector( 'small span').text).split()[-1][:-1] record_time = new.find_element_by_css_selector( '.article-main .pull-right').text description = new.find_element_by_css_selector( '.media-body .list-group-item-text').text.replace("\n", "").replace( "\"", "'") news_result = News(title=title, news_office=news_office, words_number=word_number, news_time=record_time, description=description) with open("result20030101-20061231.json", "a+", encoding='utf8') as f: f.write(news_result.to_str() + ',\n') last_record_time = record_time print(last_record_time) print('all info exported, go to next windows')
def export_news_info_and_go_back_to_init_page(): print('export_news_info') locator = (By.CLASS_NAME, "app-article") WebDriverWait(BROWSER, 90, 0.5).until(expected_conditions.presence_of_element_located(locator)) news = BROWSER.find_elements_by_class_name('app-article') last_record_time = None for new in news: title = new.find_element_by_css_selector('.col-xs-12 h3').text other_information = new.find_elements_by_css_selector('.col-xs-12 .article-subheading span') source = other_information[0].text word_number = other_information[1].text record_time = other_information[2].text description = new.find_element_by_css_selector('.col-xs-12 .description').text.replace("\n", "") news_result = News(title=title, news_office=source, words_number=word_number, news_time=record_time, description=description) with open("result20030101-20061231.json", "a+", encoding='utf8') as f: f.write(news_result.to_str() + ',\n') last_record_time = record_time print(last_record_time) print('all info exported, close current windows') BROWSER.close() switch_to_init_windows()
#!/usr/bin/python # -*- coding: UTF-8 -*- from news import News a = News("标题", "时间", "作者", "描述", "字数") with open("resul1t.json", "a+", encoding='utf8') as f: f.write(a.to_str() + ',\n')