Example #1
0
def export_news_info():
    print('export_news_info')
    locator = (By.CLASS_NAME, "list-group")
    WebDriverWait(BROWSER, 90, 0.5).until(
        expected_conditions.presence_of_element_located(locator))

    news, n = [], 0
    while n == 0:
        news = BROWSER.find_elements_by_css_selector(
            '#article-tab-1-view-1 .list-group .list-group-item')
        print('\n+++++\n')
        n = len(news)
        time.sleep(2)
        print('\n+++++\n')

    last_record_time = None
    for new in news:
        title = new.find_element_by_css_selector(
            '.list-group-item-heading span').text.replace("\"", "'")
        news_office = new.find_element_by_css_selector('small a').text
        word_number = str(new.find_element_by_css_selector(
            'small span').text).split()[-1][:-1]
        record_time = new.find_element_by_css_selector(
            '.article-main .pull-right').text
        description = new.find_element_by_css_selector(
            '.media-body .list-group-item-text').text.replace("\n",
                                                              "").replace(
                                                                  "\"", "'")
        news_result = News(title=title,
                           news_office=news_office,
                           words_number=word_number,
                           news_time=record_time,
                           description=description)
        with open("result20030101-20061231.json", "a+", encoding='utf8') as f:
            f.write(news_result.to_str() + ',\n')
        last_record_time = record_time

    print(last_record_time)
    print('all info exported, go to next windows')
def export_news_info_and_go_back_to_init_page():
    print('export_news_info')
    locator = (By.CLASS_NAME, "app-article")
    WebDriverWait(BROWSER, 90, 0.5).until(expected_conditions.presence_of_element_located(locator))

    news = BROWSER.find_elements_by_class_name('app-article')
    last_record_time = None
    for new in news:
        title = new.find_element_by_css_selector('.col-xs-12 h3').text
        other_information = new.find_elements_by_css_selector('.col-xs-12 .article-subheading span')
        source = other_information[0].text
        word_number = other_information[1].text
        record_time = other_information[2].text
        description = new.find_element_by_css_selector('.col-xs-12 .description').text.replace("\n", "")
        news_result = News(title=title, news_office=source, words_number=word_number, news_time=record_time, description=description)
        with open("result20030101-20061231.json", "a+", encoding='utf8') as f:
            f.write(news_result.to_str() + ',\n')
        last_record_time = record_time

    print(last_record_time)
    print('all info exported, close current windows')
    BROWSER.close()
    switch_to_init_windows()
Example #3
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-

from news import News

a = News("标题", "时间", "作者", "描述", "字数")


with open("resul1t.json", "a+", encoding='utf8') as f:
    f.write(a.to_str() + ',\n')