Python News.source Examples

Programming Language: Python

Namespace/Package Name: Model.news

Class/Type: News

Method/Function: source

Examples at hotexamples.com: 2

Python News.source - 2 examples found. These are the top rated real world Python examples of Model.news.News.source extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

News(15)

spider_name(10)

title(9)

date(7)

url(7)

source_url(4)

id(2)

source(2)

tag(2)

content(1)

keyword(1)

keywords(1)

news_date(1)

Example #1

Show file

File: get_toutiao_news_byapi.py Project: wangshen520/CrawlerAndDataAnalysis

def toutiao_news_api(url):
    # 代理服务器
    proxyHost = "http-dyn.abuyun.com"
    proxyPort = "9020"

    # 代理隧道验证信息
    proxyUser = "******"
    proxyPass = "******"

    proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
        "host": proxyHost,
        "port": proxyPort,
        "user": proxyUser,
        "pass": proxyPass,
    }

    proxies = {
        "http": proxyMeta,
        "https": proxyMeta,
    }
    ua = UserAgent(verify_ssl=False)
    headers = {
        'cookie':
        'tt_webid=6825236887406953998; s_v_web_id=verify_ka17kc91_J51hfIgB_1Ujy_4F87_AQ77_v44SCeaZdYbb; WEATHER_CITY=%E5%8C%97%E4%BA%AC; __tasessionId=ftj73c94a1589124278466; tt_webid=6825236887406953998; csrftoken=3bc73a541ff3c196706a5fa652baa10a; ttcid=93c87bb6d2c44204a824c060f2a0344b39; SLARDAR_WEB_ID=167cd898-158d-4682-84b7-515f808f9c49; tt_scid=nvrgh8BUDb5bfXypX.EbNgFcMiVjrSr7vdwnPAab2w2tEn2I8DLcdmqRb2aAGGvT6b9b',
        'user-agent': ua.random,
        'x-requested-with': 'XMLHttpRequest'
    }
    toutiao_data = requests.get(url, headers=headers, proxies=proxies).text
    global data
    data = json.loads(toutiao_data)
    global max_behot_time
    max_behot_time = data['next']['max_behot_time']
    items = data['data']

    news_list = []
    link_head = 'http://toutiao.com'

    for n in items:
        if 'title' in n and n['tag'] != 'ad' and n['tag'] != 'news_media':
            news = News()
            news.title = n['title']
            print(news.title)
            news.tag = n['tag']
            news.source = n['source']
            # 转换成localtime
            time_local = time.localtime(n['behot_time'])
            # 转换成新的时间格式(2016-05-05 20:28:54)
            dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
            news.news_date = dt
            print(news.news_date)
            news.source_url = link_head + n['source_url']

            news_list.append(news)
            #print(news.title, news.source_url, news.source, news.keyword, news.keywords)

    return news_list

Example #2

Show file

File: get_toutiao_news_bykeyword.py Project: wangshen520/CrawlerAndDataAnalysis

def keyword_search(keyword):

    source_url_list = select_source_url_returnset()

    url = 'http://www.toutiao.com/search_content/?offset=0&format=json&keyword= ' + keyword + '&autoload=true&count=200&cur_tab=1'

    toutiao_data = requests.get(url).text

    data = json.loads(toutiao_data)
    items = data['data']

    news_list = []
    link_head = 'http://toutiao.com'

    for n in items:
        if 'title' in n:
            news = News()
            news.title = n['title']
            news.tag = n['tag']
            news.source = n['source']
            news.source_url = link_head + n['source_url']
            # 两会关键词
            news.keyword = keyword
            # 今日头条自带关键词
            news.keywords = n['keywords']

            #如果已经存在source_url则跳过
            if news.source_url in source_url_list:
                print('数据库已有该记录！')
                continue

            print('新添加记录：', news.title)
            news_list.append(news)
            # print(news.title, news.source_url, news.source, news.keyword, news.keywords)

    return news_list