Example #1
0
def select_url():
    arrList = []
    try:
        connect = get_connect()
        cursor = connect.cursor()
        print("connection")
        sql = "SELECT id,source_url FROM news_api"
        cursor.execute(sql)
        result = cursor.fetchall()
        for row in result:
            # print(row[0])
            news = News()
            news.id = row[0]
            news.source_url = row[1]
            arrList.append(news)
    except Exception as e:
        print(e)
    finally:
        return arrList
def keyword_search(keyword):

    source_url_list = select_source_url_returnset()

    url = 'http://www.toutiao.com/search_content/?offset=0&format=json&keyword= ' + keyword + '&autoload=true&count=200&cur_tab=1'

    toutiao_data = requests.get(url).text

    data = json.loads(toutiao_data)
    items = data['data']

    news_list = []
    link_head = 'http://toutiao.com'

    for n in items:
        if 'title' in n:
            news = News()
            news.title = n['title']
            news.tag = n['tag']
            news.source = n['source']
            news.source_url = link_head + n['source_url']
            # 两会关键词
            news.keyword = keyword
            # 今日头条自带关键词
            news.keywords = n['keywords']

            #如果已经存在source_url则跳过
            if news.source_url in source_url_list:
                print('数据库已有该记录!')
                continue

            print('新添加记录:', news.title)
            news_list.append(news)
            # print(news.title, news.source_url, news.source, news.keyword, news.keywords)

    return news_list