def select_url(): arrList = [] try: connect = get_connect() cursor = connect.cursor() print("connection") sql = "SELECT id,source_url FROM news_api" cursor.execute(sql) result = cursor.fetchall() for row in result: # print(row[0]) news = News() news.id = row[0] news.source_url = row[1] arrList.append(news) except Exception as e: print(e) finally: return arrList
def keyword_search(keyword): source_url_list = select_source_url_returnset() url = 'http://www.toutiao.com/search_content/?offset=0&format=json&keyword= ' + keyword + '&autoload=true&count=200&cur_tab=1' toutiao_data = requests.get(url).text data = json.loads(toutiao_data) items = data['data'] news_list = [] link_head = 'http://toutiao.com' for n in items: if 'title' in n: news = News() news.title = n['title'] news.tag = n['tag'] news.source = n['source'] news.source_url = link_head + n['source_url'] # 两会关键词 news.keyword = keyword # 今日头条自带关键词 news.keywords = n['keywords'] #如果已经存在source_url则跳过 if news.source_url in source_url_list: print('数据库已有该记录!') continue print('新添加记录:', news.title) news_list.append(news) # print(news.title, news.source_url, news.source, news.keyword, news.keywords) return news_list