def get_news(page_count, cb): page_count *= 10 error_count = 0 time_utc = int(time.time()) for i in range(1, page_count + 1): response = url_open( "https://www.chainnews.com/api/articles/feeds/?page=%d&ts=%d" % (i, time_utc)) #print(response) json_data = json.loads(response) for item in json_data['results']: article_item = news_base.article_info( item['author_name'], # item["pb_timestamp"], # item['title'], # item['digest'], item['content'], item['refer_link'], "链闻chainnews") if not cb(article_item): error_count += 1 else: error_count = 0 if error_count >= 5: break if error_count >= 5: break #def get_news(10) #print(response)
def get_news(page_count, cb): time_utc = int(time.time()) error_count = 0 for i in range(1, page_count + 1): response = url_open( "https://www.55coin.com/index/article/search.html?cat_id=4&page=%d&is_index=1" % (i)) #print(response) json_data = json.loads(response) for item in json_data['list']: article_item = news_base.article_info( item['nickname'], int(item["add_time"]), item['title'], item['brief'], 'content', 'https://www.55coin.com/article/%d.html' % item['article_id'], "区势传媒") source_responce = url_open(article_item.source_addr) source_doc = pq(source_responce) article_item.content = source_doc(".article-content").html() if not cb(article_item): error_count += 1 else: error_count = 0 if error_count >= 5: break if error_count >= 5: break #print(json_data['results'][0]) #def get_news(10) #print(response)
def get_news(page_count, cb): error_count = 0 time_utc = int(time.time()) * 1000 for i in range(1, page_count + 1): #print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") response = url_open( "https://www.chainfor.com/home/list/news/data.do?categoryId=&lastItemTimeStamp=%d&device_type=0" % (time_utc)) #print(response) json_data = json.loads(response) for item in json_data['list']: article_item = news_base.article_info( item['nickName'], # int(item["releaseDate"]['time']) / 1000, # item['title'], # item["introduction"], # 'content', "https://www.chainfor.com/news/show/%d.html" % item["id"], "链向财经") source_responce = url_open(article_item.source_addr) source_doc = pq(source_responce) article_item.content = source_doc(".m-i-bd").html() time_utc = item["releaseDate"]['time'] if not cb(article_item): error_count += 1 else: error_count = 0 if error_count >= 5: break if error_count >= 5: break #print(article_item) #print(json_data['results'][0]) #get_news(2) #def get_news(10) #print(response)
def get_news(page_count,cb): error_count = 0 for i in range(1,page_count+1): response = url_open("https://webapi.8btc.com/bbt_api/news/list?num=20&page=%d"%(i)) #print(response) json_data = json.loads(response) for item in json_data['data']['list']: article_item = news_base.article_info(item['author_info']['display_name'], int(item["post_date"]), item['title'], item['desc'],'content', 'https://www.8btc.com/article/'+str(item['id']), "8比特") source_responce = url_open(article_item.source_addr) source_doc = pq(source_responce) article_item.content = source_doc(".bbt-html").html() if not cb(article_item): error_count+=1 else: error_count = 0 if error_count >= 5: break if error_count >= 5: break #def get_news(10) #print(response)
def get_news(page_count, cb): time_utc = int(time.time()) error_count = 0 index = 0 for i in range(1,page_count+1): #print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") response = url_open("https://api.jinse.com/v6/information/list?catelogue_key=www&limit=23&information_id=%d&flag=down&version=9.9.9&_source=www"%(index)) #print(response) json_data = json.loads(response) for item in json_data['list']: if item["type"] != 1 and item["type"] != 2: continue article_item = news_base.article_info( item["extra"]['author'],# int(item["extra"]["published_at"]),# item['title'], # item["extra"]['summary'],# 'content', item["extra"]['topic_url'], "金色财金") source_responce = url_open(article_item.source_addr) source_doc = pq(source_responce) article_item.content = source_doc(".js-article-detail").html() if source_doc(".js-article-detail").html() else source_doc(".js-article").html() index = item['id'] if not cb(article_item): error_count+=1 else: error_count = 0 if error_count >= 5: break if error_count >= 5: break #print(json_data['results'][0]) #def get_news(10) #print(response)