async def text_filter(message: types.Message): googlenews = GoogleNews(lang='ru') googlenews.search(str(message.text)) result = googlenews.get_links() count = 0 for i in result: await message.answer(i) if count == 4: break count += 1
def googleLinks(topic): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('1d') googlenews.set_encode('utf-8') article = googlenews.get_news(topic) links = googlenews.get_links()[:5] actualLinks = list() for l in links: l = "http://" + l print(l) actualLinks.append( requests.get(l).url ) return actualLinks
def crawl(coin): page = news_pages[search_keyword.index(coin)] news = GoogleNews(lang='ko', encode='utf-8') news.search(coin) time.sleep(30) news.getpage(page) title = news.get_texts() url = news.get_links() desc = news.get_desc() for t, u, d in zip(title, url, desc): # print(d) idx = coin_index[search_keyword.index(coin)] if t != "" and u != "" and d != "": dic = { u"title": u'{}'.format(t), u"desc": u'{}'.format(d), u"link": u'{}'.format(u) } if coin_list[search_keyword.index(coin)] in t or coin_eng[ search_keyword.index(coin)] in t: if idx == 0: ref = db.collection(u'{}'.format( coin_eng[search_keyword.index(coin)])) ref.add(dic) time.sleep(random.uniform(2, 4)) coin_index[search_keyword.index(coin)] += 1 else: flag = True ref = db.collection(u'{}'.format( coin_eng[search_keyword.index(coin)])).stream() for doc in ref: time.sleep(random.uniform(1, 3)) check_dic = doc.to_dict() #print('[check] {}'.format(check_dic)) if dic['title'] == check_dic['title']: flag = False break if flag: print('[{}] ///// {} '.format(coin, dic)) ref = db.collection(u'{}'.format( coin_eng[search_keyword.index(coin)])) ref.add(dic) time.sleep(random.uniform(1, 5)) #print(coin,t,u) coin_index[search_keyword.index(coin)] += 1 news_pages[search_keyword.index(coin)] += 1
def getLinks(query, num_links=5): googlenews = GoogleNews(lang="en") googlenews.search(query) return googlenews.get_links()[:num_links]
def news_scraper(curr): cursor = GoogleNews('en','d') cursor.search(curr) cursor.getpage(1) cursor.result() return list(zip(cursor.get_texts(),cursor.get_links()))
### DATA INPUT st.write("Please enter a news topic below. The default is 'president'.") user_input = st.text_input("news topic", 'president') st.write(f"Thanks! Give me a few minutes to run your analysis on the search term: {user_input}. You might want to grab a coffee...") ### Run analysis # get news articles googlenews.get_news(user_input) articles = googlenews.get_links() if len(articles)>25: articles = articles[:25] def clean_text(input_string): """ clean the text parsed from the news articles :param input_string: raw plain text from article :return: clean_string: cleaned article text """ clean_string = (input_string.translate(str.maketrans('', '', string.punctuation))).lower() clean_string = ' '.join(clean_string.split()) return clean_string st.write("Good news! I found some news articles using Google News!")
from bs4 import BeautifulSoup import requests googlenews = GoogleNews() googlenews.setlang('cn') googlenews.setperiod('d') googlenews.setencode('utf-8') googlenews.clear() x = input("請輸入要搜尋的關鍵字,將為你搜集相關字詞內容:") googlenews.search(x) alldata = googlenews.result() result = googlenews.gettext() links = googlenews.get_links() # print(type(result)) # print(len(result)) # print(alldata) print() for n in range(len(result)): print(result[n]) print(links[n]) df = pd.DataFrame({'標題': result, '連結': links}) url = df['連結'][0] print(url) # 取其中一篇文章做分析測試
################ Definition der Suche ######################## ############################################################## googlenews.set_encode('utf-8') # Sprache definieren (z.B. 'de'=deutsch; 'en'=englisch; ...) googlenews.set_lang('de') # nach Periode Filtern (z.B. News nicht älter als 1 Tag) googlenews.set_period('1d') #googlenews.set_time_range('15/01/2021','17/01/2021') # Suche ausfuehren googlenews.get_news('Wetter Hamburg') ############################################################## ######################## Ausgabe ############################# ############################################################## # Alle Infos (Titel, Beschreibung, Zeit, Datum, Link, Quelle) #print(googlenews.results()) # News-Kopfzeile iterative durchlaufen #for i in googlenews.results(): # print(i['title']) #print('Anzahl Ergebnisse: ', len(googlenews.results())) # Liste mit allen News-Kopfzeilen #print(googlenews.get_texts()) # Links zu den Quellen print(googlenews.get_links())