コード例 #1
0
async def text_filter(message: types.Message):
    googlenews = GoogleNews(lang='ru')
    googlenews.search(str(message.text))
    result = googlenews.get_links()
    count = 0
    for i in result:
        await message.answer(i)
        if count == 4:
            break
        count += 1
コード例 #2
0
ファイル: newsScrape.py プロジェクト: LAHacks21/WordCloud
def googleLinks(topic):
    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('1d')
    googlenews.set_encode('utf-8')
    article =  googlenews.get_news(topic)
    links = googlenews.get_links()[:5]
    actualLinks = list()
    for l in links:
        l = "http://" + l
        print(l)
        actualLinks.append( requests.get(l).url ) 
    return actualLinks
    
コード例 #3
0
def crawl(coin):
    page = news_pages[search_keyword.index(coin)]
    news = GoogleNews(lang='ko', encode='utf-8')
    news.search(coin)
    time.sleep(30)
    news.getpage(page)
    title = news.get_texts()
    url = news.get_links()
    desc = news.get_desc()
    for t, u, d in zip(title, url, desc):
        # print(d)
        idx = coin_index[search_keyword.index(coin)]
        if t != "" and u != "" and d != "":
            dic = {
                u"title": u'{}'.format(t),
                u"desc": u'{}'.format(d),
                u"link": u'{}'.format(u)
            }
            if coin_list[search_keyword.index(coin)] in t or coin_eng[
                    search_keyword.index(coin)] in t:
                if idx == 0:
                    ref = db.collection(u'{}'.format(
                        coin_eng[search_keyword.index(coin)]))
                    ref.add(dic)
                    time.sleep(random.uniform(2, 4))
                    coin_index[search_keyword.index(coin)] += 1
                else:
                    flag = True
                    ref = db.collection(u'{}'.format(
                        coin_eng[search_keyword.index(coin)])).stream()
                    for doc in ref:
                        time.sleep(random.uniform(1, 3))
                        check_dic = doc.to_dict()
                        #print('[check] {}'.format(check_dic))
                        if dic['title'] == check_dic['title']:
                            flag = False
                            break
                    if flag:
                        print('[{}] ///// {} '.format(coin, dic))
                        ref = db.collection(u'{}'.format(
                            coin_eng[search_keyword.index(coin)]))
                        ref.add(dic)
                        time.sleep(random.uniform(1, 5))
                        #print(coin,t,u)
                        coin_index[search_keyword.index(coin)] += 1
    news_pages[search_keyword.index(coin)] += 1
コード例 #4
0
ファイル: utils.py プロジェクト: aditeyabaral/newsnow
def getLinks(query, num_links=5):
    googlenews = GoogleNews(lang="en")
    googlenews.search(query)
    return googlenews.get_links()[:num_links]
コード例 #5
0
ファイル: main.py プロジェクト: saaalik/Cryptoconverter
def news_scraper(curr):
    cursor = GoogleNews('en','d')
    cursor.search(curr)
    cursor.getpage(1)
    cursor.result()
    return list(zip(cursor.get_texts(),cursor.get_links()))
コード例 #6
0


### DATA INPUT

st.write("Please enter a news topic below. The default is 'president'.")

user_input = st.text_input("news topic", 'president')

st.write(f"Thanks! Give me a few minutes to run your analysis on the search term: {user_input}. You might want to grab a coffee...")

### Run analysis

# get news articles
googlenews.get_news(user_input)
articles = googlenews.get_links()
if len(articles)>25:
    articles = articles[:25]

def clean_text(input_string):
    """ clean the text parsed from the news articles

    :param input_string: raw plain text from article
    :return: clean_string: cleaned article text
    """
    clean_string = (input_string.translate(str.maketrans('', '', string.punctuation))).lower()
    clean_string = ' '.join(clean_string.split())
    return clean_string

st.write("Good news! I found some news articles using Google News!")
コード例 #7
0
from bs4 import BeautifulSoup
import requests

googlenews = GoogleNews()

googlenews.setlang('cn')
googlenews.setperiod('d')
googlenews.setencode('utf-8')
googlenews.clear()

x = input("請輸入要搜尋的關鍵字,將為你搜集相關字詞內容:")
googlenews.search(x)

alldata = googlenews.result()
result = googlenews.gettext()
links = googlenews.get_links()
# print(type(result))
# print(len(result))
# print(alldata)

print()

for n in range(len(result)):
    print(result[n])
    print(links[n])

df = pd.DataFrame({'標題': result, '連結': links})

url = df['連結'][0]
print(url)
# 取其中一篇文章做分析測試
コード例 #8
0
################ Definition der Suche ########################
##############################################################

googlenews.set_encode('utf-8')
# Sprache definieren (z.B. 'de'=deutsch; 'en'=englisch; ...)
googlenews.set_lang('de')
# nach Periode Filtern (z.B. News nicht älter als 1 Tag)
googlenews.set_period('1d')
#googlenews.set_time_range('15/01/2021','17/01/2021')

# Suche ausfuehren
googlenews.get_news('Wetter Hamburg')

##############################################################
######################## Ausgabe #############################
##############################################################

# Alle Infos (Titel, Beschreibung, Zeit, Datum, Link, Quelle)
#print(googlenews.results())

# News-Kopfzeile iterative durchlaufen
#for i in googlenews.results():
#    print(i['title'])
#print('Anzahl Ergebnisse: ', len(googlenews.results()))

# Liste mit allen News-Kopfzeilen
#print(googlenews.get_texts())

# Links zu den Quellen
print(googlenews.get_links())