コード例 #1
0
    def __init__(self, politician_name):
        """Initialize an object representing an article."""
        news = GoogleNews()
        news.setlang("uk")
        news.setencode("utf-8")
        news.setperiod("3d")
        news.search(politician_name)
        info = news.result()
        self.articles = []

        name, surname = politician_name.split()[0], politician_name.split()[1]
        self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch"

        def get_data(self):
            r = requests.get(self.link)
            return r.text

        html_data = get_data(self)
        soup = BeautifulSoup(html_data, "html.parser")
        image_links, num = [], 0
        for item in soup.find_all("img"):
            image_links.append(item["src"])
            num += 1
            if num == 6:
                break

        for i in range(5):
            text = info[i]
            info_list = [text["title"], text["link"], image_links[i + 1]]
            self.articles.append(info_list)
コード例 #2
0
ファイル: news.py プロジェクト: eriksonlb/TARS
def get_news(assunto):
    news = GoogleNews(period='d')
    news.setlang('pt')
    news.set_encode('utf-8')
    news.set_time_range('12/02/2021', '13/02/2021')
    news.get_news(assunto)
    results = news.get_texts()
    result = results[3:8] if len(results) > 0 else "Sem notícias recentes"
    return result
コード例 #3
0
def initalize_google_news(start_date, end_date):
    """Initializes the googlenews object."""

    print("initalize_google_news...")

    googlenews = GoogleNews(encode="utf-8")  # create googlenews object
    googlenews.setlang("en")
    googlenews.setperiod("d")
    googlenews.setencode("utf-8")
    googlenews.setTimeRange(start_date, end_date)  # using user specified date range

    return googlenews
コード例 #4
0
def search(keyword=None, datestart=None, dateend=None, pgs=1):
    # Variáveis globais
    global noticias
    global cont
    global acabou

    # Parametros de busca
    print('Keyword: ', keyword)

    #Configuração da pesquisa
    googlenews = GoogleNews(start=datestart, end=dateend)
    googlenews.setlang('pt')
    googlenews.search(keyword)
    result = googlenews.result()

    # Passando os dados p/ um DataFrame
    df = pd.DataFrame(result)

    # Printando as 5 primeiras notícias
    print(df.head())

    # Pega um range de páginas obtidas do resultado acima
    for i in range(0, pgs):
        googlenews.getpage(i)
        result = googlenews.result()
        df = pd.DataFrame(result)

    # Converte o DataFrame acima para uma lista de dicionários
    for ind in df.index:
        print('Noticia numero: {}'.format(ind))
        dict = {}
        article = Article(df['link'][ind], config=config)
        article.download()
        try:
            article.parse()
            article.nlp()
            dict['Date'] = df['date'][ind]
            dict['Media'] = df['media'][ind]
            dict['Title'] = article.title
            dict['Article'] = article.text
            dict['Summary'] = article.summary
            dict['Created'] = False
            noticias.append(dict)
        except:
            print('Error')
        time.sleep(0)
コード例 #5
0
class Engine:
    def __init__(self):
        self.news = GoogleNews()
        self.news.setlang('en')
        #self.news.setTimeRange('01/01/2000','01/01/2015')
        self.news.setencode('utf-8')
        self.pageNumber = 1
        self.searchTerm = ""

    def nextPage(self):
        if self.news.result == None:
            raise RuntimeError("Engine has not searched yet")
        self.pageNumber += 1
        self.news.clear()
        self.news.getpage(self.pageNumber)
        if len(self.news.result()) == 0: return False
        else: return True

    def previousPage(self):
        if self.news.result == None:
            raise RuntimeError("Engine has not searched yet")
        self.pageNumber -= 1
        self.news.clear()
        self.news.getpage(self.pageNumber)
        if len(self.news.result()) == 0: return False
        else: return True

    def search(self, term):
        self.news.search(term)
        if len(self.news.result()) == 0:
            return False
        else:
            return self.news.result()

    def getPageNumber(self):
        return self.pageNumber

    def getResults(self):
        return self.news.result()

    def clear(self):
        self.news.clear()

    def resetPageNumber(self):
        self.pageNumber = 1
コード例 #6
0
def extract_links(dir_c, dir_k, lang):
    for t in topics:
        print('Current topic: ', t + '\n')

        kw = get_keywords(dir_k, t)
        print('Keywords: ', kw + '\n')

        f_clean = open(dir_c + t + '.txt', 'r')
        fp = f_clean.readlines()
        min_d, max_d, num_d = get_date_range(fp)
        print('Date range: ', min_d, max_d + '\n')

        f_out = open(lang + '/links/' + t + '_links.txt', 'w')

        key_enc = quote(kw.encode('utf8'))
        googlenews = GoogleNews()
        googlenews.setlang(lang)
        googlenews.setTimeRange(min_d, max_d)
        googlenews.search(key_enc)
        result = googlenews.result()

        page = 1
        num_art = len(result)
        curr_art = num_art

        while curr_art < 10*num_d:
            page += 1
            googlenews.getpage(page)
            result = googlenews.result()
            num_art = len(result)
            if curr_art < num_art:
                curr_art = num_art
            else: break
        
        for i in range(curr_art):
            date = str(dateparser.parse(result[i]['date']).date())
            link = result[i]['link']
            f_out.write(date + '\n' + link)
            f_out.write('\n--------------------------------\n')

        print('--------------------------------\n')
        f_out.close()
コード例 #7
0
def crawl(search: str, lang: str = "en", directory: str = None) -> list:
    gn = GoogleNews()
    gn.setlang(lang)
    gn.search(search)
    result = gn.result()

    articles = []

    for res in result:
        try:
            article = Article.download(res['link'])
            articles.append(article)
        except MaxRetryError:
            logger.error(f'MaxRetryError for {res["link"]}')

    if directory is not None:
        for idx, article in enumerate(articles):
            if article is not None:
                filename = f'{search}_{idx}_{lang}.yml'
                filename = filename.lower().replace(' ', '_')
                article.to_yaml(f'{directory}/{filename}')

    return articles
コード例 #8
0
'''
Example of GoogleNews usage.
'''

from GoogleNews import GoogleNews
from pprint import pprint

news = GoogleNews()

news.setlang('en')
news.setencode('utf-8')
news.setperiod('3d')

news.search('Programming')

info = news.result()

print(news.total_count())
print(len(info))

news.get_page(2)

info = news.result()

print(len(info))

pprint(info)
コード例 #9
0
ファイル: google_news.py プロジェクト: aleksl17/ibmmab
from GoogleNews import GoogleNews

googlenews = GoogleNews()
googlenews.setlang('no')


def search_with_inmput(
        pages):  #googleNews takler ikke spesial karakterer som æ ø å
    googlenews.search(input)
    if (pages >= 2):
        for x in range(2, pages + 1):
            googlenews.getpage(x)
    #googlenews.getpage(2)
    #googlenews.getpage(3)
    news_links = googlenews.get__links()
    googlenews.clear()
    return news_links


def norge_klima_search(pages):  # Searching google news using Norge and klima
    googlenews.search('norge klima')
    if (pages >= 2):
        for x in range(2, pages + 1):
            googlenews.getpage(x)
    #googlenews.getpage(2)
    #googlenews.getpage(3)
    news_links = googlenews.get__links()
    googlenews.clear()
    return news_links
コード例 #10
0
            continue
        if 0 < choice < 5:
            break
        else:
            print("That is not between 1 and 4! Try again:")
    print ("You entered: {} ") # Good to use format instead of string formatting with %
mydict = {1:go_to_stackoverflow, 2:import_from_phone, 3:import_from_camcorder, 4:import_from_camcorder}
mydict[choice]()
print(askUser())

s_req = input("Enter the term you would like to search")
st_date = input("Please enter your desired start date (MM-DD-YYY): ")
en_date = input("Please enter your desired end date (MM-DD-YYY): ")

googlenews = GoogleNews()
googlenews.setlang('en')
googlenews.setTimeRange(st_date,en_date)
googlenews.search(s_req)
googlenews.result()




#create a least squares regression model using the variablles
all_adj_close= all_data[['Adj Close']]
all_returns = np.log(all_adj_close / all_adj_close.shift(1))

#isolate the returns you want to value for the OLS
print("As a reminder, you have selected the following: " + input_string)
sample_stocks = input("Please choose 2 of the stocks you have chosen to calculate a OLS regression: ")
コード例 #11
0
#!/usr/bin/env python
# coding: utf-8
import pandas as pd
import jieba
from GoogleNews import GoogleNews
from bs4 import BeautifulSoup
import requests

googlenews = GoogleNews()

googlenews.setlang('cn')
googlenews.setperiod('d')
googlenews.setencode('utf-8')
googlenews.clear()

x = input("請輸入要搜尋的關鍵字,將為你搜集相關字詞內容:")
googlenews.search(x)

alldata = googlenews.result()
result = googlenews.gettext()
links = googlenews.get_links()
# print(type(result))
# print(len(result))
# print(alldata)

print()

for n in range(len(result)):
    print(result[n])
    print(links[n])