def __init__(self, politician_name): """Initialize an object representing an article.""" news = GoogleNews() news.setlang("uk") news.setencode("utf-8") news.setperiod("3d") news.search(politician_name) info = news.result() self.articles = [] name, surname = politician_name.split()[0], politician_name.split()[1] self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch" def get_data(self): r = requests.get(self.link) return r.text html_data = get_data(self) soup = BeautifulSoup(html_data, "html.parser") image_links, num = [], 0 for item in soup.find_all("img"): image_links.append(item["src"]) num += 1 if num == 6: break for i in range(5): text = info[i] info_list = [text["title"], text["link"], image_links[i + 1]] self.articles.append(info_list)
def initalize_google_news(start_date, end_date): """Initializes the googlenews object.""" print("initalize_google_news...") googlenews = GoogleNews(encode="utf-8") # create googlenews object googlenews.setlang("en") googlenews.setperiod("d") googlenews.setencode("utf-8") googlenews.setTimeRange(start_date, end_date) # using user specified date range return googlenews
class Engine: def __init__(self): self.news = GoogleNews() self.news.setlang('en') #self.news.setTimeRange('01/01/2000','01/01/2015') self.news.setencode('utf-8') self.pageNumber = 1 self.searchTerm = "" def nextPage(self): if self.news.result == None: raise RuntimeError("Engine has not searched yet") self.pageNumber += 1 self.news.clear() self.news.getpage(self.pageNumber) if len(self.news.result()) == 0: return False else: return True def previousPage(self): if self.news.result == None: raise RuntimeError("Engine has not searched yet") self.pageNumber -= 1 self.news.clear() self.news.getpage(self.pageNumber) if len(self.news.result()) == 0: return False else: return True def search(self, term): self.news.search(term) if len(self.news.result()) == 0: return False else: return self.news.result() def getPageNumber(self): return self.pageNumber def getResults(self): return self.news.result() def clear(self): self.news.clear() def resetPageNumber(self): self.pageNumber = 1
''' Example of GoogleNews usage. ''' from GoogleNews import GoogleNews from pprint import pprint news = GoogleNews() news.setlang('en') news.setencode('utf-8') news.setperiod('3d') news.search('Programming') info = news.result() print(news.total_count()) print(len(info)) news.get_page(2) info = news.result() print(len(info)) pprint(info)
#!/usr/bin/env python # coding: utf-8 import pandas as pd import jieba from GoogleNews import GoogleNews from bs4 import BeautifulSoup import requests googlenews = GoogleNews() googlenews.setlang('cn') googlenews.setperiod('d') googlenews.setencode('utf-8') googlenews.clear() x = input("請輸入要搜尋的關鍵字,將為你搜集相關字詞內容:") googlenews.search(x) alldata = googlenews.result() result = googlenews.gettext() links = googlenews.get_links() # print(type(result)) # print(len(result)) # print(alldata) print() for n in range(len(result)): print(result[n]) print(links[n])