def __init__(self, politician_name): """Initialize an object representing an article.""" news = GoogleNews() news.setlang("uk") news.setencode("utf-8") news.setperiod("3d") news.search(politician_name) info = news.result() self.articles = [] name, surname = politician_name.split()[0], politician_name.split()[1] self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch" def get_data(self): r = requests.get(self.link) return r.text html_data = get_data(self) soup = BeautifulSoup(html_data, "html.parser") image_links, num = [], 0 for item in soup.find_all("img"): image_links.append(item["src"]) num += 1 if num == 6: break for i in range(5): text = info[i] info_list = [text["title"], text["link"], image_links[i + 1]] self.articles.append(info_list)
def job(self): #Download current database self.getDB() self.print_header(self.rawFileName) self.lineCounter(self.rawFileName) x = 0 for tag in self.newsTags: #print("Collecting newses from tag: " + tag + "...") self.logger.info(f"Collecting newses from tag: {tag}") googlenews = GoogleNews() googlenews.clear() googlenews.set_lang(self.newsLang) googlenews.setperiod('1d') googlenews.get_news(tag) output = googlenews.results(sort=True) output = pd.DataFrame(output) x = x + len(output['title']) self.saveToFile(output, self.rawFileName) self.logger.info(f"Collected amount of news: {x}") self.removeDuplicates(self.rawFileName, self.finalFileName) #os.remove(rawFileName) #delete bufor file #logger.info(f"Removed file with duplicates: {rawFileName}") os.rename(self.finalFileName, self.rawFileName) #rename final file to bufor name self.logger.info(f"Renamed: {self.finalFileName} to: {self.rawFileName}") self.backupDB()
def initalize_google_news(start_date, end_date): """Initializes the googlenews object.""" print("initalize_google_news...") googlenews = GoogleNews(encode="utf-8") # create googlenews object googlenews.setlang("en") googlenews.setperiod("d") googlenews.setencode("utf-8") googlenews.setTimeRange(start_date, end_date) # using user specified date range return googlenews
class GoogleNewsMethods(): # Creates a googlenews object def __init__(self): self.googlenews = GoogleNews(lang="en") # This will return a list of news for perticular stock on a given date def newscollection(self, stock, date): self.googlenews.search(stock) self.googlenews.setTimeRange('05/01/2020', '05/28/2020') self.googlenews.setperiod('05/15/2020') self.newsList = self.googlenews.result() return (self.newsList)
''' Example of GoogleNews usage. ''' from GoogleNews import GoogleNews from pprint import pprint news = GoogleNews() news.setlang('en') news.setencode('utf-8') news.setperiod('3d') news.search('Programming') info = news.result() print(news.total_count()) print(len(info)) news.get_page(2) info = news.result() print(len(info)) pprint(info)
return dataset if __name__ == '__main__': import time import requests from bs4 import BeautifulSoup from GoogleNews import GoogleNews googlenews = GoogleNews() googlenews = GoogleNews(lang='en') googlenews = GoogleNews(period='d') googlenews = GoogleNews(start='02/01/2020',end='02/28/2020') googlenews.setlang('en') googlenews.setperiod('d') googlenews.setTimeRange('02/01/2020','02/28/2020') googlenews.search('APPL') googlenews.getpage(2) x = googlenews.result() for item in x: web_link = item['link'] start = time.time() page_source = requests.get(web_link) soup = BeautifulSoup(page_source.text, "lxml") print('s: ', time.time()-start) try: text = soup.find('article').text #print(text) except:
from tinydb import TinyDB, Query import random from GoogleNews import GoogleNews # setting up database links_db = TinyDB("links.json") Topic = Query() # setting up google news api googlenews = GoogleNews(lang="en") googlenews.setperiod("3") def return_random_link(topic): links = links_db.search(Topic.topic == topic) if len(links) == 0: # return the google news link print("fetching google link") googlenews.search(topic) google_links = googlenews.result() return google_links[0]["link"] else: print("fetching link from database") choice = random.randrange(0, len(links) - 1) return links[choice]["link"] print(return_random_link("latinx"))
self.i += 1 current_prog = self.i / self.length self.prog_bar.progress(current_prog) st.title('Public sentiments') st.sidebar.title('User Inputs') searchInput = st.sidebar.text_input('search query') val = len(searchInput) if val > 0: agree = st.sidebar.checkbox('frequency') if agree: option = st.sidebar.selectbox('How would you like to be contacted?', ('1h', '1d', '7d', '1y')) googlenews.setperiod(option) else: st.sidebar.markdown('Select the time range for the search') dt1 = st.sidebar.date_input('from date', datetime.date.today()) dt2 = st.sidebar.date_input('till date', datetime.date.today()) if dt1 > dt2: st.sidebar.error('SELECT A VALID "FROM" DATE') else: googlenews.setTimeRange(dt1, dt2) with st.spinner('Getting data...'): googlenews.search(searchInput) news_content = [] ## ''' Google News start ''' for i in range(1, 1 + 1): googlenews.getpage(i)