def get_search_results(keyword: str): googlenews = GoogleNews(lang="en", period="7d", encode="utf-8") googlenews.get_news(keyword) googlenews.search(keyword) googlenews.get_page(1) results = googlenews.results() return results[0:5]
def job(self): #Download current database self.getDB() self.print_header(self.rawFileName) self.lineCounter(self.rawFileName) x = 0 for tag in self.newsTags: #print("Collecting newses from tag: " + tag + "...") self.logger.info(f"Collecting newses from tag: {tag}") googlenews = GoogleNews() googlenews.clear() googlenews.set_lang(self.newsLang) googlenews.setperiod('1d') googlenews.get_news(tag) output = googlenews.results(sort=True) output = pd.DataFrame(output) x = x + len(output['title']) self.saveToFile(output, self.rawFileName) self.logger.info(f"Collected amount of news: {x}") self.removeDuplicates(self.rawFileName, self.finalFileName) #os.remove(rawFileName) #delete bufor file #logger.info(f"Removed file with duplicates: {rawFileName}") os.rename(self.finalFileName, self.rawFileName) #rename final file to bufor name self.logger.info(f"Renamed: {self.finalFileName} to: {self.rawFileName}") self.backupDB()
def get_training_data(self): """ load training data from google news """ # check if data has been downloaded if not os.path.isfile('./data/sentiment_data/headlines.csv'): googlenews = GoogleNews(lang='en', start='01/01/2015') # mm/dd/yyyy news = [] keywords = [ 'Blockchain', 'Cryptocurrency', 'Bitcoin', 'Etherium', 'Stock Market', 'Finance' ] # fetch news headlines for every keyword in keywords list for keyword in tqdm(keywords): googlenews.get_news(keyword) results = googlenews.results() # append news headlines to list news for result in results: news.append([result['datetime'], result['title']]) # create a pandas dataframe with news list and save it to csv df = pd.DataFrame(news, columns=['date', 'headline']) df.to_csv('./data/sentiment_data/headlines.csv', index=False) return df else: return pd.read_csv('./data/sentiment_data/headlines.csv')
def get_news(assunto): news = GoogleNews(period='d') news.setlang('pt') news.set_encode('utf-8') news.set_time_range('12/02/2021', '13/02/2021') news.get_news(assunto) results = news.get_texts() result = results[3:8] if len(results) > 0 else "Sem notícias recentes" return result
def getnewsData(self): today = date.today() T_split = str(today).split('-') toDate = T_split[2] + '/' + T_split[1] + '/' + T_split[0] googlenewsMkt = GoogleNews(start=toDate, end=toDate) googlenewsMkt.get_news('Market') result = googlenewsMkt.results() df = pd.DataFrame(result).head(10) dfi.export(df, './template/df_styled_Market.jpeg') googlenewsBiz = GoogleNews(start=toDate, end=toDate) googlenewsBiz.get_news('Business') result = googlenewsBiz.results() df = pd.DataFrame(result).head(10) dfi.export(df, './template/df_styled_Business.jpeg')
def news_sentiments(self): # Returns news articles curated via Finviz, Yahoo, and Google News, GET UNUSUAL OPTION ACTIVITY BASE_URL = f'https://finviz.com/quote.ashx?t={self.ticker}' soup = self._get_soup(BASE_URL) table = soup.find('table', {'class': 'fullview-news-outer'}) rows = table.find_all('tr') df_data = [] for row in rows: date = row.find('td', {'align': 'right'}) article = row.find('td', {'align': 'left'}) link = article.find('a')['href'] df_data.append((date.get_text(), article.get_text(), link)) df = pd.DataFrame(df_data, columns=['Time', 'Headline', 'Link']) BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/news?p={self.ticker}' soup = self._get_soup(BASE_URL) links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'}) news = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links] BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/press-releases?p={self.ticker}' soup = self._get_soup(BASE_URL) links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'}) press_releases = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links] # Look for keywords in the news? Any showcases, Investor/analyst days, Analyst revisions, Management transitions # Product launches, Significant stock buyback changes # Getting news from google news search googlenews = GoogleNews(lang='en', period='14d') # Specify period for news googlenews.get_news(f'${self.ticker} stock') stock_news = googlenews.results() # print([(i, j) for i, j in zip(googlenews.get_texts(), googlenews.get_links())]) # To get other pages, do googlenews.get_page(2), etc. # Have whitelist of websites to search articles from. Maybe have key word to filter out stupid stuff. sectors = self.find_competition() sector_news = [] if sectors: for sector in sectors: googlenews = GoogleNews(lang='en', period='14d') googlenews.get_news(f'{sector} sector stocks') sector_news.append(googlenews.result()) return df, news, press_releases, sector_news, stock_news
def googleNewsApi(request, word): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('7d') googlenews.set_encode('utf-8') googlenews.get_news(str(word)) googlenews.total_count() resultsGoogleNews = googlenews.results() #print(resultsGoogleNews) #print(googlenews.total_count()) #TWITTER consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4' consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT' access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G' access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN' # create OAuthHandler object auth = tweepy.OAuthHandler(consumer_key, consumer_secret) # set access token and secret auth.set_access_token(access_token, access_token_secret) # create tweepy API object to fetch tweets api = tweepy.API(auth) date_since = datetime.today().strftime('%Y-%m-%d') print(date_since) #tweets = api.search(str("bitcoin"), count=1) tweets = tweepy.Cursor(api.search, q=str(word), lang="en", since=date_since).items(100) """print(tweets.__dict__['page_iterator'].__dict__) for tweet in tweets: print(tweet) print(tweet.id)""" #return googlenews """for result in resultsGoogleNews: title = result['title'] date = result['date'] link = result['link'] source = result['site'] news = {'title':title, 'date': date, 'link': link, 'site':site} """ return render(request, 'homepage.html', { 'news': resultsGoogleNews, 'tweets': tweets })
def scrape_the_news(): user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' config = Config() config.browser_user_agent = user_agent topiclist = NLP_news() print(topiclist[0]) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-8') googlenews.set_period('7d') googlenews.get_news(topiclist[0]) result = googlenews.results() googlenews.clear() df = pd.DataFrame(result) df = df.drop(['date', 'media'], axis=1) df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title'] df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']] conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123") curr = conn.cursor() for i, row in df.iterrows(): try: row.Link = 'https://' + row.Link columns = row.keys() values = [row[column] for column in columns] insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)" curr.execute(insert_statement, tuple(values)) except: print('could not add row', i) conn.commit() curr.close() conn.close()
def googleLinks(topic): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('1d') googlenews.set_encode('utf-8') article = googlenews.get_news(topic) links = googlenews.get_links()[:5] actualLinks = list() for l in links: l = "http://" + l print(l) actualLinks.append( requests.get(l).url ) return actualLinks
def query_google_news(query): googlenews = GoogleNews(lang='en') googlenews.get_news(query) res = googlenews.results() # with open('data_google.txt') as f: # res = json.load(f) print(res) rank = 0 all_news_list = [] for news in res: rank = rank + 1 if rank < 51: all_news = AllNews(news["desc"], news["title"], category=None, date_time=news["datetime"], rank=rank, src='google') else: break all_news_list.append(all_news) return all_news_list
def crawling_news(company_name_list, start_date, end_date): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_time_range('start_date', 'end_date') googlenews.set_encode('utf-8') #news.google.com search sample all_title = [] logging.info('loop start') for i in range(len(company_name_list)): googlenews.get_news(company_name_list[i]) logging.info('%s : %0.2f%s' % (company_name_list[i], ((i + 1) / len(company_name_list)) * 100, '%')) for j in range(len(googlenews.results())): all_title.append(googlenews.results()[j].get('title')) all_title = pd.DataFrame(all_title) all_title.to_csv('sp500news.csv') logging.info('saved to csv, done!!') return all_title
def home(request): #news data googlenews = GoogleNews(lang='en', period='12h', encode='utf-8') googlenews.get_news('National Stock Exchange') news = googlenews.result(sort=True) news_first = news[:8] news_length = [] for i in range(1, len(news) // 8): news_length.append(news[i * 8:(i * 8) + 8]) # chart data companies = Companies.objects.all() data = get_history( symbol="NIFTY", start=date.today() - timedelta(days=30), end=date.today(), index=True, ) labels = data._data.axes[1].tolist() context = { 'companies': companies, 'labels': labels, 'data': data['Close'].tolist(), 'news': news, 'news_length': news_length, } if request.POST.get('login'): user = authenticate(request, username=request.POST['username'], password=request.POST.get('password')) if user is not None: login(request, user) return redirect('home') else: print(user) context['error'] = "*Username and Password doesn't Match.*" # context['results']=results return render(request, 'home.html', context=context) if request.POST.get('option'): pk = request.POST['option'] company = Companies.get_company_by_id(pk) print(company.name) print(company.symbol) obj = RunModel(company) current_data = get_history( symbol=company.symbol, start=date.today() - timedelta(days=30), end=date.today(), ) current_labels = current_data._data.axes[1].tolist() nan_ = [float('nan') for i in range(len(current_labels) - 1)] nan_.append(current_data['Close'].tolist()[-1]) priceObj = obj.getPrice() nextDays = obj.getNext30Days() if current_data['Close'].tolist()[-1] > nextDays[-1]: color = True else: color = False context['priceObj'] = priceObj context['nextDays'] = nextDays context['nextDays_data'] = nan_ + nextDays context['nextDays_labels'] = current_labels + list(range(1, 21)) context['selectedOption'] = company.name context['current_data'] = current_data['Close'].tolist() context['current_labels'] = current_labels context['color'] = color return render(request, 'home.html', context=context)
classifier=pickle.load(open('models/xgboost_trained.pickle','rb')) ### DATA INPUT st.write("Please enter a news topic below. The default is 'president'.") user_input = st.text_input("news topic", 'president') st.write(f"Thanks! Give me a few minutes to run your analysis on the search term: {user_input}. You might want to grab a coffee...") ### Run analysis # get news articles googlenews.get_news(user_input) articles = googlenews.get_links() if len(articles)>25: articles = articles[:25] def clean_text(input_string): """ clean the text parsed from the news articles :param input_string: raw plain text from article :return: clean_string: cleaned article text """ clean_string = (input_string.translate(str.maketrans('', '', string.punctuation))).lower() clean_string = ' '.join(clean_string.split()) return clean_string st.write("Good news! I found some news articles using Google News!")
""" months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021'] fin = [] seen = [] for first in primary_phrases: for second in secondary_phrases: full_phrase = first+" "+second print(full_phrase) for i in range(0, len(months)-1): googlenews.set_time_range(months[i],months[i+1]) googlenews.get_news(full_phrase) res = googlenews.results(sort=True) #It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() #(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works) for result in res: if result['title'] not in seen: result['start date'] = months[i] result['end date'] = months[i+1] result['primary phrase'] = first result['secondary phrase'] = second result['full phrase'] = full_phrase fin.append(result) seen.append(result['title'])
def _google(self): news = GoogleNews(period='1d') news.get_news( self._ticker ) # @TODO how to also accomodate full name (case insensitive) self._news = news.get_texts()
def run_alexa(): command = take_command() print(command) if 'music' in command: song = command.replace('play song', '') talk('I am playing your favourite ' + song) # print('playing') print(song) # playing the first video that appears in yt search pywhatkit.playonyt(song) elif 'time' in command: now = datetime.now() time = now.strftime("%H:%M:%S") print("time:", time) talk("Current time is " + time) elif ('month' or 'year') in command: now = datetime.now() year = now.strftime("%Y") print("year:", year) talk("Current year is " + year) month = now.strftime("%m") print("month:", month) talk("Current month is " + month) elif 'date' in command: now = datetime.now() date_time = now.strftime("%m/%d/%Y, %H:%M:%S") print("date and time:", date_time) talk("Current date and time is " + date_time) # opens web.whatsapp at specified time i.e before 10 minutes and send the msg elif 'whatsapp' in command: talk("To which number do you have to whatsapp") talk("Please dont forget to enter 10 digits with country code") num = input() talk("Enter the message you have to send") msg = input() talk("Enter the time to send the message") time = int(input()) pywhatkit.sendwhatmsg(num, msg, time, 00) pywhatkit.showHistory() pywhatkit.shutdown(3000000000) # pywhatkit.sendwhatmsg("+919876543210", "This is a message", 15, 00) # Convert text to handwritten format elif 'convert' in command: text = command.replace('convert', '') pywhatkit.text_to_handwriting(text, rgb=[0, 0, 0]) # Perform google search elif 'search' in command: key = command.replace('search', '') pywhatkit.search("key") elif 'wikipedia' in command: person = command.replace('wikipedia', '') talk("How many pages do you want to read") num_pages = int(input()) # talk("In which language do you want to read") # l = input() # wikipedia.set_lang(l) info = wikipedia.summary(person, num_pages) print(info) talk(info) elif 'can you work for me' in command: talk("sorry, I have headache. Please do your work") elif 'are you single' in command: talk("I am in relationshhip with wifi") elif 'joke' in command: talk(pyjokes.get_joke()) talk("sorry for the lamest joke") elif 'open google browser' in command: try: urL = 'https://www.google.com' chrome_path = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe" webbrowser.register('chrome', None, webbrowser.BackgroundBrowser(chrome_path)) webbrowser.get('chrome').open_new_tab(urL) talk("Successfully opened chrome its upto you to search") except: webbrowser.Error elif 'google search' in command: word_to_search = command.replace('google search', '') response = GoogleSearch().search(word_to_search) print(response) for result in response.results: print("Title: " + result.title) talk("You can look for the following titles " + result.title) elif 'weather' in command: # base URL BASE_URL = "https://api.openweathermap.org/data/2.5/weather?" talk("Which city weather are you looking for") try: with sr.Microphone() as source: print('listening weather...') city_voice = listener.listen(source) city = listener.recognize_google(city_voice) # city = '\"'+city.lower()+'\"' print(city) # city="bangalore" # API key API_KEY = "Your API Key" API_KEY = "b5a362ef1dc8e16c673dd5049aa98d8f" # upadting the URL URL = BASE_URL + "q=" + city + "&appid=" + API_KEY # HTTP request response = requests.get(URL) # checking the status code of the request if response.status_code == 200: # getting data in the json format data = response.json() # getting the main dict block main = data['main'] # getting temperature temperature = main['temp'] # getting the humidity humidity = main['humidity'] # getting the pressure pressure = main['pressure'] # weather report report = data['weather'] print(f"{CITY:-^30}") print(f"Temperature: {temperature}") print(f"Humidity: {humidity}") print(f"Pressure: {pressure}") print(f"Weather Report: {report[0]['description']}") talk("Temperature in " + city + " is " + temperature + " humidity is " + humidity + " pressure is " + pressure + " and your final weather report" + report) else: # showing the error message print("Error in the HTTP request") talk("Error in the HTTP request") except: talk("Hmmmmm, it looks like there is something wrong") elif 'news' in command: try: googlenews = GoogleNews() googlenews.set_lang('en') # googlenews.set_period('7d') # googlenews.set_time_range('02/01/2020', '02/28/2020') googlenews.set_encode('utf-8') talk("What news are you looking for") try: with sr.Microphone() as source: print('listening news ...') news_voice = listener.listen(source) news_input = listener.recognize_google(news_voice) news_input = news_input.lower() print(news_input) googlenews.get_news(news_input) googlenews.search(news_input) googlenews.get_page(2) result = googlenews.page_at(2) news = googlenews.get_texts() print(news) talk(news) except: print("Error") talk("Error in reading input") except: print("No news") talk(" I couldn't find any news on this day") elif 'play book' or 'read pdf' in command: talk("Which pdf do you want me to read") book_input = input() print(book_input) book = open(book_input, 'rb') # create pdfReader object pdfReader = PyPDF2.PdfFileReader(book) # count the total pages total_pages = pdfReader.numPages total_pages = str(total_pages) print("Total number of pages " + total_pages) talk("Total number of pages " + total_pages) # initialise speaker object # speaker = pyttsx3.init() # talk("Enter your starting page") # start_page = int(input()) talk( " here are the options for you, you can press 1 to Play a single page 2 to Play between start and end points and 3 to Play the entire book " ) talk("Enter your choice") choice = int(input()) if (choice == 1): talk("Enter index number") page = int(input()) page = pdfReader.getPage(page) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() elif (choice == 2): talk("Enter starting page number") start_page = int(input()) talk("Enter ending page number") end_page = int(input()) for page in range(start_page + 1, end_page): page = pdfReader.getPage(start_page + 1) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() elif (choice == 3): for page in range(total_pages + 1): page = pdfReader.getPage(page) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() else: talk("Haha!! Please enter valid choice") else: talk( "Hiii Rashika, I am so bored can you please give me some proper commands" )
from GoogleNews import GoogleNews googlenews = GoogleNews() googlenews = GoogleNews(lang='en') googlenews.get_news('APPLE') googlenews.search('APPLE') googlenews.get_page(1) googlenews.result() googlenews.get_texts()
def get(request): news = GoogleNews() news.get_news(deamplify=True) return render(request, 'news.html', {'data': news.result()})
### MODULES from GoogleNews import GoogleNews ### METHODS def show_routine(results): for num, page in enumerate(results): print(f"{num}. {page['date']} - {page['title']}") ### MAIN # Setup the research keywords = "covid cava de' tirreni" period = '10d' google_news = GoogleNews(lang='it', period=period) google = GoogleNews(lang='it', period=period) # Results from news.google.com google_news.get_news(keywords) results_gnews = google_news.results(sort=True) show_routine(results_gnews) # Results from google.com google.search(keywords) results_google = google.results(sort=True) show_routine(results_google)
def googlenews_recovery(app_config): googlenews = GoogleNews() googlenews.set_lang(app_config["lang"]) googlenews.set_period(app_config["period"]) googlenews.get_news(app_config["keywords"]) return googlenews
def home(request): # news data googlenews = GoogleNews(lang='en', period='12h', encode='utf-8') googlenews.get_news('Cryptocurrency') news = googlenews.result(sort=True) news_first = news[:8] news_length = [] for i in range(1, len(news) // 8): news_length.append(news[i * 8:(i * 8) + 8]) # chart data # companies = Companies.objects.all() # data = get_history( # symbol="NIFTY", # start=date.today() - timedelta(days=30), # end=date.today(), # index=True, # ) bitstamp = pd.read_csv( r"C:\Users\afzal\Desktop\Bitcoin-Predictor\Bitcoin-Predictor\Bitcoin\trained_models\bitt.csv" ) # price_series = bitstamp.reset_index().Close.values from sklearn.preprocessing import MinMaxScaler data = bitstamp['Close'] # scaler = MinMaxScaler(feature_range=(0, 1)) # data = scaler.fit_transform(price_series.reshape(-1, 1)) labels = ' $ BITCOIN PRICE PREDICTOR $ ' context = { 'labels': labels, 'data': data.tolist(), 'news': news, 'news_length': news_length, } if request.POST.get('login'): user = authenticate(request, username=request.POST['username'], password=request.POST.get('password')) if user is not None: login(request, user) return redirect('home') else: print('Welcome ' + user) context['error'] = "*Username and Password doesn't Match.*" # context['results']=results return render(request, 'home.html', context=context) if request.POST.get('option'): pk = request.POST['option'] print(pk, flush=True) obj = RunModel() current_data = data # print(current_data) current_labels = bitstamp.reset_index().Timestamp.values.tolist() nan_ = [float('nan') for i in range(348)] # nan_.append(current_data.tolist()[-1]) # priceObj = obj.getPrice() # d = [[NaN] for i in range(328)] nextDays = obj.getNextQDays(pk) color = [] if current_data.tolist()[-1] > nextDays[-1]: color.append(True) else: color.append(False) # context['priceObj'] = priceObj context['nextDays'] = nextDays context['nextDays_data'] = nan_ + nextDays context['nextDays_labels'] = current_labels + list( range(1, int(pk) + 1)) # context['selectedOption'] = company.name context['current_data'] = data.tolist() context['current_labels'] = current_labels context['color'] = color context['pk'] = pk context['onepiece'] = current_data.tolist()[-1] return render(request, 'home.html', context=context)
def main(): all_df = [] sid_obj = SentimentIntensityAnalyzer() googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-16') """ Primary Phrases refer to the keywords we are interested in studying Secondary Phrases refer to the target countries """ company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac'] # testing_countries = ['Egypt', 'Kenya', 'Nigeria'] testing_countries = [] """ Months refer to the date range """ # months = ['08/01/2020', '09/01/2020', '10/01/2020'] # months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021'] months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021'] for first in company_name: fin = [] seen = [] with open('sample.csv', mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) summary_data = [] for row in csv_reader: # print(row) second = row['\ufeffCountry'] if (second not in testing_countries and len(testing_countries)!=0): continue full_phrase = first+" "+second print(full_phrase) counter = 0 sum_sent = 0 pos_count = 0 # neu_count = 0 neg_count = 0 neg_article = {'title': 'N/A', '% Negative': 0} for i in range(0, len(months)-1): googlenews.set_time_range(months[i],months[i+1]) googlenews.get_news(full_phrase) res = googlenews.results() #It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() #(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works) for result in res: if result['title'] not in seen: # print(result) result['start date'] = months[i] result['end date'] = months[i+1] result['company'] = first result['country'] = second result['latitude'] = row['Latitude'] result['longitude'] = row['Longitude'] sentiment_dict = sid_obj.polarity_scores(result['title']) result['% Negative'] = sentiment_dict['neg']*100 result['% Neutral'] = sentiment_dict['neu']*100 result['% Positive'] = sentiment_dict['pos']*100 result['Magnitude'] = sentiment_dict['compound']*50 + 50 counter += 1 sum_sent += result['Magnitude'] # result.pop('date') # result.pop('datetime') # result.pop('img') # result.pop('media') # if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1 # elif result['% Neutral'] > result['% Positive']: neu_count += 1 # else: pos_count += 1 if result['% Positive'] > result['% Negative']: pos_count += 1 else: neg_count += 1 if result['% Negative'] >= neg_article['% Negative']: neg_article = result fin.append(result) seen.append(result['title']) posPercent = 50 if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count) magni = 0 if counter>0: magni = sum_sent/counter country_comp_score = {'country': second, 'latitude': row['Latitude'], 'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']} summary_data.append(country_comp_score) all_df.append((country_comp_score, first)) df = pd.DataFrame(fin) df.drop(columns=['date', 'datetime', 'img', 'media']) df.to_csv("./Output/{}_output.csv".format(first),index=False) summary_df = pd.DataFrame(summary_data) summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False) # all_df.append(summary_df) # meta_data = [] # # with open('sample.csv', mode='r') as csv_file: # dic_len = sum(1 for line in open('sample.csv')) # with open('sample.csv', mode='r') as csv_file: # csv_reader = csv.DictReader(csv_file) # for j in range(0, dic_len): # most_pos = 0 # for i in range(0, len(company_name)): # if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: # most_pos = i # meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]}) fields = ['Country', 'Company', 'Count'] meta_data = [] seen = [] for result in all_df: if result[0]['country'] not in seen: seen.append(result[0]['country']) meta_data.append([result[0]['country'], result[1], result[0]['positive']]) else: for candidate in meta_data: if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']: candidate[1] = result[1] candidate[2] = result[0]['positive'] with open('./Output/meta_data.csv', 'w') as f: write = csv.writer(f) write.writerow(fields) write.writerows(meta_data)
if (max_pos == 0): disaster = "Cyclone" elif (max_pos == 1): disaster = "Earthquake" elif (max_pos == 2): disaster = "Flood" else: disaster = "Wildfire" user_agent = 'Chrome/50.0.2661.102' config = Config() config.browser_user_agent = user_agent news = GoogleNews(end='12/12/2020') news = GoogleNews(period='15d') news = GoogleNews(lang='en') news.get_news(disaster) result = news.result(sort='true') """ for i in result: print("Title:",i['title']) print("Source:",i['media']) print(i['date']) print("") """ pd.DataFrame(result).to_csv('C:/Users/user/Desktop/Minor Project/output.csv', header=False, index=False) image1 = Image.open( r'C:\Users\user\Desktop\Minor Project\uploads\disaster.jpg') im1 = image1.convert('RGB')
googlenews = GoogleNews() ############################################################## ################ Definition der Suche ######################## ############################################################## googlenews.set_encode('utf-8') # Sprache definieren (z.B. 'de'=deutsch; 'en'=englisch; ...) googlenews.set_lang('de') # nach Periode Filtern (z.B. News nicht älter als 1 Tag) googlenews.set_period('1d') #googlenews.set_time_range('15/01/2021','17/01/2021') # Suche ausfuehren googlenews.get_news('Wetter Hamburg') ############################################################## ######################## Ausgabe ############################# ############################################################## # Alle Infos (Titel, Beschreibung, Zeit, Datum, Link, Quelle) #print(googlenews.results()) # News-Kopfzeile iterative durchlaufen #for i in googlenews.results(): # print(i['title']) #print('Anzahl Ergebnisse: ', len(googlenews.results())) # Liste mit allen News-Kopfzeilen #print(googlenews.get_texts())