def crawling_news(company_name_list, start_date, end_date, save_file_name): #set logger Handler logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) #define googlenews googlenews = GoogleNews(lang='en', start=start_date, end=end_date, encode='utf-8') #news.google.com search sample all_title = [] logging.info('loop start') for i in range(len(company_name_list)): comp_name = company_name_list[i] googlenews.search(comp_name) logging.info('%s : %d%s' % (comp_name, ((i + 1) / len(company_name_list)) * 100, '%')) for j in range(len(googlenews.results())): temp = [] temp.append(googlenews.results()[j].get('title')) temp.append(comp_name) temp.append(fixing_date(googlenews.results()[j].get('date'))) all_title.append(temp) #clear result list googlenews.clear() all_title = pd.DataFrame(all_title) all_title.to_csv('%s.csv' % (save_file_name)) logging.info('saved as %s.csv, done!!' % (save_file_name)) return all_title
def getnewsData(self): today = date.today() T_split = str(today).split('-') toDate = T_split[2] + '/' + T_split[1] + '/' + T_split[0] googlenewsMkt = GoogleNews(start=toDate, end=toDate) googlenewsMkt.get_news('Market') result = googlenewsMkt.results() df = pd.DataFrame(result).head(10) dfi.export(df, './template/df_styled_Market.jpeg') googlenewsBiz = GoogleNews(start=toDate, end=toDate) googlenewsBiz.get_news('Business') result = googlenewsBiz.results() df = pd.DataFrame(result).head(10) dfi.export(df, './template/df_styled_Business.jpeg')
def get_search_results(keyword: str): googlenews = GoogleNews(lang="en", period="7d", encode="utf-8") googlenews.get_news(keyword) googlenews.search(keyword) googlenews.get_page(1) results = googlenews.results() return results[0:5]
def get_training_data(self): """ load training data from google news """ # check if data has been downloaded if not os.path.isfile('./data/sentiment_data/headlines.csv'): googlenews = GoogleNews(lang='en', start='01/01/2015') # mm/dd/yyyy news = [] keywords = [ 'Blockchain', 'Cryptocurrency', 'Bitcoin', 'Etherium', 'Stock Market', 'Finance' ] # fetch news headlines for every keyword in keywords list for keyword in tqdm(keywords): googlenews.get_news(keyword) results = googlenews.results() # append news headlines to list news for result in results: news.append([result['datetime'], result['title']]) # create a pandas dataframe with news list and save it to csv df = pd.DataFrame(news, columns=['date', 'headline']) df.to_csv('./data/sentiment_data/headlines.csv', index=False) return df else: return pd.read_csv('./data/sentiment_data/headlines.csv')
def job(self): #Download current database self.getDB() self.print_header(self.rawFileName) self.lineCounter(self.rawFileName) x = 0 for tag in self.newsTags: #print("Collecting newses from tag: " + tag + "...") self.logger.info(f"Collecting newses from tag: {tag}") googlenews = GoogleNews() googlenews.clear() googlenews.set_lang(self.newsLang) googlenews.setperiod('1d') googlenews.get_news(tag) output = googlenews.results(sort=True) output = pd.DataFrame(output) x = x + len(output['title']) self.saveToFile(output, self.rawFileName) self.logger.info(f"Collected amount of news: {x}") self.removeDuplicates(self.rawFileName, self.finalFileName) #os.remove(rawFileName) #delete bufor file #logger.info(f"Removed file with duplicates: {rawFileName}") os.rename(self.finalFileName, self.rawFileName) #rename final file to bufor name self.logger.info(f"Renamed: {self.finalFileName} to: {self.rawFileName}") self.backupDB()
async def create_item(item: Item): result = "" googlenews = GoogleNews() googlenews.set_lang('pt') googlenews.search(item.mensagem) googlenews.results() result = googlenews.get_texts()[0] translations = translator.translate(result, dest='en') textTranslator = translations.text score = analyser.polarity_scores(textTranslator) # avaliação de polaridade de sentimento da mensagem compound = (analyser.polarity_scores(textTranslator)['compound']) # capitura da média do sentimento da mensagem if compound > 0: mensagemSentimento = "noticia positiva" elif compound >= 0: mensagemSentimento = "noticia neutra" else: mensagemSentimento = "noticia negativa" return {"mensagem":googlenews.get_texts()[0],"sentimento":mensagemSentimento}
def news_sentiments(self): # Returns news articles curated via Finviz, Yahoo, and Google News, GET UNUSUAL OPTION ACTIVITY BASE_URL = f'https://finviz.com/quote.ashx?t={self.ticker}' soup = self._get_soup(BASE_URL) table = soup.find('table', {'class': 'fullview-news-outer'}) rows = table.find_all('tr') df_data = [] for row in rows: date = row.find('td', {'align': 'right'}) article = row.find('td', {'align': 'left'}) link = article.find('a')['href'] df_data.append((date.get_text(), article.get_text(), link)) df = pd.DataFrame(df_data, columns=['Time', 'Headline', 'Link']) BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/news?p={self.ticker}' soup = self._get_soup(BASE_URL) links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'}) news = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links] BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/press-releases?p={self.ticker}' soup = self._get_soup(BASE_URL) links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'}) press_releases = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links] # Look for keywords in the news? Any showcases, Investor/analyst days, Analyst revisions, Management transitions # Product launches, Significant stock buyback changes # Getting news from google news search googlenews = GoogleNews(lang='en', period='14d') # Specify period for news googlenews.get_news(f'${self.ticker} stock') stock_news = googlenews.results() # print([(i, j) for i, j in zip(googlenews.get_texts(), googlenews.get_links())]) # To get other pages, do googlenews.get_page(2), etc. # Have whitelist of websites to search articles from. Maybe have key word to filter out stupid stuff. sectors = self.find_competition() sector_news = [] if sectors: for sector in sectors: googlenews = GoogleNews(lang='en', period='14d') googlenews.get_news(f'{sector} sector stocks') sector_news.append(googlenews.result()) return df, news, press_releases, sector_news, stock_news
def crawling_news(company_name_list, start_date, end_date): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_time_range('start_date', 'end_date') googlenews.set_encode('utf-8') #news.google.com search sample all_title = [] logging.info('loop start') for i in range(len(company_name_list)): googlenews.get_news(company_name_list[i]) logging.info('%s : %0.2f%s' % (company_name_list[i], ((i + 1) / len(company_name_list)) * 100, '%')) for j in range(len(googlenews.results())): all_title.append(googlenews.results()[j].get('title')) all_title = pd.DataFrame(all_title) all_title.to_csv('sp500news.csv') logging.info('saved to csv, done!!') return all_title
def googleNewsApi(request, word): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('7d') googlenews.set_encode('utf-8') googlenews.get_news(str(word)) googlenews.total_count() resultsGoogleNews = googlenews.results() #print(resultsGoogleNews) #print(googlenews.total_count()) #TWITTER consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4' consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT' access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G' access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN' # create OAuthHandler object auth = tweepy.OAuthHandler(consumer_key, consumer_secret) # set access token and secret auth.set_access_token(access_token, access_token_secret) # create tweepy API object to fetch tweets api = tweepy.API(auth) date_since = datetime.today().strftime('%Y-%m-%d') print(date_since) #tweets = api.search(str("bitcoin"), count=1) tweets = tweepy.Cursor(api.search, q=str(word), lang="en", since=date_since).items(100) """print(tweets.__dict__['page_iterator'].__dict__) for tweet in tweets: print(tweet) print(tweet.id)""" #return googlenews """for result in resultsGoogleNews: title = result['title'] date = result['date'] link = result['link'] source = result['site'] news = {'title':title, 'date': date, 'link': link, 'site':site} """ return render(request, 'homepage.html', { 'news': resultsGoogleNews, 'tweets': tweets })
def GNews(): gn = GoogleNews() gn.set_period('7d') list = [ "INDIA", "USA", "UK", "AUSTRALIA", "FRANC", "UGANDA", "PAKISTAN", "MALDIVES", "CELEBRITY" ] gn.search(random.choice(list)) rs = gn.results() for i in rs: data = i['title'] data += i['desc'] data += i['link'] return data
def callGoogle(state): try: googlenews = GoogleNews(lang='pt') googlenews.search('covid ' + state) newsData = googlenews.results(sort=True) returned_dict = {} if state == 'Brasil': returned_dict['titulo'] = newsData[0]['title'] returned_dict['desc'] = newsData[0]['desc'] returned_dict['link'] = newsData[0]['link'] returned_dict['fonte'] = newsData[0]['media'] returned_dict['data'] = newsData[0]['date'] return returned_dict for row in newsData: this_row = row['title'] if state in this_row \ or initialStates[state] in this_row \ or state.split(' ')[0] in this_row: returned_dict['titulo'] = row['title'] returned_dict['desc'] = row['desc'] returned_dict['link'] = row['link'] returned_dict['fonte'] = row['media'] returned_dict['data'] = row['date'] return returned_dict elif state == 'Minas Gerais' and ('BH' or 'bh') in this_row: returned_dict['titulo'] = row['title'] returned_dict['desc'] = row['desc'] returned_dict['link'] = row['link'] returned_dict['fonte'] = row['media'] returned_dict['data'] = row['date'] return returned_dict returned_dict['titulo'] = newsData[0]['title'] returned_dict['desc'] = newsData[0]['desc'] returned_dict['link'] = newsData[0]['link'] returned_dict['fonte'] = newsData[0]['media'] returned_dict['data'] = newsData[0]['date'] return returned_dict except: return 'Google News API is not working'
def scrape_the_news(): user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' config = Config() config.browser_user_agent = user_agent topiclist = NLP_news() print(topiclist[0]) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-8') googlenews.set_period('7d') googlenews.get_news(topiclist[0]) result = googlenews.results() googlenews.clear() df = pd.DataFrame(result) df = df.drop(['date', 'media'], axis=1) df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title'] df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']] conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123") curr = conn.cursor() for i, row in df.iterrows(): try: row.Link = 'https://' + row.Link columns = row.keys() values = [row[column] for column in columns] insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)" curr.execute(insert_statement, tuple(values)) except: print('could not add row', i) conn.commit() curr.close() conn.close()
def get_news(query: str, pages: int = 35) -> List[Dict[str, Any]]: """ Search news defined by query. Returns a list of search results. Parameters ---------- query: str The news search query to use. Returns ------- news: list of news items. News list, each element in the list is a dictionary containing news details like title, date, URL etc. """ googlenews = GoogleNews(start='01/01/2010', end='01/01/2015') googlenews.search(query) news = [] for page in tqdm(range(pages), leave=False): googlenews.get_page(page) news += googlenews.results() return news
def query_google_news(query): googlenews = GoogleNews(lang='en') googlenews.get_news(query) res = googlenews.results() # with open('data_google.txt') as f: # res = json.load(f) print(res) rank = 0 all_news_list = [] for news in res: rank = rank + 1 if rank < 51: all_news = AllNews(news["desc"], news["title"], category=None, date_time=news["datetime"], rank=rank, src='google') else: break all_news_list.append(all_news) return all_news_list
from datetime import date from GoogleNews import GoogleNews news = GoogleNews() news.set_lang('en') date_today = date.today() news.set_time_range('01/11/2020', date_today) news.set_encode('utf-8') topic = input("Topic : ") news.search(topic) news.get_page(2) #headlines with links WORLD NEWS for i in range(6): print(news.results()[i]["title"]) print(news.results()[i]["link"])
months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021'] fin = [] seen = [] for first in primary_phrases: for second in secondary_phrases: full_phrase = first+" "+second print(full_phrase) for i in range(0, len(months)-1): googlenews.set_time_range(months[i],months[i+1]) googlenews.get_news(full_phrase) res = googlenews.results(sort=True) #It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() #(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works) for result in res: if result['title'] not in seen: result['start date'] = months[i] result['end date'] = months[i+1] result['primary phrase'] = first result['secondary phrase'] = second result['full phrase'] = full_phrase fin.append(result) seen.append(result['title']) df = pd.DataFrame(fin)
def game(): for i in range(1000): request = input('Auto-Bot at your service. Please state your request. ') if request == 'google': query = input('Search: ') print(search(query, num_results = 3)) elif request == 'stocks': ticker = input('Ticker Symbol: ') realticker = yf.Ticker(ticker) print(realticker.history(period= '1m')) elif request == 'weather': place = input('City: ') weather = weather_forecast.forecast(place=place, time=current_time, date=d1) elif request == 'email': to = input('Email address: ') content = input('What do you want to say? ') address = '*****@*****.**' password = '******' server = 'imap.gmail.com' s = bot.SMTP(host= 'smtp.gmail.com', port= 587) s.starttls() s.login(address, password) s.ehlo() s.sendmail(address, to ,content) {} elif request == 'song': song = input('Song name: ') results = YoutubeSearch(song, max_results=1).to_dict() dict = results[0].values() newdict = list(dict) url = newdict[7] print(f'https://www.youtube.com{url}') elif request == 'news': news = input('Search news: ') gn = GoogleNews() top = gn.search(news) newnews = gn.results() dict = list(newnews[0].values()) dicttwo = list(newnews[1].values()) dictthree = list(newnews[2].values()) dictfour = list(newnews[3].values()) dictfive = list(newnews[4].values()) title1 = dict[0] title2 = dicttwo[0] title3 = dictthree[0] title4 = dictfour[0] title5 = dictfive[0] src1 = dict[1] src2 = dicttwo[1] src3 = dictthree[1] src4 = dictfour[1] src5 = dictfive[1] cap1 = dict[4] cap2 = dicttwo[4] cap3 = dictthree[4] cap4 = dictfour[4] cap5 = dictfive[4] url1 = dict[5] url2 = dicttwo[5] url3 = dictthree[5] url4 = dictfour[5] url5 = dictfive[5] print(f'Title: {title1}') print(f'Source: {src1}') print(f'Caption: {cap1}') print(f'Url: {url1}') print(f'Title: {title2}') print(f'Source: {src2}') print(f'Caption: {cap2}') print(f'Url: {url2}') print(f'Title: {title3}') print(f'Source: {src3}') print(f'Caption: {cap3}') print(f'Url: {url3}') print(f'Title: {title4}') print(f'Source: {src4}') print(f'Caption: {cap4}') print(f'Url: {url4}') print(f'Title: {title5}') print(f'Source: {src5}') print(f'Caption: {cap5}') print(f'Url: {url5}') elif request == 'math': def add(x, y): return x + y # This function subtracts two numbers def subtract(x, y): return x - y # This function multiplies two numbers def multiply(x, y): return x * y # This function divides two numbers def divide(x, y): return x / y while True: # Take input from the user choice = input("Enter choice( + / - / * / / ): ") # Check if choice is one of the four options if choice in ('+', '-', '*', '/'): num1 = float(input("Enter first number: ")) num2 = float(input("Enter second number: ")) if choice == '+': print(num1, "+", num2, "=", add(num1, num2)) elif choice == '-': print(num1, "-", num2, "=", subtract(num1, num2)) elif choice == '*': print(num1, "*", num2, "=", multiply(num1, num2)) elif choice == '/': print(num1, "/", num2, "=", divide(num1, num2)) break else: print("Invalid Input") elif request == 'game': type = input('Which game? Press 1 for tic-tac-toe, press 2 for rock-paper-scissors ') if type == '1': unused_keys = ['1', '2', '3', '4', '5', '6', '7', '8', '9'] theBoard = {'7': ' ', '8': ' ', '9': ' ', '4': ' ', '5': ' ', '6': ' ', '1': ' ', '2': ' ', '3': ' '} board_keys = [] for key in theBoard: board_keys.append(key) ''' We will have to print the updated board after every move in the game and thus we will make a function in which we'll define the printBoard function so that we can easily print the board everytime by calling this function. ''' def printBoard(board): print(board['7'] + '|' + board['8'] + '|' + board['9']) print('-+-+-') print(board['4'] + '|' + board['5'] + '|' + board['6']) print('-+-+-') print(board['1'] + '|' + board['2'] + '|' + board['3']) # Now we'll write the main function which has all the gameplay functionality. def tictactoe(): turn = 'X' count = 0 for i in range(10): printBoard(theBoard) print("It's your turn," + turn + ".Move to which place?") if turn == 'O': choice = random.randint(1,9) choice = unused_keys[choice] if theBoard[f'{choice}'] == ' ': theBoard[choice] = turn unused_keys.remove(choice) count += 1 elif turn == 'X': move = input() if theBoard[move] == ' ': theBoard[move] = turn unused_keys.remove(move) count += 1 else: print("That place is already filled.\nMove to which place?") continue # Now we will check if player X or O has won,for every move after 5 moves. if count >= 5: if theBoard['7'] == theBoard['8'] == theBoard['9'] != ' ': # across the top printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['4'] == theBoard['5'] == theBoard['6'] != ' ': # across the middle printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['1'] == theBoard['2'] == theBoard['3'] != ' ': # across the bottom printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['1'] == theBoard['4'] == theBoard['7'] != ' ': # down the left side printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['2'] == theBoard['5'] == theBoard['8'] != ' ': # down the middle printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['3'] == theBoard['6'] == theBoard['9'] != ' ': # down the right side printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['7'] == theBoard['5'] == theBoard['3'] != ' ': # diagonal printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break elif theBoard['1'] == theBoard['5'] == theBoard['9'] != ' ': # diagonal printBoard(theBoard) print("\nGame Over.\n") print(" **** " + turn + " won. ****") break # If neither X nor O wins and the board is full, we'll declare the result as 'tie'. if count == 9: print("\nGame Over.\n") print("It's a Tie!!") # Now we have to change the player after every move. if turn == 'X': turn = 'O' else: turn = 'X' tictactoe() elif type == '2': print("Winning Rules of the Rock paper scissor game as follows: \n" + "Rock vs paper->paper wins \n" + "Rock vs scissor->Rock wins \n" + "paper vs scissor->scissor wins \n") print("Enter choice \n 1. Rock \n 2. paper \n 3. scissor \n") choice = int(input("User turn: ")) # OR is the short-circuit operator # if any one of the condition is true # then it return True value # looping until user enter invalid input while choice > 3 or choice < 1: choice = int(input("enter valid input: ")) # initialize value of choice_name variable # corresponding to the choice value if choice == 1: choice_name = 'Rock' elif choice == 2: choice_name = 'paper' else: choice_name = 'scissor' # print user choice print("user choice is: " + choice_name) print("\nNow its computer turn.......") # Computer chooses randomly any number # among 1 , 2 and 3. Using randint method # of random module comp_choice = random.randint(1, 3) # looping until comp_choice value # is equal to the choice value while comp_choice == choice: comp_choice = random.randint(1, 3) # initialize value of comp_choice_name # variable corresponding to the choice value if comp_choice == 1: comp_choice_name = 'Rock' elif comp_choice == 2: comp_choice_name = 'paper' else: comp_choice_name = 'scissor' print("Computer choice is: " + comp_choice_name) print(choice_name + " V/s " + comp_choice_name) # condition for winning if ((choice == 1 and comp_choice == 2) or (choice == 2 and comp_choice == 1)): print("paper wins => ", end="") result = "paper" elif ((choice == 1 and comp_choice == 3) or (choice == 3 and comp_choice == 1)): print("Rock wins =>", end="") result = "Rock" else: print("scissor wins =>", end="") result = "scissor" # Printing either user or computer wins if result == choice_name: print("<== User wins ==>") else: print("<== Computer wins ==>")
from GoogleNews import GoogleNews from newspaper import Article import pandas as pd from datetime import date from afinn import Afinn af = Afinn() PATH = "C:\Program Files (x86)\chromedriver_win32\chromedriver.exe" #driver = webdriver.Chrome(PATH) print(date.today().strftime('%m/%d/%Y')) googlenews=GoogleNews(start= str(date.today().strftime('%m/%d/%Y')),end=str(date.today().strftime('%m/%d/%Y'))) #topic = input('what topic would u like to know about') googlenews.search(' ') #googlenews.get_page(2) a = googlenews.results() newList = [] for i in a: newList.append(i['title']) for x in newList: print(x) score = af.score(x) if score > 0: print('positive') elif score == 0: print('neutral') else: print('negative') k=input("press close to exit")
def main(): all_df = [] sid_obj = SentimentIntensityAnalyzer() googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-16') """ Primary Phrases refer to the keywords we are interested in studying Secondary Phrases refer to the target countries """ company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac'] # testing_countries = ['Egypt', 'Kenya', 'Nigeria'] testing_countries = [] """ Months refer to the date range """ # months = ['08/01/2020', '09/01/2020', '10/01/2020'] # months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021'] months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021'] for first in company_name: fin = [] seen = [] with open('sample.csv', mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) summary_data = [] for row in csv_reader: # print(row) second = row['\ufeffCountry'] if (second not in testing_countries and len(testing_countries)!=0): continue full_phrase = first+" "+second print(full_phrase) counter = 0 sum_sent = 0 pos_count = 0 # neu_count = 0 neg_count = 0 neg_article = {'title': 'N/A', '% Negative': 0} for i in range(0, len(months)-1): googlenews.set_time_range(months[i],months[i+1]) googlenews.get_news(full_phrase) res = googlenews.results() #It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() #(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works) for result in res: if result['title'] not in seen: # print(result) result['start date'] = months[i] result['end date'] = months[i+1] result['company'] = first result['country'] = second result['latitude'] = row['Latitude'] result['longitude'] = row['Longitude'] sentiment_dict = sid_obj.polarity_scores(result['title']) result['% Negative'] = sentiment_dict['neg']*100 result['% Neutral'] = sentiment_dict['neu']*100 result['% Positive'] = sentiment_dict['pos']*100 result['Magnitude'] = sentiment_dict['compound']*50 + 50 counter += 1 sum_sent += result['Magnitude'] # result.pop('date') # result.pop('datetime') # result.pop('img') # result.pop('media') # if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1 # elif result['% Neutral'] > result['% Positive']: neu_count += 1 # else: pos_count += 1 if result['% Positive'] > result['% Negative']: pos_count += 1 else: neg_count += 1 if result['% Negative'] >= neg_article['% Negative']: neg_article = result fin.append(result) seen.append(result['title']) posPercent = 50 if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count) magni = 0 if counter>0: magni = sum_sent/counter country_comp_score = {'country': second, 'latitude': row['Latitude'], 'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']} summary_data.append(country_comp_score) all_df.append((country_comp_score, first)) df = pd.DataFrame(fin) df.drop(columns=['date', 'datetime', 'img', 'media']) df.to_csv("./Output/{}_output.csv".format(first),index=False) summary_df = pd.DataFrame(summary_data) summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False) # all_df.append(summary_df) # meta_data = [] # # with open('sample.csv', mode='r') as csv_file: # dic_len = sum(1 for line in open('sample.csv')) # with open('sample.csv', mode='r') as csv_file: # csv_reader = csv.DictReader(csv_file) # for j in range(0, dic_len): # most_pos = 0 # for i in range(0, len(company_name)): # if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: # most_pos = i # meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]}) fields = ['Country', 'Company', 'Count'] meta_data = [] seen = [] for result in all_df: if result[0]['country'] not in seen: seen.append(result[0]['country']) meta_data.append([result[0]['country'], result[1], result[0]['positive']]) else: for candidate in meta_data: if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']: candidate[1] = result[1] candidate[2] = result[0]['positive'] with open('./Output/meta_data.csv', 'w') as f: write = csv.writer(f) write.writerow(fields) write.writerows(meta_data)
import tensorflow as tf from tensorflow import keras reconstructed_model = keras.models.load_model("model") googlenews = GoogleNews() print('Sentiment Analysis (-1 to 1, Negative to Positive Sentiment)') ticker = input('Enter in Stock Ticker (Blank to Quit): ') sort = True while ticker != '': titles = [] googlenews.search(ticker + ' Stock') news = googlenews.results(sort=sort) googlenews.clear() for articles in news: titles.append(articles['title']) predictions = [] for title in titles: # print(title) predictions.append( reconstructed_model.predict(np.array([title]))[0][0]) sentiment = 0 for prediction in predictions: if prediction > 0:
def GET(self): if session.get("user"): logged_in = True else: logged_in = False i = web.input(q="", sort="table", typ="text") if i.q == "": if logged_in: stin = db[session.get("user")] else: stin = { "engines": { "Google": "checked", "Bing": "checked", "DuckDuckGo": "checked", "Yahoo": "checked" }, "default_typ": { "text": "checked", "image": "", "video": "", "news": "", "maps": "", "shopping": "" } } return render.home(logged_in, stin) else: r = requests.get("http://httpbin.org/ip") global cache #clear cache if cache is too big if len(cache) > 25: cache = {} engines = [] sort = i.sort typ = i.typ if "Google" in i: engines.append("Google") if "Bing" in i: engines.append("Bing") if "DuckDuckGo" in i: engines.append("DuckDuckGo") if "Yahoo" in i: engines.append("Yahoo") if "Google" not in i and "Bing" not in i and "DuckDuckGo" not in i and "Yahoo" not in i: if logged_in: engines = db[session.get("user")]['engines'] else: engines = ['Google', 'Bing', 'DuckDuckGo', 'Yahoo'] dictionary = [] info = [] ans = [] if i.q != "" and typ == "text": start_time = time.time() goog = [] b = [] duckduckgo = [] yhoo = [] use_cache = False try: #if within 2 days of last cache, use cache #cache per user if cache[session.get( "user")][i.q]["last_updated"] + 172800 > time.time( ) and random.randint(1, 10) == 5: use_cache = True except: pass if use_cache: goog = cache[session.get("user")][i.q]["google"] b = cache[session.get("user")][i.q]["bing"] duckduckgo = cache[session.get("user")][i.q]["duckduckgo"] yhoo = cache[session.get("user")][i.q]["yahoo"] else: if "Google" in engines: queue1 = Queue() p = Process(target=google, args=(i.q, queue1)) p.start() if "Bing" in engines: queue2 = Queue() p2 = Process(target=bing, args=(i.q, queue2)) p2.start() if "DuckDuckGo" in engines: queue3 = Queue() p3 = Process(target=ddg, args=(i.q, queue3)) p3.start() if "Yahoo" in engines: queue4 = Queue() p4 = Process(target=yahoo, args=(i.q, queue4)) p4.start() if "Google" in engines: goog = queue1.get() p.join() if "Bing" in engines: b = queue2.get() p2.join() if "DuckDuckGo" in engines: duckduckgo = queue3.get() p3.join() if "Yahoo" in engines: yhoo = queue4.get() p4.join() dictionary = word_dictionary(i.q) info = infobox(i.q) ans = ansbox(i.q) if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in: try: cache[session.get("user")][i.q] = { "google": goog, "bing": b, "yahoo": yhoo, "duckduckgo": duckduckgo, "last_updated": time.time() } except: pass data = [] e = [] f = [] for g in goog: g['engine'] = "Google" e.append(g) f.append(g['title']) for bingresult in b: bingresult['engine'] = "Bing" e.append(bingresult) f.append(bingresult['title']) for d in duckduckgo: d['engine'] = "DuckDuckGo" e.append(d) f.append(d['title']) for y in yhoo: y['engine'] = 'Yahoo' e.append(y) f.append(y['title']) def getnum(s0, s1): s0 = s0.lower() s1 = s1.lower() s0List = s0.split(" ") s1List = s1.split(" ") num = len(list(set(s0List) & set(s1List))) return round(num / len(s0List) * 100) g = set(f) counter = 0 so = [] for item in e: if "stackoverflow.com" in item['link']: thing = "" for x in so: if getnum(x[0]['title'], item['title']) >= 90: thing = x break if thing: so.remove(thing) engines = x[1] engines.append(item['engine']) x = [x[0], engines] so.append(x) else: engines = [item['engine']] x = [item, engines] so.append(x) else: thing = "" for x in data: if getnum(x[0]['title'], item['title']) >= 90: thing = x break if thing: data.remove(thing) engines = x[1] engines.append(item['engine']) x = [x[0], engines, x[2]] data.append(x) else: engines = [item['engine']] x = [item, engines, counter] data.append(x) counter += 1 done = 0 data2 = [] for item in data: if done == len(data): break if data.index(item) != item[2]: data.insert(item[2], data.pop(data.index(item))) done += 1 data2, data = data, data2 for item in so: data.append(item) for item in data2: data.append(item) print("--- %s seconds ---" % (time.time() - start_time)) return render.text(data, i.q, dictionary, info, ans, logged_in) elif i.q != "" and typ == "image": query = i.q.replace(" ", "+") goog = requests.get( f"https://google.com/search?q={query}&tbm=isch", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }).content soup = BeautifulSoup(goog, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] imgs.append(link) goog = imgs b = requests.get( f"https://bing.com/images/search?q={query}&form=HDRSC2", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }).content soup = BeautifulSoup(b, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] if link.startswith("/rp"): link = f"https://bing.com/images/search?q={query}&form=HDRSC2" + link if link != "<img alt=": imgs.append(link) b = imgs duckduckgo = requests.get( f"https://duckduckgo.com/?q={query}&ia=images", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }) soup = BeautifulSoup(duckduckgo.content, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] imgs.append(link) duckduckgo = imgs yhoo = requests.get( f"https://images.search.yahoo.com/search/images;_ylt=A0geJaQetm1gPx0AGURXNyoA;_ylu=Y29sbwNiZjEEcG9zAzEEdnRpZAMEc2VjA3BpdnM-?p={query}&fr2=piv-web&fr=opensearch", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }).content soup = BeautifulSoup(yhoo, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] imgs.append(link) yhoo = imgs elif i.q != "" and typ == "video": query = i.q.replace(" ", "+") goog = YoutubeSearch(query, max_results=100).to_dict() b, duckduckgo, yhoo = [], [], [] elif i.q != "" and typ == "news": query = i.q.replace(" ", "+") news = GoogleNews() news.set_lang('en') news.set_encode('utf-8') news.search(query) goog = news.results() b, duckduckgo, yhoo = [], [], [] elif i.q != "" and typ == "maps": goog, b, duckduckgo, yhoo = [], [], [], [] elif i.q != "" and typ == "shopping": goog = [] b = [] duckduckgo = [] yhoo = [] use_cache = False try: #if within 2 days of last cache, use cache #cache per user if cache[session.get( "user")][i.q]["last_updated"] + 172800 > time.time( ) and random.randint(1, 10) == 5: use_cache = True except: pass print(use_cache) if use_cache: goog = cache[session.get("user")][i.q]["google"] b = cache[session.get("user")][i.q]["bing"] duckduckgo = cache[session.get("user")][i.q]["duckduckgo"] yhoo = cache[session.get("user")][i.q]["yahoo"] else: if "Google" in engines: queue1 = Queue() p = Process(target=gshop, args=(i.q, queue1)) p.start() if "Bing" in engines: queue2 = Queue() p2 = Process(target=bing_shopping, args=(i.q, queue2)) p2.start() if "Yahoo" in engines: queue3 = Queue() p3 = Process(target=yahoo_shopping, args=(i.q, queue3)) p3.start() if "Google" in engines: goog = queue1.get() p.join() if "Bing" in engines: b = queue2.get() p2.join() if "Yahoo" in engines: yhoo = queue3.get() p3.join() if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in: try: cache[session.get("user")][i.q] = { "google": goog, "bing": b, "yahoo": yhoo, "duckduckgo": duckduckgo, "last_updated": time.time() } except: pass return render.search(goog, b, duckduckgo, yhoo, i.q, sort, typ, engines, logged_in, dictionary, info, ans)
### MODULES from GoogleNews import GoogleNews ### METHODS def show_routine(results): for num, page in enumerate(results): print(f"{num}. {page['date']} - {page['title']}") ### MAIN # Setup the research keywords = "covid cava de' tirreni" period = '10d' google_news = GoogleNews(lang='it', period=period) google = GoogleNews(lang='it', period=period) # Results from news.google.com google_news.get_news(keywords) results_gnews = google_news.results(sort=True) show_routine(results_gnews) # Results from google.com google.search(keywords) results_google = google.results(sort=True) show_routine(results_google)
def nextpage(): from GoogleNews import GoogleNews googlenews = GoogleNews(lang='en') columns_of_languages = session['columns_of_languages'] improve_link = session['improve_columns'] art_link = session['art_columns'] user = session['user'] print(art_link) # global columns_of_languages for i in columns_of_languages: print(i) if i in request.form: button_name = request.form[i] print(button_name) if button_name != "": try: db = pymysql.connect(host="achintya.heliohost.us", user="******", password="******", autocommit=True) cur = db.cursor() ab = "use achintya_maxo_scl" cur.execute(ab) a = f"select {button_name} from December" cur.execute(a) link_tuple = cur.fetchall() linklist = [] for i in link_tuple: if i[0] == None: continue else: linklist.append(i[0]) top_property = [] first_value = 40 for i in range(0, len(linklist)): top_property.append(first_value) first_value = first_value + 540 link_with_topvalue = {} print(linklist) print(top_property) for i in range(0, len(linklist)): link_with_topvalue[linklist[i]] = top_property[i] db.close() button_name_info = 'Learn' + button_name googlenews.search(button_name_info) news = googlenews.results() return render_template( 'language1.html', link_with_topvalue=link_with_topvalue, button_name=button_name, news=news, leng=5, user=user) except pymysql.err.OperationalError: print('ntrwk error') return redirect(url_for('nextpage', user=user)) except Exception as e: print(e) if db.open: db.close() return redirect(url_for('nextpage', user=user)) for i in improve_link: print(i) if i in request.form: button_name = request.form[i] print(button_name) if button_name != "": try: db = pymysql.connect(host="achintya.heliohost.us", user="******", password="******", autocommit=True) cur = db.cursor() ab = "use achintya_maxo_scl" cur.execute(ab) a = f"select {button_name} from December" cur.execute(a) link_tuple = cur.fetchall() linklist = [] for i in link_tuple: if i[0] == None: continue else: linklist.append(i[0]) top_property = [] first_value = 40 for i in range(0, len(linklist)): top_property.append(first_value) first_value = first_value + 540 link_with_topvalue = {} print(linklist) print(top_property) for i in range(0, len(linklist)): link_with_topvalue[linklist[i]] = top_property[i] db.close() button_name_info = 'improve english' googlenews.search(button_name_info) news = googlenews.results() return render_template( 'language1.html', link_with_topvalue=link_with_topvalue, button_name=button_name, news=news, leng=5, user=user) except pymysql.err.OperationalError: return redirect(url_for('nextpage')) except Exception as e: print(e) if db.open: db.close() return redirect(url_for('nextpage')) for i in art_link: print(i) # print(f'error is {request.form.to_dict()[1]}') if i in request.form: print("hi") button_name = request.form.to_dict()[i] print(button_name) if button_name != "": try: db = pymysql.connect(host="achintya.heliohost.us", user="******", password="******", autocommit=True) cur = db.cursor() ab = "use achintya_maxo_scl" cur.execute(ab) a = f"select {button_name} from December" cur.execute(a) link_tuple = cur.fetchall() linklist = [] for i in link_tuple: if i[0] == None: continue else: linklist.append(i[0]) top_property = [] first_value = 40 for i in range(0, len(linklist)): top_property.append(first_value) first_value = first_value + 540 link_with_topvalue = {} print(linklist) print(top_property) for i in range(0, len(linklist)): link_with_topvalue[linklist[i]] = top_property[i] db.close() if button_name == 'Drawing' or button_name == 'Drawings': button_name = 'life drawing' button_name_info = 'Learn' + button_name googlenews.search(button_name_info) news = googlenews.results() return render_template( 'language1.html', link_with_topvalue=link_with_topvalue, button_name=button_name, news=news, leng=5, user=user) except pymysql.err.OperationalError: return redirect(url_for('nextpage')) except Exception as e: print(e) if db.open: db.close() return redirect(url_for('nextpage')) else: try: db = pymysql.connect(host="achintya.heliohost.us", user="******", password="******", autocommit=True) cur = db.cursor() ab = "use achintya_maxo_scl" cur.execute(ab) a = f"select Paintings from December" cur.execute(a) link_tuple = cur.fetchall() linklist = [] for i in link_tuple: if i[0] == None: continue else: linklist.append(i[0]) top_property = [] first_value = 40 for i in range(0, len(linklist)): top_property.append(first_value) first_value = first_value + 540 link_with_topvalue = {} print(linklist) print(top_property) for i in range(0, len(linklist)): link_with_topvalue[linklist[i]] = top_property[i] db.close() button_name_info = 'Paintings' googlenews.search(button_name_info) news = googlenews.results() j = 'bg.png' return render_template('language1.html', link_with_topvalue=link_with_topvalue, button_name="Paintings", news=news, leng=5, user=user, j=j) except pymysql.err.OperationalError: return redirect(url_for('nextpage')) except Exception as e: print(e) if db.open: db.close() return redirect(url_for('nextpage')) return render_template('mainpage_nextpage.html', columns_of_languages=columns_of_languages, user=user)
class GoogleNewsClient(object): """Retrieves weblinks from GoogleNews and retrieves web content using Article from Newspaper; runs sentiment analysis on text using TextBlob""" def __init__(self, start, end): self.googlenews = GoogleNews(start=start, end=end, lang='en') def get_news(self, query, count): """Creates a dataframe of weblinks from GoogleNews using user-input parameters of some query and number of pages to scan""" self.googlenews.search(query) for page in range(1, count): self.googlenews.getpage(page) result = self.googlenews.results() df = pd.DataFrame(result) return df def get_articles(self, news): """With the weblinks from the get_news dataframe, retrieves web content from each webpage""" list = [] for ind in news.index: dict = {} try: article = Article(news['link'][ind], config=config) article.download() article.parse() article.nlp() #dict['Reporting Date'] = news['date'][ind] #dict['Publish Date'] = article.publish_date local_time = pytz.timezone("US/Eastern") if article.publish_date is None: try: date_format = datetime.datetime.strptime( news['date'][ind], "%b %d, %Y") local_date = local_time.localize(date_format, is_dst=None) utc_date = local_date.astimzone(pytz.UTC) dict["Date"] = utc_date except: current_utc = datetime.datetime.utcnow() current_utc = current_utc.replace(tzinfo=pytz.utc) number = [int(s) for s in str.split() if s.isdigit()] if "year" in news['date'][ind] or "years" in news[ 'date'][ind]: delta = dateutil.relativedelta.relativedelta( years=number) elif "month" in news['date'][ind] or "months" in news[ 'date'][ind]: delta = dateutil.relativedelta.relativedelta( months=number) elif "week" in news['date'][ind] or "weeks" in news[ 'date'][ind]: delta = dateutil.relativedelta.relativedelta( weeks=number) elif "day" in news['date'][ind] or "days" in news[ 'date'][ind]: #delta = datetime.timedelta(number) delta = dateutil.relativedelta.relativedelta( days=number) elif "hour" in news['date'][ind] or "hours" in news[ 'date'][ind]: delta = dateutil.relativedelta.relativedelta( hours=number) elif "min" in news['date'][ind] or "mins" in news[ 'date'][ind]: delta = dateutil.relativedelta.relativedelta( mins=number) date = current_utc - delta dict["Date"] = date.astimezone(pytz.UTC) else: #date_format = datetime.datetime.strptime(article.publish_date, "%Y-%m-%d %H:%M:%S") #local_date = local_time.localize(date_format, is_dst=None) #utc_date = local_date.astimzone(pytz.UTC) date = article.publish_date date = date.replace(tzinfo=pytz.utc) dict['Date'] = date dict['Media'] = news['media'][ind] dict['Title'] = article.title dict['Article'] = article.text dict['Summary'] = article.summary dict['Keywords'] = article.keywords dict['Link'] = news['link'][ind] list.append(dict) except: continue news_df = pd.DataFrame(list) #news_df["Keywords"] = news_df["Keywords"].str.join(',') news_df["Keywords"] = news_df["Keywords"].apply( lambda x: ','.join(map(str, x))) return news_df def get_sentiment(self, news_df): """Calculates polarity and sentiment of the web page's text content""" sentiment = [] polarity = [] for ind in news_df.index: analysis = TextBlob(news_df['Article'][ind]) if analysis.sentiment.polarity > 0: sentiment.append('positive') polarity.append(analysis.sentiment.polarity) elif analysis.sentiment.polarity == 0: sentiment.append('neutral') polarity.append(analysis.sentiment.polarity) else: sentiment.append('negative') polarity.append(analysis.sentiment.polarity) news_df["Sentiment"] = sentiment news_df["Polarity"] = polarity return news_df