def get_news(assunto): news = GoogleNews(period='d') news.setlang('pt') news.set_encode('utf-8') news.set_time_range('12/02/2021', '13/02/2021') news.get_news(assunto) results = news.get_texts() result = results[3:8] if len(results) > 0 else "Sem notícias recentes" return result
def googleLinks(topic): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('1d') googlenews.set_encode('utf-8') article = googlenews.get_news(topic) links = googlenews.get_links()[:5] actualLinks = list() for l in links: l = "http://" + l print(l) actualLinks.append( requests.get(l).url ) return actualLinks
def search_google_news(query, google_date): #-- Retrieve news articles # Init googlenews googlenews = GoogleNews() #googlenews.set_period('7d') # Cannot use set_period with set_time_range, use either or. #googlenews.set_time_range(str(google_date), '2020-10-12') googlenews.set_encode('utf-8') googlenews.search(query) googlenews.getpage(50) result = googlenews.result() # Clear before searching again googlenews.clear() return result
def googleNewsApi(request, word): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('7d') googlenews.set_encode('utf-8') googlenews.get_news(str(word)) googlenews.total_count() resultsGoogleNews = googlenews.results() #print(resultsGoogleNews) #print(googlenews.total_count()) #TWITTER consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4' consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT' access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G' access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN' # create OAuthHandler object auth = tweepy.OAuthHandler(consumer_key, consumer_secret) # set access token and secret auth.set_access_token(access_token, access_token_secret) # create tweepy API object to fetch tweets api = tweepy.API(auth) date_since = datetime.today().strftime('%Y-%m-%d') print(date_since) #tweets = api.search(str("bitcoin"), count=1) tweets = tweepy.Cursor(api.search, q=str(word), lang="en", since=date_since).items(100) """print(tweets.__dict__['page_iterator'].__dict__) for tweet in tweets: print(tweet) print(tweet.id)""" #return googlenews """for result in resultsGoogleNews: title = result['title'] date = result['date'] link = result['link'] source = result['site'] news = {'title':title, 'date': date, 'link': link, 'site':site} """ return render(request, 'homepage.html', { 'news': resultsGoogleNews, 'tweets': tweets })
def scrape_the_news(): user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' config = Config() config.browser_user_agent = user_agent topiclist = NLP_news() print(topiclist[0]) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-8') googlenews.set_period('7d') googlenews.get_news(topiclist[0]) result = googlenews.results() googlenews.clear() df = pd.DataFrame(result) df = df.drop(['date', 'media'], axis=1) df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title'] df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']] conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123") curr = conn.cursor() for i, row in df.iterrows(): try: row.Link = 'https://' + row.Link columns = row.keys() values = [row[column] for column in columns] insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)" curr.execute(insert_statement, tuple(values)) except: print('could not add row', i) conn.commit() curr.close() conn.close()
def crawling_news(company_name_list, start_date, end_date): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_time_range('start_date', 'end_date') googlenews.set_encode('utf-8') #news.google.com search sample all_title = [] logging.info('loop start') for i in range(len(company_name_list)): googlenews.get_news(company_name_list[i]) logging.info('%s : %0.2f%s' % (company_name_list[i], ((i + 1) / len(company_name_list)) * 100, '%')) for j in range(len(googlenews.results())): all_title.append(googlenews.results()[j].get('title')) all_title = pd.DataFrame(all_title) all_title.to_csv('sp500news.csv') logging.info('saved to csv, done!!') return all_title
def googlenews_function(keyword='台積電', language='cn', start_date='2020/12/01', end_date='2020/12/28'): ''' - 日期 - 關鍵字 - 語言 - 爬幾頁 ''' googlenews = GoogleNews() googlenews.clear() googlenews.set_encode('utf-8') googlenews.set_lang(language) all_date_start = start_date.split('/') start_year = all_date_start[0] start_month = all_date_start[1] start_day = all_date_start[2] all_date_start = '{}/{}/{}'.format(start_month, start_day, start_year) all_date_end = end_date.split('/') end_year = all_date_end[0] end_month = all_date_end[1] end_day = all_date_end[2] all_date_end = '{}/{}/{}'.format(end_month, end_day, end_year) googlenews.set_time_range(start=all_date_start, end=all_date_end) googlenews.search(keyword) data = googlenews.result() print("資料總筆數:", len(data)) news = pd.DataFrame(data) # news.to_csv("GoogleNews_" + keyword +"_日期" + start_date.replace('/', '-') + '到' +end_date.replace('/', '-')+ ".csv", index= False) return news
from datetime import date from GoogleNews import GoogleNews news = GoogleNews() news.set_lang('en') date_today = date.today() news.set_time_range('01/11/2020', date_today) news.set_encode('utf-8') topic = input("Topic : ") news.search(topic) news.get_page(2) #headlines with links WORLD NEWS for i in range(6): print(news.results()[i]["title"]) print(news.results()[i]["link"])
def run_alexa(): command = take_command() print(command) if 'music' in command: song = command.replace('play song', '') talk('I am playing your favourite ' + song) # print('playing') print(song) # playing the first video that appears in yt search pywhatkit.playonyt(song) elif 'time' in command: now = datetime.now() time = now.strftime("%H:%M:%S") print("time:", time) talk("Current time is " + time) elif ('month' or 'year') in command: now = datetime.now() year = now.strftime("%Y") print("year:", year) talk("Current year is " + year) month = now.strftime("%m") print("month:", month) talk("Current month is " + month) elif 'date' in command: now = datetime.now() date_time = now.strftime("%m/%d/%Y, %H:%M:%S") print("date and time:", date_time) talk("Current date and time is " + date_time) # opens web.whatsapp at specified time i.e before 10 minutes and send the msg elif 'whatsapp' in command: talk("To which number do you have to whatsapp") talk("Please dont forget to enter 10 digits with country code") num = input() talk("Enter the message you have to send") msg = input() talk("Enter the time to send the message") time = int(input()) pywhatkit.sendwhatmsg(num, msg, time, 00) pywhatkit.showHistory() pywhatkit.shutdown(3000000000) # pywhatkit.sendwhatmsg("+919876543210", "This is a message", 15, 00) # Convert text to handwritten format elif 'convert' in command: text = command.replace('convert', '') pywhatkit.text_to_handwriting(text, rgb=[0, 0, 0]) # Perform google search elif 'search' in command: key = command.replace('search', '') pywhatkit.search("key") elif 'wikipedia' in command: person = command.replace('wikipedia', '') talk("How many pages do you want to read") num_pages = int(input()) # talk("In which language do you want to read") # l = input() # wikipedia.set_lang(l) info = wikipedia.summary(person, num_pages) print(info) talk(info) elif 'can you work for me' in command: talk("sorry, I have headache. Please do your work") elif 'are you single' in command: talk("I am in relationshhip with wifi") elif 'joke' in command: talk(pyjokes.get_joke()) talk("sorry for the lamest joke") elif 'open google browser' in command: try: urL = 'https://www.google.com' chrome_path = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe" webbrowser.register('chrome', None, webbrowser.BackgroundBrowser(chrome_path)) webbrowser.get('chrome').open_new_tab(urL) talk("Successfully opened chrome its upto you to search") except: webbrowser.Error elif 'google search' in command: word_to_search = command.replace('google search', '') response = GoogleSearch().search(word_to_search) print(response) for result in response.results: print("Title: " + result.title) talk("You can look for the following titles " + result.title) elif 'weather' in command: # base URL BASE_URL = "https://api.openweathermap.org/data/2.5/weather?" talk("Which city weather are you looking for") try: with sr.Microphone() as source: print('listening weather...') city_voice = listener.listen(source) city = listener.recognize_google(city_voice) # city = '\"'+city.lower()+'\"' print(city) # city="bangalore" # API key API_KEY = "Your API Key" API_KEY = "b5a362ef1dc8e16c673dd5049aa98d8f" # upadting the URL URL = BASE_URL + "q=" + city + "&appid=" + API_KEY # HTTP request response = requests.get(URL) # checking the status code of the request if response.status_code == 200: # getting data in the json format data = response.json() # getting the main dict block main = data['main'] # getting temperature temperature = main['temp'] # getting the humidity humidity = main['humidity'] # getting the pressure pressure = main['pressure'] # weather report report = data['weather'] print(f"{CITY:-^30}") print(f"Temperature: {temperature}") print(f"Humidity: {humidity}") print(f"Pressure: {pressure}") print(f"Weather Report: {report[0]['description']}") talk("Temperature in " + city + " is " + temperature + " humidity is " + humidity + " pressure is " + pressure + " and your final weather report" + report) else: # showing the error message print("Error in the HTTP request") talk("Error in the HTTP request") except: talk("Hmmmmm, it looks like there is something wrong") elif 'news' in command: try: googlenews = GoogleNews() googlenews.set_lang('en') # googlenews.set_period('7d') # googlenews.set_time_range('02/01/2020', '02/28/2020') googlenews.set_encode('utf-8') talk("What news are you looking for") try: with sr.Microphone() as source: print('listening news ...') news_voice = listener.listen(source) news_input = listener.recognize_google(news_voice) news_input = news_input.lower() print(news_input) googlenews.get_news(news_input) googlenews.search(news_input) googlenews.get_page(2) result = googlenews.page_at(2) news = googlenews.get_texts() print(news) talk(news) except: print("Error") talk("Error in reading input") except: print("No news") talk(" I couldn't find any news on this day") elif 'play book' or 'read pdf' in command: talk("Which pdf do you want me to read") book_input = input() print(book_input) book = open(book_input, 'rb') # create pdfReader object pdfReader = PyPDF2.PdfFileReader(book) # count the total pages total_pages = pdfReader.numPages total_pages = str(total_pages) print("Total number of pages " + total_pages) talk("Total number of pages " + total_pages) # initialise speaker object # speaker = pyttsx3.init() # talk("Enter your starting page") # start_page = int(input()) talk( " here are the options for you, you can press 1 to Play a single page 2 to Play between start and end points and 3 to Play the entire book " ) talk("Enter your choice") choice = int(input()) if (choice == 1): talk("Enter index number") page = int(input()) page = pdfReader.getPage(page) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() elif (choice == 2): talk("Enter starting page number") start_page = int(input()) talk("Enter ending page number") end_page = int(input()) for page in range(start_page + 1, end_page): page = pdfReader.getPage(start_page + 1) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() elif (choice == 3): for page in range(total_pages + 1): page = pdfReader.getPage(page) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() else: talk("Haha!! Please enter valid choice") else: talk( "Hiii Rashika, I am so bored can you please give me some proper commands" )
def GET(self): if session.get("user"): logged_in = True else: logged_in = False i = web.input(q="", sort="table", typ="text") if i.q == "": if logged_in: stin = db[session.get("user")] else: stin = { "engines": { "Google": "checked", "Bing": "checked", "DuckDuckGo": "checked", "Yahoo": "checked" }, "default_typ": { "text": "checked", "image": "", "video": "", "news": "", "maps": "", "shopping": "" } } return render.home(logged_in, stin) else: r = requests.get("http://httpbin.org/ip") global cache #clear cache if cache is too big if len(cache) > 25: cache = {} engines = [] sort = i.sort typ = i.typ if "Google" in i: engines.append("Google") if "Bing" in i: engines.append("Bing") if "DuckDuckGo" in i: engines.append("DuckDuckGo") if "Yahoo" in i: engines.append("Yahoo") if "Google" not in i and "Bing" not in i and "DuckDuckGo" not in i and "Yahoo" not in i: if logged_in: engines = db[session.get("user")]['engines'] else: engines = ['Google', 'Bing', 'DuckDuckGo', 'Yahoo'] dictionary = [] info = [] ans = [] if i.q != "" and typ == "text": start_time = time.time() goog = [] b = [] duckduckgo = [] yhoo = [] use_cache = False try: #if within 2 days of last cache, use cache #cache per user if cache[session.get( "user")][i.q]["last_updated"] + 172800 > time.time( ) and random.randint(1, 10) == 5: use_cache = True except: pass if use_cache: goog = cache[session.get("user")][i.q]["google"] b = cache[session.get("user")][i.q]["bing"] duckduckgo = cache[session.get("user")][i.q]["duckduckgo"] yhoo = cache[session.get("user")][i.q]["yahoo"] else: if "Google" in engines: queue1 = Queue() p = Process(target=google, args=(i.q, queue1)) p.start() if "Bing" in engines: queue2 = Queue() p2 = Process(target=bing, args=(i.q, queue2)) p2.start() if "DuckDuckGo" in engines: queue3 = Queue() p3 = Process(target=ddg, args=(i.q, queue3)) p3.start() if "Yahoo" in engines: queue4 = Queue() p4 = Process(target=yahoo, args=(i.q, queue4)) p4.start() if "Google" in engines: goog = queue1.get() p.join() if "Bing" in engines: b = queue2.get() p2.join() if "DuckDuckGo" in engines: duckduckgo = queue3.get() p3.join() if "Yahoo" in engines: yhoo = queue4.get() p4.join() dictionary = word_dictionary(i.q) info = infobox(i.q) ans = ansbox(i.q) if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in: try: cache[session.get("user")][i.q] = { "google": goog, "bing": b, "yahoo": yhoo, "duckduckgo": duckduckgo, "last_updated": time.time() } except: pass data = [] e = [] f = [] for g in goog: g['engine'] = "Google" e.append(g) f.append(g['title']) for bingresult in b: bingresult['engine'] = "Bing" e.append(bingresult) f.append(bingresult['title']) for d in duckduckgo: d['engine'] = "DuckDuckGo" e.append(d) f.append(d['title']) for y in yhoo: y['engine'] = 'Yahoo' e.append(y) f.append(y['title']) def getnum(s0, s1): s0 = s0.lower() s1 = s1.lower() s0List = s0.split(" ") s1List = s1.split(" ") num = len(list(set(s0List) & set(s1List))) return round(num / len(s0List) * 100) g = set(f) counter = 0 so = [] for item in e: if "stackoverflow.com" in item['link']: thing = "" for x in so: if getnum(x[0]['title'], item['title']) >= 90: thing = x break if thing: so.remove(thing) engines = x[1] engines.append(item['engine']) x = [x[0], engines] so.append(x) else: engines = [item['engine']] x = [item, engines] so.append(x) else: thing = "" for x in data: if getnum(x[0]['title'], item['title']) >= 90: thing = x break if thing: data.remove(thing) engines = x[1] engines.append(item['engine']) x = [x[0], engines, x[2]] data.append(x) else: engines = [item['engine']] x = [item, engines, counter] data.append(x) counter += 1 done = 0 data2 = [] for item in data: if done == len(data): break if data.index(item) != item[2]: data.insert(item[2], data.pop(data.index(item))) done += 1 data2, data = data, data2 for item in so: data.append(item) for item in data2: data.append(item) print("--- %s seconds ---" % (time.time() - start_time)) return render.text(data, i.q, dictionary, info, ans, logged_in) elif i.q != "" and typ == "image": query = i.q.replace(" ", "+") goog = requests.get( f"https://google.com/search?q={query}&tbm=isch", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }).content soup = BeautifulSoup(goog, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] imgs.append(link) goog = imgs b = requests.get( f"https://bing.com/images/search?q={query}&form=HDRSC2", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }).content soup = BeautifulSoup(b, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] if link.startswith("/rp"): link = f"https://bing.com/images/search?q={query}&form=HDRSC2" + link if link != "<img alt=": imgs.append(link) b = imgs duckduckgo = requests.get( f"https://duckduckgo.com/?q={query}&ia=images", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }) soup = BeautifulSoup(duckduckgo.content, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] imgs.append(link) duckduckgo = imgs yhoo = requests.get( f"https://images.search.yahoo.com/search/images;_ylt=A0geJaQetm1gPx0AGURXNyoA;_ylu=Y29sbwNiZjEEcG9zAzEEdnRpZAMEc2VjA3BpdnM-?p={query}&fr2=piv-web&fr=opensearch", headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0' }).content soup = BeautifulSoup(yhoo, "html.parser") images = soup.findAll('img') imgs = [] for image in images: image = str(image) link = image.split('src="')[-1].split('"')[0] imgs.append(link) yhoo = imgs elif i.q != "" and typ == "video": query = i.q.replace(" ", "+") goog = YoutubeSearch(query, max_results=100).to_dict() b, duckduckgo, yhoo = [], [], [] elif i.q != "" and typ == "news": query = i.q.replace(" ", "+") news = GoogleNews() news.set_lang('en') news.set_encode('utf-8') news.search(query) goog = news.results() b, duckduckgo, yhoo = [], [], [] elif i.q != "" and typ == "maps": goog, b, duckduckgo, yhoo = [], [], [], [] elif i.q != "" and typ == "shopping": goog = [] b = [] duckduckgo = [] yhoo = [] use_cache = False try: #if within 2 days of last cache, use cache #cache per user if cache[session.get( "user")][i.q]["last_updated"] + 172800 > time.time( ) and random.randint(1, 10) == 5: use_cache = True except: pass print(use_cache) if use_cache: goog = cache[session.get("user")][i.q]["google"] b = cache[session.get("user")][i.q]["bing"] duckduckgo = cache[session.get("user")][i.q]["duckduckgo"] yhoo = cache[session.get("user")][i.q]["yahoo"] else: if "Google" in engines: queue1 = Queue() p = Process(target=gshop, args=(i.q, queue1)) p.start() if "Bing" in engines: queue2 = Queue() p2 = Process(target=bing_shopping, args=(i.q, queue2)) p2.start() if "Yahoo" in engines: queue3 = Queue() p3 = Process(target=yahoo_shopping, args=(i.q, queue3)) p3.start() if "Google" in engines: goog = queue1.get() p.join() if "Bing" in engines: b = queue2.get() p2.join() if "Yahoo" in engines: yhoo = queue3.get() p3.join() if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in: try: cache[session.get("user")][i.q] = { "google": goog, "bing": b, "yahoo": yhoo, "duckduckgo": duckduckgo, "last_updated": time.time() } except: pass return render.search(goog, b, duckduckgo, yhoo, i.q, sort, typ, engines, logged_in, dictionary, info, ans)
def main(): all_df = [] sid_obj = SentimentIntensityAnalyzer() googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-16') """ Primary Phrases refer to the keywords we are interested in studying Secondary Phrases refer to the target countries """ company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac'] # testing_countries = ['Egypt', 'Kenya', 'Nigeria'] testing_countries = [] """ Months refer to the date range """ # months = ['08/01/2020', '09/01/2020', '10/01/2020'] # months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021'] months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021'] for first in company_name: fin = [] seen = [] with open('sample.csv', mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) summary_data = [] for row in csv_reader: # print(row) second = row['\ufeffCountry'] if (second not in testing_countries and len(testing_countries)!=0): continue full_phrase = first+" "+second print(full_phrase) counter = 0 sum_sent = 0 pos_count = 0 # neu_count = 0 neg_count = 0 neg_article = {'title': 'N/A', '% Negative': 0} for i in range(0, len(months)-1): googlenews.set_time_range(months[i],months[i+1]) googlenews.get_news(full_phrase) res = googlenews.results() #It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() #(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works) for result in res: if result['title'] not in seen: # print(result) result['start date'] = months[i] result['end date'] = months[i+1] result['company'] = first result['country'] = second result['latitude'] = row['Latitude'] result['longitude'] = row['Longitude'] sentiment_dict = sid_obj.polarity_scores(result['title']) result['% Negative'] = sentiment_dict['neg']*100 result['% Neutral'] = sentiment_dict['neu']*100 result['% Positive'] = sentiment_dict['pos']*100 result['Magnitude'] = sentiment_dict['compound']*50 + 50 counter += 1 sum_sent += result['Magnitude'] # result.pop('date') # result.pop('datetime') # result.pop('img') # result.pop('media') # if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1 # elif result['% Neutral'] > result['% Positive']: neu_count += 1 # else: pos_count += 1 if result['% Positive'] > result['% Negative']: pos_count += 1 else: neg_count += 1 if result['% Negative'] >= neg_article['% Negative']: neg_article = result fin.append(result) seen.append(result['title']) posPercent = 50 if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count) magni = 0 if counter>0: magni = sum_sent/counter country_comp_score = {'country': second, 'latitude': row['Latitude'], 'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']} summary_data.append(country_comp_score) all_df.append((country_comp_score, first)) df = pd.DataFrame(fin) df.drop(columns=['date', 'datetime', 'img', 'media']) df.to_csv("./Output/{}_output.csv".format(first),index=False) summary_df = pd.DataFrame(summary_data) summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False) # all_df.append(summary_df) # meta_data = [] # # with open('sample.csv', mode='r') as csv_file: # dic_len = sum(1 for line in open('sample.csv')) # with open('sample.csv', mode='r') as csv_file: # csv_reader = csv.DictReader(csv_file) # for j in range(0, dic_len): # most_pos = 0 # for i in range(0, len(company_name)): # if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: # most_pos = i # meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]}) fields = ['Country', 'Company', 'Count'] meta_data = [] seen = [] for result in all_df: if result[0]['country'] not in seen: seen.append(result[0]['country']) meta_data.append([result[0]['country'], result[1], result[0]['positive']]) else: for candidate in meta_data: if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']: candidate[1] = result[1] candidate[2] = result[0]['positive'] with open('./Output/meta_data.csv', 'w') as f: write = csv.writer(f) write.writerow(fields) write.writerows(meta_data)
""" Before you start, install the library using: pip install GoogleNews """ from GoogleNews import GoogleNews import pandas as pd googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-8') """ Primary Phrases refer to the keywords we are interested in studying Secondary Phrases refer to the target countries """ primary_phrases = ['Pfizer Vaccine', 'AstraZeneca Vaccine', 'Sputnik V Vaccine', 'Sinovac Vaccine'] secondary_phrases = ['Namibia', 'France', 'South Africa'] # months = ['01/01/2019', '02/01/2019', '03/01/2019', '04/01/2019', '05/01/2019', '06/01/2019', '07/01/2019', '08/01/2019', '09/01/2019', '10/01/2019', '11/01/2019', '12/01/2019', '01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021'] """ Months refer to the date range """ months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021'] fin = []