def testResultNumberWithTwoPages(self): googlenews = GoogleNews() googlenews.search(keyword) googlenews.get_page(2) length = len(googlenews.result()) self.assertEqual(length, 20) print('Result length with two pages is correct')
def get_search_results(keyword: str): googlenews = GoogleNews(lang="en", period="7d", encode="utf-8") googlenews.get_news(keyword) googlenews.search(keyword) googlenews.get_page(1) results = googlenews.results() return results[0:5]
def extract_history(self, from_date, to_date, filters, max_pages=10, max_itens=100, country="BR", language="PT", extract_full=False): if type(from_date) == str: from_date = self.__convert_str_to_date(from_date) if type(to_date) == str: to_date = self.__convert_str_to_date(to_date) date_intervals = [] current = from_date while current < to_date: final = current + relativedelta(days=30) if final > to_date: final = to_date date_intervals.append((current, final)) current = current + relativedelta(days=31) data = [] for dt_interval in date_intervals: googlenews = GoogleNews(lang='pt', start=dt_interval[0].strftime("%d/%m/%Y"), end=dt_interval[1].strftime("%d/%m/%Y")) googlenews.search(filters) num_page = 1 titles = [] while len(titles) < max_itens and num_page < max_pages: googlenews.get_page(num_page) results = googlenews.result() df = pd.DataFrame(results) df = df[~df['title'].isin(titles)] df = df[['title', 'datetime', 'desc', 'link', 'media']] df.columns = [ 'title', 'published date', 'description', 'url', 'publisher' ] titles.extend(df['title']) num_page += 1 data.append(df) final_result = pd.DataFrame([]) if len(data) > 0: final_result = pd.concat(data, ignore_index=True) return final_result
def get_news(query: str, pages: int = 35) -> List[Dict[str, Any]]: """ Search news defined by query. Returns a list of search results. Parameters ---------- query: str The news search query to use. Returns ------- news: list of news items. News list, each element in the list is a dictionary containing news details like title, date, URL etc. """ googlenews = GoogleNews(start='01/01/2010', end='01/01/2015') googlenews.search(query) news = [] for page in tqdm(range(pages), leave=False): googlenews.get_page(page) news += googlenews.results() return news
from datetime import date from GoogleNews import GoogleNews news = GoogleNews() news.set_lang('en') date_today = date.today() news.set_time_range('01/11/2020', date_today) news.set_encode('utf-8') topic = input("Topic : ") news.search(topic) news.get_page(2) #headlines with links WORLD NEWS for i in range(6): print(news.results()[i]["title"]) print(news.results()[i]["link"])
def createSentimentList(topic, startMonth ,startYear ,endMonth , endYear, dirName=None,inputSentimentList = None,pages = 5): print("Running createSentimentList function") if (not inputSentimentList): print("Initialising empty output list") output = [] else: print("Initialising exisiting list as output list") output = inputSentimentList monthsInYear = 12 monthCount = (endYear - startYear)*monthsInYear + (endMonth - startMonth) print(f"Number of months (batches):{monthCount}") for i in range(monthCount): batch = [] #Prepare start date and end date inputMonth = 12 if (startMonth + i)%12 == 0 else (startMonth + i)%12 inputYear = (startYear + (startMonth + i)//12) - 1 if inputMonth == 12 else (startYear + (startMonth + i)//12) print(f"Loop:{i+1}, Month: {inputMonth}, Year:{inputYear}") startDate = datetime.datetime(inputYear,inputMonth,1) endDate = last_day_of_month(startDate) #endDate = datetime.datetime(inputYear,inputMonth,last_day_of_month(startDate.strftime('%s'))) startDateStr = startDate.strftime("%m/%d/%Y") endDateStr = endDate.strftime("%m/%d/%Y") print(f"Date range: {startDateStr} - {endDateStr}") #Create GoogleNews Object googlenews = GoogleNews(lang='en',start=startDateStr,end=endDateStr,encode='utf-8') print(f"searching the topic '{topic}' on google from {startDateStr} - {endDateStr}...") googlenews.search(topic) #get_page(1) is called by default. #Iterate here for number of pages print("retrieved page 1") for i in range(pages-1): googlenews.get_page(i+2) print (f"retrieved page {i+2}") totalPolarity = 0 totalSubjectivity = 0 adjustedPolarity = 0 adjustedSubjectivity = 0 vaderPolarity = 0 adjustedVaderPolarity = 0 textBlobAdjustedLength = 0 vaderAdjustedLength = 0 for newsDict in googlenews.result(sort=True): opinion = TextBlob(newsDict["title"]).sentiment vaderOpinion = analyzer.polarity_scores(newsDict["title"])['compound'] newsDict["textBlob polarity"] = opinion.polarity newsDict["textBlob subjectivity"] = opinion.subjectivity newsDict["vader polarity"] = vaderOpinion vaderPolarity += vaderOpinion totalPolarity += opinion.polarity totalSubjectivity += opinion.subjectivity if opinion.polarity != 0: textBlobAdjustedLength += 1 adjustedPolarity += opinion.polarity adjustedSubjectivity += opinion.subjectivity if vaderOpinion != 0: vaderAdjustedLength += 1 adjustedVaderPolarity += vaderOpinion batch.append(newsDict) batchName = f"{inputMonth}_{inputYear}_{topic}" df=pd.DataFrame(batch) df.to_csv(f"outputs/{dirName}/batches/{batchName}.csv",index=False) print(f"Created outputs/{dirName}/batches/{batchName}NewsBatch.csv") batchLength = len(batch) meanPolarity = totalPolarity/batchLength meanSubjectivity = totalSubjectivity/batchLength meanAdjustedPolarity = adjustedPolarity/textBlobAdjustedLength meanAdjustedSubjectivity = adjustedSubjectivity/textBlobAdjustedLength meanVaderPolarity = vaderPolarity/batchLength meanAdjustedVaderPolarity = adjustedVaderPolarity/vaderAdjustedLength batchData = {} batchData['month'] = inputMonth batchData['year'] = inputYear batchData['topic'] = topic batchData['batchLength'] = batchLength batchData['textBlobAdjustedLength'] = textBlobAdjustedLength batchData['vaderAdjustedLength'] = vaderAdjustedLength batchData['meanPolarity'] = meanPolarity batchData['meanSubjectivity'] = meanSubjectivity batchData['meanAdjustedPolarity'] = meanAdjustedPolarity batchData['meanAdjustedSubjectivity'] = meanAdjustedSubjectivity batchData['meanVaderPolarity'] = meanVaderPolarity batchData['meanAdjustedVaderPolarity'] = meanAdjustedVaderPolarity #print(batchData) output.append(batchData) return output
def run_alexa(): command = take_command() print(command) if 'music' in command: song = command.replace('play song', '') talk('I am playing your favourite ' + song) # print('playing') print(song) # playing the first video that appears in yt search pywhatkit.playonyt(song) elif 'time' in command: now = datetime.now() time = now.strftime("%H:%M:%S") print("time:", time) talk("Current time is " + time) elif ('month' or 'year') in command: now = datetime.now() year = now.strftime("%Y") print("year:", year) talk("Current year is " + year) month = now.strftime("%m") print("month:", month) talk("Current month is " + month) elif 'date' in command: now = datetime.now() date_time = now.strftime("%m/%d/%Y, %H:%M:%S") print("date and time:", date_time) talk("Current date and time is " + date_time) # opens web.whatsapp at specified time i.e before 10 minutes and send the msg elif 'whatsapp' in command: talk("To which number do you have to whatsapp") talk("Please dont forget to enter 10 digits with country code") num = input() talk("Enter the message you have to send") msg = input() talk("Enter the time to send the message") time = int(input()) pywhatkit.sendwhatmsg(num, msg, time, 00) pywhatkit.showHistory() pywhatkit.shutdown(3000000000) # pywhatkit.sendwhatmsg("+919876543210", "This is a message", 15, 00) # Convert text to handwritten format elif 'convert' in command: text = command.replace('convert', '') pywhatkit.text_to_handwriting(text, rgb=[0, 0, 0]) # Perform google search elif 'search' in command: key = command.replace('search', '') pywhatkit.search("key") elif 'wikipedia' in command: person = command.replace('wikipedia', '') talk("How many pages do you want to read") num_pages = int(input()) # talk("In which language do you want to read") # l = input() # wikipedia.set_lang(l) info = wikipedia.summary(person, num_pages) print(info) talk(info) elif 'can you work for me' in command: talk("sorry, I have headache. Please do your work") elif 'are you single' in command: talk("I am in relationshhip with wifi") elif 'joke' in command: talk(pyjokes.get_joke()) talk("sorry for the lamest joke") elif 'open google browser' in command: try: urL = 'https://www.google.com' chrome_path = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe" webbrowser.register('chrome', None, webbrowser.BackgroundBrowser(chrome_path)) webbrowser.get('chrome').open_new_tab(urL) talk("Successfully opened chrome its upto you to search") except: webbrowser.Error elif 'google search' in command: word_to_search = command.replace('google search', '') response = GoogleSearch().search(word_to_search) print(response) for result in response.results: print("Title: " + result.title) talk("You can look for the following titles " + result.title) elif 'weather' in command: # base URL BASE_URL = "https://api.openweathermap.org/data/2.5/weather?" talk("Which city weather are you looking for") try: with sr.Microphone() as source: print('listening weather...') city_voice = listener.listen(source) city = listener.recognize_google(city_voice) # city = '\"'+city.lower()+'\"' print(city) # city="bangalore" # API key API_KEY = "Your API Key" API_KEY = "b5a362ef1dc8e16c673dd5049aa98d8f" # upadting the URL URL = BASE_URL + "q=" + city + "&appid=" + API_KEY # HTTP request response = requests.get(URL) # checking the status code of the request if response.status_code == 200: # getting data in the json format data = response.json() # getting the main dict block main = data['main'] # getting temperature temperature = main['temp'] # getting the humidity humidity = main['humidity'] # getting the pressure pressure = main['pressure'] # weather report report = data['weather'] print(f"{CITY:-^30}") print(f"Temperature: {temperature}") print(f"Humidity: {humidity}") print(f"Pressure: {pressure}") print(f"Weather Report: {report[0]['description']}") talk("Temperature in " + city + " is " + temperature + " humidity is " + humidity + " pressure is " + pressure + " and your final weather report" + report) else: # showing the error message print("Error in the HTTP request") talk("Error in the HTTP request") except: talk("Hmmmmm, it looks like there is something wrong") elif 'news' in command: try: googlenews = GoogleNews() googlenews.set_lang('en') # googlenews.set_period('7d') # googlenews.set_time_range('02/01/2020', '02/28/2020') googlenews.set_encode('utf-8') talk("What news are you looking for") try: with sr.Microphone() as source: print('listening news ...') news_voice = listener.listen(source) news_input = listener.recognize_google(news_voice) news_input = news_input.lower() print(news_input) googlenews.get_news(news_input) googlenews.search(news_input) googlenews.get_page(2) result = googlenews.page_at(2) news = googlenews.get_texts() print(news) talk(news) except: print("Error") talk("Error in reading input") except: print("No news") talk(" I couldn't find any news on this day") elif 'play book' or 'read pdf' in command: talk("Which pdf do you want me to read") book_input = input() print(book_input) book = open(book_input, 'rb') # create pdfReader object pdfReader = PyPDF2.PdfFileReader(book) # count the total pages total_pages = pdfReader.numPages total_pages = str(total_pages) print("Total number of pages " + total_pages) talk("Total number of pages " + total_pages) # initialise speaker object # speaker = pyttsx3.init() # talk("Enter your starting page") # start_page = int(input()) talk( " here are the options for you, you can press 1 to Play a single page 2 to Play between start and end points and 3 to Play the entire book " ) talk("Enter your choice") choice = int(input()) if (choice == 1): talk("Enter index number") page = int(input()) page = pdfReader.getPage(page) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() elif (choice == 2): talk("Enter starting page number") start_page = int(input()) talk("Enter ending page number") end_page = int(input()) for page in range(start_page + 1, end_page): page = pdfReader.getPage(start_page + 1) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() elif (choice == 3): for page in range(total_pages + 1): page = pdfReader.getPage(page) text = page.extractText() talk(text) # speaker.say(text) # speaker.runAndWait() else: talk("Haha!! Please enter valid choice") else: talk( "Hiii Rashika, I am so bored can you please give me some proper commands" )
from GoogleNews import GoogleNews googlenews = GoogleNews() googlenews = GoogleNews(lang='en') googlenews.get_news('APPLE') googlenews.search('APPLE') googlenews.get_page(1) googlenews.result() googlenews.get_texts()