コード例 #1
0
ファイル: temp.py プロジェクト: manishmj9431/ISO-25-E1
def getNews(query):
    googleNews = GoogleNews()
    googleNews.search(query)

    news = []

    i = 0

    number = min([len(googleNews.result()), 6])

    for result in googleNews.result():
        if (i > number):
            break

        n = {}
        n["title"] = result['title']
        n["description"] = result['desc']
        n["link"] = result['link']

        if (i == 0):
            n["image"] = result['img']
        news.append(n)

        i += 1

    googleNews.clear()

    return news
コード例 #2
0
def news(str):
    global i
    if i == 0:
        spacek(f"ofcures {str} which news  you want to listen")
    else:
        spacek(f"which news you want to listen{str}")

    try:
        s = takecommend().lower()
        s = s.replace('about', "")
        spacek("which page you want ot listen")

        s2 = int(takecommend())
        googlenews = GoogleNews()
        googlenews = GoogleNews('en', "2")
        # here you can use d which is denoted for how much linw you want to lesiten
        googlenews.search(s)
        googlenews.getpage(s2)
        googlenews.result()
        spacek(f" {str} here is news about ")
        spacek(s)
        print(googlenews.gettext())
        spacek(googlenews.gettext())
    except Exception as s:
        spacek(f"could not understand {str} what did  you say  say it again")
        i = 1
        news(str)
コード例 #3
0
ファイル: getNews.py プロジェクト: Steven-Chang1114/UniChoice
def getPolarity(uniName):
    from GoogleNews import GoogleNews
    from newspaper import Article
    from newspaper import Config
    import pandas as pd
    from textblob import TextBlob

    uniName = uniName + ' Coronavirus'
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent
    googlenews = GoogleNews(start='08/01/2020', end='09/26/2020')
    googlenews.search(uniName)
    result = googlenews.result()

    for i in range(0, 5):
        googlenews.getpage(i)
        result = googlenews.result()
    df = pd.DataFrame(result)
    sum = 0
    counter = 1
    for ind in df.index:
        try:
            article = Article(df['link'][ind], config=config)
            article.download()
            article.parse()
            article.nlp()
            testimonial = TextBlob(article.summary)
            counter += 1
            sum += testimonial.sentiment.polarity
        except:
            pass

    return sum / counter
コード例 #4
0
ファイル: news.py プロジェクト: KCStinger/Covid-19_status
def get_news():
    dt_today = str(datetime.today().strftime('%m/%d/%Y'))
    dt_previous = datetime.today() - timedelta(days=5)
    dt_previous = str(dt_previous.strftime('%m/%d/%Y'))
    #print(dt_today)
    #print(dt_previous)

    googlenews = GoogleNews(start=dt_previous, end=dt_today)
    googlenews.search('Coronavirus')
    googlenews.getpage(1)
    result1 = googlenews.result()
    googlenews.getpage(2)
    result2 = googlenews.result()
    result = result1 + result2
    news_list = list()
    for i in result:
        if i['desc'] != '':
            dic = dict()
            dic['title'] = i['title']
            dic['source'] = i['media']
            dic['date&time'] = i['date']
            dic['desc'] = i['desc']
            dic['link'] = i['link']
            news_list.append(dic)
    return news_list
コード例 #5
0
ファイル: googlenews.py プロジェクト: Siddharthbadal/Python
def todaysNews(str):
    googlenews = GoogleNews()
    googlenews = GoogleNews('en', 'd')

    googlenews.search(str)
    googlenews.getpage(1)

    googlenews.result()
    g = googlenews.gettext()
    return g
コード例 #6
0
ファイル: news.py プロジェクト: Anwesha-dash811/hacktober-1
def news():
    topic = entry.get()
    googlenews = GoogleNews()
    googlenews = GoogleNews('en', 'd')
    googlenews.search(topic)
    googlenews.getpage()
    googlenews.result()
    a = googlenews.gettext()
    output.insert(END, a)
    speak = Dispatch(
        "SAPI.SpVoice"
    )  #calling this dispatch method helps to interact with Microsoft Speech SDK to speak
    speak.Speak(a)
コード例 #7
0
def news():
    speak("What kind of news would you like to hear ?")
    type = takeCommand()
    googleNews = GoogleNews()
    googleNews = GoogleNews(lang = 'en')
    googleNews.search(type) # will search the kind we want to hear
    googleNews.getpage(1) # page number of news 
    googleNews.result()
    list = googleNews.gettext()
    #print(list)
    if len(list) > 0:
       speak(random.choice(list))
    else:
       speak("No news related to this topic.") 
コード例 #8
0
    def fetch_articles(self):

        # how many pages to scrape
        N_pages = 1
        links = []
        # how many days from last update
        # TODO: look for the last update datetime in the DB
        days_from_last_update = (datetime.datetime.today() -
                                 self.history_start).days
        # for each day between start date and today:
        for day in range(0, days_from_last_update + 1):
            download_date = self.history_start + datetime.timedelta(days=day)
            googlenews = GoogleNews(start=download_date.strftime("%m/%d/%Y"),
                                    end=download_date.strftime("%m/%d/%Y"))
            googlenews.search(self.ticker)
            # iterate N_pages of Google News
            for i in range(0, N_pages):
                googlenews.getpage(i)
                result = googlenews.result()
                links = links + result

        links = list(set([x['link'] for x in links]))

        # for each link (without dups) get the article and its metadata
        articles = []
        for link in links:
            try:
                downloaded = self.download_and_parse_article(link)
                articles.append(downloaded)
            except Exception as e:
                print(e)

        return articles
コード例 #9
0
    def googleNewsCrawler(self):
        result_list = []
        googlenews = GoogleNews()

        for i in range(self.__numDays):
            startDateTime = self.__dateTime + timedelta(days=i)
            endDateTime = self.__dateTime + timedelta(days=i + self.__daysSpan)

            googlenews.setTimeRange(
                start=str(startDateTime.month) + '/' + str(startDateTime.day) +
                '/' + str(startDateTime.year),
                end=str(endDateTime.month) + '/' + str(endDateTime.day) + '/' +
                str(endDateTime.year))
            googlenews.search(self.__keyWords)
            for j in range(self.__pagsEveryDay - 1):
                googlenews.getpage(j + 2)
            logging.info(
                str(self.__keyWords + '__' + str(startDateTime.date()) +
                    " append " + str(int(self.__pagsEveryDay * 10)) +
                    " items"))
            result_list = result_list + googlenews.result()
            googlenews.clear()

            if (i + 1) % 10 == 0:
                self.toJson(result_list)
                result_list = []
                continue
        self.toJson(result_list)
コード例 #10
0
ファイル: news_api.py プロジェクト: CelineZhou3105/Stonk-Ponk
def get_news(ticker):
    try:
        stock_data = stock_api.get_stock_data(ticker)
    except:
        raise Exception("Stock Not Found")

    try:
        googlenews = GoogleNews(period='2d')
        googlenews.search(ticker)
        result = googlenews.result()

        news_articles = []
        for item in result:
            news_article = {}
            news_article['title'] = item['title']
            news_article['media'] = item['media']
            news_article['date'] = item['date']
            news_article['description'] = item['desc']
            news_article['link'] = item['link']
            news_article['datetime'] = item['datetime']
            news_articles.append(news_article)

        return news_articles
    except:
        raise Exception("News Error")
コード例 #11
0
def googlenews_extract(date_range, num_pages, search_text):
    ''' Use googlenews package to extract top 30 stories per day based on search string '''

    df_days = []

    # loop through date range to ensure equal sample size from each day
    #TODO: if we want to pull multiple years of data, perhaps add multi-threading...not necessary for < ~20 calls
    for date in date_range:

        result = []
        googlenews = GoogleNews(start=date, end=date)
        googlenews.search(search_text)
        print("Search Date = ", date)

        for i in range(0, num_pages):

            print('Executing GoogleNews call #', i + 1)

            googlenews.getpage(i)
            result_next = googlenews.result()
            print("Total records returned: ", len(result_next))

            df = pd.DataFrame(result_next)
            df['date_calendar'] = date

        df_days.append(df)
        appended_data = pd.concat(df_days)

    df_news = appended_data.reset_index(drop=True).drop(['date'], axis=1)

    return df_news
コード例 #12
0
    def scrapeTitles(self, num=0):
        '''
            Inputs: 

                num --> finds at least num titles

            Outputs: A list of raw titles

            How: Makese API call to Google News
                Cleans title list down to just words
                Strips out any titles that do not contain ticker
        '''
        found = 0
        titles = []
        end = self.end
        start = self.start

        # max start can be reduced by is 7 days
        tries = 0

        while found <= num:

            if not self.validDates() or tries > 7:
                break
        

            googlenews = GoogleNews(start=start, end=end)
            
            googlenews.search(self.ticker)

            result = googlenews.result()
            if len(result) == 0:
                break
            df = pd.DataFrame(result)

            if len(df) > 0:
                self.titleList = df['title'].tolist() + df['desc'].tolist()

            self.clean()
            self.stripTitleList()
            
            if self.titleList is not None:
                print(self.start, self.end)
                print("after stripTitleList: Not None")
            else:
                print(self.start, self.end)
                print("after stripTitleList: None")


            for t in self.titleList:
                if t not in titles:
                    titles.append((t))
                    found += 1
            
            start = self.reduceDate(start, 1)
            
            tries += 1
           
        self.start = start
        self.titleList = titles[:num]
コード例 #13
0
    def __init__(self, politician_name):
        """Initialize an object representing an article."""
        news = GoogleNews()
        news.setlang("uk")
        news.setencode("utf-8")
        news.setperiod("3d")
        news.search(politician_name)
        info = news.result()
        self.articles = []

        name, surname = politician_name.split()[0], politician_name.split()[1]
        self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch"

        def get_data(self):
            r = requests.get(self.link)
            return r.text

        html_data = get_data(self)
        soup = BeautifulSoup(html_data, "html.parser")
        image_links, num = [], 0
        for item in soup.find_all("img"):
            image_links.append(item["src"])
            num += 1
            if num == 6:
                break

        for i in range(5):
            text = info[i]
            info_list = [text["title"], text["link"], image_links[i + 1]]
            self.articles.append(info_list)
コード例 #14
0
def google_new_scrape(keyword=0, earliest_date="2000-01-01", end_date=""):
    ealiest_date = dt.strptime(earliest_date, "20%y-%m-%d")
    ealiest_date = ealiest_date.strftime("%m/%d/20%y")
    googlenews = None
    if end_date != "":
        end_date = dt.strptime(end_date, "20%y-%m-%d")
        end_date = end_date.strftime("%m/%d/20%y")
        googlenews = GoogleNews(start=earliest_date,end=end_date)
    else:
        googlenews = GoogleNews(start=earliest_date)
    googlenews.search('trump')
    for i in range(1,1000):
        googlenews.getpage(i)
        result=googlenews.result()
        print(len(result), result)
        df=pd.DataFrame(result)
    list=[]
    for ind in df.index:
        dict={}
        article = Article(df['link'][ind])
        article.download()
        article.parse()
        #article.nlp()
        dict['Date']=df['date'][ind]
        dict['Media']=df['media'][ind]
        dict['Title']=article.title
        dict['Article']=article.text
        dict['Summary']=article.summary
        list.append(dict)
    news_df=pd.DataFrame(list)
    print(news_df)
    file_name = 'googlenews.csv'
    news_df.to_csv(file_name)
コード例 #15
0
def get_company_news_link(company='NaN', news_num=5, time_range='today'):

    if company == 'NaN':
        return 'please input company name'

    news_link = []
    googlenews = GoogleNews()
    googlenews.clear()

    if time_range != 'today':
        start_date = "{1}/{2}/{0}".format(time_range[0:4], time_range[5:7],
                                          time_range[8:10])
        end_date = "{1}/{2}/{0}".format(time_range[11:15], time_range[16:18],
                                        time_range[19:21])
        googlenews.set_time_range(start_date, end_date)

    googlenews.search(company)
    result = googlenews.result()

    try:
        for num in range(news_num):
            news_link.append(result[num]['link'])
    except IndexError:
        if len(news_link) == 0:
            return '此時段無' + company + '新聞 OR 網路不穩'
        return news_link
    else:
        return news_link
コード例 #16
0
ファイル: test_search.py プロジェクト: manishhedau/GoogleNews
 def testResultHasDate(self):
   googlenews = GoogleNews()
   googlenews.search(keyword)
   result = googlenews.result()[0]
   print(result.get('date').lower())
   self.assertIsNot('', result.get('date').lower())
   print('Result date is not empty')
コード例 #17
0
ファイル: testSearch.py プロジェクト: wastu01/GoogleNews
 def testResultNumberWithTwoPages(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     googlenews.getpage(2)
     length = len(googlenews.result())
     self.assertEqual(length, 20)
     print('Result length with two pages is correct')
コード例 #18
0
    def getTitles(self, ticker, start, end):
        googlenews = GoogleNews(start=start, end=end)
        googlenews.search(ticker)
        result = googlenews.result()
        df = pd.DataFrame(result)

        return df['title']
コード例 #19
0
 def testResultHasLink(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('link').lower())
     self.assertIn('http', result.get('link').lower())
     print('Result contains http link')
コード例 #20
0
 def testResultHasImage(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('img').lower())
     self.assertIn('base64', result.get('img').lower())
     print('Result contains image')
コード例 #21
0
ファイル: views.py プロジェクト: PROuserR/My-website
def index(request):
    googlenews = GoogleNews()
    googlenews.search('Shailene Woodley')
    news = googlenews.result()

    context = {'news': news}
    return render(request, 'index.html', context)
コード例 #22
0
 def testResultContainsKeyword(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('desc').lower())
     self.assertIn(keyword.lower(), result.get('desc').lower())
     print('Result contains keyword')
コード例 #23
0
def google_scrape(entity, start_date, end_date, days_per_period=7):
    '''
    Scrap (using GoogleNews API) the top 10 headlines of google news on a particular entity, weekly, over a given time range
    Output : Pandas Dataframe with datetime, title, excerpt, domain (news origin), and article url
    '''
    time.sleep(60) # set timer to wait 60s before scraping (google scraper has limit to scraping)
    
    # calculate the number of weeks between start and end date (inclusive)
    n_periods = (end_date - start_date).days // days_per_period + 2

    # divide the dates into date_periods (query top 10 for each week)
    date_range = pd.date_range(start_date, end_date, periods=n_periods)

    # create result df with columns
    result_df = pd.DataFrame(columns=['date_time', 'title', 'excerpt', 'domain', 'article_url'])

    # go through the date ranges and retrieve top 10
    for i in range(len(date_range)-1):
        start_temp = date_range[i]
        end_temp = date_range[i+1]

        news = GoogleNews(start=start_temp.strftime("%m/%d/%Y"),end=end_temp.strftime("%m/%d/%Y"), lang='en', encode='utf-8')
        news.search(f"{entity}")   # Main bulk of time, taking ~2 seconds to search
    
        if pd.DataFrame(news.result()).empty:
            # No relevant articles 
            continue
    
        # retrieve relevant news results
        temp_df = pd.DataFrame(news.result())[['date', 'title', 'desc', 'media', 'link']]
        
        # rename columns
        temp_df.columns = ['date_time', 'title', 'excerpt', 'domain', 'article_url']
        
        # only get headlines which mention the entity of interest
        temp_df = temp_df[temp_df['title'].str.contains(entity,flags=re.IGNORECASE)].reset_index(drop=True)
        temp_df['date_time'] = temp_df.date_time.apply(date_convert)

        # remove rows without datetime
        temp_df = temp_df.dropna(axis=0, subset=["date_time"])

        # combine result df
        result_df = pd.concat([result_df, temp_df])

    result_df["source_id"] = ""

    return result_df
コード例 #24
0
ファイル: autonews_api.py プロジェクト: riyakwl28/auto_news
def extract_google(query_terms, startDate, endDate):
    if len(startDate) == 0:
        startDate = datetime.datetime.today().strftime('%d/%m/%Y')
    if len(endDate) == 0:
        endDate = datetime.datetime.strftime(
            datetime.datetime.today().date() - datetime.timedelta(days=7),
            '%d/%m/%Y')
    startDate = datetime.datetime.strptime(startDate,
                                           '%Y-%m-%d').strftime('%d/%m/%y')
    endDate = datetime.datetime.strptime(endDate,
                                         '%Y-%m-%d').strftime('%d/%m/%y')
    final_articles = []
    print(startDate)
    print(endDate)
    print("Crawling Starting")
    # here extracting news from google news
    googlenews = GoogleNews()
    googlenews.setTimeRange(startDate, endDate)
    for query in query_terms:
        googlenews.clear()

        #forming the search term
        googlenews.search("India Technology " + query)

        result = googlenews.result()

        for n in range(len(result)):
            source = result[n]['media']
            url = result[n]['link']
            try:
                article = Article(url)
                article.download()
                article.parse()
            except Exception as e:
                print("Trouble downloading so skipping")
                continue
            content = article.text

            # summarize the content
            temp_content = re.sub(r'^\s*[\(\[].*?[\)\]]\s*', '', content)
            sentences = sent_detector.tokenize(temp_content)
            summary = (" ".join(sentences[:2]).strip())

            date = result[n]['date']
            if (date.find('ago') != -1):
                date = current.date()
            title = result[n]['title']
            #         content=result[n]['desc']
            img = result[n]['img']
            #adding the extracted info in final_articles list
            final_articles.append({
                'source': source,
                'url': url,
                'date': date,
                'title': title,
                'content': content,
                'img': img
            })
    return final_articles
コード例 #25
0
def get_news(text):
    googlenews = GoogleNews()
    googlenews.search(text)
    googlenews.clear()
    googlenews.getpage(2)
    result = googlenews.result()

    return result
コード例 #26
0
def search(keyword=None, datestart=None, dateend=None, pgs=1):
    # Variáveis globais
    global noticias
    global cont
    global acabou

    # Parametros de busca
    print('Keyword: ', keyword)

    #Configuração da pesquisa
    googlenews = GoogleNews(start=datestart, end=dateend)
    googlenews.setlang('pt')
    googlenews.search(keyword)
    result = googlenews.result()

    # Passando os dados p/ um DataFrame
    df = pd.DataFrame(result)

    # Printando as 5 primeiras notícias
    print(df.head())

    # Pega um range de páginas obtidas do resultado acima
    for i in range(0, pgs):
        googlenews.getpage(i)
        result = googlenews.result()
        df = pd.DataFrame(result)

    # Converte o DataFrame acima para uma lista de dicionários
    for ind in df.index:
        print('Noticia numero: {}'.format(ind))
        dict = {}
        article = Article(df['link'][ind], config=config)
        article.download()
        try:
            article.parse()
            article.nlp()
            dict['Date'] = df['date'][ind]
            dict['Media'] = df['media'][ind]
            dict['Title'] = article.title
            dict['Article'] = article.text
            dict['Summary'] = article.summary
            dict['Created'] = False
            noticias.append(dict)
        except:
            print('Error')
        time.sleep(0)
コード例 #27
0
class Engine:
    def __init__(self):
        self.news = GoogleNews()
        self.news.setlang('en')
        #self.news.setTimeRange('01/01/2000','01/01/2015')
        self.news.setencode('utf-8')
        self.pageNumber = 1
        self.searchTerm = ""

    def nextPage(self):
        if self.news.result == None:
            raise RuntimeError("Engine has not searched yet")
        self.pageNumber += 1
        self.news.clear()
        self.news.getpage(self.pageNumber)
        if len(self.news.result()) == 0: return False
        else: return True

    def previousPage(self):
        if self.news.result == None:
            raise RuntimeError("Engine has not searched yet")
        self.pageNumber -= 1
        self.news.clear()
        self.news.getpage(self.pageNumber)
        if len(self.news.result()) == 0: return False
        else: return True

    def search(self, term):
        self.news.search(term)
        if len(self.news.result()) == 0:
            return False
        else:
            return self.news.result()

    def getPageNumber(self):
        return self.pageNumber

    def getResults(self):
        return self.news.result()

    def clear(self):
        self.news.clear()

    def resetPageNumber(self):
        self.pageNumber = 1
コード例 #28
0
ファイル: scrape_news.py プロジェクト: kafkoders/oclean
def scrape():

	link_list = []

	# Instance of class GoogleNews
	googlenews = GoogleNews()
	googlenews.search("oceans"+"+trash")

	for news_item in googlenews.result():
		sql_insert(news_item)
コード例 #29
0
def run(start_date, end_date, keyword, file, mail, importance):

    #find relevant news articles within given timeframe
    googlenews = GoogleNews()
    googlenews = GoogleNews(lang='en')
    googlenews = GoogleNews(start=start_date, end=end_date)
    googlenews.search(keyword)
    res = googlenews.result()
    googlenews.result()
    headlines = googlenews.gettext()
    links = googlenews.get__links(
    )  #note that documentation has this as googlenews.getlinks() so it might change
    #get page url
    results = articleReader(links, headlines, keyword)
    run.df = pd.DataFrame(results)
    if run.df.shape[0] > importance:
        run.df = run.df.iloc[0:importance]

    return run.df
コード例 #30
0
ファイル: sentiment.py プロジェクト: theoportlock/system
 def run(self):
     googlenews = GoogleNews('en', 'd')
     googlenews.search(self.term)
     headline_results = googlenews.result()
     for i in headline_results:
         print(i["desc"])
         blob = TextBlob(i["desc"])
         self.sentiment += blob.sentiment.polarity / len(headline_results)
         self.subjectivity += blob.sentiment.subjectivity / len(
             headline_results)