コード例 #1
0
def google_new_scrape(keyword=0, earliest_date="2000-01-01", end_date=""):
    ealiest_date = dt.strptime(earliest_date, "20%y-%m-%d")
    ealiest_date = ealiest_date.strftime("%m/%d/20%y")
    googlenews = None
    if end_date != "":
        end_date = dt.strptime(end_date, "20%y-%m-%d")
        end_date = end_date.strftime("%m/%d/20%y")
        googlenews = GoogleNews(start=earliest_date,end=end_date)
    else:
        googlenews = GoogleNews(start=earliest_date)
    googlenews.search('trump')
    for i in range(1,1000):
        googlenews.getpage(i)
        result=googlenews.result()
        print(len(result), result)
        df=pd.DataFrame(result)
    list=[]
    for ind in df.index:
        dict={}
        article = Article(df['link'][ind])
        article.download()
        article.parse()
        #article.nlp()
        dict['Date']=df['date'][ind]
        dict['Media']=df['media'][ind]
        dict['Title']=article.title
        dict['Article']=article.text
        dict['Summary']=article.summary
        list.append(dict)
    news_df=pd.DataFrame(list)
    print(news_df)
    file_name = 'googlenews.csv'
    news_df.to_csv(file_name)
コード例 #2
0
def news(str):
    global i
    if i == 0:
        spacek(f"ofcures {str} which news  you want to listen")
    else:
        spacek(f"which news you want to listen{str}")

    try:
        s = takecommend().lower()
        s = s.replace('about', "")
        spacek("which page you want ot listen")

        s2 = int(takecommend())
        googlenews = GoogleNews()
        googlenews = GoogleNews('en', "2")
        # here you can use d which is denoted for how much linw you want to lesiten
        googlenews.search(s)
        googlenews.getpage(s2)
        googlenews.result()
        spacek(f" {str} here is news about ")
        spacek(s)
        print(googlenews.gettext())
        spacek(googlenews.gettext())
    except Exception as s:
        spacek(f"could not understand {str} what did  you say  say it again")
        i = 1
        news(str)
コード例 #3
0
def get_admin_data(user_headline, user_img, user_keywords):
    admin_data = {'link': None, 'headline': None,
                  'content': None, 'image': None}
    google_news = GoogleNews(lang='en')
    google_news.search(user_headline)
    links = google_news.get__links()
    print('No. of links found: ', len(links))
    if len(links) == 0:
        google_news = GoogleNews(lang='en')
        google_news.search(' '.join(user_keywords))
        links2 = google_news.get__links()
        if len(links2) == 0:
            return admin_data
        else:
            links = links2
    if len(links) == 1:
        link_used = links[0]
    else:
        link_used = links[1]

    admin_data['link'] = link_used
    # print(link_used)
    article = Article(link_used)
    article.download()
    article.parse()
    article.nlp()
    admin_data['headline'] = article.title
    admin_data['content'] = article.summary
    if article.top_image is not None:
        admin_data['image'] = article.top_image
    print('admin link: ', admin_data['link'])
    print('admin headline: ', admin_data['headline'])
    return admin_data
コード例 #4
0
ファイル: googlenews.py プロジェクト: Siddharthbadal/Python
def todaysNews(str):
    googlenews = GoogleNews()
    googlenews = GoogleNews('en', 'd')

    googlenews.search(str)
    googlenews.getpage(1)

    googlenews.result()
    g = googlenews.gettext()
    return g
コード例 #5
0
def getArticles(searchQuery, dateRange = False, startDate = '', endDate = ''):
    # returns first page of GoogleNews article results for the given query
    # in the form of a list of dictionary (dict w info per article) 
    # Dict Keys = [title, media, date, desc, link, img]
    if dateRange:
        googlenews = GoogleNews(lang='en', start = startDate, end = endDate)
    else:
        googlenews = GoogleNews(lang='en', period = 'd')
    googlenews.search(searchQuery)
    articlesInfo = googlenews.result(sort=True)
    return articlesInfo
コード例 #6
0
ファイル: news.py プロジェクト: Anwesha-dash811/hacktober-1
def news():
    topic = entry.get()
    googlenews = GoogleNews()
    googlenews = GoogleNews('en', 'd')
    googlenews.search(topic)
    googlenews.getpage()
    googlenews.result()
    a = googlenews.gettext()
    output.insert(END, a)
    speak = Dispatch(
        "SAPI.SpVoice"
    )  #calling this dispatch method helps to interact with Microsoft Speech SDK to speak
    speak.Speak(a)
コード例 #7
0
def news():
    speak("What kind of news would you like to hear ?")
    type = takeCommand()
    googleNews = GoogleNews()
    googleNews = GoogleNews(lang = 'en')
    googleNews.search(type) # will search the kind we want to hear
    googleNews.getpage(1) # page number of news 
    googleNews.result()
    list = googleNews.gettext()
    #print(list)
    if len(list) > 0:
       speak(random.choice(list))
    else:
       speak("No news related to this topic.") 
コード例 #8
0
ファイル: News_Data.py プロジェクト: sakshiseth/CitiHack
 def getnewsData(self):
     today = date.today()
     T_split = str(today).split('-')
     toDate = T_split[2] + '/' + T_split[1] + '/' + T_split[0]
     googlenewsMkt = GoogleNews(start=toDate, end=toDate)
     googlenewsMkt.get_news('Market')
     result = googlenewsMkt.results()
     df = pd.DataFrame(result).head(10)
     dfi.export(df, './template/df_styled_Market.jpeg')
     googlenewsBiz = GoogleNews(start=toDate, end=toDate)
     googlenewsBiz.get_news('Business')
     result = googlenewsBiz.results()
     df = pd.DataFrame(result).head(10)
     dfi.export(df, './template/df_styled_Business.jpeg')
コード例 #9
0
    def news_sentiments(self): # Returns news articles curated via Finviz, Yahoo, and Google News, GET UNUSUAL OPTION ACTIVITY
        BASE_URL = f'https://finviz.com/quote.ashx?t={self.ticker}'
        soup = self._get_soup(BASE_URL)

        table = soup.find('table', {'class': 'fullview-news-outer'})
        rows = table.find_all('tr')
        df_data = []
        for row in rows:
            date = row.find('td', {'align': 'right'})
            article = row.find('td', {'align': 'left'})
            link = article.find('a')['href']
            df_data.append((date.get_text(), article.get_text(), link))
        df = pd.DataFrame(df_data, columns=['Time', 'Headline', 'Link'])


        BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/news?p={self.ticker}'
        soup = self._get_soup(BASE_URL)

        links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'})
        news = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links]

        BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/press-releases?p={self.ticker}'
        soup = self._get_soup(BASE_URL)

        links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'})
        press_releases = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links]
        # Look for keywords in the news? Any showcases, Investor/analyst days, Analyst revisions, Management transitions
        # Product launches, Significant stock buyback changes
  
  
          # Getting news from google news search
        googlenews = GoogleNews(lang='en', period='14d') # Specify period for news
        googlenews.get_news(f'${self.ticker} stock')
        stock_news = googlenews.results()
  
        # print([(i, j) for i, j in zip(googlenews.get_texts(), googlenews.get_links())])
        # To get other pages, do googlenews.get_page(2), etc.
  
        # Have whitelist of websites to search articles from. Maybe have key word to filter out stupid stuff.
  
        sectors = self.find_competition()
        sector_news = []
        if sectors:
            for sector in sectors:
                googlenews = GoogleNews(lang='en', period='14d')
                googlenews.get_news(f'{sector} sector stocks')
                sector_news.append(googlenews.result())
    
        return df, news, press_releases, sector_news, stock_news
コード例 #10
0
def search():
    global state, config
    if config is None:
        raise Exception('Call initiateConfig first')
    if state is None:
        state = {}
    state['url'] = {}
    googlenews = GoogleNews()
    googlenews = GoogleNews('en', 'd')
    for city in config['cities']:
        googlenews.search('covid in ' + city)
        state['url'][city] = []
        for i in range(config['pagesPerCity']):
            googlenews.getpage(i)
            state['url'][city].extend(googlenews.get__links())
コード例 #11
0
ファイル: temp.py プロジェクト: manishmj9431/ISO-25-E1
def getNews(query):
    googleNews = GoogleNews()
    googleNews.search(query)

    news = []

    i = 0

    number = min([len(googleNews.result()), 6])

    for result in googleNews.result():
        if (i > number):
            break

        n = {}
        n["title"] = result['title']
        n["description"] = result['desc']
        n["link"] = result['link']

        if (i == 0):
            n["image"] = result['img']
        news.append(n)

        i += 1

    googleNews.clear()

    return news
コード例 #12
0
    def get_training_data(self):
        """ load training data from google news """

        # check if data has been downloaded
        if not os.path.isfile('./data/sentiment_data/headlines.csv'):
            googlenews = GoogleNews(lang='en',
                                    start='01/01/2015')  # mm/dd/yyyy

            news = []

            keywords = [
                'Blockchain', 'Cryptocurrency', 'Bitcoin', 'Etherium',
                'Stock Market', 'Finance'
            ]

            # fetch news headlines for every keyword in keywords list
            for keyword in tqdm(keywords):
                googlenews.get_news(keyword)
                results = googlenews.results()

                # append news headlines to list news
                for result in results:
                    news.append([result['datetime'], result['title']])

            # create a pandas dataframe with news list and save it to csv
            df = pd.DataFrame(news, columns=['date', 'headline'])
            df.to_csv('./data/sentiment_data/headlines.csv', index=False)
            return df
        else:
            return pd.read_csv('./data/sentiment_data/headlines.csv')
コード例 #13
0
    def getTitles(self, ticker, start, end):
        googlenews = GoogleNews(start=start, end=end)
        googlenews.search(ticker)
        result = googlenews.result()
        df = pd.DataFrame(result)

        return df['title']
コード例 #14
0
 def testResultHasImage(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('img').lower())
     self.assertIn('base64', result.get('img').lower())
     print('Result contains image')
コード例 #15
0
 def testResultHasLink(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('link').lower())
     self.assertIn('http', result.get('link').lower())
     print('Result contains http link')
コード例 #16
0
    def googleNewsCrawler(self):
        result_list = []
        googlenews = GoogleNews()

        for i in range(self.__numDays):
            startDateTime = self.__dateTime + timedelta(days=i)
            endDateTime = self.__dateTime + timedelta(days=i + self.__daysSpan)

            googlenews.setTimeRange(
                start=str(startDateTime.month) + '/' + str(startDateTime.day) +
                '/' + str(startDateTime.year),
                end=str(endDateTime.month) + '/' + str(endDateTime.day) + '/' +
                str(endDateTime.year))
            googlenews.search(self.__keyWords)
            for j in range(self.__pagsEveryDay - 1):
                googlenews.getpage(j + 2)
            logging.info(
                str(self.__keyWords + '__' + str(startDateTime.date()) +
                    " append " + str(int(self.__pagsEveryDay * 10)) +
                    " items"))
            result_list = result_list + googlenews.result()
            googlenews.clear()

            if (i + 1) % 10 == 0:
                self.toJson(result_list)
                result_list = []
                continue
        self.toJson(result_list)
def crawling_news(company_name_list, start_date, end_date, save_file_name):
    #set logger Handler
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    #define googlenews
    googlenews = GoogleNews(lang='en',
                            start=start_date,
                            end=end_date,
                            encode='utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        comp_name = company_name_list[i]
        googlenews.search(comp_name)
        logging.info('%s : %d%s' %
                     (comp_name,
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            temp = []
            temp.append(googlenews.results()[j].get('title'))
            temp.append(comp_name)
            temp.append(fixing_date(googlenews.results()[j].get('date')))
            all_title.append(temp)
        #clear result list
        googlenews.clear()
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('%s.csv' % (save_file_name))
    logging.info('saved as %s.csv, done!!' % (save_file_name))
    return all_title
コード例 #18
0
def googlenews_extract(date_range, num_pages, search_text):
    ''' Use googlenews package to extract top 30 stories per day based on search string '''

    df_days = []

    # loop through date range to ensure equal sample size from each day
    #TODO: if we want to pull multiple years of data, perhaps add multi-threading...not necessary for < ~20 calls
    for date in date_range:

        result = []
        googlenews = GoogleNews(start=date, end=date)
        googlenews.search(search_text)
        print("Search Date = ", date)

        for i in range(0, num_pages):

            print('Executing GoogleNews call #', i + 1)

            googlenews.getpage(i)
            result_next = googlenews.result()
            print("Total records returned: ", len(result_next))

            df = pd.DataFrame(result_next)
            df['date_calendar'] = date

        df_days.append(df)
        appended_data = pd.concat(df_days)

    df_news = appended_data.reset_index(drop=True).drop(['date'], axis=1)

    return df_news
コード例 #19
0
ファイル: getNews.py プロジェクト: Steven-Chang1114/UniChoice
def getPolarity(uniName):
    from GoogleNews import GoogleNews
    from newspaper import Article
    from newspaper import Config
    import pandas as pd
    from textblob import TextBlob

    uniName = uniName + ' Coronavirus'
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent
    googlenews = GoogleNews(start='08/01/2020', end='09/26/2020')
    googlenews.search(uniName)
    result = googlenews.result()

    for i in range(0, 5):
        googlenews.getpage(i)
        result = googlenews.result()
    df = pd.DataFrame(result)
    sum = 0
    counter = 1
    for ind in df.index:
        try:
            article = Article(df['link'][ind], config=config)
            article.download()
            article.parse()
            article.nlp()
            testimonial = TextBlob(article.summary)
            counter += 1
            sum += testimonial.sentiment.polarity
        except:
            pass

    return sum / counter
コード例 #20
0
def get_admin_data(user_headline, user_img):
    admin_data = {
        'link': None,
        'headline': None,
        'content': None,
        'image': None
    }
    google_news = GoogleNews(lang='en')
    google_news.search(user_headline)
    links = google_news.get__links()
    print('No. of links found: ', len(links))
    if len(links) == 0:
        return admin_data
    elif len(links) == 1:
        link_used = links[0]
    else:
        link_used = links[1]

    admin_data['link'] = link_used
    print(link_used)
    article = Article(link_used)
    article.download()
    article.parse()
    article.nlp()
    admin_data['headline'] = article.title
    admin_data['content'] = article.summary
    if article.top_image is None:
        admin_data['image'] = user_img
    else:
        admin_data['image'] = article.top_image

    return admin_data
コード例 #21
0
ファイル: news.py プロジェクト: KCStinger/Covid-19_status
def get_news():
    dt_today = str(datetime.today().strftime('%m/%d/%Y'))
    dt_previous = datetime.today() - timedelta(days=5)
    dt_previous = str(dt_previous.strftime('%m/%d/%Y'))
    #print(dt_today)
    #print(dt_previous)

    googlenews = GoogleNews(start=dt_previous, end=dt_today)
    googlenews.search('Coronavirus')
    googlenews.getpage(1)
    result1 = googlenews.result()
    googlenews.getpage(2)
    result2 = googlenews.result()
    result = result1 + result2
    news_list = list()
    for i in result:
        if i['desc'] != '':
            dic = dict()
            dic['title'] = i['title']
            dic['source'] = i['media']
            dic['date&time'] = i['date']
            dic['desc'] = i['desc']
            dic['link'] = i['link']
            news_list.append(dic)
    return news_list
コード例 #22
0
    def __init__(self, politician_name):
        """Initialize an object representing an article."""
        news = GoogleNews()
        news.setlang("uk")
        news.setencode("utf-8")
        news.setperiod("3d")
        news.search(politician_name)
        info = news.result()
        self.articles = []

        name, surname = politician_name.split()[0], politician_name.split()[1]
        self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch"

        def get_data(self):
            r = requests.get(self.link)
            return r.text

        html_data = get_data(self)
        soup = BeautifulSoup(html_data, "html.parser")
        image_links, num = [], 0
        for item in soup.find_all("img"):
            image_links.append(item["src"])
            num += 1
            if num == 6:
                break

        for i in range(5):
            text = info[i]
            info_list = [text["title"], text["link"], image_links[i + 1]]
            self.articles.append(info_list)
コード例 #23
0
    def fetch_articles(self):

        # how many pages to scrape
        N_pages = 1
        links = []
        # how many days from last update
        # TODO: look for the last update datetime in the DB
        days_from_last_update = (datetime.datetime.today() -
                                 self.history_start).days
        # for each day between start date and today:
        for day in range(0, days_from_last_update + 1):
            download_date = self.history_start + datetime.timedelta(days=day)
            googlenews = GoogleNews(start=download_date.strftime("%m/%d/%Y"),
                                    end=download_date.strftime("%m/%d/%Y"))
            googlenews.search(self.ticker)
            # iterate N_pages of Google News
            for i in range(0, N_pages):
                googlenews.getpage(i)
                result = googlenews.result()
                links = links + result

        links = list(set([x['link'] for x in links]))

        # for each link (without dups) get the article and its metadata
        articles = []
        for link in links:
            try:
                downloaded = self.download_and_parse_article(link)
                articles.append(downloaded)
            except Exception as e:
                print(e)

        return articles
コード例 #24
0
ファイル: test_search.py プロジェクト: manishhedau/GoogleNews
 def testResultHasDate(self):
   googlenews = GoogleNews()
   googlenews.search(keyword)
   result = googlenews.result()[0]
   print(result.get('date').lower())
   self.assertIsNot('', result.get('date').lower())
   print('Result date is not empty')
コード例 #25
0
ファイル: testSearch.py プロジェクト: wastu01/GoogleNews
 def testResultNumberWithTwoPages(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     googlenews.getpage(2)
     length = len(googlenews.result())
     self.assertEqual(length, 20)
     print('Result length with two pages is correct')
コード例 #26
0
 def testResultNumberAtTwoPages(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.page_at(2)
     length = len(result)
     self.assertEqual(length, 10)
     print('Result length at two pages is correct')
コード例 #27
0
 def testResultContainsKeyword(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('desc').lower())
     self.assertIn(keyword.lower(), result.get('desc').lower())
     print('Result contains keyword')
コード例 #28
0
ファイル: news_api.py プロジェクト: CelineZhou3105/Stonk-Ponk
def get_news(ticker):
    try:
        stock_data = stock_api.get_stock_data(ticker)
    except:
        raise Exception("Stock Not Found")

    try:
        googlenews = GoogleNews(period='2d')
        googlenews.search(ticker)
        result = googlenews.result()

        news_articles = []
        for item in result:
            news_article = {}
            news_article['title'] = item['title']
            news_article['media'] = item['media']
            news_article['date'] = item['date']
            news_article['description'] = item['desc']
            news_article['link'] = item['link']
            news_article['datetime'] = item['datetime']
            news_articles.append(news_article)

        return news_articles
    except:
        raise Exception("News Error")
コード例 #29
0
def get_search_results(keyword: str):
    googlenews = GoogleNews(lang="en", period="7d", encode="utf-8")
    googlenews.get_news(keyword)
    googlenews.search(keyword)
    googlenews.get_page(1)
    results = googlenews.results()
    return results[0:5]
コード例 #30
0
ファイル: views.py プロジェクト: PROuserR/My-website
def index(request):
    googlenews = GoogleNews()
    googlenews.search('Shailene Woodley')
    news = googlenews.result()

    context = {'news': news}
    return render(request, 'index.html', context)