コード例 #1
0
def get_admin_data(user_headline, user_img, user_keywords):
    admin_data = {'link': None, 'headline': None,
                  'content': None, 'image': None}
    google_news = GoogleNews(lang='en')
    google_news.search(user_headline)
    links = google_news.get__links()
    print('No. of links found: ', len(links))
    if len(links) == 0:
        google_news = GoogleNews(lang='en')
        google_news.search(' '.join(user_keywords))
        links2 = google_news.get__links()
        if len(links2) == 0:
            return admin_data
        else:
            links = links2
    if len(links) == 1:
        link_used = links[0]
    else:
        link_used = links[1]

    admin_data['link'] = link_used
    # print(link_used)
    article = Article(link_used)
    article.download()
    article.parse()
    article.nlp()
    admin_data['headline'] = article.title
    admin_data['content'] = article.summary
    if article.top_image is not None:
        admin_data['image'] = article.top_image
    print('admin link: ', admin_data['link'])
    print('admin headline: ', admin_data['headline'])
    return admin_data
コード例 #2
0
 def testResultHasLink(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('link').lower())
     self.assertIn('http', result.get('link').lower())
     print('Result contains http link')
コード例 #3
0
    def scrapeTitles(self, num=0):
        '''
            Inputs: 

                num --> finds at least num titles

            Outputs: A list of raw titles

            How: Makese API call to Google News
                Cleans title list down to just words
                Strips out any titles that do not contain ticker
        '''
        found = 0
        titles = []
        end = self.end
        start = self.start

        # max start can be reduced by is 7 days
        tries = 0

        while found <= num:

            if not self.validDates() or tries > 7:
                break
        

            googlenews = GoogleNews(start=start, end=end)
            
            googlenews.search(self.ticker)

            result = googlenews.result()
            if len(result) == 0:
                break
            df = pd.DataFrame(result)

            if len(df) > 0:
                self.titleList = df['title'].tolist() + df['desc'].tolist()

            self.clean()
            self.stripTitleList()
            
            if self.titleList is not None:
                print(self.start, self.end)
                print("after stripTitleList: Not None")
            else:
                print(self.start, self.end)
                print("after stripTitleList: None")


            for t in self.titleList:
                if t not in titles:
                    titles.append((t))
                    found += 1
            
            start = self.reduceDate(start, 1)
            
            tries += 1
           
        self.start = start
        self.titleList = titles[:num]
コード例 #4
0
 def __init__(self):
     self.news = GoogleNews()
     self.news.setlang('en')
     #self.news.setTimeRange('01/01/2000','01/01/2015')
     self.news.setencode('utf-8')
     self.pageNumber = 1
     self.searchTerm = ""
コード例 #5
0
ファイル: test_search.py プロジェクト: manishhedau/GoogleNews
 def testResultHasDate(self):
   googlenews = GoogleNews()
   googlenews.search(keyword)
   result = googlenews.result()[0]
   print(result.get('date').lower())
   self.assertIsNot('', result.get('date').lower())
   print('Result date is not empty')
コード例 #6
0
ファイル: news_api.py プロジェクト: CelineZhou3105/Stonk-Ponk
def get_news(ticker):
    try:
        stock_data = stock_api.get_stock_data(ticker)
    except:
        raise Exception("Stock Not Found")

    try:
        googlenews = GoogleNews(period='2d')
        googlenews.search(ticker)
        result = googlenews.result()

        news_articles = []
        for item in result:
            news_article = {}
            news_article['title'] = item['title']
            news_article['media'] = item['media']
            news_article['date'] = item['date']
            news_article['description'] = item['desc']
            news_article['link'] = item['link']
            news_article['datetime'] = item['datetime']
            news_articles.append(news_article)

        return news_articles
    except:
        raise Exception("News Error")
コード例 #7
0
ファイル: views.py プロジェクト: PROuserR/My-website
def index(request):
    googlenews = GoogleNews()
    googlenews.search('Shailene Woodley')
    news = googlenews.result()

    context = {'news': news}
    return render(request, 'index.html', context)
コード例 #8
0
 def testResultContainsKeyword(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('desc').lower())
     self.assertIn(keyword.lower(), result.get('desc').lower())
     print('Result contains keyword')
コード例 #9
0
    def getTitles(self, ticker, start, end):
        googlenews = GoogleNews(start=start, end=end)
        googlenews.search(ticker)
        result = googlenews.result()
        df = pd.DataFrame(result)

        return df['title']
コード例 #10
0
 def testResultNumberAtTwoPages(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.page_at(2)
     length = len(result)
     self.assertEqual(length, 10)
     print('Result length at two pages is correct')
コード例 #11
0
ファイル: quicktest.py プロジェクト: JinSun2014/Sourcefinder
def getSources(topic):
	#urls = ['http://www.postcrescent.com/article/20140517/APC03/305170255/Integrys-Energy-growing-shareholders-told', 
	#		'http://www.postcrescent.com/article/20140517/APC03/305170255/Integrys-Energy-growing-shareholders-told']
	urls = GoogleNews(topic)
	urls.extend(YahooFinance(topic))
	print urls

	print len(urls)


	#NEWER CODE
	numUrls = len(urls)
	threads = []
	names = []


	for i in range(0, numUrls):
		t= threading.Thread(target=worker, args=(i,urls[i],names))
		threads.append(t)
		t.start()

	for p in threads:
	     p.join()

	return names
コード例 #12
0
    def get_training_data(self):
        """ load training data from google news """

        # check if data has been downloaded
        if not os.path.isfile('./data/sentiment_data/headlines.csv'):
            googlenews = GoogleNews(lang='en',
                                    start='01/01/2015')  # mm/dd/yyyy

            news = []

            keywords = [
                'Blockchain', 'Cryptocurrency', 'Bitcoin', 'Etherium',
                'Stock Market', 'Finance'
            ]

            # fetch news headlines for every keyword in keywords list
            for keyword in tqdm(keywords):
                googlenews.get_news(keyword)
                results = googlenews.results()

                # append news headlines to list news
                for result in results:
                    news.append([result['datetime'], result['title']])

            # create a pandas dataframe with news list and save it to csv
            df = pd.DataFrame(news, columns=['date', 'headline'])
            df.to_csv('./data/sentiment_data/headlines.csv', index=False)
            return df
        else:
            return pd.read_csv('./data/sentiment_data/headlines.csv')
コード例 #13
0
ファイル: news.py プロジェクト: KCStinger/Covid-19_status
def get_news():
    dt_today = str(datetime.today().strftime('%m/%d/%Y'))
    dt_previous = datetime.today() - timedelta(days=5)
    dt_previous = str(dt_previous.strftime('%m/%d/%Y'))
    #print(dt_today)
    #print(dt_previous)

    googlenews = GoogleNews(start=dt_previous, end=dt_today)
    googlenews.search('Coronavirus')
    googlenews.getpage(1)
    result1 = googlenews.result()
    googlenews.getpage(2)
    result2 = googlenews.result()
    result = result1 + result2
    news_list = list()
    for i in result:
        if i['desc'] != '':
            dic = dict()
            dic['title'] = i['title']
            dic['source'] = i['media']
            dic['date&time'] = i['date']
            dic['desc'] = i['desc']
            dic['link'] = i['link']
            news_list.append(dic)
    return news_list
コード例 #14
0
 def testResultHasImage(self):
     googlenews = GoogleNews()
     googlenews.search(keyword)
     result = googlenews.result()[0]
     print(result.get('img').lower())
     self.assertIn('base64', result.get('img').lower())
     print('Result contains image')
コード例 #15
0
def get_admin_data(user_headline, user_img):
    admin_data = {
        'link': None,
        'headline': None,
        'content': None,
        'image': None
    }
    google_news = GoogleNews(lang='en')
    google_news.search(user_headline)
    links = google_news.get__links()
    print('No. of links found: ', len(links))
    if len(links) == 0:
        return admin_data
    elif len(links) == 1:
        link_used = links[0]
    else:
        link_used = links[1]

    admin_data['link'] = link_used
    print(link_used)
    article = Article(link_used)
    article.download()
    article.parse()
    article.nlp()
    admin_data['headline'] = article.title
    admin_data['content'] = article.summary
    if article.top_image is None:
        admin_data['image'] = user_img
    else:
        admin_data['image'] = article.top_image

    return admin_data
コード例 #16
0
async def text_filter(message: types.Message):
    googlenews = GoogleNews(lang='ru')
    googlenews.search(str(message.text))
    result = googlenews.get_links()
    count = 0
    for i in result:
        await message.answer(i)
        if count == 4:
            break
        count += 1
コード例 #17
0
ファイル: sentiment.py プロジェクト: theoportlock/system
 def run(self):
     googlenews = GoogleNews('en', 'd')
     googlenews.search(self.term)
     headline_results = googlenews.result()
     for i in headline_results:
         print(i["desc"])
         blob = TextBlob(i["desc"])
         self.sentiment += blob.sentiment.polarity / len(headline_results)
         self.subjectivity += blob.sentiment.subjectivity / len(
             headline_results)
コード例 #18
0
ファイル: scrape_news.py プロジェクト: kafkoders/oclean
def scrape():

	link_list = []

	# Instance of class GoogleNews
	googlenews = GoogleNews()
	googlenews.search("oceans"+"+trash")

	for news_item in googlenews.result():
		sql_insert(news_item)
コード例 #19
0
ファイル: news_source_v1.py プロジェクト: dray89/news_updates
 def run_search(self):
     stocks = self.get_df()
     googlenews = GoogleNews()
     links = []
     for i, j in stocks.itertuples():
         googlenews.search(j)
         results = googlenews.getlinks()
         for link in results:
             links.append(link)
     return links
コード例 #20
0
def get_search_results(keyword: str):
    googlenews = GoogleNews(lang="en", period="7d", encode="utf-8")
    googlenews.get_news(keyword)
    googlenews.search(keyword)
    googlenews.get_page(1)
    results = googlenews.results()
    return results[0:5]
コード例 #21
0
def google_new_scrape(keyword=0, earliest_date="2000-01-01", end_date=""):
    ealiest_date = dt.strptime(earliest_date, "20%y-%m-%d")
    ealiest_date = ealiest_date.strftime("%m/%d/20%y")
    googlenews = None
    if end_date != "":
        end_date = dt.strptime(end_date, "20%y-%m-%d")
        end_date = end_date.strftime("%m/%d/20%y")
        googlenews = GoogleNews(start=earliest_date,end=end_date)
    else:
        googlenews = GoogleNews(start=earliest_date)
    googlenews.search('trump')
    for i in range(1,1000):
        googlenews.getpage(i)
        result=googlenews.result()
        print(len(result), result)
        df=pd.DataFrame(result)
    list=[]
    for ind in df.index:
        dict={}
        article = Article(df['link'][ind])
        article.download()
        article.parse()
        #article.nlp()
        dict['Date']=df['date'][ind]
        dict['Media']=df['media'][ind]
        dict['Title']=article.title
        dict['Article']=article.text
        dict['Summary']=article.summary
        list.append(dict)
    news_df=pd.DataFrame(list)
    print(news_df)
    file_name = 'googlenews.csv'
    news_df.to_csv(file_name)
def crawling_news(company_name_list, start_date, end_date, save_file_name):
    #set logger Handler
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    #define googlenews
    googlenews = GoogleNews(lang='en',
                            start=start_date,
                            end=end_date,
                            encode='utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        comp_name = company_name_list[i]
        googlenews.search(comp_name)
        logging.info('%s : %d%s' %
                     (comp_name,
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            temp = []
            temp.append(googlenews.results()[j].get('title'))
            temp.append(comp_name)
            temp.append(fixing_date(googlenews.results()[j].get('date')))
            all_title.append(temp)
        #clear result list
        googlenews.clear()
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('%s.csv' % (save_file_name))
    logging.info('saved as %s.csv, done!!' % (save_file_name))
    return all_title
コード例 #23
0
def get_company_news_link(company='NaN', news_num=5, time_range='today'):

    if company == 'NaN':
        return 'please input company name'

    news_link = []
    googlenews = GoogleNews()
    googlenews.clear()

    if time_range != 'today':
        start_date = "{1}/{2}/{0}".format(time_range[0:4], time_range[5:7],
                                          time_range[8:10])
        end_date = "{1}/{2}/{0}".format(time_range[11:15], time_range[16:18],
                                        time_range[19:21])
        googlenews.set_time_range(start_date, end_date)

    googlenews.search(company)
    result = googlenews.result()

    try:
        for num in range(news_num):
            news_link.append(result[num]['link'])
    except IndexError:
        if len(news_link) == 0:
            return '此時段無' + company + '新聞 OR 網路不穩'
        return news_link
    else:
        return news_link
コード例 #24
0
class GoogleNewsMethods():

    # Creates a googlenews object
    def __init__(self):
        self.googlenews = GoogleNews(lang="en")

    # This will return a list of news for perticular stock on a given date
    def newscollection(self, stock, date):
        self.googlenews.search(stock)
        self.newsList = self.googlenews.result()
        return (self.newsList)
コード例 #25
0
ファイル: autonews_api.py プロジェクト: riyakwl28/auto_news
def extract_google(query_terms, startDate, endDate):
    if len(startDate) == 0:
        startDate = datetime.datetime.today().strftime('%d/%m/%Y')
    if len(endDate) == 0:
        endDate = datetime.datetime.strftime(
            datetime.datetime.today().date() - datetime.timedelta(days=7),
            '%d/%m/%Y')
    startDate = datetime.datetime.strptime(startDate,
                                           '%Y-%m-%d').strftime('%d/%m/%y')
    endDate = datetime.datetime.strptime(endDate,
                                         '%Y-%m-%d').strftime('%d/%m/%y')
    final_articles = []
    print(startDate)
    print(endDate)
    print("Crawling Starting")
    # here extracting news from google news
    googlenews = GoogleNews()
    googlenews.setTimeRange(startDate, endDate)
    for query in query_terms:
        googlenews.clear()

        #forming the search term
        googlenews.search("India Technology " + query)

        result = googlenews.result()

        for n in range(len(result)):
            source = result[n]['media']
            url = result[n]['link']
            try:
                article = Article(url)
                article.download()
                article.parse()
            except Exception as e:
                print("Trouble downloading so skipping")
                continue
            content = article.text

            # summarize the content
            temp_content = re.sub(r'^\s*[\(\[].*?[\)\]]\s*', '', content)
            sentences = sent_detector.tokenize(temp_content)
            summary = (" ".join(sentences[:2]).strip())

            date = result[n]['date']
            if (date.find('ago') != -1):
                date = current.date()
            title = result[n]['title']
            #         content=result[n]['desc']
            img = result[n]['img']
            #adding the extracted info in final_articles list
            final_articles.append({
                'source': source,
                'url': url,
                'date': date,
                'title': title,
                'content': content,
                'img': img
            })
    return final_articles
コード例 #26
0
def get_top_news(term, limit=5):
    googlenews = GoogleNews()
    googlenews.search(term)
    result = googlenews.result()

    try:
        result = result
    except:
        pass

    return json.dumps(result)
コード例 #27
0
def get_news(text):
    googlenews = GoogleNews()
    googlenews.search(text)
    googlenews.clear()
    googlenews.getpage(2)
    result = googlenews.result()

    return result
コード例 #28
0
def initalize_google_news(start_date, end_date):
    """Initializes the googlenews object."""

    print("initalize_google_news...")

    googlenews = GoogleNews(encode="utf-8")  # create googlenews object
    googlenews.setlang("en")
    googlenews.setperiod("d")
    googlenews.setencode("utf-8")
    googlenews.setTimeRange(start_date, end_date)  # using user specified date range

    return googlenews
コード例 #29
0
ファイル: getNews.py プロジェクト: Steven-Chang1114/UniChoice
def getPolarity(uniName):
    from GoogleNews import GoogleNews
    from newspaper import Article
    from newspaper import Config
    import pandas as pd
    from textblob import TextBlob

    uniName = uniName + ' Coronavirus'
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent
    googlenews = GoogleNews(start='08/01/2020', end='09/26/2020')
    googlenews.search(uniName)
    result = googlenews.result()

    for i in range(0, 5):
        googlenews.getpage(i)
        result = googlenews.result()
    df = pd.DataFrame(result)
    sum = 0
    counter = 1
    for ind in df.index:
        try:
            article = Article(df['link'][ind], config=config)
            article.download()
            article.parse()
            article.nlp()
            testimonial = TextBlob(article.summary)
            counter += 1
            sum += testimonial.sentiment.polarity
        except:
            pass

    return sum / counter
コード例 #30
0
ファイル: newsScrape.py プロジェクト: LAHacks21/WordCloud
def googleLinks(topic):
    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('1d')
    googlenews.set_encode('utf-8')
    article =  googlenews.get_news(topic)
    links = googlenews.get_links()[:5]
    actualLinks = list()
    for l in links:
        l = "http://" + l
        print(l)
        actualLinks.append( requests.get(l).url ) 
    return actualLinks
    
コード例 #31
0
ファイル: temp.py プロジェクト: manishmj9431/ISO-25-E1
def getNews(query):
    googleNews = GoogleNews()
    googleNews.search(query)

    news = []

    i = 0

    number = min([len(googleNews.result()), 6])

    for result in googleNews.result():
        if (i > number):
            break

        n = {}
        n["title"] = result['title']
        n["description"] = result['desc']
        n["link"] = result['link']

        if (i == 0):
            n["image"] = result['img']
        news.append(n)

        i += 1

    googleNews.clear()

    return news
コード例 #32
0
from GoogleNews import GoogleNews
from readability import Document
from TextRank import Summary
from fetch_url import fetch_url
import sys
import re

number_of_links = int(sys.argv[1])
query = '+'.join(sys.argv[2:])
regex = re.compile("<(.*?)>|\&#13;")

article_list = []
summary_list = []

links = GoogleNews.search(query,number_of_links)


if not links:
	print "No links found"

else:
	result = fetch_url.fetch_parallel(links)

	while not result.empty():
		article = Document(result.get()).summary() 
		article = re.sub(regex, "", article)
		article = article.encode('ascii','ignore')
		summary = Summary.textrank(article)
		summary = summary.encode('ascii','ignore')
		article_list.append(article)
		summary_list.append(summary)