def crawling_news(company_name_list, start_date, end_date, save_file_name):
    #set logger Handler
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    #define googlenews
    googlenews = GoogleNews(lang='en',
                            start=start_date,
                            end=end_date,
                            encode='utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        comp_name = company_name_list[i]
        googlenews.search(comp_name)
        logging.info('%s : %d%s' %
                     (comp_name,
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            temp = []
            temp.append(googlenews.results()[j].get('title'))
            temp.append(comp_name)
            temp.append(fixing_date(googlenews.results()[j].get('date')))
            all_title.append(temp)
        #clear result list
        googlenews.clear()
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('%s.csv' % (save_file_name))
    logging.info('saved as %s.csv, done!!' % (save_file_name))
    return all_title
コード例 #2
0
ファイル: News_Data.py プロジェクト: sakshiseth/CitiHack
 def getnewsData(self):
     today = date.today()
     T_split = str(today).split('-')
     toDate = T_split[2] + '/' + T_split[1] + '/' + T_split[0]
     googlenewsMkt = GoogleNews(start=toDate, end=toDate)
     googlenewsMkt.get_news('Market')
     result = googlenewsMkt.results()
     df = pd.DataFrame(result).head(10)
     dfi.export(df, './template/df_styled_Market.jpeg')
     googlenewsBiz = GoogleNews(start=toDate, end=toDate)
     googlenewsBiz.get_news('Business')
     result = googlenewsBiz.results()
     df = pd.DataFrame(result).head(10)
     dfi.export(df, './template/df_styled_Business.jpeg')
コード例 #3
0
def get_search_results(keyword: str):
    googlenews = GoogleNews(lang="en", period="7d", encode="utf-8")
    googlenews.get_news(keyword)
    googlenews.search(keyword)
    googlenews.get_page(1)
    results = googlenews.results()
    return results[0:5]
コード例 #4
0
    def get_training_data(self):
        """ load training data from google news """

        # check if data has been downloaded
        if not os.path.isfile('./data/sentiment_data/headlines.csv'):
            googlenews = GoogleNews(lang='en',
                                    start='01/01/2015')  # mm/dd/yyyy

            news = []

            keywords = [
                'Blockchain', 'Cryptocurrency', 'Bitcoin', 'Etherium',
                'Stock Market', 'Finance'
            ]

            # fetch news headlines for every keyword in keywords list
            for keyword in tqdm(keywords):
                googlenews.get_news(keyword)
                results = googlenews.results()

                # append news headlines to list news
                for result in results:
                    news.append([result['datetime'], result['title']])

            # create a pandas dataframe with news list and save it to csv
            df = pd.DataFrame(news, columns=['date', 'headline'])
            df.to_csv('./data/sentiment_data/headlines.csv', index=False)
            return df
        else:
            return pd.read_csv('./data/sentiment_data/headlines.csv')
コード例 #5
0
  def job(self):    
      #Download current database
      self.getDB()
      self.print_header(self.rawFileName)
      self.lineCounter(self.rawFileName)
      x = 0
      for tag in self.newsTags:
        #print("Collecting newses from tag: " + tag + "...")
        self.logger.info(f"Collecting newses from tag: {tag}")
        googlenews = GoogleNews()
        googlenews.clear()
        googlenews.set_lang(self.newsLang)
        googlenews.setperiod('1d')
        googlenews.get_news(tag)
        output = googlenews.results(sort=True)
        output = pd.DataFrame(output)
        x = x + len(output['title'])
        self.saveToFile(output, self.rawFileName)
      self.logger.info(f"Collected amount of news:  {x}")
      self.removeDuplicates(self.rawFileName, self.finalFileName)

      #os.remove(rawFileName) #delete bufor file
      #logger.info(f"Removed file with duplicates:  {rawFileName}")
      os.rename(self.finalFileName, self.rawFileName) #rename final file to bufor name
      self.logger.info(f"Renamed: {self.finalFileName} to: {self.rawFileName}")
      self.backupDB()
コード例 #6
0
async def create_item(item: Item):
    result = ""
    googlenews = GoogleNews()
    googlenews.set_lang('pt')
    googlenews.search(item.mensagem)
    googlenews.results()
    result = googlenews.get_texts()[0]
    translations  = translator.translate(result, dest='en')
    textTranslator = translations.text
    score = analyser.polarity_scores(textTranslator) # avaliação de polaridade de sentimento da mensagem
    compound = (analyser.polarity_scores(textTranslator)['compound'])  # capitura da média do sentimento da mensagem
    if compound > 0:
      mensagemSentimento = "noticia positiva" 
    elif compound >= 0:
      mensagemSentimento = "noticia neutra" 
    else:
      mensagemSentimento = "noticia negativa"
    return {"mensagem":googlenews.get_texts()[0],"sentimento":mensagemSentimento}
コード例 #7
0
    def news_sentiments(self): # Returns news articles curated via Finviz, Yahoo, and Google News, GET UNUSUAL OPTION ACTIVITY
        BASE_URL = f'https://finviz.com/quote.ashx?t={self.ticker}'
        soup = self._get_soup(BASE_URL)

        table = soup.find('table', {'class': 'fullview-news-outer'})
        rows = table.find_all('tr')
        df_data = []
        for row in rows:
            date = row.find('td', {'align': 'right'})
            article = row.find('td', {'align': 'left'})
            link = article.find('a')['href']
            df_data.append((date.get_text(), article.get_text(), link))
        df = pd.DataFrame(df_data, columns=['Time', 'Headline', 'Link'])


        BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/news?p={self.ticker}'
        soup = self._get_soup(BASE_URL)

        links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'})
        news = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links]

        BASE_URL = f'https://finance.yahoo.com/quote/{self.ticker}/press-releases?p={self.ticker}'
        soup = self._get_soup(BASE_URL)

        links = soup.find_all('a', {'class': 'js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled'})
        press_releases = [(link.get_text(), str('yahoo.com' + link['href'])) for link in links]
        # Look for keywords in the news? Any showcases, Investor/analyst days, Analyst revisions, Management transitions
        # Product launches, Significant stock buyback changes
  
  
          # Getting news from google news search
        googlenews = GoogleNews(lang='en', period='14d') # Specify period for news
        googlenews.get_news(f'${self.ticker} stock')
        stock_news = googlenews.results()
  
        # print([(i, j) for i, j in zip(googlenews.get_texts(), googlenews.get_links())])
        # To get other pages, do googlenews.get_page(2), etc.
  
        # Have whitelist of websites to search articles from. Maybe have key word to filter out stupid stuff.
  
        sectors = self.find_competition()
        sector_news = []
        if sectors:
            for sector in sectors:
                googlenews = GoogleNews(lang='en', period='14d')
                googlenews.get_news(f'{sector} sector stocks')
                sector_news.append(googlenews.result())
    
        return df, news, press_releases, sector_news, stock_news
コード例 #8
0
def crawling_news(company_name_list, start_date, end_date):
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_time_range('start_date', 'end_date')
    googlenews.set_encode('utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        googlenews.get_news(company_name_list[i])
        logging.info('%s : %0.2f%s' %
                     (company_name_list[i],
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            all_title.append(googlenews.results()[j].get('title'))
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('sp500news.csv')
    logging.info('saved to csv, done!!')
    return all_title
コード例 #9
0
def googleNewsApi(request, word):

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('7d')
    googlenews.set_encode('utf-8')
    googlenews.get_news(str(word))
    googlenews.total_count()
    resultsGoogleNews = googlenews.results()
    #print(resultsGoogleNews)
    #print(googlenews.total_count())

    #TWITTER
    consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4'
    consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT'
    access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G'
    access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN'
    # create OAuthHandler object
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    # set access token and secret
    auth.set_access_token(access_token, access_token_secret)
    # create tweepy API object to fetch tweets
    api = tweepy.API(auth)
    date_since = datetime.today().strftime('%Y-%m-%d')
    print(date_since)
    #tweets = api.search(str("bitcoin"), count=1)
    tweets = tweepy.Cursor(api.search,
                           q=str(word),
                           lang="en",
                           since=date_since).items(100)
    """print(tweets.__dict__['page_iterator'].__dict__)
    for tweet in tweets:
        print(tweet)
        print(tweet.id)"""
    #return googlenews
    """for result in resultsGoogleNews:

        title = result['title']
        date = result['date']
        link = result['link']
        source = result['site']

        news = {'title':title, 'date': date, 'link': link, 'site':site}
    """
    return render(request, 'homepage.html', {
        'news': resultsGoogleNews,
        'tweets': tweets
    })
コード例 #10
0
def GNews():
    gn = GoogleNews()
    gn.set_period('7d')
    list = [
        "INDIA", "USA", "UK", "AUSTRALIA", "FRANC", "UGANDA", "PAKISTAN",
        "MALDIVES", "CELEBRITY"
    ]

    gn.search(random.choice(list))
    rs = gn.results()

    for i in rs:
        data = i['title']
        data += i['desc']
        data += i['link']
    return data
コード例 #11
0
def callGoogle(state):
    try:
        googlenews = GoogleNews(lang='pt')
        googlenews.search('covid ' + state)
        newsData = googlenews.results(sort=True)
        returned_dict = {}

        if state == 'Brasil':
            returned_dict['titulo'] = newsData[0]['title']
            returned_dict['desc'] = newsData[0]['desc']
            returned_dict['link'] = newsData[0]['link']
            returned_dict['fonte'] = newsData[0]['media']
            returned_dict['data'] = newsData[0]['date']
            return returned_dict

        for row in newsData:
            this_row = row['title']
            if state in this_row \
                or initialStates[state] in this_row \
                or state.split(' ')[0] in this_row:
                returned_dict['titulo'] = row['title']
                returned_dict['desc'] = row['desc']
                returned_dict['link'] = row['link']
                returned_dict['fonte'] = row['media']
                returned_dict['data'] = row['date']
                return returned_dict
            elif state == 'Minas Gerais' and ('BH' or 'bh') in this_row:
                returned_dict['titulo'] = row['title']
                returned_dict['desc'] = row['desc']
                returned_dict['link'] = row['link']
                returned_dict['fonte'] = row['media']
                returned_dict['data'] = row['date']
                return returned_dict

        returned_dict['titulo'] = newsData[0]['title']
        returned_dict['desc'] = newsData[0]['desc']
        returned_dict['link'] = newsData[0]['link']
        returned_dict['fonte'] = newsData[0]['media']
        returned_dict['data'] = newsData[0]['date']
        return returned_dict
    except:
        return 'Google News API is not working'
コード例 #12
0
def scrape_the_news():
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent

    topiclist = NLP_news()
    print(topiclist[0])

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_encode('utf-8')
    googlenews.set_period('7d')
    googlenews.get_news(topiclist[0])

    result = googlenews.results()

    googlenews.clear()

    df = pd.DataFrame(result)
    df = df.drop(['date', 'media'], axis=1)
    df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title']
    df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']]

    conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123")
    curr = conn.cursor()

    for i, row in df.iterrows():
        try:
            row.Link = 'https://' + row.Link
            columns = row.keys()
            values = [row[column] for column in columns]

            insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)"
            curr.execute(insert_statement, tuple(values))
        except:
            print('could not add row', i)

    conn.commit()

    curr.close()
    conn.close()
コード例 #13
0
def get_news(query: str, pages: int = 35) -> List[Dict[str, Any]]:
    """
    Search news defined by query.
    Returns a list of search results.
    
    Parameters
    ----------
    query: str
        The news search query to use.
        
    Returns
    -------
    news: list of news items.
        News list, each element in the list is a dictionary containing news details like title, date, URL etc.
    """

    googlenews = GoogleNews(start='01/01/2010', end='01/01/2015')
    googlenews.search(query)
    news = []
    for page in tqdm(range(pages), leave=False):
        googlenews.get_page(page)
        news += googlenews.results()

    return news
コード例 #14
0
def query_google_news(query):

    googlenews = GoogleNews(lang='en')
    googlenews.get_news(query)
    res = googlenews.results()

    # with open('data_google.txt') as f:
    #     res = json.load(f)
    print(res)
    rank = 0
    all_news_list = []
    for news in res:
        rank = rank + 1
        if rank < 51:
            all_news = AllNews(news["desc"],
                               news["title"],
                               category=None,
                               date_time=news["datetime"],
                               rank=rank,
                               src='google')
        else:
            break
        all_news_list.append(all_news)
    return all_news_list
コード例 #15
0
from datetime import date
from GoogleNews import GoogleNews
news = GoogleNews()
news.set_lang('en')
date_today = date.today()
news.set_time_range('01/11/2020', date_today)
news.set_encode('utf-8')
topic = input("Topic : ")
news.search(topic)
news.get_page(2)
#headlines with links WORLD NEWS
for i in range(6):
    print(news.results()[i]["title"])
    print(news.results()[i]["link"])
コード例 #16
0
months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021']

fin = []

seen = []

for first in primary_phrases:
	for second in secondary_phrases:
		full_phrase = first+" "+second

		print(full_phrase)

		for i in range(0, len(months)-1):
			googlenews.set_time_range(months[i],months[i+1])
			googlenews.get_news(full_phrase)
			res = googlenews.results(sort=True)

			#It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() 
			#(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works)

			for result in res:
				if result['title'] not in seen:
					result['start date'] = months[i]
					result['end date'] = months[i+1]
					result['primary phrase'] = first
					result['secondary phrase'] = second
					result['full phrase'] = full_phrase
					fin.append(result)
					seen.append(result['title'])

df = pd.DataFrame(fin)
コード例 #17
0
ファイル: main.py プロジェクト: kleong12/Auto-Bot-BEI
def game():

    for i in range(1000):

        request = input('Auto-Bot at your service. Please state your request. ')

        if request == 'google':
            query = input('Search: ')
            print(search(query, num_results = 3))


        elif request == 'stocks':
            ticker = input('Ticker Symbol: ')
            realticker = yf.Ticker(ticker)
            print(realticker.history(period= '1m'))

        elif request == 'weather':
            place = input('City: ')
            weather = weather_forecast.forecast(place=place, time=current_time, date=d1)
            


        elif request == 'email':
            to = input('Email address: ')
            content = input('What do you want to say? ')






            address = '*****@*****.**'
            password = '******'
            server = 'imap.gmail.com'



            s = bot.SMTP(host= 'smtp.gmail.com', port= 587)
            s.starttls()
            s.login(address, password)
            s.ehlo()


            s.sendmail(address, to ,content)
            {}
        elif request == 'song':
            song = input('Song name: ')
            results = YoutubeSearch(song, max_results=1).to_dict()
            dict = results[0].values()
            newdict = list(dict)

            url = newdict[7]



            print(f'https://www.youtube.com{url}')

        elif request == 'news':

            news = input('Search news: ')
            gn = GoogleNews()
            top = gn.search(news)
            newnews = gn.results()

            dict = list(newnews[0].values())
            dicttwo = list(newnews[1].values())
            dictthree = list(newnews[2].values())
            dictfour = list(newnews[3].values())
            dictfive = list(newnews[4].values())



            title1 = dict[0]
            title2 = dicttwo[0]
            title3 = dictthree[0]
            title4 = dictfour[0]
            title5 = dictfive[0]

            src1 = dict[1]
            src2 = dicttwo[1]
            src3 = dictthree[1]
            src4 = dictfour[1]
            src5 = dictfive[1]

            cap1 = dict[4]
            cap2 = dicttwo[4]
            cap3 = dictthree[4]
            cap4 = dictfour[4]
            cap5 = dictfive[4]

            url1 = dict[5]
            url2 = dicttwo[5]
            url3 = dictthree[5]
            url4 = dictfour[5]
            url5 = dictfive[5]

            print(f'Title: {title1}')
            print(f'Source: {src1}')
            print(f'Caption: {cap1}')
            print(f'Url: {url1}')

            print(f'Title: {title2}')
            print(f'Source: {src2}')
            print(f'Caption: {cap2}')
            print(f'Url: {url2}')

            print(f'Title: {title3}')
            print(f'Source: {src3}')
            print(f'Caption: {cap3}')
            print(f'Url: {url3}')

            print(f'Title: {title4}')
            print(f'Source: {src4}')
            print(f'Caption: {cap4}')
            print(f'Url: {url4}')

            print(f'Title: {title5}')
            print(f'Source: {src5}')
            print(f'Caption: {cap5}')
            print(f'Url: {url5}')








        elif request == 'math':

            def add(x, y):
                return x + y

            # This function subtracts two numbers
            def subtract(x, y):
                return x - y

            # This function multiplies two numbers
            def multiply(x, y):
                return x * y

            # This function divides two numbers
            def divide(x, y):
                return x / y



            while True:
                # Take input from the user
                choice = input("Enter choice( + / - / * / / ): ")

                # Check if choice is one of the four options
                if choice in ('+', '-', '*', '/'):
                    num1 = float(input("Enter first number: "))
                    num2 = float(input("Enter second number: "))

                    if choice == '+':
                        print(num1, "+", num2, "=", add(num1, num2))

                    elif choice == '-':
                        print(num1, "-", num2, "=", subtract(num1, num2))

                    elif choice == '*':
                        print(num1, "*", num2, "=", multiply(num1, num2))

                    elif choice == '/':
                        print(num1, "/", num2, "=", divide(num1, num2))
                    break
                else:
                    print("Invalid Input")

        elif request == 'game':

            type = input('Which game? Press 1 for tic-tac-toe, press 2 for rock-paper-scissors ')

            if type == '1':
                unused_keys = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
                theBoard = {'7': ' ', '8': ' ', '9': ' ',
                            '4': ' ', '5': ' ', '6': ' ',
                            '1': ' ', '2': ' ', '3': ' '}

                board_keys = []

                for key in theBoard:
                    board_keys.append(key)

                ''' We will have to print the updated board after every move in the game and 
                    thus we will make a function in which we'll define the printBoard function
                    so that we can easily print the board everytime by calling this function. '''

                def printBoard(board):
                    print(board['7'] + '|' + board['8'] + '|' + board['9'])
                    print('-+-+-')
                    print(board['4'] + '|' + board['5'] + '|' + board['6'])
                    print('-+-+-')
                    print(board['1'] + '|' + board['2'] + '|' + board['3'])

                # Now we'll write the main function which has all the gameplay functionality.
                def tictactoe():

                    turn = 'X'
                    count = 0

                    for i in range(10):
                        printBoard(theBoard)
                        print("It's your turn," + turn + ".Move to which place?")

                        if turn == 'O':
                            choice = random.randint(1,9)
                            choice = unused_keys[choice]



                            if theBoard[f'{choice}'] == ' ':
                                theBoard[choice] = turn
                                unused_keys.remove(choice)
                                count += 1






                        elif turn == 'X':
                            move = input()

                            if theBoard[move] == ' ':
                                theBoard[move] = turn
                                unused_keys.remove(move)
                                count += 1
                            else:
                                print("That place is already filled.\nMove to which place?")
                                continue

                        # Now we will check if player X or O has won,for every move after 5 moves.
                        if count >= 5:
                            if theBoard['7'] == theBoard['8'] == theBoard['9'] != ' ':  # across the top
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['4'] == theBoard['5'] == theBoard['6'] != ' ':  # across the middle
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['1'] == theBoard['2'] == theBoard['3'] != ' ':  # across the bottom
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['1'] == theBoard['4'] == theBoard['7'] != ' ':  # down the left side
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['2'] == theBoard['5'] == theBoard['8'] != ' ':  # down the middle
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['3'] == theBoard['6'] == theBoard['9'] != ' ':  # down the right side
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['7'] == theBoard['5'] == theBoard['3'] != ' ':  # diagonal
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break
                            elif theBoard['1'] == theBoard['5'] == theBoard['9'] != ' ':  # diagonal
                                printBoard(theBoard)
                                print("\nGame Over.\n")
                                print(" **** " + turn + " won. ****")
                                break

                                # If neither X nor O wins and the board is full, we'll declare the result as 'tie'.
                        if count == 9:
                            print("\nGame Over.\n")
                            print("It's a Tie!!")

                        # Now we have to change the player after every move.
                        if turn == 'X':
                            turn = 'O'
                        else:
                            turn = 'X'

                tictactoe()




            elif type == '2':
                print("Winning Rules of the Rock paper scissor game as follows: \n"
                  + "Rock vs paper->paper wins \n"
                  + "Rock vs scissor->Rock wins \n"
                  + "paper vs scissor->scissor wins \n")


                print("Enter choice \n 1. Rock \n 2. paper \n 3. scissor \n")


                choice = int(input("User turn: "))

                # OR is the short-circuit operator
                # if any one of the condition is true
                # then it return True value

                # looping until user enter invalid input
                while choice > 3 or choice < 1:
                    choice = int(input("enter valid input: "))

                    # initialize value of choice_name variable
                # corresponding to the choice value
                if choice == 1:
                    choice_name = 'Rock'
                elif choice == 2:
                    choice_name = 'paper'
                else:
                    choice_name = 'scissor'

                # print user choice
                print("user choice is: " + choice_name)
                print("\nNow its computer turn.......")

                # Computer chooses randomly any number
                # among 1 , 2 and 3. Using randint method
                # of random module
                comp_choice = random.randint(1, 3)

                # looping until comp_choice value
                # is equal to the choice value
                while comp_choice == choice:
                    comp_choice = random.randint(1, 3)

                    # initialize value of comp_choice_name
                # variable corresponding to the choice value
                if comp_choice == 1:
                    comp_choice_name = 'Rock'
                elif comp_choice == 2:
                    comp_choice_name = 'paper'
                else:
                    comp_choice_name = 'scissor'

                print("Computer choice is: " + comp_choice_name)

                print(choice_name + " V/s " + comp_choice_name)

                # condition for winning
                if ((choice == 1 and comp_choice == 2) or
                            (choice == 2 and comp_choice == 1)):
                    print("paper wins => ", end="")
                    result = "paper"

                elif ((choice == 1 and comp_choice == 3) or
                      (choice == 3 and comp_choice == 1)):
                    print("Rock wins =>", end="")
                    result = "Rock"
                else:
                    print("scissor wins =>", end="")
                    result = "scissor"

                # Printing either user or computer wins
                if result == choice_name:
                    print("<== User wins ==>")
                else:
                    print("<== Computer wins ==>")
コード例 #18
0
from GoogleNews import GoogleNews
from newspaper import Article
import pandas as pd
from datetime import date

from afinn import Afinn
af = Afinn()

PATH = "C:\Program Files (x86)\chromedriver_win32\chromedriver.exe"
#driver = webdriver.Chrome(PATH)
print(date.today().strftime('%m/%d/%Y'))
googlenews=GoogleNews(start= str(date.today().strftime('%m/%d/%Y')),end=str(date.today().strftime('%m/%d/%Y')))
#topic = input('what topic would u like to know about')
googlenews.search(' ')
#googlenews.get_page(2)
a = googlenews.results()

newList = []
for i in a:
    newList.append(i['title'])
for x in newList:
    print(x)
    score = af.score(x) 
    if score > 0:
        print('positive')
    elif score == 0:
        print('neutral')
    else:
        print('negative')

k=input("press close to exit") 
def main():

	all_df = []

	sid_obj = SentimentIntensityAnalyzer() 	

	googlenews = GoogleNews()
	googlenews.set_lang('en')
	googlenews.set_encode('utf-16')

	"""
	Primary Phrases refer to the keywords we are interested in studying
	Secondary Phrases refer to the target countries
	"""
	company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac']

	# testing_countries = ['Egypt', 'Kenya', 'Nigeria']
	testing_countries = []

	"""
	Months refer to the date range 
	"""
	# months = ['08/01/2020', '09/01/2020', '10/01/2020']
	# months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']
	months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']

	for first in company_name:

		fin = []
		seen = []
		
		with open('sample.csv', mode='r') as csv_file:
			csv_reader = csv.DictReader(csv_file)
			
			summary_data = []

			for row in csv_reader:
				# print(row)
				second = row['\ufeffCountry']
				if (second not in testing_countries and len(testing_countries)!=0): 
					continue

				full_phrase = first+" "+second

				print(full_phrase)

				counter = 0
				sum_sent = 0
				
				pos_count = 0
				# neu_count = 0
				neg_count = 0

				neg_article = {'title': 'N/A', '% Negative': 0}

				for i in range(0, len(months)-1):
					googlenews.set_time_range(months[i],months[i+1])
					googlenews.get_news(full_phrase)
					res = googlenews.results()

					#It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() 
					#(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works)

					for result in res:
						if result['title'] not in seen:
							# print(result)
							result['start date'] = months[i]
							result['end date'] = months[i+1]
							result['company'] = first
							result['country'] = second
							result['latitude'] = row['Latitude']
							result['longitude'] = row['Longitude']

							sentiment_dict = sid_obj.polarity_scores(result['title'])
							result['% Negative'] = sentiment_dict['neg']*100
							result['% Neutral'] = sentiment_dict['neu']*100
							result['% Positive'] = sentiment_dict['pos']*100
							result['Magnitude'] = sentiment_dict['compound']*50 + 50

							counter += 1
							sum_sent += result['Magnitude']
							
							# result.pop('date')
							# result.pop('datetime')
							# result.pop('img')
							# result.pop('media')

							# if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1
							# elif result['% Neutral'] > result['% Positive']: neu_count += 1
							# else: pos_count += 1
							if result['% Positive'] > result['% Negative']: pos_count += 1
							else: neg_count += 1

							if result['% Negative'] >= neg_article['% Negative']: neg_article = result

							fin.append(result)
							seen.append(result['title'])

				posPercent = 50
				if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count)

				magni = 0
				if counter>0: magni = sum_sent/counter

				country_comp_score = {'country': second, 'latitude': row['Latitude'], 
				'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 
				'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']}

				summary_data.append(country_comp_score)
				all_df.append((country_comp_score, first))

			df = pd.DataFrame(fin)
			df.drop(columns=['date', 'datetime', 'img', 'media'])
			df.to_csv("./Output/{}_output.csv".format(first),index=False)

			summary_df = pd.DataFrame(summary_data)
			summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False)
			# all_df.append(summary_df)
	
	# meta_data = []
	# # with open('sample.csv', mode='r') as csv_file:
	# dic_len = sum(1 for line in open('sample.csv'))

	# with open('sample.csv', mode='r') as csv_file:
	# 	csv_reader = csv.DictReader(csv_file)
	# 	for j in range(0, dic_len):
	# 		most_pos = 0
	# 		for i in range(0, len(company_name)):
	# 			if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: 
	# 				most_pos = i
	# 		meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]})

	fields = ['Country', 'Company', 'Count']  

	meta_data = []
	seen = []
	for result in all_df:
		if result[0]['country'] not in seen:
			seen.append(result[0]['country'])
			meta_data.append([result[0]['country'], result[1], result[0]['positive']])
		else:
			for candidate in meta_data:
				if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']:
					candidate[1] = result[1]
					candidate[2] = result[0]['positive']

	with open('./Output/meta_data.csv', 'w') as f:
		write = csv.writer(f)      
		write.writerow(fields)
		write.writerows(meta_data)
コード例 #20
0
import tensorflow as tf
from tensorflow import keras

reconstructed_model = keras.models.load_model("model")

googlenews = GoogleNews()
print('Sentiment Analysis (-1 to 1, Negative to Positive Sentiment)')

ticker = input('Enter in Stock Ticker (Blank to Quit): ')
sort = True

while ticker != '':
    titles = []

    googlenews.search(ticker + ' Stock')
    news = googlenews.results(sort=sort)

    googlenews.clear()

    for articles in news:
        titles.append(articles['title'])

    predictions = []
    for title in titles:
        # print(title)
        predictions.append(
            reconstructed_model.predict(np.array([title]))[0][0])

    sentiment = 0
    for prediction in predictions:
        if prediction > 0:
コード例 #21
0
ファイル: main.py プロジェクト: CoolCoderSJ/SearchDeck
    def GET(self):
        if session.get("user"):
            logged_in = True
        else:
            logged_in = False
        i = web.input(q="", sort="table", typ="text")
        if i.q == "":
            if logged_in:
                stin = db[session.get("user")]
            else:
                stin = {
                    "engines": {
                        "Google": "checked",
                        "Bing": "checked",
                        "DuckDuckGo": "checked",
                        "Yahoo": "checked"
                    },
                    "default_typ": {
                        "text": "checked",
                        "image": "",
                        "video": "",
                        "news": "",
                        "maps": "",
                        "shopping": ""
                    }
                }
            return render.home(logged_in, stin)

        else:
            r = requests.get("http://httpbin.org/ip")
            global cache
            #clear cache if cache is too big
            if len(cache) > 25:
                cache = {}
            engines = []
            sort = i.sort
            typ = i.typ
            if "Google" in i:
                engines.append("Google")
            if "Bing" in i:
                engines.append("Bing")
            if "DuckDuckGo" in i:
                engines.append("DuckDuckGo")
            if "Yahoo" in i:
                engines.append("Yahoo")

            if "Google" not in i and "Bing" not in i and "DuckDuckGo" not in i and "Yahoo" not in i:
                if logged_in:
                    engines = db[session.get("user")]['engines']
                else:
                    engines = ['Google', 'Bing', 'DuckDuckGo', 'Yahoo']

            dictionary = []
            info = []
            ans = []

            if i.q != "" and typ == "text":
                start_time = time.time()
                goog = []
                b = []
                duckduckgo = []
                yhoo = []
                use_cache = False
                try:
                    #if within 2 days of last cache, use cache
                    #cache per user
                    if cache[session.get(
                            "user")][i.q]["last_updated"] + 172800 > time.time(
                            ) and random.randint(1, 10) == 5:
                        use_cache = True
                except:
                    pass
                if use_cache:
                    goog = cache[session.get("user")][i.q]["google"]
                    b = cache[session.get("user")][i.q]["bing"]
                    duckduckgo = cache[session.get("user")][i.q]["duckduckgo"]
                    yhoo = cache[session.get("user")][i.q]["yahoo"]
                else:
                    if "Google" in engines:
                        queue1 = Queue()
                        p = Process(target=google, args=(i.q, queue1))
                        p.start()
                    if "Bing" in engines:
                        queue2 = Queue()
                        p2 = Process(target=bing, args=(i.q, queue2))
                        p2.start()
                    if "DuckDuckGo" in engines:
                        queue3 = Queue()
                        p3 = Process(target=ddg, args=(i.q, queue3))
                        p3.start()
                    if "Yahoo" in engines:
                        queue4 = Queue()
                        p4 = Process(target=yahoo, args=(i.q, queue4))
                        p4.start()
                    if "Google" in engines:
                        goog = queue1.get()
                        p.join()
                    if "Bing" in engines:
                        b = queue2.get()
                        p2.join()
                    if "DuckDuckGo" in engines:
                        duckduckgo = queue3.get()
                        p3.join()
                    if "Yahoo" in engines:
                        yhoo = queue4.get()
                        p4.join()
                    dictionary = word_dictionary(i.q)
                    info = infobox(i.q)
                    ans = ansbox(i.q)
                    if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in:
                        try:
                            cache[session.get("user")][i.q] = {
                                "google": goog,
                                "bing": b,
                                "yahoo": yhoo,
                                "duckduckgo": duckduckgo,
                                "last_updated": time.time()
                            }
                        except:
                            pass
                data = []
                e = []
                f = []
                for g in goog:
                    g['engine'] = "Google"
                    e.append(g)
                    f.append(g['title'])

                for bingresult in b:
                    bingresult['engine'] = "Bing"
                    e.append(bingresult)
                    f.append(bingresult['title'])

                for d in duckduckgo:
                    d['engine'] = "DuckDuckGo"
                    e.append(d)
                    f.append(d['title'])

                for y in yhoo:
                    y['engine'] = 'Yahoo'
                    e.append(y)
                    f.append(y['title'])

                def getnum(s0, s1):
                    s0 = s0.lower()
                    s1 = s1.lower()
                    s0List = s0.split(" ")
                    s1List = s1.split(" ")
                    num = len(list(set(s0List) & set(s1List)))
                    return round(num / len(s0List) * 100)

                g = set(f)
                counter = 0
                so = []
                for item in e:
                    if "stackoverflow.com" in item['link']:
                        thing = ""
                        for x in so:
                            if getnum(x[0]['title'], item['title']) >= 90:
                                thing = x
                                break
                        if thing:
                            so.remove(thing)
                            engines = x[1]
                            engines.append(item['engine'])
                            x = [x[0], engines]
                            so.append(x)
                        else:
                            engines = [item['engine']]
                            x = [item, engines]
                            so.append(x)
                    else:
                        thing = ""
                        for x in data:
                            if getnum(x[0]['title'], item['title']) >= 90:
                                thing = x
                                break
                        if thing:
                            data.remove(thing)
                            engines = x[1]
                            engines.append(item['engine'])
                            x = [x[0], engines, x[2]]
                            data.append(x)
                        else:
                            engines = [item['engine']]
                            x = [item, engines, counter]
                            data.append(x)
                        counter += 1

                done = 0
                data2 = []
                for item in data:
                    if done == len(data):
                        break
                    if data.index(item) != item[2]:
                        data.insert(item[2], data.pop(data.index(item)))
                        done += 1
                data2, data = data, data2

                for item in so:
                    data.append(item)

                for item in data2:
                    data.append(item)

                print("--- %s seconds ---" % (time.time() - start_time))
                return render.text(data, i.q, dictionary, info, ans, logged_in)
            elif i.q != "" and typ == "image":
                query = i.q.replace(" ", "+")
                goog = requests.get(
                    f"https://google.com/search?q={query}&tbm=isch",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(goog, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                goog = imgs
                b = requests.get(
                    f"https://bing.com/images/search?q={query}&form=HDRSC2",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(b, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    if link.startswith("/rp"):
                        link = f"https://bing.com/images/search?q={query}&form=HDRSC2" + link
                    if link != "<img alt=":
                        imgs.append(link)
                b = imgs
                duckduckgo = requests.get(
                    f"https://duckduckgo.com/?q={query}&ia=images",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    })
                soup = BeautifulSoup(duckduckgo.content, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                duckduckgo = imgs
                yhoo = requests.get(
                    f"https://images.search.yahoo.com/search/images;_ylt=A0geJaQetm1gPx0AGURXNyoA;_ylu=Y29sbwNiZjEEcG9zAzEEdnRpZAMEc2VjA3BpdnM-?p={query}&fr2=piv-web&fr=opensearch",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(yhoo, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                yhoo = imgs
            elif i.q != "" and typ == "video":
                query = i.q.replace(" ", "+")
                goog = YoutubeSearch(query, max_results=100).to_dict()
                b, duckduckgo, yhoo = [], [], []
            elif i.q != "" and typ == "news":
                query = i.q.replace(" ", "+")
                news = GoogleNews()

                news.set_lang('en')
                news.set_encode('utf-8')

                news.search(query)

                goog = news.results()
                b, duckduckgo, yhoo = [], [], []
            elif i.q != "" and typ == "maps":
                goog, b, duckduckgo, yhoo = [], [], [], []
            elif i.q != "" and typ == "shopping":
                goog = []
                b = []
                duckduckgo = []
                yhoo = []
                use_cache = False
                try:
                    #if within 2 days of last cache, use cache
                    #cache per user
                    if cache[session.get(
                            "user")][i.q]["last_updated"] + 172800 > time.time(
                            ) and random.randint(1, 10) == 5:
                        use_cache = True
                except:
                    pass
                print(use_cache)
                if use_cache:
                    goog = cache[session.get("user")][i.q]["google"]
                    b = cache[session.get("user")][i.q]["bing"]
                    duckduckgo = cache[session.get("user")][i.q]["duckduckgo"]
                    yhoo = cache[session.get("user")][i.q]["yahoo"]
                else:
                    if "Google" in engines:
                        queue1 = Queue()
                        p = Process(target=gshop, args=(i.q, queue1))
                        p.start()
                    if "Bing" in engines:
                        queue2 = Queue()
                        p2 = Process(target=bing_shopping, args=(i.q, queue2))
                        p2.start()
                    if "Yahoo" in engines:
                        queue3 = Queue()
                        p3 = Process(target=yahoo_shopping, args=(i.q, queue3))
                        p3.start()
                    if "Google" in engines:
                        goog = queue1.get()
                        p.join()
                    if "Bing" in engines:
                        b = queue2.get()
                        p2.join()
                    if "Yahoo" in engines:
                        yhoo = queue3.get()
                        p3.join()
                    if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in:
                        try:
                            cache[session.get("user")][i.q] = {
                                "google": goog,
                                "bing": b,
                                "yahoo": yhoo,
                                "duckduckgo": duckduckgo,
                                "last_updated": time.time()
                            }
                        except:
                            pass
            return render.search(goog, b, duckduckgo, yhoo, i.q, sort, typ,
                                 engines, logged_in, dictionary, info, ans)
コード例 #22
0
### MODULES

from GoogleNews import GoogleNews

### METHODS


def show_routine(results):
    for num, page in enumerate(results):
        print(f"{num}. {page['date']} - {page['title']}")


### MAIN

# Setup the research
keywords = "covid cava de' tirreni"
period = '10d'
google_news = GoogleNews(lang='it', period=period)
google = GoogleNews(lang='it', period=period)

# Results from news.google.com
google_news.get_news(keywords)
results_gnews = google_news.results(sort=True)
show_routine(results_gnews)

# Results from google.com
google.search(keywords)
results_google = google.results(sort=True)
show_routine(results_google)
コード例 #23
0
ファイル: app.py プロジェクト: AchintyaSushiksha/SCL-Maxo
def nextpage():
    from GoogleNews import GoogleNews
    googlenews = GoogleNews(lang='en')
    columns_of_languages = session['columns_of_languages']
    improve_link = session['improve_columns']
    art_link = session['art_columns']
    user = session['user']
    print(art_link)

    #     global columns_of_languages
    for i in columns_of_languages:
        print(i)
        if i in request.form:
            button_name = request.form[i]
            print(button_name)
            if button_name != "":
                try:
                    db = pymysql.connect(host="achintya.heliohost.us",
                                         user="******",
                                         password="******",
                                         autocommit=True)
                    cur = db.cursor()
                    ab = "use achintya_maxo_scl"
                    cur.execute(ab)
                    a = f"select {button_name} from December"
                    cur.execute(a)
                    link_tuple = cur.fetchall()
                    linklist = []
                    for i in link_tuple:
                        if i[0] == None:
                            continue
                        else:
                            linklist.append(i[0])

                    top_property = []
                    first_value = 40
                    for i in range(0, len(linklist)):
                        top_property.append(first_value)
                        first_value = first_value + 540
                    link_with_topvalue = {}
                    print(linklist)
                    print(top_property)
                    for i in range(0, len(linklist)):
                        link_with_topvalue[linklist[i]] = top_property[i]

                    db.close()
                    button_name_info = 'Learn' + button_name
                    googlenews.search(button_name_info)
                    news = googlenews.results()

                    return render_template(
                        'language1.html',
                        link_with_topvalue=link_with_topvalue,
                        button_name=button_name,
                        news=news,
                        leng=5,
                        user=user)

                except pymysql.err.OperationalError:
                    print('ntrwk error')
                    return redirect(url_for('nextpage', user=user))
                except Exception as e:
                    print(e)
                    if db.open:
                        db.close()
                    return redirect(url_for('nextpage', user=user))

    for i in improve_link:
        print(i)
        if i in request.form:
            button_name = request.form[i]
            print(button_name)
            if button_name != "":
                try:
                    db = pymysql.connect(host="achintya.heliohost.us",
                                         user="******",
                                         password="******",
                                         autocommit=True)
                    cur = db.cursor()
                    ab = "use achintya_maxo_scl"
                    cur.execute(ab)
                    a = f"select {button_name} from December"
                    cur.execute(a)
                    link_tuple = cur.fetchall()
                    linklist = []
                    for i in link_tuple:
                        if i[0] == None:
                            continue
                        else:
                            linklist.append(i[0])

                    top_property = []
                    first_value = 40
                    for i in range(0, len(linklist)):
                        top_property.append(first_value)
                        first_value = first_value + 540
                    link_with_topvalue = {}
                    print(linklist)
                    print(top_property)
                    for i in range(0, len(linklist)):
                        link_with_topvalue[linklist[i]] = top_property[i]
                    db.close()

                    button_name_info = 'improve english'
                    googlenews.search(button_name_info)
                    news = googlenews.results()
                    return render_template(
                        'language1.html',
                        link_with_topvalue=link_with_topvalue,
                        button_name=button_name,
                        news=news,
                        leng=5,
                        user=user)

                except pymysql.err.OperationalError:
                    return redirect(url_for('nextpage'))
                except Exception as e:
                    print(e)
                    if db.open:
                        db.close()
                    return redirect(url_for('nextpage'))

    for i in art_link:
        print(i)
        #         print(f'error is {request.form.to_dict()[1]}')

        if i in request.form:
            print("hi")
            button_name = request.form.to_dict()[i]
            print(button_name)
            if button_name != "":
                try:
                    db = pymysql.connect(host="achintya.heliohost.us",
                                         user="******",
                                         password="******",
                                         autocommit=True)
                    cur = db.cursor()
                    ab = "use achintya_maxo_scl"
                    cur.execute(ab)
                    a = f"select {button_name} from December"
                    cur.execute(a)
                    link_tuple = cur.fetchall()
                    linklist = []
                    for i in link_tuple:
                        if i[0] == None:
                            continue
                        else:
                            linklist.append(i[0])

                    top_property = []
                    first_value = 40
                    for i in range(0, len(linklist)):
                        top_property.append(first_value)
                        first_value = first_value + 540
                    link_with_topvalue = {}
                    print(linklist)
                    print(top_property)
                    for i in range(0, len(linklist)):
                        link_with_topvalue[linklist[i]] = top_property[i]

                    db.close()
                    if button_name == 'Drawing' or button_name == 'Drawings':
                        button_name = 'life drawing'
                    button_name_info = 'Learn' + button_name
                    googlenews.search(button_name_info)
                    news = googlenews.results()
                    return render_template(
                        'language1.html',
                        link_with_topvalue=link_with_topvalue,
                        button_name=button_name,
                        news=news,
                        leng=5,
                        user=user)

                except pymysql.err.OperationalError:
                    return redirect(url_for('nextpage'))
                except Exception as e:
                    print(e)
                    if db.open:
                        db.close()
                    return redirect(url_for('nextpage'))

    else:
        try:
            db = pymysql.connect(host="achintya.heliohost.us",
                                 user="******",
                                 password="******",
                                 autocommit=True)
            cur = db.cursor()
            ab = "use achintya_maxo_scl"
            cur.execute(ab)
            a = f"select Paintings from December"
            cur.execute(a)
            link_tuple = cur.fetchall()
            linklist = []
            for i in link_tuple:
                if i[0] == None:
                    continue
                else:
                    linklist.append(i[0])

            top_property = []
            first_value = 40
            for i in range(0, len(linklist)):
                top_property.append(first_value)
                first_value = first_value + 540
            link_with_topvalue = {}
            print(linklist)
            print(top_property)
            for i in range(0, len(linklist)):
                link_with_topvalue[linklist[i]] = top_property[i]

            db.close()

            button_name_info = 'Paintings'
            googlenews.search(button_name_info)
            news = googlenews.results()
            j = 'bg.png'
            return render_template('language1.html',
                                   link_with_topvalue=link_with_topvalue,
                                   button_name="Paintings",
                                   news=news,
                                   leng=5,
                                   user=user,
                                   j=j)

        except pymysql.err.OperationalError:
            return redirect(url_for('nextpage'))
        except Exception as e:
            print(e)
            if db.open:
                db.close()
            return redirect(url_for('nextpage'))

        return render_template('mainpage_nextpage.html',
                               columns_of_languages=columns_of_languages,
                               user=user)
コード例 #24
0
ファイル: googlenews-api.py プロジェクト: lyanita/newsparser
class GoogleNewsClient(object):
    """Retrieves weblinks from GoogleNews and retrieves web content using Article from Newspaper; runs sentiment analysis on text using TextBlob"""
    def __init__(self, start, end):
        self.googlenews = GoogleNews(start=start, end=end, lang='en')

    def get_news(self, query, count):
        """Creates a dataframe of weblinks from GoogleNews using user-input parameters of some query and number of pages to scan"""
        self.googlenews.search(query)
        for page in range(1, count):
            self.googlenews.getpage(page)
            result = self.googlenews.results()
            df = pd.DataFrame(result)
        return df

    def get_articles(self, news):
        """With the weblinks from the get_news dataframe, retrieves web content from each webpage"""
        list = []
        for ind in news.index:
            dict = {}
            try:
                article = Article(news['link'][ind], config=config)
                article.download()
                article.parse()
                article.nlp()
                #dict['Reporting Date'] = news['date'][ind]
                #dict['Publish Date'] = article.publish_date
                local_time = pytz.timezone("US/Eastern")
                if article.publish_date is None:
                    try:
                        date_format = datetime.datetime.strptime(
                            news['date'][ind], "%b %d, %Y")
                        local_date = local_time.localize(date_format,
                                                         is_dst=None)
                        utc_date = local_date.astimzone(pytz.UTC)
                        dict["Date"] = utc_date
                    except:
                        current_utc = datetime.datetime.utcnow()
                        current_utc = current_utc.replace(tzinfo=pytz.utc)
                        number = [int(s) for s in str.split() if s.isdigit()]
                        if "year" in news['date'][ind] or "years" in news[
                                'date'][ind]:
                            delta = dateutil.relativedelta.relativedelta(
                                years=number)
                        elif "month" in news['date'][ind] or "months" in news[
                                'date'][ind]:
                            delta = dateutil.relativedelta.relativedelta(
                                months=number)
                        elif "week" in news['date'][ind] or "weeks" in news[
                                'date'][ind]:
                            delta = dateutil.relativedelta.relativedelta(
                                weeks=number)
                        elif "day" in news['date'][ind] or "days" in news[
                                'date'][ind]:
                            #delta = datetime.timedelta(number)
                            delta = dateutil.relativedelta.relativedelta(
                                days=number)
                        elif "hour" in news['date'][ind] or "hours" in news[
                                'date'][ind]:
                            delta = dateutil.relativedelta.relativedelta(
                                hours=number)
                        elif "min" in news['date'][ind] or "mins" in news[
                                'date'][ind]:
                            delta = dateutil.relativedelta.relativedelta(
                                mins=number)
                        date = current_utc - delta
                        dict["Date"] = date.astimezone(pytz.UTC)
                else:
                    #date_format = datetime.datetime.strptime(article.publish_date, "%Y-%m-%d %H:%M:%S")
                    #local_date = local_time.localize(date_format, is_dst=None)
                    #utc_date = local_date.astimzone(pytz.UTC)
                    date = article.publish_date
                    date = date.replace(tzinfo=pytz.utc)
                    dict['Date'] = date
                dict['Media'] = news['media'][ind]
                dict['Title'] = article.title
                dict['Article'] = article.text
                dict['Summary'] = article.summary
                dict['Keywords'] = article.keywords
                dict['Link'] = news['link'][ind]
                list.append(dict)
            except:
                continue
        news_df = pd.DataFrame(list)
        #news_df["Keywords"] = news_df["Keywords"].str.join(',')
        news_df["Keywords"] = news_df["Keywords"].apply(
            lambda x: ','.join(map(str, x)))
        return news_df

    def get_sentiment(self, news_df):
        """Calculates polarity and sentiment of the web page's text content"""
        sentiment = []
        polarity = []
        for ind in news_df.index:
            analysis = TextBlob(news_df['Article'][ind])
            if analysis.sentiment.polarity > 0:
                sentiment.append('positive')
                polarity.append(analysis.sentiment.polarity)
            elif analysis.sentiment.polarity == 0:
                sentiment.append('neutral')
                polarity.append(analysis.sentiment.polarity)
            else:
                sentiment.append('negative')
                polarity.append(analysis.sentiment.polarity)
        news_df["Sentiment"] = sentiment
        news_df["Polarity"] = polarity
        return news_df