コード例 #1
0
ファイル: news.py プロジェクト: eriksonlb/TARS
def get_news(assunto):
    news = GoogleNews(period='d')
    news.setlang('pt')
    news.set_encode('utf-8')
    news.set_time_range('12/02/2021', '13/02/2021')
    news.get_news(assunto)
    results = news.get_texts()
    result = results[3:8] if len(results) > 0 else "Sem notícias recentes"
    return result
コード例 #2
0
ファイル: newsScrape.py プロジェクト: LAHacks21/WordCloud
def googleLinks(topic):
    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('1d')
    googlenews.set_encode('utf-8')
    article =  googlenews.get_news(topic)
    links = googlenews.get_links()[:5]
    actualLinks = list()
    for l in links:
        l = "http://" + l
        print(l)
        actualLinks.append( requests.get(l).url ) 
    return actualLinks
    
コード例 #3
0
def search_google_news(query, google_date):
    #-- Retrieve news articles
    # Init googlenews
    googlenews = GoogleNews()
    #googlenews.set_period('7d') # Cannot use set_period with set_time_range, use either or.
    #googlenews.set_time_range(str(google_date), '2020-10-12')
    googlenews.set_encode('utf-8')
    googlenews.search(query)
    googlenews.getpage(50)
    result = googlenews.result()
    # Clear before searching again
    googlenews.clear()

    return result
コード例 #4
0
def googleNewsApi(request, word):

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('7d')
    googlenews.set_encode('utf-8')
    googlenews.get_news(str(word))
    googlenews.total_count()
    resultsGoogleNews = googlenews.results()
    #print(resultsGoogleNews)
    #print(googlenews.total_count())

    #TWITTER
    consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4'
    consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT'
    access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G'
    access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN'
    # create OAuthHandler object
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    # set access token and secret
    auth.set_access_token(access_token, access_token_secret)
    # create tweepy API object to fetch tweets
    api = tweepy.API(auth)
    date_since = datetime.today().strftime('%Y-%m-%d')
    print(date_since)
    #tweets = api.search(str("bitcoin"), count=1)
    tweets = tweepy.Cursor(api.search,
                           q=str(word),
                           lang="en",
                           since=date_since).items(100)
    """print(tweets.__dict__['page_iterator'].__dict__)
    for tweet in tweets:
        print(tweet)
        print(tweet.id)"""
    #return googlenews
    """for result in resultsGoogleNews:

        title = result['title']
        date = result['date']
        link = result['link']
        source = result['site']

        news = {'title':title, 'date': date, 'link': link, 'site':site}
    """
    return render(request, 'homepage.html', {
        'news': resultsGoogleNews,
        'tweets': tweets
    })
コード例 #5
0
def scrape_the_news():
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent

    topiclist = NLP_news()
    print(topiclist[0])

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_encode('utf-8')
    googlenews.set_period('7d')
    googlenews.get_news(topiclist[0])

    result = googlenews.results()

    googlenews.clear()

    df = pd.DataFrame(result)
    df = df.drop(['date', 'media'], axis=1)
    df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title']
    df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']]

    conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123")
    curr = conn.cursor()

    for i, row in df.iterrows():
        try:
            row.Link = 'https://' + row.Link
            columns = row.keys()
            values = [row[column] for column in columns]

            insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)"
            curr.execute(insert_statement, tuple(values))
        except:
            print('could not add row', i)

    conn.commit()

    curr.close()
    conn.close()
コード例 #6
0
def crawling_news(company_name_list, start_date, end_date):
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_time_range('start_date', 'end_date')
    googlenews.set_encode('utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        googlenews.get_news(company_name_list[i])
        logging.info('%s : %0.2f%s' %
                     (company_name_list[i],
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            all_title.append(googlenews.results()[j].get('title'))
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('sp500news.csv')
    logging.info('saved to csv, done!!')
    return all_title
コード例 #7
0
def googlenews_function(keyword='台積電',
                        language='cn',
                        start_date='2020/12/01',
                        end_date='2020/12/28'):
    '''
    - 日期
    - 關鍵字
    - 語言
    - 爬幾頁

    '''
    googlenews = GoogleNews()
    googlenews.clear()
    googlenews.set_encode('utf-8')
    googlenews.set_lang(language)

    all_date_start = start_date.split('/')
    start_year = all_date_start[0]
    start_month = all_date_start[1]
    start_day = all_date_start[2]
    all_date_start = '{}/{}/{}'.format(start_month, start_day, start_year)

    all_date_end = end_date.split('/')
    end_year = all_date_end[0]
    end_month = all_date_end[1]
    end_day = all_date_end[2]
    all_date_end = '{}/{}/{}'.format(end_month, end_day, end_year)

    googlenews.set_time_range(start=all_date_start, end=all_date_end)

    googlenews.search(keyword)
    data = googlenews.result()
    print("資料總筆數:", len(data))
    news = pd.DataFrame(data)
    # news.to_csv("GoogleNews_" + keyword +"_日期" + start_date.replace('/', '-') + '到' +end_date.replace('/', '-')+ ".csv", index= False)
    return news
コード例 #8
0
from datetime import date
from GoogleNews import GoogleNews
news = GoogleNews()
news.set_lang('en')
date_today = date.today()
news.set_time_range('01/11/2020', date_today)
news.set_encode('utf-8')
topic = input("Topic : ")
news.search(topic)
news.get_page(2)
#headlines with links WORLD NEWS
for i in range(6):
    print(news.results()[i]["title"])
    print(news.results()[i]["link"])
コード例 #9
0
ファイル: alexa.py プロジェクト: vimal0312/alexa
def run_alexa():
    command = take_command()
    print(command)
    if 'music' in command:
        song = command.replace('play song', '')
        talk('I am playing your favourite ' + song)
        # print('playing')
        print(song)
        # playing the first video that appears in yt search
        pywhatkit.playonyt(song)

    elif 'time' in command:
        now = datetime.now()
        time = now.strftime("%H:%M:%S")
        print("time:", time)
        talk("Current time is " + time)

    elif ('month' or 'year') in command:
        now = datetime.now()
        year = now.strftime("%Y")
        print("year:", year)
        talk("Current year is  " + year)
        month = now.strftime("%m")
        print("month:", month)
        talk("Current month is  " + month)

    elif 'date' in command:
        now = datetime.now()
        date_time = now.strftime("%m/%d/%Y, %H:%M:%S")
        print("date and time:", date_time)
        talk("Current date and time is " + date_time)

    # opens web.whatsapp at specified time i.e before 10 minutes and send the msg
    elif 'whatsapp' in command:
        talk("To which number do you have to whatsapp")
        talk("Please dont forget to enter 10 digits with country code")
        num = input()
        talk("Enter the message you have to send")
        msg = input()
        talk("Enter the time to send the message")
        time = int(input())
        pywhatkit.sendwhatmsg(num, msg, time, 00)
        pywhatkit.showHistory()
        pywhatkit.shutdown(3000000000)
        # pywhatkit.sendwhatmsg("+919876543210", "This is a message", 15, 00)

    # Convert text to handwritten format
    elif 'convert' in command:
        text = command.replace('convert', '')
        pywhatkit.text_to_handwriting(text, rgb=[0, 0, 0])

    # Perform google search
    elif 'search' in command:
        key = command.replace('search', '')
        pywhatkit.search("key")

    elif 'wikipedia' in command:
        person = command.replace('wikipedia', '')
        talk("How many pages do you want to read")
        num_pages = int(input())
        # talk("In which language do you want to read")
        # l = input()
        # wikipedia.set_lang(l)
        info = wikipedia.summary(person, num_pages)
        print(info)
        talk(info)

    elif 'can you work for me' in command:
        talk("sorry, I have headache. Please do your work")

    elif 'are you single' in command:
        talk("I am in relationshhip with wifi")

    elif 'joke' in command:
        talk(pyjokes.get_joke())
        talk("sorry for the lamest joke")

    elif 'open google browser' in command:
        try:
            urL = 'https://www.google.com'
            chrome_path = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
            webbrowser.register('chrome', None,
                                webbrowser.BackgroundBrowser(chrome_path))
            webbrowser.get('chrome').open_new_tab(urL)
            talk("Successfully opened chrome its upto you to search")
        except:
            webbrowser.Error

    elif 'google search' in command:
        word_to_search = command.replace('google search', '')
        response = GoogleSearch().search(word_to_search)
        print(response)
        for result in response.results:
            print("Title: " + result.title)
            talk("You can look for the following titles  " + result.title)

    elif 'weather' in command:
        # base URL
        BASE_URL = "https://api.openweathermap.org/data/2.5/weather?"
        talk("Which city weather are you looking for")
        try:
            with sr.Microphone() as source:
                print('listening weather...')
                city_voice = listener.listen(source)
                city = listener.recognize_google(city_voice)
                # city = '\"'+city.lower()+'\"'

                print(city)
                # city="bangalore"
                # API key API_KEY = "Your API Key"
                API_KEY = "b5a362ef1dc8e16c673dd5049aa98d8f"
                # upadting the URL
                URL = BASE_URL + "q=" + city + "&appid=" + API_KEY
                # HTTP request
                response = requests.get(URL)
                # checking the status code of the request
                if response.status_code == 200:
                    # getting data in the json format
                    data = response.json()
                    # getting the main dict block
                    main = data['main']
                    # getting temperature
                    temperature = main['temp']
                    # getting the humidity
                    humidity = main['humidity']
                    # getting the pressure
                    pressure = main['pressure']
                    # weather report
                    report = data['weather']
                    print(f"{CITY:-^30}")
                    print(f"Temperature: {temperature}")
                    print(f"Humidity: {humidity}")
                    print(f"Pressure: {pressure}")
                    print(f"Weather Report: {report[0]['description']}")
                    talk("Temperature in " + city + " is " + temperature +
                         " humidity is " + humidity + " pressure is " +
                         pressure + " and your final weather report" + report)
                else:
                    # showing the error message
                    print("Error in the HTTP request")
                    talk("Error in the HTTP request")
        except:
            talk("Hmmmmm, it looks like there is something wrong")

    elif 'news' in command:
        try:
            googlenews = GoogleNews()
            googlenews.set_lang('en')
            # googlenews.set_period('7d')
            # googlenews.set_time_range('02/01/2020', '02/28/2020')
            googlenews.set_encode('utf-8')

            talk("What news are you looking for")
            try:
                with sr.Microphone() as source:
                    print('listening news ...')
                    news_voice = listener.listen(source)
                    news_input = listener.recognize_google(news_voice)
                    news_input = news_input.lower()
                    print(news_input)
                    googlenews.get_news(news_input)
                    googlenews.search(news_input)
                    googlenews.get_page(2)
                    result = googlenews.page_at(2)
                    news = googlenews.get_texts()
                    print(news)
                    talk(news)
            except:
                print("Error")
                talk("Error in reading input")

        except:
            print("No news")
            talk(" I couldn't find any news on this day")

    elif 'play book' or 'read pdf' in command:
        talk("Which pdf do you want me to read")
        book_input = input()
        print(book_input)
        book = open(book_input, 'rb')
        # create pdfReader object
        pdfReader = PyPDF2.PdfFileReader(book)
        # count the total pages
        total_pages = pdfReader.numPages
        total_pages = str(total_pages)
        print("Total number of pages " + total_pages)
        talk("Total number of pages " + total_pages)
        # initialise speaker object
        # speaker = pyttsx3.init()
        # talk("Enter your starting page")
        # start_page = int(input())
        talk(
            " here are the options for you, you can press 1 to  Play a single page     2 to   Play between start and end points  and  3 to  Play the entire book "
        )
        talk("Enter your choice")
        choice = int(input())
        if (choice == 1):
            talk("Enter index number")
            page = int(input())
            page = pdfReader.getPage(page)
            text = page.extractText()
            talk(text)
            # speaker.say(text)
            # speaker.runAndWait()
        elif (choice == 2):
            talk("Enter starting page number")
            start_page = int(input())
            talk("Enter ending page number")
            end_page = int(input())
            for page in range(start_page + 1, end_page):
                page = pdfReader.getPage(start_page + 1)
                text = page.extractText()
                talk(text)
                # speaker.say(text)
                # speaker.runAndWait()
        elif (choice == 3):
            for page in range(total_pages + 1):
                page = pdfReader.getPage(page)
                text = page.extractText()
                talk(text)
                # speaker.say(text)
                # speaker.runAndWait()
        else:
            talk("Haha!! Please enter valid choice")
    else:
        talk(
            "Hiii Rashika, I am so bored can you please give me some proper commands"
        )
コード例 #10
0
ファイル: main.py プロジェクト: CoolCoderSJ/SearchDeck
    def GET(self):
        if session.get("user"):
            logged_in = True
        else:
            logged_in = False
        i = web.input(q="", sort="table", typ="text")
        if i.q == "":
            if logged_in:
                stin = db[session.get("user")]
            else:
                stin = {
                    "engines": {
                        "Google": "checked",
                        "Bing": "checked",
                        "DuckDuckGo": "checked",
                        "Yahoo": "checked"
                    },
                    "default_typ": {
                        "text": "checked",
                        "image": "",
                        "video": "",
                        "news": "",
                        "maps": "",
                        "shopping": ""
                    }
                }
            return render.home(logged_in, stin)

        else:
            r = requests.get("http://httpbin.org/ip")
            global cache
            #clear cache if cache is too big
            if len(cache) > 25:
                cache = {}
            engines = []
            sort = i.sort
            typ = i.typ
            if "Google" in i:
                engines.append("Google")
            if "Bing" in i:
                engines.append("Bing")
            if "DuckDuckGo" in i:
                engines.append("DuckDuckGo")
            if "Yahoo" in i:
                engines.append("Yahoo")

            if "Google" not in i and "Bing" not in i and "DuckDuckGo" not in i and "Yahoo" not in i:
                if logged_in:
                    engines = db[session.get("user")]['engines']
                else:
                    engines = ['Google', 'Bing', 'DuckDuckGo', 'Yahoo']

            dictionary = []
            info = []
            ans = []

            if i.q != "" and typ == "text":
                start_time = time.time()
                goog = []
                b = []
                duckduckgo = []
                yhoo = []
                use_cache = False
                try:
                    #if within 2 days of last cache, use cache
                    #cache per user
                    if cache[session.get(
                            "user")][i.q]["last_updated"] + 172800 > time.time(
                            ) and random.randint(1, 10) == 5:
                        use_cache = True
                except:
                    pass
                if use_cache:
                    goog = cache[session.get("user")][i.q]["google"]
                    b = cache[session.get("user")][i.q]["bing"]
                    duckduckgo = cache[session.get("user")][i.q]["duckduckgo"]
                    yhoo = cache[session.get("user")][i.q]["yahoo"]
                else:
                    if "Google" in engines:
                        queue1 = Queue()
                        p = Process(target=google, args=(i.q, queue1))
                        p.start()
                    if "Bing" in engines:
                        queue2 = Queue()
                        p2 = Process(target=bing, args=(i.q, queue2))
                        p2.start()
                    if "DuckDuckGo" in engines:
                        queue3 = Queue()
                        p3 = Process(target=ddg, args=(i.q, queue3))
                        p3.start()
                    if "Yahoo" in engines:
                        queue4 = Queue()
                        p4 = Process(target=yahoo, args=(i.q, queue4))
                        p4.start()
                    if "Google" in engines:
                        goog = queue1.get()
                        p.join()
                    if "Bing" in engines:
                        b = queue2.get()
                        p2.join()
                    if "DuckDuckGo" in engines:
                        duckduckgo = queue3.get()
                        p3.join()
                    if "Yahoo" in engines:
                        yhoo = queue4.get()
                        p4.join()
                    dictionary = word_dictionary(i.q)
                    info = infobox(i.q)
                    ans = ansbox(i.q)
                    if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in:
                        try:
                            cache[session.get("user")][i.q] = {
                                "google": goog,
                                "bing": b,
                                "yahoo": yhoo,
                                "duckduckgo": duckduckgo,
                                "last_updated": time.time()
                            }
                        except:
                            pass
                data = []
                e = []
                f = []
                for g in goog:
                    g['engine'] = "Google"
                    e.append(g)
                    f.append(g['title'])

                for bingresult in b:
                    bingresult['engine'] = "Bing"
                    e.append(bingresult)
                    f.append(bingresult['title'])

                for d in duckduckgo:
                    d['engine'] = "DuckDuckGo"
                    e.append(d)
                    f.append(d['title'])

                for y in yhoo:
                    y['engine'] = 'Yahoo'
                    e.append(y)
                    f.append(y['title'])

                def getnum(s0, s1):
                    s0 = s0.lower()
                    s1 = s1.lower()
                    s0List = s0.split(" ")
                    s1List = s1.split(" ")
                    num = len(list(set(s0List) & set(s1List)))
                    return round(num / len(s0List) * 100)

                g = set(f)
                counter = 0
                so = []
                for item in e:
                    if "stackoverflow.com" in item['link']:
                        thing = ""
                        for x in so:
                            if getnum(x[0]['title'], item['title']) >= 90:
                                thing = x
                                break
                        if thing:
                            so.remove(thing)
                            engines = x[1]
                            engines.append(item['engine'])
                            x = [x[0], engines]
                            so.append(x)
                        else:
                            engines = [item['engine']]
                            x = [item, engines]
                            so.append(x)
                    else:
                        thing = ""
                        for x in data:
                            if getnum(x[0]['title'], item['title']) >= 90:
                                thing = x
                                break
                        if thing:
                            data.remove(thing)
                            engines = x[1]
                            engines.append(item['engine'])
                            x = [x[0], engines, x[2]]
                            data.append(x)
                        else:
                            engines = [item['engine']]
                            x = [item, engines, counter]
                            data.append(x)
                        counter += 1

                done = 0
                data2 = []
                for item in data:
                    if done == len(data):
                        break
                    if data.index(item) != item[2]:
                        data.insert(item[2], data.pop(data.index(item)))
                        done += 1
                data2, data = data, data2

                for item in so:
                    data.append(item)

                for item in data2:
                    data.append(item)

                print("--- %s seconds ---" % (time.time() - start_time))
                return render.text(data, i.q, dictionary, info, ans, logged_in)
            elif i.q != "" and typ == "image":
                query = i.q.replace(" ", "+")
                goog = requests.get(
                    f"https://google.com/search?q={query}&tbm=isch",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(goog, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                goog = imgs
                b = requests.get(
                    f"https://bing.com/images/search?q={query}&form=HDRSC2",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(b, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    if link.startswith("/rp"):
                        link = f"https://bing.com/images/search?q={query}&form=HDRSC2" + link
                    if link != "<img alt=":
                        imgs.append(link)
                b = imgs
                duckduckgo = requests.get(
                    f"https://duckduckgo.com/?q={query}&ia=images",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    })
                soup = BeautifulSoup(duckduckgo.content, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                duckduckgo = imgs
                yhoo = requests.get(
                    f"https://images.search.yahoo.com/search/images;_ylt=A0geJaQetm1gPx0AGURXNyoA;_ylu=Y29sbwNiZjEEcG9zAzEEdnRpZAMEc2VjA3BpdnM-?p={query}&fr2=piv-web&fr=opensearch",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(yhoo, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                yhoo = imgs
            elif i.q != "" and typ == "video":
                query = i.q.replace(" ", "+")
                goog = YoutubeSearch(query, max_results=100).to_dict()
                b, duckduckgo, yhoo = [], [], []
            elif i.q != "" and typ == "news":
                query = i.q.replace(" ", "+")
                news = GoogleNews()

                news.set_lang('en')
                news.set_encode('utf-8')

                news.search(query)

                goog = news.results()
                b, duckduckgo, yhoo = [], [], []
            elif i.q != "" and typ == "maps":
                goog, b, duckduckgo, yhoo = [], [], [], []
            elif i.q != "" and typ == "shopping":
                goog = []
                b = []
                duckduckgo = []
                yhoo = []
                use_cache = False
                try:
                    #if within 2 days of last cache, use cache
                    #cache per user
                    if cache[session.get(
                            "user")][i.q]["last_updated"] + 172800 > time.time(
                            ) and random.randint(1, 10) == 5:
                        use_cache = True
                except:
                    pass
                print(use_cache)
                if use_cache:
                    goog = cache[session.get("user")][i.q]["google"]
                    b = cache[session.get("user")][i.q]["bing"]
                    duckduckgo = cache[session.get("user")][i.q]["duckduckgo"]
                    yhoo = cache[session.get("user")][i.q]["yahoo"]
                else:
                    if "Google" in engines:
                        queue1 = Queue()
                        p = Process(target=gshop, args=(i.q, queue1))
                        p.start()
                    if "Bing" in engines:
                        queue2 = Queue()
                        p2 = Process(target=bing_shopping, args=(i.q, queue2))
                        p2.start()
                    if "Yahoo" in engines:
                        queue3 = Queue()
                        p3 = Process(target=yahoo_shopping, args=(i.q, queue3))
                        p3.start()
                    if "Google" in engines:
                        goog = queue1.get()
                        p.join()
                    if "Bing" in engines:
                        b = queue2.get()
                        p2.join()
                    if "Yahoo" in engines:
                        yhoo = queue3.get()
                        p3.join()
                    if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in:
                        try:
                            cache[session.get("user")][i.q] = {
                                "google": goog,
                                "bing": b,
                                "yahoo": yhoo,
                                "duckduckgo": duckduckgo,
                                "last_updated": time.time()
                            }
                        except:
                            pass
            return render.search(goog, b, duckduckgo, yhoo, i.q, sort, typ,
                                 engines, logged_in, dictionary, info, ans)
def main():

	all_df = []

	sid_obj = SentimentIntensityAnalyzer() 	

	googlenews = GoogleNews()
	googlenews.set_lang('en')
	googlenews.set_encode('utf-16')

	"""
	Primary Phrases refer to the keywords we are interested in studying
	Secondary Phrases refer to the target countries
	"""
	company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac']

	# testing_countries = ['Egypt', 'Kenya', 'Nigeria']
	testing_countries = []

	"""
	Months refer to the date range 
	"""
	# months = ['08/01/2020', '09/01/2020', '10/01/2020']
	# months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']
	months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']

	for first in company_name:

		fin = []
		seen = []
		
		with open('sample.csv', mode='r') as csv_file:
			csv_reader = csv.DictReader(csv_file)
			
			summary_data = []

			for row in csv_reader:
				# print(row)
				second = row['\ufeffCountry']
				if (second not in testing_countries and len(testing_countries)!=0): 
					continue

				full_phrase = first+" "+second

				print(full_phrase)

				counter = 0
				sum_sent = 0
				
				pos_count = 0
				# neu_count = 0
				neg_count = 0

				neg_article = {'title': 'N/A', '% Negative': 0}

				for i in range(0, len(months)-1):
					googlenews.set_time_range(months[i],months[i+1])
					googlenews.get_news(full_phrase)
					res = googlenews.results()

					#It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() 
					#(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works)

					for result in res:
						if result['title'] not in seen:
							# print(result)
							result['start date'] = months[i]
							result['end date'] = months[i+1]
							result['company'] = first
							result['country'] = second
							result['latitude'] = row['Latitude']
							result['longitude'] = row['Longitude']

							sentiment_dict = sid_obj.polarity_scores(result['title'])
							result['% Negative'] = sentiment_dict['neg']*100
							result['% Neutral'] = sentiment_dict['neu']*100
							result['% Positive'] = sentiment_dict['pos']*100
							result['Magnitude'] = sentiment_dict['compound']*50 + 50

							counter += 1
							sum_sent += result['Magnitude']
							
							# result.pop('date')
							# result.pop('datetime')
							# result.pop('img')
							# result.pop('media')

							# if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1
							# elif result['% Neutral'] > result['% Positive']: neu_count += 1
							# else: pos_count += 1
							if result['% Positive'] > result['% Negative']: pos_count += 1
							else: neg_count += 1

							if result['% Negative'] >= neg_article['% Negative']: neg_article = result

							fin.append(result)
							seen.append(result['title'])

				posPercent = 50
				if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count)

				magni = 0
				if counter>0: magni = sum_sent/counter

				country_comp_score = {'country': second, 'latitude': row['Latitude'], 
				'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 
				'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']}

				summary_data.append(country_comp_score)
				all_df.append((country_comp_score, first))

			df = pd.DataFrame(fin)
			df.drop(columns=['date', 'datetime', 'img', 'media'])
			df.to_csv("./Output/{}_output.csv".format(first),index=False)

			summary_df = pd.DataFrame(summary_data)
			summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False)
			# all_df.append(summary_df)
	
	# meta_data = []
	# # with open('sample.csv', mode='r') as csv_file:
	# dic_len = sum(1 for line in open('sample.csv'))

	# with open('sample.csv', mode='r') as csv_file:
	# 	csv_reader = csv.DictReader(csv_file)
	# 	for j in range(0, dic_len):
	# 		most_pos = 0
	# 		for i in range(0, len(company_name)):
	# 			if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: 
	# 				most_pos = i
	# 		meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]})

	fields = ['Country', 'Company', 'Count']  

	meta_data = []
	seen = []
	for result in all_df:
		if result[0]['country'] not in seen:
			seen.append(result[0]['country'])
			meta_data.append([result[0]['country'], result[1], result[0]['positive']])
		else:
			for candidate in meta_data:
				if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']:
					candidate[1] = result[1]
					candidate[2] = result[0]['positive']

	with open('./Output/meta_data.csv', 'w') as f:
		write = csv.writer(f)      
		write.writerow(fields)
		write.writerows(meta_data)
コード例 #12
0
"""
Before you start, install the library using: 

pip install GoogleNews

"""

from GoogleNews import GoogleNews
import pandas as pd

googlenews = GoogleNews()

googlenews.set_lang('en')
googlenews.set_encode('utf-8')

"""
Primary Phrases refer to the keywords we are interested in studying
Secondary Phrases refer to the target countries
"""
primary_phrases = ['Pfizer Vaccine', 'AstraZeneca Vaccine', 'Sputnik V Vaccine', 'Sinovac Vaccine']
secondary_phrases = ['Namibia', 'France', 'South Africa']


# months = ['01/01/2019', '02/01/2019', '03/01/2019', '04/01/2019', '05/01/2019', '06/01/2019', '07/01/2019', '08/01/2019', '09/01/2019', '10/01/2019', '11/01/2019', '12/01/2019', '01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021']
"""
Months refer to the date range 
"""
months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021']

fin = []