コード例 #1
0
  def job(self):    
      #Download current database
      self.getDB()
      self.print_header(self.rawFileName)
      self.lineCounter(self.rawFileName)
      x = 0
      for tag in self.newsTags:
        #print("Collecting newses from tag: " + tag + "...")
        self.logger.info(f"Collecting newses from tag: {tag}")
        googlenews = GoogleNews()
        googlenews.clear()
        googlenews.set_lang(self.newsLang)
        googlenews.setperiod('1d')
        googlenews.get_news(tag)
        output = googlenews.results(sort=True)
        output = pd.DataFrame(output)
        x = x + len(output['title'])
        self.saveToFile(output, self.rawFileName)
      self.logger.info(f"Collected amount of news:  {x}")
      self.removeDuplicates(self.rawFileName, self.finalFileName)

      #os.remove(rawFileName) #delete bufor file
      #logger.info(f"Removed file with duplicates:  {rawFileName}")
      os.rename(self.finalFileName, self.rawFileName) #rename final file to bufor name
      self.logger.info(f"Renamed: {self.finalFileName} to: {self.rawFileName}")
      self.backupDB()
コード例 #2
0
def search():
    google_news = GoogleNews()
    google_news.set_lang(lang="cn")
    google_news.search(request.args.get("key"))
    result = google_news.result()

    newses = list()
    for news in result:
        newses.append({"title": news["title"], "link": news["link"]})

    return render_template("index.html", data=json.dumps(newses))
コード例 #3
0
ファイル: newsScrape.py プロジェクト: LAHacks21/WordCloud
def googleLinks(topic):
    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('1d')
    googlenews.set_encode('utf-8')
    article =  googlenews.get_news(topic)
    links = googlenews.get_links()[:5]
    actualLinks = list()
    for l in links:
        l = "http://" + l
        print(l)
        actualLinks.append( requests.get(l).url ) 
    return actualLinks
    
コード例 #4
0
def googleNewsApi(request, word):

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('7d')
    googlenews.set_encode('utf-8')
    googlenews.get_news(str(word))
    googlenews.total_count()
    resultsGoogleNews = googlenews.results()
    #print(resultsGoogleNews)
    #print(googlenews.total_count())

    #TWITTER
    consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4'
    consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT'
    access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G'
    access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN'
    # create OAuthHandler object
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    # set access token and secret
    auth.set_access_token(access_token, access_token_secret)
    # create tweepy API object to fetch tweets
    api = tweepy.API(auth)
    date_since = datetime.today().strftime('%Y-%m-%d')
    print(date_since)
    #tweets = api.search(str("bitcoin"), count=1)
    tweets = tweepy.Cursor(api.search,
                           q=str(word),
                           lang="en",
                           since=date_since).items(100)
    """print(tweets.__dict__['page_iterator'].__dict__)
    for tweet in tweets:
        print(tweet)
        print(tweet.id)"""
    #return googlenews
    """for result in resultsGoogleNews:

        title = result['title']
        date = result['date']
        link = result['link']
        source = result['site']

        news = {'title':title, 'date': date, 'link': link, 'site':site}
    """
    return render(request, 'homepage.html', {
        'news': resultsGoogleNews,
        'tweets': tweets
    })
コード例 #5
0
async def create_item(item: Item):
    result = ""
    googlenews = GoogleNews()
    googlenews.set_lang('pt')
    googlenews.search(item.mensagem)
    googlenews.results()
    result = googlenews.get_texts()[0]
    translations  = translator.translate(result, dest='en')
    textTranslator = translations.text
    score = analyser.polarity_scores(textTranslator) # avaliação de polaridade de sentimento da mensagem
    compound = (analyser.polarity_scores(textTranslator)['compound'])  # capitura da média do sentimento da mensagem
    if compound > 0:
      mensagemSentimento = "noticia positiva" 
    elif compound >= 0:
      mensagemSentimento = "noticia neutra" 
    else:
      mensagemSentimento = "noticia negativa"
    return {"mensagem":googlenews.get_texts()[0],"sentimento":mensagemSentimento}
コード例 #6
0
def scrape_the_news():
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent

    topiclist = NLP_news()
    print(topiclist[0])

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_encode('utf-8')
    googlenews.set_period('7d')
    googlenews.get_news(topiclist[0])

    result = googlenews.results()

    googlenews.clear()

    df = pd.DataFrame(result)
    df = df.drop(['date', 'media'], axis=1)
    df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title']
    df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']]

    conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123")
    curr = conn.cursor()

    for i, row in df.iterrows():
        try:
            row.Link = 'https://' + row.Link
            columns = row.keys()
            values = [row[column] for column in columns]

            insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)"
            curr.execute(insert_statement, tuple(values))
        except:
            print('could not add row', i)

    conn.commit()

    curr.close()
    conn.close()
コード例 #7
0
def crawling_news(company_name_list, start_date, end_date):
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_time_range('start_date', 'end_date')
    googlenews.set_encode('utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        googlenews.get_news(company_name_list[i])
        logging.info('%s : %0.2f%s' %
                     (company_name_list[i],
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            all_title.append(googlenews.results()[j].get('title'))
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('sp500news.csv')
    logging.info('saved to csv, done!!')
    return all_title
コード例 #8
0
def googlenews_function(keyword='台積電',
                        language='cn',
                        start_date='2020/12/01',
                        end_date='2020/12/28'):
    '''
    - 日期
    - 關鍵字
    - 語言
    - 爬幾頁

    '''
    googlenews = GoogleNews()
    googlenews.clear()
    googlenews.set_encode('utf-8')
    googlenews.set_lang(language)

    all_date_start = start_date.split('/')
    start_year = all_date_start[0]
    start_month = all_date_start[1]
    start_day = all_date_start[2]
    all_date_start = '{}/{}/{}'.format(start_month, start_day, start_year)

    all_date_end = end_date.split('/')
    end_year = all_date_end[0]
    end_month = all_date_end[1]
    end_day = all_date_end[2]
    all_date_end = '{}/{}/{}'.format(end_month, end_day, end_year)

    googlenews.set_time_range(start=all_date_start, end=all_date_end)

    googlenews.search(keyword)
    data = googlenews.result()
    print("資料總筆數:", len(data))
    news = pd.DataFrame(data)
    # news.to_csv("GoogleNews_" + keyword +"_日期" + start_date.replace('/', '-') + '到' +end_date.replace('/', '-')+ ".csv", index= False)
    return news
コード例 #9
0
from datetime import date
from GoogleNews import GoogleNews
news = GoogleNews()
news.set_lang('en')
date_today = date.today()
news.set_time_range('01/11/2020', date_today)
news.set_encode('utf-8')
topic = input("Topic : ")
news.search(topic)
news.get_page(2)
#headlines with links WORLD NEWS
for i in range(6):
    print(news.results()[i]["title"])
    print(news.results()[i]["link"])
コード例 #10
0
def googlenews_recovery(app_config):
    googlenews = GoogleNews()
    googlenews.set_lang(app_config["lang"])
    googlenews.set_period(app_config["period"])
    googlenews.get_news(app_config["keywords"])
    return googlenews
コード例 #11
0
ファイル: alexa.py プロジェクト: vimal0312/alexa
def run_alexa():
    command = take_command()
    print(command)
    if 'music' in command:
        song = command.replace('play song', '')
        talk('I am playing your favourite ' + song)
        # print('playing')
        print(song)
        # playing the first video that appears in yt search
        pywhatkit.playonyt(song)

    elif 'time' in command:
        now = datetime.now()
        time = now.strftime("%H:%M:%S")
        print("time:", time)
        talk("Current time is " + time)

    elif ('month' or 'year') in command:
        now = datetime.now()
        year = now.strftime("%Y")
        print("year:", year)
        talk("Current year is  " + year)
        month = now.strftime("%m")
        print("month:", month)
        talk("Current month is  " + month)

    elif 'date' in command:
        now = datetime.now()
        date_time = now.strftime("%m/%d/%Y, %H:%M:%S")
        print("date and time:", date_time)
        talk("Current date and time is " + date_time)

    # opens web.whatsapp at specified time i.e before 10 minutes and send the msg
    elif 'whatsapp' in command:
        talk("To which number do you have to whatsapp")
        talk("Please dont forget to enter 10 digits with country code")
        num = input()
        talk("Enter the message you have to send")
        msg = input()
        talk("Enter the time to send the message")
        time = int(input())
        pywhatkit.sendwhatmsg(num, msg, time, 00)
        pywhatkit.showHistory()
        pywhatkit.shutdown(3000000000)
        # pywhatkit.sendwhatmsg("+919876543210", "This is a message", 15, 00)

    # Convert text to handwritten format
    elif 'convert' in command:
        text = command.replace('convert', '')
        pywhatkit.text_to_handwriting(text, rgb=[0, 0, 0])

    # Perform google search
    elif 'search' in command:
        key = command.replace('search', '')
        pywhatkit.search("key")

    elif 'wikipedia' in command:
        person = command.replace('wikipedia', '')
        talk("How many pages do you want to read")
        num_pages = int(input())
        # talk("In which language do you want to read")
        # l = input()
        # wikipedia.set_lang(l)
        info = wikipedia.summary(person, num_pages)
        print(info)
        talk(info)

    elif 'can you work for me' in command:
        talk("sorry, I have headache. Please do your work")

    elif 'are you single' in command:
        talk("I am in relationshhip with wifi")

    elif 'joke' in command:
        talk(pyjokes.get_joke())
        talk("sorry for the lamest joke")

    elif 'open google browser' in command:
        try:
            urL = 'https://www.google.com'
            chrome_path = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
            webbrowser.register('chrome', None,
                                webbrowser.BackgroundBrowser(chrome_path))
            webbrowser.get('chrome').open_new_tab(urL)
            talk("Successfully opened chrome its upto you to search")
        except:
            webbrowser.Error

    elif 'google search' in command:
        word_to_search = command.replace('google search', '')
        response = GoogleSearch().search(word_to_search)
        print(response)
        for result in response.results:
            print("Title: " + result.title)
            talk("You can look for the following titles  " + result.title)

    elif 'weather' in command:
        # base URL
        BASE_URL = "https://api.openweathermap.org/data/2.5/weather?"
        talk("Which city weather are you looking for")
        try:
            with sr.Microphone() as source:
                print('listening weather...')
                city_voice = listener.listen(source)
                city = listener.recognize_google(city_voice)
                # city = '\"'+city.lower()+'\"'

                print(city)
                # city="bangalore"
                # API key API_KEY = "Your API Key"
                API_KEY = "b5a362ef1dc8e16c673dd5049aa98d8f"
                # upadting the URL
                URL = BASE_URL + "q=" + city + "&appid=" + API_KEY
                # HTTP request
                response = requests.get(URL)
                # checking the status code of the request
                if response.status_code == 200:
                    # getting data in the json format
                    data = response.json()
                    # getting the main dict block
                    main = data['main']
                    # getting temperature
                    temperature = main['temp']
                    # getting the humidity
                    humidity = main['humidity']
                    # getting the pressure
                    pressure = main['pressure']
                    # weather report
                    report = data['weather']
                    print(f"{CITY:-^30}")
                    print(f"Temperature: {temperature}")
                    print(f"Humidity: {humidity}")
                    print(f"Pressure: {pressure}")
                    print(f"Weather Report: {report[0]['description']}")
                    talk("Temperature in " + city + " is " + temperature +
                         " humidity is " + humidity + " pressure is " +
                         pressure + " and your final weather report" + report)
                else:
                    # showing the error message
                    print("Error in the HTTP request")
                    talk("Error in the HTTP request")
        except:
            talk("Hmmmmm, it looks like there is something wrong")

    elif 'news' in command:
        try:
            googlenews = GoogleNews()
            googlenews.set_lang('en')
            # googlenews.set_period('7d')
            # googlenews.set_time_range('02/01/2020', '02/28/2020')
            googlenews.set_encode('utf-8')

            talk("What news are you looking for")
            try:
                with sr.Microphone() as source:
                    print('listening news ...')
                    news_voice = listener.listen(source)
                    news_input = listener.recognize_google(news_voice)
                    news_input = news_input.lower()
                    print(news_input)
                    googlenews.get_news(news_input)
                    googlenews.search(news_input)
                    googlenews.get_page(2)
                    result = googlenews.page_at(2)
                    news = googlenews.get_texts()
                    print(news)
                    talk(news)
            except:
                print("Error")
                talk("Error in reading input")

        except:
            print("No news")
            talk(" I couldn't find any news on this day")

    elif 'play book' or 'read pdf' in command:
        talk("Which pdf do you want me to read")
        book_input = input()
        print(book_input)
        book = open(book_input, 'rb')
        # create pdfReader object
        pdfReader = PyPDF2.PdfFileReader(book)
        # count the total pages
        total_pages = pdfReader.numPages
        total_pages = str(total_pages)
        print("Total number of pages " + total_pages)
        talk("Total number of pages " + total_pages)
        # initialise speaker object
        # speaker = pyttsx3.init()
        # talk("Enter your starting page")
        # start_page = int(input())
        talk(
            " here are the options for you, you can press 1 to  Play a single page     2 to   Play between start and end points  and  3 to  Play the entire book "
        )
        talk("Enter your choice")
        choice = int(input())
        if (choice == 1):
            talk("Enter index number")
            page = int(input())
            page = pdfReader.getPage(page)
            text = page.extractText()
            talk(text)
            # speaker.say(text)
            # speaker.runAndWait()
        elif (choice == 2):
            talk("Enter starting page number")
            start_page = int(input())
            talk("Enter ending page number")
            end_page = int(input())
            for page in range(start_page + 1, end_page):
                page = pdfReader.getPage(start_page + 1)
                text = page.extractText()
                talk(text)
                # speaker.say(text)
                # speaker.runAndWait()
        elif (choice == 3):
            for page in range(total_pages + 1):
                page = pdfReader.getPage(page)
                text = page.extractText()
                talk(text)
                # speaker.say(text)
                # speaker.runAndWait()
        else:
            talk("Haha!! Please enter valid choice")
    else:
        talk(
            "Hiii Rashika, I am so bored can you please give me some proper commands"
        )
コード例 #12
0
ファイル: main.py プロジェクト: CoolCoderSJ/SearchDeck
    def GET(self):
        if session.get("user"):
            logged_in = True
        else:
            logged_in = False
        i = web.input(q="", sort="table", typ="text")
        if i.q == "":
            if logged_in:
                stin = db[session.get("user")]
            else:
                stin = {
                    "engines": {
                        "Google": "checked",
                        "Bing": "checked",
                        "DuckDuckGo": "checked",
                        "Yahoo": "checked"
                    },
                    "default_typ": {
                        "text": "checked",
                        "image": "",
                        "video": "",
                        "news": "",
                        "maps": "",
                        "shopping": ""
                    }
                }
            return render.home(logged_in, stin)

        else:
            r = requests.get("http://httpbin.org/ip")
            global cache
            #clear cache if cache is too big
            if len(cache) > 25:
                cache = {}
            engines = []
            sort = i.sort
            typ = i.typ
            if "Google" in i:
                engines.append("Google")
            if "Bing" in i:
                engines.append("Bing")
            if "DuckDuckGo" in i:
                engines.append("DuckDuckGo")
            if "Yahoo" in i:
                engines.append("Yahoo")

            if "Google" not in i and "Bing" not in i and "DuckDuckGo" not in i and "Yahoo" not in i:
                if logged_in:
                    engines = db[session.get("user")]['engines']
                else:
                    engines = ['Google', 'Bing', 'DuckDuckGo', 'Yahoo']

            dictionary = []
            info = []
            ans = []

            if i.q != "" and typ == "text":
                start_time = time.time()
                goog = []
                b = []
                duckduckgo = []
                yhoo = []
                use_cache = False
                try:
                    #if within 2 days of last cache, use cache
                    #cache per user
                    if cache[session.get(
                            "user")][i.q]["last_updated"] + 172800 > time.time(
                            ) and random.randint(1, 10) == 5:
                        use_cache = True
                except:
                    pass
                if use_cache:
                    goog = cache[session.get("user")][i.q]["google"]
                    b = cache[session.get("user")][i.q]["bing"]
                    duckduckgo = cache[session.get("user")][i.q]["duckduckgo"]
                    yhoo = cache[session.get("user")][i.q]["yahoo"]
                else:
                    if "Google" in engines:
                        queue1 = Queue()
                        p = Process(target=google, args=(i.q, queue1))
                        p.start()
                    if "Bing" in engines:
                        queue2 = Queue()
                        p2 = Process(target=bing, args=(i.q, queue2))
                        p2.start()
                    if "DuckDuckGo" in engines:
                        queue3 = Queue()
                        p3 = Process(target=ddg, args=(i.q, queue3))
                        p3.start()
                    if "Yahoo" in engines:
                        queue4 = Queue()
                        p4 = Process(target=yahoo, args=(i.q, queue4))
                        p4.start()
                    if "Google" in engines:
                        goog = queue1.get()
                        p.join()
                    if "Bing" in engines:
                        b = queue2.get()
                        p2.join()
                    if "DuckDuckGo" in engines:
                        duckduckgo = queue3.get()
                        p3.join()
                    if "Yahoo" in engines:
                        yhoo = queue4.get()
                        p4.join()
                    dictionary = word_dictionary(i.q)
                    info = infobox(i.q)
                    ans = ansbox(i.q)
                    if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in:
                        try:
                            cache[session.get("user")][i.q] = {
                                "google": goog,
                                "bing": b,
                                "yahoo": yhoo,
                                "duckduckgo": duckduckgo,
                                "last_updated": time.time()
                            }
                        except:
                            pass
                data = []
                e = []
                f = []
                for g in goog:
                    g['engine'] = "Google"
                    e.append(g)
                    f.append(g['title'])

                for bingresult in b:
                    bingresult['engine'] = "Bing"
                    e.append(bingresult)
                    f.append(bingresult['title'])

                for d in duckduckgo:
                    d['engine'] = "DuckDuckGo"
                    e.append(d)
                    f.append(d['title'])

                for y in yhoo:
                    y['engine'] = 'Yahoo'
                    e.append(y)
                    f.append(y['title'])

                def getnum(s0, s1):
                    s0 = s0.lower()
                    s1 = s1.lower()
                    s0List = s0.split(" ")
                    s1List = s1.split(" ")
                    num = len(list(set(s0List) & set(s1List)))
                    return round(num / len(s0List) * 100)

                g = set(f)
                counter = 0
                so = []
                for item in e:
                    if "stackoverflow.com" in item['link']:
                        thing = ""
                        for x in so:
                            if getnum(x[0]['title'], item['title']) >= 90:
                                thing = x
                                break
                        if thing:
                            so.remove(thing)
                            engines = x[1]
                            engines.append(item['engine'])
                            x = [x[0], engines]
                            so.append(x)
                        else:
                            engines = [item['engine']]
                            x = [item, engines]
                            so.append(x)
                    else:
                        thing = ""
                        for x in data:
                            if getnum(x[0]['title'], item['title']) >= 90:
                                thing = x
                                break
                        if thing:
                            data.remove(thing)
                            engines = x[1]
                            engines.append(item['engine'])
                            x = [x[0], engines, x[2]]
                            data.append(x)
                        else:
                            engines = [item['engine']]
                            x = [item, engines, counter]
                            data.append(x)
                        counter += 1

                done = 0
                data2 = []
                for item in data:
                    if done == len(data):
                        break
                    if data.index(item) != item[2]:
                        data.insert(item[2], data.pop(data.index(item)))
                        done += 1
                data2, data = data, data2

                for item in so:
                    data.append(item)

                for item in data2:
                    data.append(item)

                print("--- %s seconds ---" % (time.time() - start_time))
                return render.text(data, i.q, dictionary, info, ans, logged_in)
            elif i.q != "" and typ == "image":
                query = i.q.replace(" ", "+")
                goog = requests.get(
                    f"https://google.com/search?q={query}&tbm=isch",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(goog, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                goog = imgs
                b = requests.get(
                    f"https://bing.com/images/search?q={query}&form=HDRSC2",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(b, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    if link.startswith("/rp"):
                        link = f"https://bing.com/images/search?q={query}&form=HDRSC2" + link
                    if link != "<img alt=":
                        imgs.append(link)
                b = imgs
                duckduckgo = requests.get(
                    f"https://duckduckgo.com/?q={query}&ia=images",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    })
                soup = BeautifulSoup(duckduckgo.content, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                duckduckgo = imgs
                yhoo = requests.get(
                    f"https://images.search.yahoo.com/search/images;_ylt=A0geJaQetm1gPx0AGURXNyoA;_ylu=Y29sbwNiZjEEcG9zAzEEdnRpZAMEc2VjA3BpdnM-?p={query}&fr2=piv-web&fr=opensearch",
                    headers={
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
                    }).content
                soup = BeautifulSoup(yhoo, "html.parser")
                images = soup.findAll('img')
                imgs = []
                for image in images:
                    image = str(image)
                    link = image.split('src="')[-1].split('"')[0]
                    imgs.append(link)
                yhoo = imgs
            elif i.q != "" and typ == "video":
                query = i.q.replace(" ", "+")
                goog = YoutubeSearch(query, max_results=100).to_dict()
                b, duckduckgo, yhoo = [], [], []
            elif i.q != "" and typ == "news":
                query = i.q.replace(" ", "+")
                news = GoogleNews()

                news.set_lang('en')
                news.set_encode('utf-8')

                news.search(query)

                goog = news.results()
                b, duckduckgo, yhoo = [], [], []
            elif i.q != "" and typ == "maps":
                goog, b, duckduckgo, yhoo = [], [], [], []
            elif i.q != "" and typ == "shopping":
                goog = []
                b = []
                duckduckgo = []
                yhoo = []
                use_cache = False
                try:
                    #if within 2 days of last cache, use cache
                    #cache per user
                    if cache[session.get(
                            "user")][i.q]["last_updated"] + 172800 > time.time(
                            ) and random.randint(1, 10) == 5:
                        use_cache = True
                except:
                    pass
                print(use_cache)
                if use_cache:
                    goog = cache[session.get("user")][i.q]["google"]
                    b = cache[session.get("user")][i.q]["bing"]
                    duckduckgo = cache[session.get("user")][i.q]["duckduckgo"]
                    yhoo = cache[session.get("user")][i.q]["yahoo"]
                else:
                    if "Google" in engines:
                        queue1 = Queue()
                        p = Process(target=gshop, args=(i.q, queue1))
                        p.start()
                    if "Bing" in engines:
                        queue2 = Queue()
                        p2 = Process(target=bing_shopping, args=(i.q, queue2))
                        p2.start()
                    if "Yahoo" in engines:
                        queue3 = Queue()
                        p3 = Process(target=yahoo_shopping, args=(i.q, queue3))
                        p3.start()
                    if "Google" in engines:
                        goog = queue1.get()
                        p.join()
                    if "Bing" in engines:
                        b = queue2.get()
                        p2.join()
                    if "Yahoo" in engines:
                        yhoo = queue3.get()
                        p3.join()
                    if "Yahoo" in engines and "Google" in engines and "DuckDuckGo" in engines and "Bing" in engines and logged_in:
                        try:
                            cache[session.get("user")][i.q] = {
                                "google": goog,
                                "bing": b,
                                "yahoo": yhoo,
                                "duckduckgo": duckduckgo,
                                "last_updated": time.time()
                            }
                        except:
                            pass
            return render.search(goog, b, duckduckgo, yhoo, i.q, sort, typ,
                                 engines, logged_in, dictionary, info, ans)
コード例 #13
0
def enableGoogleNews():
    googleNews = GoogleNews()
    googleNews.set_lang('en') # choose language: English
    # No date range or period set because many movies and actors have news that is not actually that recent
    return googleNews
def main():

	all_df = []

	sid_obj = SentimentIntensityAnalyzer() 	

	googlenews = GoogleNews()
	googlenews.set_lang('en')
	googlenews.set_encode('utf-16')

	"""
	Primary Phrases refer to the keywords we are interested in studying
	Secondary Phrases refer to the target countries
	"""
	company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac']

	# testing_countries = ['Egypt', 'Kenya', 'Nigeria']
	testing_countries = []

	"""
	Months refer to the date range 
	"""
	# months = ['08/01/2020', '09/01/2020', '10/01/2020']
	# months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']
	months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']

	for first in company_name:

		fin = []
		seen = []
		
		with open('sample.csv', mode='r') as csv_file:
			csv_reader = csv.DictReader(csv_file)
			
			summary_data = []

			for row in csv_reader:
				# print(row)
				second = row['\ufeffCountry']
				if (second not in testing_countries and len(testing_countries)!=0): 
					continue

				full_phrase = first+" "+second

				print(full_phrase)

				counter = 0
				sum_sent = 0
				
				pos_count = 0
				# neu_count = 0
				neg_count = 0

				neg_article = {'title': 'N/A', '% Negative': 0}

				for i in range(0, len(months)-1):
					googlenews.set_time_range(months[i],months[i+1])
					googlenews.get_news(full_phrase)
					res = googlenews.results()

					#It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() 
					#(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works)

					for result in res:
						if result['title'] not in seen:
							# print(result)
							result['start date'] = months[i]
							result['end date'] = months[i+1]
							result['company'] = first
							result['country'] = second
							result['latitude'] = row['Latitude']
							result['longitude'] = row['Longitude']

							sentiment_dict = sid_obj.polarity_scores(result['title'])
							result['% Negative'] = sentiment_dict['neg']*100
							result['% Neutral'] = sentiment_dict['neu']*100
							result['% Positive'] = sentiment_dict['pos']*100
							result['Magnitude'] = sentiment_dict['compound']*50 + 50

							counter += 1
							sum_sent += result['Magnitude']
							
							# result.pop('date')
							# result.pop('datetime')
							# result.pop('img')
							# result.pop('media')

							# if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1
							# elif result['% Neutral'] > result['% Positive']: neu_count += 1
							# else: pos_count += 1
							if result['% Positive'] > result['% Negative']: pos_count += 1
							else: neg_count += 1

							if result['% Negative'] >= neg_article['% Negative']: neg_article = result

							fin.append(result)
							seen.append(result['title'])

				posPercent = 50
				if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count)

				magni = 0
				if counter>0: magni = sum_sent/counter

				country_comp_score = {'country': second, 'latitude': row['Latitude'], 
				'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 
				'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']}

				summary_data.append(country_comp_score)
				all_df.append((country_comp_score, first))

			df = pd.DataFrame(fin)
			df.drop(columns=['date', 'datetime', 'img', 'media'])
			df.to_csv("./Output/{}_output.csv".format(first),index=False)

			summary_df = pd.DataFrame(summary_data)
			summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False)
			# all_df.append(summary_df)
	
	# meta_data = []
	# # with open('sample.csv', mode='r') as csv_file:
	# dic_len = sum(1 for line in open('sample.csv'))

	# with open('sample.csv', mode='r') as csv_file:
	# 	csv_reader = csv.DictReader(csv_file)
	# 	for j in range(0, dic_len):
	# 		most_pos = 0
	# 		for i in range(0, len(company_name)):
	# 			if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: 
	# 				most_pos = i
	# 		meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]})

	fields = ['Country', 'Company', 'Count']  

	meta_data = []
	seen = []
	for result in all_df:
		if result[0]['country'] not in seen:
			seen.append(result[0]['country'])
			meta_data.append([result[0]['country'], result[1], result[0]['positive']])
		else:
			for candidate in meta_data:
				if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']:
					candidate[1] = result[1]
					candidate[2] = result[0]['positive']

	with open('./Output/meta_data.csv', 'w') as f:
		write = csv.writer(f)      
		write.writerow(fields)
		write.writerows(meta_data)
コード例 #15
0
"""
Before you start, install the library using: 

pip install GoogleNews

"""

from GoogleNews import GoogleNews
import pandas as pd

googlenews = GoogleNews()

googlenews.set_lang('en')
googlenews.set_encode('utf-8')

"""
Primary Phrases refer to the keywords we are interested in studying
Secondary Phrases refer to the target countries
"""
primary_phrases = ['Pfizer Vaccine', 'AstraZeneca Vaccine', 'Sputnik V Vaccine', 'Sinovac Vaccine']
secondary_phrases = ['Namibia', 'France', 'South Africa']


# months = ['01/01/2019', '02/01/2019', '03/01/2019', '04/01/2019', '05/01/2019', '06/01/2019', '07/01/2019', '08/01/2019', '09/01/2019', '10/01/2019', '11/01/2019', '12/01/2019', '01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021']
"""
Months refer to the date range 
"""
months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021']

fin = []
コード例 #16
0
'''
Created on 17 Jan 2021

@author: Daten Master
'''
from GoogleNews import GoogleNews

googlenews = GoogleNews()

##############################################################
################ Definition der Suche ########################
##############################################################

googlenews.set_encode('utf-8')
# Sprache definieren (z.B. 'de'=deutsch; 'en'=englisch; ...)
googlenews.set_lang('de')
# nach Periode Filtern (z.B. News nicht älter als 1 Tag)
googlenews.set_period('1d')
#googlenews.set_time_range('15/01/2021','17/01/2021')

# Suche ausfuehren
googlenews.get_news('Wetter Hamburg')

##############################################################
######################## Ausgabe #############################
##############################################################

# Alle Infos (Titel, Beschreibung, Zeit, Datum, Link, Quelle)
#print(googlenews.results())

# News-Kopfzeile iterative durchlaufen