コード例 #1
0
ファイル: newsScrape.py プロジェクト: LAHacks21/WordCloud
def googleLinks(topic):
    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('1d')
    googlenews.set_encode('utf-8')
    article =  googlenews.get_news(topic)
    links = googlenews.get_links()[:5]
    actualLinks = list()
    for l in links:
        l = "http://" + l
        print(l)
        actualLinks.append( requests.get(l).url ) 
    return actualLinks
    
コード例 #2
0
def googleNewsApi(request, word):

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_period('7d')
    googlenews.set_encode('utf-8')
    googlenews.get_news(str(word))
    googlenews.total_count()
    resultsGoogleNews = googlenews.results()
    #print(resultsGoogleNews)
    #print(googlenews.total_count())

    #TWITTER
    consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4'
    consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT'
    access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G'
    access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN'
    # create OAuthHandler object
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    # set access token and secret
    auth.set_access_token(access_token, access_token_secret)
    # create tweepy API object to fetch tweets
    api = tweepy.API(auth)
    date_since = datetime.today().strftime('%Y-%m-%d')
    print(date_since)
    #tweets = api.search(str("bitcoin"), count=1)
    tweets = tweepy.Cursor(api.search,
                           q=str(word),
                           lang="en",
                           since=date_since).items(100)
    """print(tweets.__dict__['page_iterator'].__dict__)
    for tweet in tweets:
        print(tweet)
        print(tweet.id)"""
    #return googlenews
    """for result in resultsGoogleNews:

        title = result['title']
        date = result['date']
        link = result['link']
        source = result['site']

        news = {'title':title, 'date': date, 'link': link, 'site':site}
    """
    return render(request, 'homepage.html', {
        'news': resultsGoogleNews,
        'tweets': tweets
    })
コード例 #3
0
def GNews():
    gn = GoogleNews()
    gn.set_period('7d')
    list = [
        "INDIA", "USA", "UK", "AUSTRALIA", "FRANC", "UGANDA", "PAKISTAN",
        "MALDIVES", "CELEBRITY"
    ]

    gn.search(random.choice(list))
    rs = gn.results()

    for i in rs:
        data = i['title']
        data += i['desc']
        data += i['link']
    return data
コード例 #4
0
def scrape_the_news():
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    config = Config()
    config.browser_user_agent = user_agent

    topiclist = NLP_news()
    print(topiclist[0])

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_encode('utf-8')
    googlenews.set_period('7d')
    googlenews.get_news(topiclist[0])

    result = googlenews.results()

    googlenews.clear()

    df = pd.DataFrame(result)
    df = df.drop(['date', 'media'], axis=1)
    df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title']
    df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']]

    conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123")
    curr = conn.cursor()

    for i, row in df.iterrows():
        try:
            row.Link = 'https://' + row.Link
            columns = row.keys()
            values = [row[column] for column in columns]

            insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)"
            curr.execute(insert_statement, tuple(values))
        except:
            print('could not add row', i)

    conn.commit()

    curr.close()
    conn.close()
コード例 #5
0
def googlenews_recovery(app_config):
    googlenews = GoogleNews()
    googlenews.set_lang(app_config["lang"])
    googlenews.set_period(app_config["period"])
    googlenews.get_news(app_config["keywords"])
    return googlenews
コード例 #6
0
@author: Daten Master
'''
from GoogleNews import GoogleNews

googlenews = GoogleNews()

##############################################################
################ Definition der Suche ########################
##############################################################

googlenews.set_encode('utf-8')
# Sprache definieren (z.B. 'de'=deutsch; 'en'=englisch; ...)
googlenews.set_lang('de')
# nach Periode Filtern (z.B. News nicht älter als 1 Tag)
googlenews.set_period('1d')
#googlenews.set_time_range('15/01/2021','17/01/2021')

# Suche ausfuehren
googlenews.get_news('Wetter Hamburg')

##############################################################
######################## Ausgabe #############################
##############################################################

# Alle Infos (Titel, Beschreibung, Zeit, Datum, Link, Quelle)
#print(googlenews.results())

# News-Kopfzeile iterative durchlaufen
#for i in googlenews.results():
#    print(i['title'])