Python GoogleNews.set_time_rangeの例

プログラミング言語: Python

名前空間/パッケージ名: GoogleNews

クラス/型: GoogleNews

メソッド/関数: set_time_range

hotexamples.comのコード掲載数: 9

Python GoogleNews.set_time_range - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのGoogleNews.GoogleNews.set_time_rangeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

GoogleNews(30)

search(30)

result(30)

getpage(30)

get_news(24)

results(24)

clear(24)

set_lang(16)

gettext(13)

set_encode(12)

setlang(11)

setTimeRange(10)

set_time_range(9)

get_page(8)

get_texts(8)

get_links(8)

setperiod(8)

get__links(7)

set_period(6)

setencode(5)

page_at(3)

total_count(3)

getlinks(1)

get_desc(1)

getForYou(1)

extend(1)

コード例 #1

ファイルを表示

def get_company_news_link(company='NaN', news_num=5, time_range='today'):

    if company == 'NaN':
        return 'please input company name'

    news_link = []
    googlenews = GoogleNews()
    googlenews.clear()

    if time_range != 'today':
        start_date = "{1}/{2}/{0}".format(time_range[0:4], time_range[5:7],
                                          time_range[8:10])
        end_date = "{1}/{2}/{0}".format(time_range[11:15], time_range[16:18],
                                        time_range[19:21])
        googlenews.set_time_range(start_date, end_date)

    googlenews.search(company)
    result = googlenews.result()

    try:
        for num in range(news_num):
            news_link.append(result[num]['link'])
    except IndexError:
        if len(news_link) == 0:
            return '此時段無' + company + '新聞 OR 網路不穩'
        return news_link
    else:
        return news_link

コード例 #2

ファイルを表示

ファイル: news.py プロジェクト: eriksonlb/TARS

def get_news(assunto):
    news = GoogleNews(period='d')
    news.setlang('pt')
    news.set_encode('utf-8')
    news.set_time_range('12/02/2021', '13/02/2021')
    news.get_news(assunto)
    results = news.get_texts()
    result = results[3:8] if len(results) > 0 else "Sem notícias recentes"
    return result

コード例 #3

ファイルを表示

def crawling_news(company_name_list, start_date, end_date):
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    googlenews = GoogleNews()
    googlenews.set_lang('en')
    googlenews.set_time_range('start_date', 'end_date')
    googlenews.set_encode('utf-8')
    #news.google.com search sample
    all_title = []
    logging.info('loop start')
    for i in range(len(company_name_list)):
        googlenews.get_news(company_name_list[i])
        logging.info('%s : %0.2f%s' %
                     (company_name_list[i],
                      ((i + 1) / len(company_name_list)) * 100, '%'))
        for j in range(len(googlenews.results())):
            all_title.append(googlenews.results()[j].get('title'))
    all_title = pd.DataFrame(all_title)
    all_title.to_csv('sp500news.csv')
    logging.info('saved to csv, done!!')
    return all_title

コード例 #4

ファイルを表示

def googlenews_function(keyword='台積電',
                        language='cn',
                        start_date='2020/12/01',
                        end_date='2020/12/28'):
    '''
    - 日期
    - 關鍵字
    - 語言
    - 爬幾頁

    '''
    googlenews = GoogleNews()
    googlenews.clear()
    googlenews.set_encode('utf-8')
    googlenews.set_lang(language)

    all_date_start = start_date.split('/')
    start_year = all_date_start[0]
    start_month = all_date_start[1]
    start_day = all_date_start[2]
    all_date_start = '{}/{}/{}'.format(start_month, start_day, start_year)

    all_date_end = end_date.split('/')
    end_year = all_date_end[0]
    end_month = all_date_end[1]
    end_day = all_date_end[2]
    all_date_end = '{}/{}/{}'.format(end_month, end_day, end_year)

    googlenews.set_time_range(start=all_date_start, end=all_date_end)

    googlenews.search(keyword)
    data = googlenews.result()
    print("資料總筆數:", len(data))
    news = pd.DataFrame(data)
    # news.to_csv("GoogleNews_" + keyword +"_日期" + start_date.replace('/', '-') + '到' +end_date.replace('/', '-')+ ".csv", index= False)
    return news

コード例 #5

ファイルを表示

ファイル: news.py プロジェクト: pragyamittal0/APIHacks_CodingClan

from datetime import date
from GoogleNews import GoogleNews
news = GoogleNews()
news.set_lang('en')
date_today = date.today()
news.set_time_range('01/11/2020', date_today)
news.set_encode('utf-8')
topic = input("Topic : ")
news.search(topic)
news.get_page(2)
#headlines with links WORLD NEWS
for i in range(6):
    print(news.results()[i]["title"])
    print(news.results()[i]["link"])

コード例 #6

ファイルを表示

ファイル: gnews_sentiment.py プロジェクト: john-winnicki/Africa-Vaccine-Disinfo-Comparison

def main():

	all_df = []

	sid_obj = SentimentIntensityAnalyzer() 	

	googlenews = GoogleNews()
	googlenews.set_lang('en')
	googlenews.set_encode('utf-16')

	"""
	Primary Phrases refer to the keywords we are interested in studying
	Secondary Phrases refer to the target countries
	"""
	company_name = ['Pfizer', 'AstraZeneca', 'Sputnik', 'Sinovac']

	# testing_countries = ['Egypt', 'Kenya', 'Nigeria']
	testing_countries = []

	"""
	Months refer to the date range 
	"""
	# months = ['08/01/2020', '09/01/2020', '10/01/2020']
	# months = ['01/01/2020', '02/01/2020', '03/01/2020', '04/01/2020', '05/01/2020', '06/01/2020', '07/01/2020', '08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']
	months = ['09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021', '02/01/2021']

	for first in company_name:

		fin = []
		seen = []
		
		with open('sample.csv', mode='r') as csv_file:
			csv_reader = csv.DictReader(csv_file)
			
			summary_data = []

			for row in csv_reader:
				# print(row)
				second = row['\ufeffCountry']
				if (second not in testing_countries and len(testing_countries)!=0): 
					continue

				full_phrase = first+" "+second

				print(full_phrase)

				counter = 0
				sum_sent = 0
				
				pos_count = 0
				# neu_count = 0
				neg_count = 0

				neg_article = {'title': 'N/A', '% Negative': 0}

				for i in range(0, len(months)-1):
					googlenews.set_time_range(months[i],months[i+1])
					googlenews.get_news(full_phrase)
					res = googlenews.results()

					#It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() 
					#(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works)

					for result in res:
						if result['title'] not in seen:
							# print(result)
							result['start date'] = months[i]
							result['end date'] = months[i+1]
							result['company'] = first
							result['country'] = second
							result['latitude'] = row['Latitude']
							result['longitude'] = row['Longitude']

							sentiment_dict = sid_obj.polarity_scores(result['title'])
							result['% Negative'] = sentiment_dict['neg']*100
							result['% Neutral'] = sentiment_dict['neu']*100
							result['% Positive'] = sentiment_dict['pos']*100
							result['Magnitude'] = sentiment_dict['compound']*50 + 50

							counter += 1
							sum_sent += result['Magnitude']
							
							# result.pop('date')
							# result.pop('datetime')
							# result.pop('img')
							# result.pop('media')

							# if result['% Negative'] > result['% Neutral'] and result['% Negative']>result['% Positive']: neg_count += 1
							# elif result['% Neutral'] > result['% Positive']: neu_count += 1
							# else: pos_count += 1
							if result['% Positive'] > result['% Negative']: pos_count += 1
							else: neg_count += 1

							if result['% Negative'] >= neg_article['% Negative']: neg_article = result

							fin.append(result)
							seen.append(result['title'])

				posPercent = 50
				if pos_count+neg_count>0: posPercent = pos_count/(pos_count + neg_count)

				magni = 0
				if counter>0: magni = sum_sent/counter

				country_comp_score = {'country': second, 'latitude': row['Latitude'], 
				'longitude': row['Longitude'], 'magnitude': magni, 'positive': pos_count, 
				'negative': neg_count, 'pos/(pos+neg)': posPercent, 'Most negative title': neg_article['title']}

				summary_data.append(country_comp_score)
				all_df.append((country_comp_score, first))

			df = pd.DataFrame(fin)
			df.drop(columns=['date', 'datetime', 'img', 'media'])
			df.to_csv("./Output/{}_output.csv".format(first),index=False)

			summary_df = pd.DataFrame(summary_data)
			summary_df.to_csv("./Output/{}_summary_output.csv".format(first),index=False)
			# all_df.append(summary_df)
	
	# meta_data = []
	# # with open('sample.csv', mode='r') as csv_file:
	# dic_len = sum(1 for line in open('sample.csv'))

	# with open('sample.csv', mode='r') as csv_file:
	# 	csv_reader = csv.DictReader(csv_file)
	# 	for j in range(0, dic_len):
	# 		most_pos = 0
	# 		for i in range(0, len(company_name)):
	# 			if all_df[most_pos][j]['positive']<all_df[i][j]['positive']: 
	# 				most_pos = i
	# 		meta_data.append({all_df[0][j]['\ufeffCountry']: company_name[most_positive]})

	fields = ['Country', 'Company', 'Count']  

	meta_data = []
	seen = []
	for result in all_df:
		if result[0]['country'] not in seen:
			seen.append(result[0]['country'])
			meta_data.append([result[0]['country'], result[1], result[0]['positive']])
		else:
			for candidate in meta_data:
				if candidate[0]==result[0]['country'] and candidate[2]<result[0]['positive']:
					candidate[1] = result[1]
					candidate[2] = result[0]['positive']

	with open('./Output/meta_data.csv', 'w') as f:
		write = csv.writer(f)      
		write.writerow(fields)
		write.writerows(meta_data)

コード例 #7

ファイルを表示

ファイル: googlecrawler.py プロジェクト: mirco-gnuva/CryptoSentiment

def wait(secs: int):
    for _ in tqdm(range(secs), desc='Waiting'):
        sleep(1)


last_crawled_day = read_backup(BACKUP_FILE)

next_day = last_crawled_day - timedelta(
    days=1) if last_crawled_day else date.today()

try:
    while True:
        client = GoogleNews(lang='en', encode='utf-8')
        date_str = next_day.strftime(DATE_FORMAT)
        client.set_time_range(date_str, date_str)
        client.search('bitcoin btc')
        for i in tqdm(range(1, PAGES + 1), desc=f"{date_str}'s pages"):
            client.getpage(i)
            results = client.result()
            wait(randint(1, 30))
        print(f'{len(results)} results from {PAGES} pages.')
        print('Saving results')
        parsed_results = []
        for result in results:
            try:
                article = Article(result['link'], config=config)
                article.download()
                article.parse()
            except newspaper.article.ArticleException:
                continue

コード例 #8

ファイルを表示

ファイル: gnews_test.py プロジェクト: ETedward/Vaccine-Disinfo-Hack

Months refer to the date range 
"""
months = ['08/01/2020', '09/01/2020', '10/01/2020', '11/01/2020', '12/01/2020', '01/01/2021']

fin = []

seen = []

for first in primary_phrases:
	for second in secondary_phrases:
		full_phrase = first+" "+second

		print(full_phrase)

		for i in range(0, len(months)-1):
			googlenews.set_time_range(months[i],months[i+1])
			googlenews.get_news(full_phrase)
			res = googlenews.results(sort=True)

			#It would be very easy to get more than the first page. Simply use: googlenews.get_page(2) or result = googlenews.page_at(2), in conjunction with googlenews.total_count() 
			#(to see how many results show up on that page, if there are zero, then probably that'the last page, but I'm not sure if that's exactly how it works)

			for result in res:
				if result['title'] not in seen:
					result['start date'] = months[i]
					result['end date'] = months[i+1]
					result['primary phrase'] = first
					result['secondary phrase'] = second
					result['full phrase'] = full_phrase
					fin.append(result)
					seen.append(result['title'])

コード例 #9

ファイルを表示

)
welcome_response = [
    "hi", "hey", "*nods*", "hi there", "hello",
    "I am glad! You are talking to me"
]
data = open('HR.txt', 'r', errors='ignore')
raw = data.read()
raw = raw.lower()
raw[:1000]
sent_tokens = nltk.sent_tokenize(raw)

'Connect Google News to project'
googlenews = GoogleNews()
googlenews = GoogleNews(lang='en')
googlenews = GoogleNews(period='7d')
googlenews.set_time_range('10/14/2020', '12/14/2020')
googlenews.set_encode('utf-8')


def Normalize(text):
    remove_punct_dict = dict(
        (ord(punct), None) for punct in string.punctuation)
    # word tokenization
    word_token = nltk.word_tokenize(text.lower().translate(remove_punct_dict))

    # remove ascii
    new_words = []
    for word in word_token:
        new_word = unicodedata.normalize('NFKD', word).encode(
            'ascii', 'ignore').decode('utf-8', 'ignore')
        new_words.append(new_word)