def test_get_historical_interest(self): from pytrends.request import TrendReq # Login to Google. Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() kw = ['zuckerberg', 'facebook stock'] df = pytrend.get_historical_interest([kw[0]], year_start=2018, month_start=5, day_start=18, year_end=2019, month_end=1, day_end=5, sleep=1) # Retrieve terms individually so their popularity values aren't relative to each other! for k in kw[1:]: dfnew1 = pytrend.get_historical_interest([k], year_start=2018, month_start=5, day_start=18, year_end=2019, month_end=1, day_end=5, sleep=1) df[k] = dfnew1[k] self.assertIsNotNone(df)
class Trends(): def __init__(self, keyword): self.keyword = keyword self.pyt = TrendReq(hl='en-US', tz=360, timeout=(10, 25), retries=2, backoff_factor=0.1) self.web = self.pyt.get_historical_interest(self.keyword, year_start=2018, month_start=1, day_start=1, hour_start=0, cat=0, geo='', gprop='', sleep=0) self.you = self.pyt.get_historical_interest(self.keyword, year_start=2018, month_start=1, day_start=1, hour_start=0, cat=0, geo='', gprop='youtube', sleep=0) self.pref() def pref(self): if self.web.mean()[self.keyword[0]] > self.you.mean()[self.keyword[0]]: self.preference_video = False else: self.preference_video = True return self.preference_video def get_content(self): if self.preference_video: print("Youtube Video") self.youtube_link = ys(self.keyword[0], max_results=5).to_json() self.link = eval(self.youtube_link)['videos'][0]['link'] return "http://youtube.com" + self.link else: print("Text Content") self.wiki = wikipediaapi.Wikipedia('en') if self.wiki.page(self.keyword[0]).exists(): self.summary = self.wiki.page(self.keyword[0]).summary else: self.summary = "No info found !" return self.summary
def get_means(next_five): pytrends = TrendReq(hl='en-US', tz=360) kw_list = next_five pytrends.build_payload(kw_list, cat=0, timeframe='now 1-H', geo='US', gprop='') historic_trends = pytrends.get_historical_interest(kw_list, year_start=2018, month_start=10, day_start=1, hour_start=0, year_end=2018, month_end=11, day_end=1, hour_end=0, cat=0, geo='US', gprop='', sleep=0) means.append(y["Chicago"].mean()) return 0
def trendhelper(): pt = TrendReq() list_of_words = [] print('When you have entered all your words, please enter : ! ') # Get the keywords the user wants words = input('Keywords : ') list_of_words.append(words) while words != '!': words = input('') list_of_words.append(words) # Delete ' ! ' from the words list del list_of_words[-1] print('Your words list is ready , Please wait ... ') # get the history of search as DataFrame df = pt.get_historical_interest(keywords=list_of_words) # ,year_start=2020,month_start=1 , year_end=2021 , month_end=1 df.to_csv('d.csv') # Print the results for i in range(len(list_of_words)): s = pd.read_csv('d.csv',header=0,usecols=[list_of_words[i]]).values print('Average search for %s is %f' %(list_of_words[i],s.mean()))
def main(): start_year = 2019 start_month = 6 pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["bitcoin"] pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') for y in range(start_year, 2020): for i in range(start_month, 12): month = i data = pytrends.get_historical_interest(kw_list, year_start=y, month_start=i, day_start=1, hour_start=1, year_end=y, month_end=i + 1, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=60) print(data) data.to_csv(kw_list[0] + ' ' + str(y) + '-' + str(month) + '.csv')
def get_training_data(self): """ load training data from google trends """ # check if data has been downloaded if not os.path.isfile('./data/financial_data/google_trends.csv'): pytrends = TrendReq() # load searches per hour from google trends for the last five years searches = pytrends.get_historical_interest( keywords=[ 'Cryptocurrency', 'Blockchain', 'Bitcoin', 'Ethereum' ], year_start=2015, month_start=1, day_start=1, hour_start=0, year_end=datetime.now().strftime('%Y'), month_end=datetime.now().strftime('%m'), day_end=datetime.now().strftime('%d'), hour_end=datetime.now().strftime('%H'), ) # make dataframe with trends and save to csv searches = pd.DataFrame(searches) searches.to_csv('./data/financial_data/google_trends.csv', index=False) return searches else: return pd.read_csv('./data/financial_data/google_trends.csv')
def hourly_test(keywords): getter = TrendReq(backoff_factor=0.2) for region in REGIONS: print(region) df = getter.get_historical_interest(keywords, year_start=2018, month_start=1, day_start=1, hour_start=0, year_end=2018, month_end=2, day_end=1, hour_end=0, cat=0, geo=region, gprop='youtube', sleep=60) df.to_csv('hourly_test.csv')
def test_tz_param(): print('testing tz...') pytrends = TrendReq(hl='en-US', tz=0, geo='') interest_over_time_df = pytrends.get_historical_interest(['Bitcoin'], year_start=2018, month_start=1, day_start=1, hour_start=0, year_end=2018, month_end=2, day_end=15, hour_end=0, cat=0, geo='', gprop='', sleep=0) writer = pd.ExcelWriter("test.xlsx", engine='xlsxwriter') interest_over_time_df.to_excel(writer) writer.save() pytrends2 = TrendReq(hl='en-US', tz=360, geo='') interest_over_time_df_2 = pytrends2.get_historical_interest( ['Bitcoin'], year_start=2018, month_start=1, day_start=1, hour_start=0, year_end=2018, month_end=2, day_end=15, hour_end=0, cat=0, geo='', gprop='', sleep=0) writer2 = pd.ExcelWriter("test2.xlsx", engine='xlsxwriter') interest_over_time_df_2.to_excel(writer2) writer2.save() #test_tz_param()
def google_values(self): from pytrends.request import TrendReq kw_list = ["dock coin"] pytrends = TrendReq(hl='en-US', tz=360) x = pytrends.get_historical_interest(kw_list, year_start=2018, month_start=10, day_start=1, year_end=2018, month_end=10, day_end=20, sleep=0) x.to_csv(f'../dataset_files/google_trends/{kw_list[0]}.csv')
def get_trends(base_date, end_date): kw_list = ['crypto'] + [coin[5:] for coin in all_tickers] pytrends = TrendReq(hl='en-US', tz=360) date_window = datetime.fromtimestamp(base_date).isoformat() date_end = datetime.fromtimestamp(end_date).isoformat() i = 0 while i <= len(all_tickers) + 1: trends = pytrends.get_historical_interest( kw_list[i:i + 3], year_start=int(date_window[:4]), month_start=int(date_window[5:7]), day_start=int(date_window[8:10]), hour_start=int(date_window[11:13]), year_end=int(date_end[:4]), month_end=int(date_end[5:7]), day_end=int(date_end[8:10]), hour_end=int(date_end[11:13]), cat=0, geo='', gprop='', sleep=60) if trends.empty: return 'Failed' trends = trends.drop(['isPartial'], axis=1).reset_index().drop_duplicates( subset=['date'], keep="last").copy() columns_trends = trends.columns for index, row in trends.iterrows(): for col in columns_trends: if col == 'date': continue try: google_entity = GoogleTrends(row['date'].timestamp(), col, row[col]) db.session.add(google_entity) db.session.commit() except Exception: db.session.rollback() print('Already has value ' + col + ' ' + str(row['date'])) i += 3 return 'Success'
def main(): filepath = "data/test.csv" trending_topics = ["economy", "energy", "bonds", "crisis", "finance"] start_date = pd.Timestamp("2011-01-01") end_date = pd.Timestamp.now() if os.path.isfile(filepath): df = pd.read_csv(filepath, index_col=0) df.index = pd.to_datetime(df.index) start_date = max(max(df.index), start_date) start_date = start_date + pd.Timedelta("1h") else: df = pd.DataFrame([]) print("starting with {0} to {1}".format(start_date, end_date)) ptr = TrendReq(hl='en-US', tz=1) months = list(pd.date_range(start_date,end_date, freq="1m")) ranges = list(zip(months[0:-1],months[1:])) date_range = ranges for start_date, end_date in ranges: print(start_date, end_date) df_new = ptr.get_historical_interest(trending_topics, year_start=start_date.year, month_start=start_date.month, day_start=start_date.day, hour_start=start_date.hour, year_end=end_date.year, month_end=end_date.month, day_end=end_date.day, hour_end=end_date.hour, cat=0, geo='', gprop='', sleep=60 ) df_sub = df_new.drop('isPartial', axis=1) if len(df_sub): df = pd.concat([df, df_new]) df.to_csv(filepath) df2.plot()
def get_most_popular(keyword_list): pytrend = TrendReq() df = pytrend.get_historical_interest(keyword_list, year_start=2018, month_start=1, day_start=1, hour_start=0, year_end=2018, month_end=2, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=0) return df, df.max().idxmax()
def google_trend(kw_list=["ETH", "Ethereum"], year_start=2018, month_start=9, day_start=1, hour_start=0, year_end=2019, month_end=8, day_end=13, hour_end=0, cat=0, geo="", gprop="", sleep=60, save=True): pytrends = TrendReq() print( "Sending requests to Google Trends. It may take some time as requests are beeing splitted for each week, please be patient..." ) trends = pytrends.get_historical_interest(kw_list, year_start=year_start, month_start=month_start, day_start=day_start, hour_start=hour_start, year_end=year_end, month_end=month_end, day_end=day_end, hour_end=hour_end, cat=cat, geo=geo, gprop=gprop, sleep=sleep) dates = trends.index time = [t.timestamp for t in dates] trends = trends.reset_index(drop=True) trends["time"] = time if save: if not os.path.exists("tmp/"): os.mkdir("tmp/") trends.to_csv("tmp/trends_{}.csv".format(kw_list[0]), index=False) return trends
def get_historical_data(): current_date = datetime.datetime.now().date() month_start = (current_date - datetime.timedelta(days=30)).month pytrends = TrendReq(hl='en-US', tz=360, timeout=(10, 25), retries=2, backoff_factor=0.1) data = pytrends.get_historical_interest(keywords=KEYWORDS_LIST, cat=ALCOHOL_CATEGORY, geo=GEO, year_start=current_date.year, year_end=current_date.year, day_end=31, month_start=month_start, month_end=current_date.month) return data
def getTrendsDataRaw(keyword, startDate, endDate): trends = TrendReq(hl='en-US', tz=0) # tz is timezone offset from UTC in minutes trend = trends.get_historical_interest([keyword], year_start=startDate.year, month_start=startDate.month, day_start=startDate.day, hour_start=startDate.hour, year_end=endDate.year, month_end=endDate.month, day_end=endDate.day, hour_end=endDate.hour, cat=0, geo='', gprop='', sleep=0)[keyword] return trend
def trends(topic): score = 0 time = str(datetime.datetime.now()) year = int(time[0:4]) month = int(time[5:7]) day = int(time[8:10]) hour = int(time[11:13]) pytrends = TrendReq(hl='ru-RU', tz=360) smth = \ pytrends.get_historical_interest([topic], year_start=year, month_start=month, day_start=day - 7, hour_start=hour, year_end=year, month_end=month, day_end=day, hour_end=hour, cat=0, geo='', gprop='', sleep=0)[ topic] for i in range(0, 167): score += smth[-i] score = float(score / 168) return score
def get_google_trend_v2(): pytrends = TrendReq(hl='en-US', tz=0, geo='') interest_over_time_df = pytrends.get_historical_interest(['Bitcoin'], year_start=2015, month_start=1, day_start=1, hour_start=0, year_end=2018, month_end=5, day_end=15, hour_end=0, cat=0, geo='', gprop='', sleep=0) writer = pd.ExcelWriter("CryptoGoogleTrends_with_overlap.xlsx", engine='xlsxwriter') interest_over_time_df.to_excel(writer) writer.save()
def getWeekTrend(word): kw_list = [word] today = date.today() tomorrow = today + datetime.timedelta(days=1) week_ago = today - datetime.timedelta(days=7) pytrends = TrendReq(hl='en-US', tz=360) kw_df = pytrends.get_historical_interest(kw_list, year_start=week_ago.year, month_start=week_ago.month, day_start=week_ago.day, hour_start=0, year_end=tomorrow.year, month_end=tomorrow.month, day_end=tomorrow.day, hour_end=0, sleep=120) kwWT = kw_df[word].sum() return kwWT
def pytrends_pull(self, query: list, query_loc: str, start_yr: int, start_mo: int, end_yr: int, end_mo: int, type: str): """ pytrends_pull(query:str, query_loc:str, start_yr:int, start_mo:int, end_yr:int, end_mo:int) Generates .csv file in raw_data folder with tweets based on query """ geocode = self.us_states[query_loc.lower()]['abbr'] #creating connection to trend.google.com pytrend = TrendReq(timeout=(10, 25)) startdate = datetime.datetime(year=start_yr, month=start_mo, day=1) enddate = datetime.datetime(year=end_yr, month=end_mo, day=calendar.monthrange(end_yr, end_mo)[1], hour=23) if type == 'hour': historical_interest = pytrend.get_historical_interest( keywords=query, cat=0, geo=geocode, year_start=start_yr, month_start=start_mo, day_start=1, hour_start=0, year_end=end_yr, month_end=end_mo, day_end=calendar.monthrange(end_yr, end_mo)[1], hour_end=0) if type == 'day': timeframe = '{} {}'.format(startdate.strftime("%Y-%m-%d"), enddate.strftime("%Y-%m-%d")) pytrend.build_payload(kw_list=query, timeframe=timeframe) historical_interest = pytrend.interest_over_time() if type == 'week': pytrend.build_payload(kw_list=query) historical_interest = pytrend.interest_over_time() return historical_interest
def start(search_words, start_date): print("... google module started") now = datetime.datetime.now() # Parameters for GOOGLE search kw_list = [search_words] year_start = int(start_date[:4]) month_start = int(start_date[5:7]) day_start = int(start_date[8:10]) hour_start = 0 print(kw_list, " ", year_start, " ", month_start, " ", day_start) # setting actual date for goggle search endpoint year_end = now.year month_end = now.month day_end = now.day hour_end = 0 print(year_end, " ", month_end, " ", day_end) pytrend = TrendReq() pytrend.build_payload(kw_list) search_results = pytrend.get_historical_interest(kw_list, year_start, month_start, day_start, hour_start, year_end, month_end, day_end, hour_end, cat=0, geo='', gprop='', sleep=0) # save into file search_results.to_csv('google_results.csv') # print the first 10 datapoints print(search_results.head(10))
class PyTrendApiServiceWorker(object): kw_list = [] kw_list_dictionary = { 'bodystyles': ['coupe', 'pickup', 'sedan', 'suv', 'crossover'], 'makes': ["honda", "chevy", "ford", "subaru"], 'models': ['honda civic', 'ford f-150', 'ford fusion', 'toyota sienna'] } trend_db = TinyDB("./db/tinytrenddb.json") def __init__(self): self.pytrends = TrendReq(hl='en-US', tz=360) def GetTheKeyWords(self): return def SetKwArray(self, kwList): self.kw_list = [] self.kw_list = kwList return def GetKwTrendData(self): print(self.kw_list) df = self.pytrends.get_historical_interest(self.kw_list, year_start=2018, month_start=1, day_start=1, hour_start=0, year_end=2019, month_end=9, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=0) df.to_csv("bodystyle_trends.txt") self.trend_db.insert({'body_styles_file_name': "bodystyle_trends.txt"})
def get_trends_dates(start_date, end_date, currencies_list): ''' ' Retrieves the Google Trends values (mapped into a range between 0 and 1) between two dates, ' inclusive. The values represent search interest relative to the peak over the provided time interval, ' where a higher value suggests more popularity. The outer column of the returned data frame represents the ' retrieved cryptocurrencies while the inner columns represents the retrieved metric (only "Trends", in this case). ' ' start_date (datetime) - the start date, with month, day, and year provided ' end_date (datetime) - the end date, with month, day, and year provided ' currencies_list (list) - the list of currencies to associate with the given fear and greed values ''' trends = TrendReq(hl='en-US', tz=0) # tz is timezone offset from UTC in minutes trends_data_frame = pd.DataFrame() for currency in currencies_list: trend = trends.get_historical_interest([currency.value.name], year_start=start_date.year, month_start=start_date.month, day_start=start_date.day, hour_start=start_date.hour, year_end=end_date.year, month_end=end_date.month, day_end=end_date.day, hour_end=end_date.hour, cat=0, geo='', gprop='', sleep=0) trend.index = trend.index.floor('d') trend = trend.drop('isPartial', axis=1) trend.columns = pd.MultiIndex.from_product([[currency], ["Trends"]]) trend = trend.groupby("date").mean() trends_data_frame = pd.concat([trend, trends_data_frame], axis=1) trends_data_frame = trends_data_frame.apply( lambda val: mathutil.map(val, MIN_TRENDS_VAL, MAX_TRENDS_VAL, 0, 1)) return trends_data_frame
def get_data(keyword, trend_type): pytrend = TrendReq(hl='en-US', tz=360) if trend_type == 'get_historical_interest': pytrend.build_payload(kw_list=[keyword], cat=0, timeframe='today 3-m', geo='', gprop='') data = pytrend.get_historical_interest([keyword]) data = data.to_dict()[keyword] return data elif trend_type == 'intereset_by_region': all_data = [] # keywords = [i.lstrip(' ') for i in keyword.split(',') if i.startswith(' ') or i] for k in keyword: pytrend.build_payload([k]) data = pytrend.interest_by_region() print('########3', data.to_dict()[k]) all_data.append(data.to_dict()[k]) return all_data return None
def pytrends_pull(self, query: str, query_loc: str, start_yr: int, start_mo: int, end_yr: int, end_mo: int): """ pytrends_pull(query:str, query_loc:str, start_yr:int, start_mo:int, end_yr:int, end_mo:int) Generates .csv file in raw_data folder with tweets based on query """ # geocode = "US-{}".format(self.us_states[query_loc.lower()]['abbr']) geocode = "" #creating connection to trend.google.com pytrend = TrendReq() # creating historical interest query on give params historical_interest = pytrend.get_historical_interest( keywords=[query], cat=0, geo=geocode, year_start=start_yr, month_start=start_mo, day_start=1, hour_start=0, year_end=end_yr, month_end=end_mo, day_end=30, hour_end=0) # creating a pytrend payload based on keyword for time_interest and related_queries pytrend.build_payload(kw_list=[query]) #time_interest dataframe and related_queries dictionary dict(dict) time_interest = pytrend.interest_over_time() related_queries = pytrend.related_queries() no_spaces = query.replace(" ", "_") dir = os.path.dirname(__file__) filename = os.path.join( dir, "..", "exported_files", "trend_history_interest_{}.csv".format(no_spaces)) filename = filename.replace("/", "\\") #exporting historical interest to csv historical_interest.to_csv(filename) filename = os.path.join(dir, "..", "exported_files", "trend_time_interest_{}.csv".format(no_spaces)) filename = filename.replace("/", "\\") #exporting time interest to csv time_interest.to_csv(filename) filename = os.path.join( dir, "..", "exported_files", "trend_related_queries_{}.csv".format(no_spaces)) filename = filename.replace("/", "\\") #exporting related_queries to csv try: with open(filename, 'w') as csvf: writer = csv.DictWriter( csvf, fieldnames=related_queries[query].keys()) writer.writeheader() for data in related_queries.values(): writer.writerow(data) except IOError: print("Error writing related queries file.")
# Since pytrends is returning a DataFrame object, we need pandas: import pandas as pd # Import of pytrends (needs to be pip installed first): from pytrends.request import TrendReq pytrends = TrendReq(hl='en-US', tz=360) kw_list = ['Bitcoin', 'BTC'] search_df = pytrends.get_historical_interest(kw_list, year_start=2020, month_start=1, day_start=1, hour_start=0, year_end=2020, month_end=1, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=60) search_df.to_csv("../data/Trends/BTC_trend_complete.csv")
class APICaller: def __init__(self): self.feature_dict = {} self.weather_cities = ["new york city", "boston", "los angeles"] self.company_names = ["apple", "microsoft", "amazon", "facebook"] self.pytrends = TrendReq(hl="en-US", tz=360) self.symbols = [ "AAPL", "MSFT", "AMZN", "FB", "NFLX", "MCD", "WEN", "SHAK", "TSLA" ] # used to store price, percentChange, volume self.data = {} self.prices_to_remember = 30 # remember a certain number of past prices (a list) for each symbol self.past_prices = {} for symbol in self.symbols: self.past_prices[symbol] = [] # findData will use the stockScraper class to pull data, unless there's an issue def findData(self, symbol): try: dictionary = stockScrape.stockScraper(symbol) self.data.update(dictionary) return 1 except: return 0 # Gets the current price of a company under a given symbol def getPrice(self, symbol): return float(self.data[symbol][0]) # Gets the current percent change of a company under a given symbol for that day def getPercentChange(self, symbol): return float(self.data[symbol][1]) # Gets the amount of shares sold for a company for that day def getVolume(self, symbol): return float(self.data[symbol][2]) # we need to call this EVERY time that we are updating i.e. once a minute def update_values(self): # there is a chance that the http stuff craps out, so have an error message the_val = 1 for symbol in self.symbols: the_val *= self.findData(symbol) if the_val == 0: # indicate that there was an error, and stop all of this return True # update temperature and humidity features (in F and %, respectively) for city in self.weather_cities: weather = Weather(unit=Unit.FAHRENHEIT) location = weather.lookup_by_location(city) self.feature_dict[city + " temperature"] = float( location.condition.temp) self.feature_dict[city + " humidity"] = float( location.atmosphere.humidity) # update google trending info for each word historical_info = self.pytrends.get_historical_interest( self.company_names, year_start=2018, month_start=12, day_start=1, hour_start=0, year_end=2018, month_end=12, day_end=25, hour_end=0, cat=0, geo='US', gprop='', sleep=0) for name in self.company_names: # don't worry about the 3 Google errors here... it's fine self.feature_dict[name + " trend"] = float( historical_info[name][-1]) # update the number of minutes in the day date_string = str(datetime.now()) time_string = (date_string.split())[1] time_list = time_string.split(':') self.feature_dict["minutes"] = int(time_list[0]) * 60 + int( time_list[1]) # update features based on the symbols for symbol in self.symbols: price = self.getPrice(symbol) self.feature_dict[symbol + " price"] = price # put price at the beginning of past prices self.past_prices[symbol] = [price] + self.past_prices[symbol] # only remember at most fixed number if len(self.past_prices[symbol]) > self.prices_to_remember: self.past_prices[symbol].pop() self.feature_dict[symbol + " percent"] = self.getPercentChange(symbol) self.feature_dict[symbol + " volume"] = self.getVolume(symbol) return False def get_dict(self): return copy.deepcopy(self.feature_dict) def print_features(self): the_dict = self.get_dict() for key in the_dict: print(key + " : " + str(the_dict[key])) # return features based on the keys given in def return_features(self, keys): the_dict = self.get_dict() return_list = [] for key in keys: return_list.append(the_dict[key]) return return_list def return_last_prices(self): return copy.deepcopy(self.past_prices) def take_in_array(self, arr, key_arr): # update that dictionary self.feature_dict = {} for i, key in enumerate(key_arr): self.feature_dict[key] = float(arr[i]) for symbol in self.symbols: price = self.feature_dict[symbol + " price"] # put price at the beginning of past prices self.past_prices[symbol] = [price] + self.past_prices[symbol] # only remember at most fixed number if len(self.past_prices[symbol]) > self.prices_to_remember: self.past_prices[symbol].pop()
pt = TrendReq(hl="en-US", tz=360) # set the keyword & timeframe pt.build_payload(["Python", "Java"], timeframe="all") # get the interest over time iot = pt.interest_over_time() iot # plot it iot.plot(figsize=(10, 6)) # get hourly historical interest data = pt.get_historical_interest( ["data science"], cat=396, year_start=2022, month_start=1, day_start=1, hour_start=0, year_end=2022, month_end=2, day_end=10, hour_end=23, ) data # the keyword to extract data kw = "python" pt.build_payload([kw], timeframe="all") # get the interest by country ibr = pt.interest_by_region("COUNTRY", inc_low_vol=True, inc_geo_code=True) # sort the countries by interest ibr[kw].sort_values(ascending=False) # get related topics of the keyword rt = pt.related_topics()
from pytrends.request import TrendReq pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["Trump"] test = pytrends.get_historical_interest(kw_list, year_start=2019, month_start=8, day_start=7, hour_start=0, year_end=2019, month_end=8, day_end=8, hour_end=0, cat=0, geo='', gprop='', sleep=0) test2 = pytrends.interest_over_time() print(test2)
import numpy as np import datetime as dt import time as time import os from pytrends.request import TrendReq import ML_functions as mlfcn import Signals_Testing as st pytrends = TrendReq(hl='en-US', tz=300) kw_list = ['Bitcoin', 'ethereum', 'cryptocurrency'] GPROP = 'news' #pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') results = pytrends.get_historical_interest(kw_list, year_start=2018, month_start=2, day_start=1, hour_start=0, year_end=2018, month_end=11, day_end=1, hour_end=0, cat=0, geo='', gprop=GPROP, sleep=0) print(results) st.write_new(results, 'google_trends_btc.xlsx', 'sheet1')
class GoogleTrend(DataCollector): # 구글 트렌드를 통해 정보를 가져오는 클래스 def __init__(self, keyword=['youtube'], hl='ko', tz='82', timeframe='today 5-y', cat=0, geo='KR', gprop=''): # 생성자 기본 설정 값 self.hl = hl self.tz = tz self.keyword = keyword self.timeframe = timeframe self.cat = cat self.geo = geo self.gprop = gprop self.update_pytrend() self.update_payload() # Login to Google. Only need to run this once, the rest of requests will use the same session. def update_pytrend(self): self.pytrend = TrendReq(hl=self.hl, tz=self.tz) # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() def update_payload(self): self.pytrend.build_payload(kw_list=self.keyword, cat=self.cat, timeframe=self.timeframe, geo=self.geo, gprop=self.gprop) def set_pytrend(self, hl='None', tz='None'): # hl는 host language, tz는 time zone if hl != 'None': # ex) 'ko', 'en_US' self.hl = hl if tz != 'None': # ex) 82:한국, 360:미국 self.tz = tz self.update_pytrend() self.update_payload() def set_payload(self, keyword=None, timeframe='None', cat=-1, geo='None', gprop='None'): # 키워드리스트, 타임프레임, 카테고리, 지역, 구글 프로퍼티 if keyword != None: self.keyword = keyword if timeframe != 'None': # ex) 'all', 'today 5-y', 'today 1,2,3-m', 'now 1,7-d', 'now 1,4-H', '2018-05-20 2019-01-20' self.timeframe = timeframe if cat != -1: self.cat = cat if geo != 'None': # ex) 'KR', 'US', '' self.geo = geo if gprop != 'None': # ex) 'images', 'news', 'youtube', 'froogle' self.gprop = gprop self.update_payload() def load_data(self, keyword=None): if keyword == 'region': self.interest_by_region() return self.interest_by_region_df_to_list() elif keyword == 'gender': return self.search_rate_by_gender() # Interest Over Time def interest_over_time(self): self.interest_over_time_df = self.pytrend.interest_over_time( ) # Returns pandas.Dataframe self.interest_over_time_df = self.interest_over_time_df.iloc[:, :self. keyword. __len__( )] # 안쓰는 데이터 isPartial 제거 self.interest_over_time_list = self.interest_over_time_df_to_list() return self.interest_over_time_list # Interest Over Time hourly def historical_hourly_interest(self): self.historical_hourly_interest_df = self.pytrend.get_historical_interest( keywords=self.keyword, year_start=2019, month_start=4, day_start=1, hour_start=0, year_end=2019, month_end=5, day_end=1, hour_end=0, cat=0, geo='KR', gprop='', sleep=0) # Returns pandas.Dataframe self.historical_hourly_interest_df = self.historical_hourly_interest_df.iloc[:, : self . keyword . __len__( )] # 안쓰는 데이터 isPartial 제거 self.historical_hourly_interest_list = self.historical_hourly_interest_df_to_list( ) return self.historical_hourly_interest_list # Interest by Region def interest_by_region(self): # 지역별로 검색 비율을 알려준다 self.interest_by_region_df = self.pytrend.interest_by_region() self.interest_by_region_list = self.interest_by_region_df_to_list() return self.interest_by_region_list # Related Topics, Returns dictionary of pandas.DataFrames def related_topics(self): # 키워드 관련 토픽을 순위별로 알려준다 self.related_topics_dict = self.pytrend.related_topics() return self.related_topics_dict # Related Queries, returns a dictionary of dataframes def related_queries(self): # 키워드 관련 검색어를 순위별로 알려준다 self.related_queries_dict = self.pytrend.related_queries() return self.related_queries_dict # trending searches in real time def trending_searches(self): # 현재 시간대 인기검색어 순위 20까지 보여준다 self.trending_searches_df = self.pytrend.trending_searches( pn='south_korea') return self.trending_searches_df # def today_searches(self): # self.today_searches_df = self.pytrend.today_searches() return self.today_searches_df # Get Google Top Charts def top_charts(self): # 년 단위로 상위 핫 키워드 가져오기 self.top_charts_df = self.pytrend.top_charts( date=2015, hl='ko', tz='82', geo='KR' ) # date = YYYY integer, tz='82', geo='KR', geo='GLOBAL', geo='US' return self.top_charts_df # Get Google Category def categories(self): # 구글 카테고리 종류와 id를 보여준다 self.categories_df = self.pytrend.categories() return self.categories_df def show_interest_over_time(self): # 시간에 따른 검색 비율을 그래프로 보여준다 num = 0.0 plt.figure(figsize=(14, 4)) plt.style.use('ggplot') # 더 이쁘게 그려준다 for key in self.keyword: num += 0.1 plt.plot(self.interest_over_time_df[key], c=plt.cm.rainbow(num), label=key) plt.legend(bbox_to_anchor=(1, 1), loc=2) # 라벨의 위치를 정해준다 plt.show() def interest_over_time_df_to_list( self): # interest_over_time_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환 date = self.interest_over_time_df.index.tolist() for i in range(len(date)): date[i] = date[i].date().strftime("%Y-%m-%d") date.insert(0, 'x') data = [] data.append(date) for key in self.keyword: y = self.interest_over_time_df[key].tolist() y.insert(0, key) data.append(y) return data def historical_hourly_interest_df_to_list( self ): # historical_hourly_interest_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환 date = self.historical_hourly_interest_df.index.tolist() for i in range(len(date)): date[i] = date[i].date().strftime("%Y-%m-%d") date.insert(0, 'x') data = [] data.append(date) for key in self.keyword: y = self.historical_hourly_interest_df[key].tolist() y.insert(0, key) data.append(y) return data def interest_by_region_df_to_list( self): # interest_by_region_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환 region = self.interest_by_region_df.index.tolist() data = [] for key in self.keyword: y = self.interest_by_region_df[key].tolist() ratio = 0 for i in [0, 1, 2, 3, 8, 11, 12, 13, 14, 15]: ratio += y[i] ratio /= 100 tmp_val = 0 reg_name = '' if ratio > 0: for i in range(len(region)): if i in [1, 2, 14, 11, 0, 13]: if i == 0: tmp_val = round(y[i] / ratio) reg_name = '강원도' elif i == 1: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '서울/경기' elif i == 2: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '경상도' elif i == 11: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '전라도' elif i == 13: tmp_val = round(y[i] / ratio) reg_name = '제주도' elif i == 14: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '충청도' data.append([reg_name, tmp_val]) return data def search_rate_by_gender(self): gender_data = [] gender_data.append(['male', random.randint(50, 100)]) gender_data.append(['female', random.randint(50, 100)]) return gender_data