class GoogleTrend(DataCollector): # 구글 트렌드를 통해 정보를 가져오는 클래스 def __init__(self, keyword=['youtube'], hl='ko', tz='82', timeframe='today 5-y', cat=0, geo='KR', gprop=''): # 생성자 기본 설정 값 self.hl = hl self.tz = tz self.keyword = keyword self.timeframe = timeframe self.cat = cat self.geo = geo self.gprop = gprop self.update_pytrend() self.update_payload() # Login to Google. Only need to run this once, the rest of requests will use the same session. def update_pytrend(self): self.pytrend = TrendReq(hl=self.hl, tz=self.tz) # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() def update_payload(self): self.pytrend.build_payload(kw_list=self.keyword, cat=self.cat, timeframe=self.timeframe, geo=self.geo, gprop=self.gprop) def set_pytrend(self, hl='None', tz='None'): # hl는 host language, tz는 time zone if hl != 'None': # ex) 'ko', 'en_US' self.hl = hl if tz != 'None': # ex) 82:한국, 360:미국 self.tz = tz self.update_pytrend() self.update_payload() def set_payload(self, keyword=None, timeframe='None', cat=-1, geo='None', gprop='None'): # 키워드리스트, 타임프레임, 카테고리, 지역, 구글 프로퍼티 if keyword != None: self.keyword = keyword if timeframe != 'None': # ex) 'all', 'today 5-y', 'today 1,2,3-m', 'now 1,7-d', 'now 1,4-H', '2018-05-20 2019-01-20' self.timeframe = timeframe if cat != -1: self.cat = cat if geo != 'None': # ex) 'KR', 'US', '' self.geo = geo if gprop != 'None': # ex) 'images', 'news', 'youtube', 'froogle' self.gprop = gprop self.update_payload() def load_data(self, keyword=None): if keyword == 'region': self.interest_by_region() return self.interest_by_region_df_to_list() elif keyword == 'gender': return self.search_rate_by_gender() # Interest Over Time def interest_over_time(self): self.interest_over_time_df = self.pytrend.interest_over_time( ) # Returns pandas.Dataframe self.interest_over_time_df = self.interest_over_time_df.iloc[:, :self. keyword. __len__( )] # 안쓰는 데이터 isPartial 제거 self.interest_over_time_list = self.interest_over_time_df_to_list() return self.interest_over_time_list # Interest Over Time hourly def historical_hourly_interest(self): self.historical_hourly_interest_df = self.pytrend.get_historical_interest( keywords=self.keyword, year_start=2019, month_start=4, day_start=1, hour_start=0, year_end=2019, month_end=5, day_end=1, hour_end=0, cat=0, geo='KR', gprop='', sleep=0) # Returns pandas.Dataframe self.historical_hourly_interest_df = self.historical_hourly_interest_df.iloc[:, : self . keyword . __len__( )] # 안쓰는 데이터 isPartial 제거 self.historical_hourly_interest_list = self.historical_hourly_interest_df_to_list( ) return self.historical_hourly_interest_list # Interest by Region def interest_by_region(self): # 지역별로 검색 비율을 알려준다 self.interest_by_region_df = self.pytrend.interest_by_region() self.interest_by_region_list = self.interest_by_region_df_to_list() return self.interest_by_region_list # Related Topics, Returns dictionary of pandas.DataFrames def related_topics(self): # 키워드 관련 토픽을 순위별로 알려준다 self.related_topics_dict = self.pytrend.related_topics() return self.related_topics_dict # Related Queries, returns a dictionary of dataframes def related_queries(self): # 키워드 관련 검색어를 순위별로 알려준다 self.related_queries_dict = self.pytrend.related_queries() return self.related_queries_dict # trending searches in real time def trending_searches(self): # 현재 시간대 인기검색어 순위 20까지 보여준다 self.trending_searches_df = self.pytrend.trending_searches( pn='south_korea') return self.trending_searches_df # def today_searches(self): # self.today_searches_df = self.pytrend.today_searches() return self.today_searches_df # Get Google Top Charts def top_charts(self): # 년 단위로 상위 핫 키워드 가져오기 self.top_charts_df = self.pytrend.top_charts( date=2015, hl='ko', tz='82', geo='KR' ) # date = YYYY integer, tz='82', geo='KR', geo='GLOBAL', geo='US' return self.top_charts_df # Get Google Category def categories(self): # 구글 카테고리 종류와 id를 보여준다 self.categories_df = self.pytrend.categories() return self.categories_df def show_interest_over_time(self): # 시간에 따른 검색 비율을 그래프로 보여준다 num = 0.0 plt.figure(figsize=(14, 4)) plt.style.use('ggplot') # 더 이쁘게 그려준다 for key in self.keyword: num += 0.1 plt.plot(self.interest_over_time_df[key], c=plt.cm.rainbow(num), label=key) plt.legend(bbox_to_anchor=(1, 1), loc=2) # 라벨의 위치를 정해준다 plt.show() def interest_over_time_df_to_list( self): # interest_over_time_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환 date = self.interest_over_time_df.index.tolist() for i in range(len(date)): date[i] = date[i].date().strftime("%Y-%m-%d") date.insert(0, 'x') data = [] data.append(date) for key in self.keyword: y = self.interest_over_time_df[key].tolist() y.insert(0, key) data.append(y) return data def historical_hourly_interest_df_to_list( self ): # historical_hourly_interest_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환 date = self.historical_hourly_interest_df.index.tolist() for i in range(len(date)): date[i] = date[i].date().strftime("%Y-%m-%d") date.insert(0, 'x') data = [] data.append(date) for key in self.keyword: y = self.historical_hourly_interest_df[key].tolist() y.insert(0, key) data.append(y) return data def interest_by_region_df_to_list( self): # interest_by_region_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환 region = self.interest_by_region_df.index.tolist() data = [] for key in self.keyword: y = self.interest_by_region_df[key].tolist() ratio = 0 for i in [0, 1, 2, 3, 8, 11, 12, 13, 14, 15]: ratio += y[i] ratio /= 100 tmp_val = 0 reg_name = '' if ratio > 0: for i in range(len(region)): if i in [1, 2, 14, 11, 0, 13]: if i == 0: tmp_val = round(y[i] / ratio) reg_name = '강원도' elif i == 1: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '서울/경기' elif i == 2: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '경상도' elif i == 11: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '전라도' elif i == 13: tmp_val = round(y[i] / ratio) reg_name = '제주도' elif i == 14: tmp_val = round((y[i] + y[i + 1]) / ratio) reg_name = '충청도' data.append([reg_name, tmp_val]) return data def search_rate_by_gender(self): gender_data = [] gender_data.append(['male', random.randint(50, 100)]) gender_data.append(['female', random.randint(50, 100)]) return gender_data
class Trendsetter(): def __init__(self, timezone=1, language='en-US'): """ Args: timezone: timezone in hours language: language of interface, not important """ self.tz = -60 * timezone self.countries = { 'united_states': ['US', 'en'], 'united_kingdom': ['GB', 'en'], 'australia': ['AU', 'en'], 'germany': ['DE', 'de'], 'france': ['FR', 'fr'], 'italy': ['IT', 'it'], 'japan': ['JP', 'ja'], 'saudi_arabia': ['SA', 'ar'], 'egypt': ['EG', 'ar'], # 'china': ['CN', 'zh-cn'], # 'iran': ['IR', 'ar'], 'brazil': ['BR', 'pt'], 'india': ['IN', 'hi'], 'israel': ['IL', 'iw'], # 'spain': ['ES', 'es'], 'mexico': ['MX', 'es'], 'russia': ['RU', 'ru'], 'south_korea': ['KR', 'ko'], 'taiwan': ['TW', 'zh-tw'], 'hong_kong': ['HK', 'zh-tw'], 'thailand': ['TH', 'th'], 'turkey': ['TR', 'tr'], 'vietnam': ['VN', 'vi'], } self.countrycodes = {v[0]: k for k, v in self.countries.items()} self.trends = TrendReq(hl=language, tz=self.tz) self.translator = gt.Translator(service_urls=[ "translate.google.com", "translate.google.co.kr", "translate.google.at", "translate.google.de", "translate.google.ru", "translate.google.ch", "translate.google.fr", "translate.google.es" ]) def browse_categories(self, levels=list()): """browse categories by list of index Args: levels: list, eg. [4,2] Returns: dataframe with child categories """ cat = self.trends.categories() for i in levels: cat = cat['children'][i] print(cat['name'], ", id =", cat['id']) if 'children' in cat.keys(): children = pd.DataFrame.from_dict(cat['children']) # children.index = children['id'] return children def get_trending(self, country='united_states'): """ get currently and daily trends for implemented countries Args: country: country name or country code Returns: {'trending': list, 'today': list} Raises: ValueError if country not supported """ if country not in self.countries: if country in self.countrycodes: country = self.countrycodes[country] else: raise ValueError("Country not supported.") self.trending = { 'trending': list(self.trends.trending_searches(pn=country)[0]), 'today': list(self.trends.today_searches(pn=self.countries[country][0])) } if self.countries[country][1] != 'en': try: self.trending_en = { k + '_en': list( map( lambda t: t.text, self.translator.translate( v, dest='en', src=self.countries[country][1]))) for k, v in self.trending.items() } self.trending.update(self.trending_en) except JSONDecodeError: warnings.warn("google translate API limit reached") except: warnings.warn("google translate API not working") return self.trending def get_related(self, kw, timeframe='now 7-d', category=0, location='', gtype=''): if isinstance(timeframe, list): tf_str = ' '.join(timeframe) else: tf_str = timeframe self.trends.build_payload([kw], cat=category, timeframe=tf_str, geo=location, gprop=gtype) related_topics = self.trends.related_topics()[kw] related_topics = related_topics['top'].append(related_topics['rising'], ignore_index=True, sort=False) return related_topics def get_interest(self, kwds, timeframe='now 7-d', category=0, location='', gtype=''): """ Args: kwds: list of up to 5 keywords timeframe: supported google format. or [t_start, t_end]; for daily output: 'YYYY-mm-dd', for hourly output: 'YYYY-mm-ddThh' category: location: supported google location or country code google_product: Returns: DataFrame """ if isinstance(kwds, str): kwds = [kwds] if isinstance(timeframe, list): tf_str = ' '.join(timeframe) else: tf_str = timeframe timeframe = timeframe.split(' ') if 'T' in tf_str: # hourly data format_str = '%Y-%m-%dT%H' else: # daily data format_str = '%Y-%m-%d' # needs improvement: if any(s in tf_str for s in ['now', 'today', 'all']): self.trends.build_payload(kwds, cat=category, timeframe=tf_str, geo=location, gprop=gtype) self.interest = self.trends.interest_over_time() else: t_start = datetime.datetime.strptime(timeframe[0], format_str) t_end = datetime.datetime.strptime(timeframe[1], format_str) if 'T' in tf_str and t_end - t_start >= datetime.timedelta(days=8): self.interest = self.trends.get_historical_interest( kwds, year_start=t_start.year, year_end=t_end.year, month_start=t_start.month, month_end=t_end.month, day_start=t_start.day, day_end=t_end.day, hour_start=t_start.hour, hour_end=t_end.hour, cat=category, geo=location, gprop=gtype, sleep=60) else: self.trends.build_payload(kwds, cat=category, timeframe=tf_str, geo=location, gprop=gtype) self.interest = self.trends.interest_over_time() return self.interest
class DesignerTrendsCollector(BuilderTrendsCollector): """DesignerTrendsCollector contains the specific implementation of `BuilderTrendsCollector`. `DesignerTrendsCollector` contains the specific implementation of `BuilderTrendsCollector` based on the external library `pytrends`. Args: BuilderTrendsCollector (class): Abstract class that provides the implementations of the properties and methods. """ def __init__( self, keyword_list: list, timeframe: str = "today 5-y", language: str = "en-US", category: int = 0, timezone: int = 360, country: str = "", property_filter="", **kwargs, ) -> None: """Initialization of DesignerTrendsCollector Args: keyword_list (list): Keyword-list with the items to search for. timeframe (str, optional): Time frame, respectively, period to search for. Defaults to "today 5-y". language (str, optional): Search language. Defaults to "en-US". category (int, optional): Define a specific [search category](https://github.com/pat310/google-trends-api/wiki/Google-Trends-Categories). Defaults to 0. timezone (int, optional): [Search timezone](https://developers.google.com/maps/documentation/timezone/overview). Defaults to 360. country (str, optional): The country, where to search for. Defaults to "". property_filter (str, optional): Property filer of the search; only in news, images, YouTube, shopping. Defaults to "". """ self.keyword_list = keyword_list self.timeframe = timeframe self.language = language self.category = category self.timezone = timezone self.country = country self.property_filter = property_filter self.pytrends = TrendReq(hl=self.language, tz=self.timezone, **kwargs) self.pytrends.build_payload( kw_list=self.keyword_list, cat=self.category, timeframe=self.timeframe, geo=self.country, gprop=self.property_filter, ) self.reset() def reset(self) -> None: """Reset the product to empty.""" self._product = TrendProduct() @property def trends(self) -> TrendProduct: """Return the trend results. Returns: TrendProduct: (class) TrendProduct contains the dictionary and the return value of it. """ product = self._product self.reset() return product def get_interest_over_time(self) -> None: """Request data from a interest over time search.""" self._product.add_product( key=self.get_interest_over_time, value=self.pytrends.interest_over_time(), ) def get_interest_by_region(self, resolution: str, **kwargs) -> None: """Request data from a interest by region search. Args: resolution (str): The resolution of the subregion. """ self._product.add_product( key=self.get_interest_by_region, value=self.pytrends.interest_by_region(resolution=resolution, **kwargs), ) def get_trending_searches(self, trend_country: str) -> None: """Request data from a search by country. Args: trend_country (str, optional): Name of the country of intrest. Defaults to "united_states". """ self._product.add_product( key=self.get_trending_searches, value=self.pytrends.trending_searches(pn=trend_country), ) def get_today_searches(self, today_country: str) -> None: """Request data from the daily search trends. Args: today_country (str): Name of the country of intrest. """ self._product.add_product( key=self.get_today_searches, value=self.pytrends.today_searches(pn=today_country), ) def get_top_charts(self, date: int, top_country: str) -> None: """Request data from a top charts search. Args: date (int): Year top_country (str): Name of the country of intrest. """ self._product.add_product( key=self.get_top_charts, value=self.pytrends.top_charts(date, hl=self.language, tz=self.timezone, geo=top_country), ) def get_related_topics(self) -> None: """Request data of a related topics based on the keyword.""" self._product.add_product(key=self.get_related_topics, value=self.pytrends.related_topics()) def get_related_queries(self) -> None: """Request data of a related queries based on the keyword.""" self._product.add_product( key=self.get_related_queries, value=self.pytrends.related_queries(), ) def get_suggestions(self) -> None: """Request data from keyword suggestion dropdown search.""" self._product.add_product( key=self.get_suggestions, value={ keyword: self.pytrends.suggestions(keyword=keyword) for keyword in self.keyword_list }, ) def get_categories(self) -> None: """Request available categories data for the current search.""" self._product.add_product( key=self.get_categories, value=self.pytrends.categories(), ) def get_historical_interest( self, year_start: int, month_start: int, day_start: int, hour_start: int, year_end: int, month_end: int, day_end: int, hour_end: int, **kwargs, ) -> None: """Request data from a hour-grided time search. Args: year_start (int): Starting year month_start (int): Starting month day_start (int): Starting day hour_start (int): Starting hour year_end (int): Final year month_end (int): Final month day_end (int): Final day hour_end (int): Final hour """ self._product.add_product( key=self.get_historical_interest, value=self.pytrends.get_historical_interest( keywords=self.keyword_list, year_start=year_start, month_start=month_start, day_start=day_start, hour_start=hour_start, year_end=year_end, month_end=month_end, day_end=day_end, hour_end=hour_end, cat=self.category, geo=self.country, gprop=self.property_filter, **kwargs, ), )
# Related Topics, returns a dictionary of dataframes related_topics_dict = pytrend.related_topics() print(related_topics_dict) # Related Queries, returns a dictionary of dataframes related_queries_dict = pytrend.related_queries() print(related_queries_dict) # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() pytrend = TrendReq() keys = ["the top"] pytrend.build_payload(kw_list=keys, cat=0, geo='', timeframe='now 7-d') if category: categories = pytrend.categories() with open('categories.json', 'w') as outfile: json.dump(categories, outfile, indent=2) interest_by_region_df = pytrend.interest_by_region(resolution="COUNTRY") print(interest_by_region_df.sort_values(keys[0], ascending=False).head(5)) # Related Topics, returns a dictionary of dataframes related_topics_dict = pytrend.related_topics() print(related_topics_dict[keys[0]]["top"].to_string()) print(related_topics_dict[keys[0]]["rising"].to_string())