Example #1
0
class GoogleTrend(DataCollector):  # 구글 트렌드를 통해 정보를 가져오는 클래스
    def __init__(self,
                 keyword=['youtube'],
                 hl='ko',
                 tz='82',
                 timeframe='today 5-y',
                 cat=0,
                 geo='KR',
                 gprop=''):  # 생성자 기본 설정 값
        self.hl = hl
        self.tz = tz
        self.keyword = keyword
        self.timeframe = timeframe
        self.cat = cat
        self.geo = geo
        self.gprop = gprop
        self.update_pytrend()
        self.update_payload()

    # Login to Google. Only need to run this once, the rest of requests will use the same session.
    def update_pytrend(self):
        self.pytrend = TrendReq(hl=self.hl, tz=self.tz)

    # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
    def update_payload(self):
        self.pytrend.build_payload(kw_list=self.keyword,
                                   cat=self.cat,
                                   timeframe=self.timeframe,
                                   geo=self.geo,
                                   gprop=self.gprop)

    def set_pytrend(self,
                    hl='None',
                    tz='None'):  # hl는 host language, tz는 time zone
        if hl != 'None':  # ex) 'ko', 'en_US'
            self.hl = hl
        if tz != 'None':  # ex) 82:한국, 360:미국
            self.tz = tz
        self.update_pytrend()
        self.update_payload()

    def set_payload(self,
                    keyword=None,
                    timeframe='None',
                    cat=-1,
                    geo='None',
                    gprop='None'):  # 키워드리스트, 타임프레임, 카테고리, 지역, 구글 프로퍼티
        if keyword != None:
            self.keyword = keyword
        if timeframe != 'None':  # ex) 'all', 'today 5-y', 'today 1,2,3-m', 'now 1,7-d', 'now 1,4-H', '2018-05-20 2019-01-20'
            self.timeframe = timeframe
        if cat != -1:
            self.cat = cat
        if geo != 'None':  # ex) 'KR', 'US', ''
            self.geo = geo
        if gprop != 'None':  # ex) 'images', 'news', 'youtube', 'froogle'
            self.gprop = gprop
        self.update_payload()

    def load_data(self, keyword=None):
        if keyword == 'region':
            self.interest_by_region()
            return self.interest_by_region_df_to_list()
        elif keyword == 'gender':
            return self.search_rate_by_gender()

    # Interest Over Time
    def interest_over_time(self):
        self.interest_over_time_df = self.pytrend.interest_over_time(
        )  # Returns pandas.Dataframe
        self.interest_over_time_df = self.interest_over_time_df.iloc[:, :self.
                                                                     keyword.
                                                                     __len__(
                                                                     )]  # 안쓰는 데이터 isPartial 제거
        self.interest_over_time_list = self.interest_over_time_df_to_list()
        return self.interest_over_time_list

    # Interest Over Time hourly
    def historical_hourly_interest(self):
        self.historical_hourly_interest_df = self.pytrend.get_historical_interest(
            keywords=self.keyword,
            year_start=2019,
            month_start=4,
            day_start=1,
            hour_start=0,
            year_end=2019,
            month_end=5,
            day_end=1,
            hour_end=0,
            cat=0,
            geo='KR',
            gprop='',
            sleep=0)  # Returns pandas.Dataframe
        self.historical_hourly_interest_df = self.historical_hourly_interest_df.iloc[:, :
                                                                                     self
                                                                                     .
                                                                                     keyword
                                                                                     .
                                                                                     __len__(
                                                                                     )]  # 안쓰는 데이터 isPartial 제거
        self.historical_hourly_interest_list = self.historical_hourly_interest_df_to_list(
        )
        return self.historical_hourly_interest_list

    # Interest by Region
    def interest_by_region(self):  # 지역별로 검색 비율을 알려준다
        self.interest_by_region_df = self.pytrend.interest_by_region()
        self.interest_by_region_list = self.interest_by_region_df_to_list()
        return self.interest_by_region_list

    # Related Topics, Returns dictionary of pandas.DataFrames
    def related_topics(self):  # 키워드 관련 토픽을 순위별로 알려준다
        self.related_topics_dict = self.pytrend.related_topics()
        return self.related_topics_dict

    # Related Queries, returns a dictionary of dataframes
    def related_queries(self):  # 키워드 관련 검색어를 순위별로 알려준다
        self.related_queries_dict = self.pytrend.related_queries()
        return self.related_queries_dict

    # trending searches in real time
    def trending_searches(self):  # 현재 시간대 인기검색어 순위 20까지 보여준다
        self.trending_searches_df = self.pytrend.trending_searches(
            pn='south_korea')
        return self.trending_searches_df

    #
    def today_searches(self):  #
        self.today_searches_df = self.pytrend.today_searches()
        return self.today_searches_df

    # Get Google Top Charts
    def top_charts(self):  # 년 단위로 상위 핫 키워드 가져오기
        self.top_charts_df = self.pytrend.top_charts(
            date=2015, hl='ko', tz='82', geo='KR'
        )  # date = YYYY integer, tz='82', geo='KR', geo='GLOBAL', geo='US'
        return self.top_charts_df

    # Get Google Category
    def categories(self):  # 구글 카테고리 종류와 id를 보여준다
        self.categories_df = self.pytrend.categories()
        return self.categories_df

    def show_interest_over_time(self):  # 시간에 따른 검색 비율을 그래프로 보여준다
        num = 0.0
        plt.figure(figsize=(14, 4))
        plt.style.use('ggplot')  # 더 이쁘게 그려준다
        for key in self.keyword:
            num += 0.1
            plt.plot(self.interest_over_time_df[key],
                     c=plt.cm.rainbow(num),
                     label=key)
        plt.legend(bbox_to_anchor=(1, 1), loc=2)  # 라벨의 위치를 정해준다
        plt.show()

    def interest_over_time_df_to_list(
            self):  # interest_over_time_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        date = self.interest_over_time_df.index.tolist()
        for i in range(len(date)):
            date[i] = date[i].date().strftime("%Y-%m-%d")
        date.insert(0, 'x')
        data = []
        data.append(date)
        for key in self.keyword:
            y = self.interest_over_time_df[key].tolist()
            y.insert(0, key)
            data.append(y)
        return data

    def historical_hourly_interest_df_to_list(
            self
    ):  # historical_hourly_interest_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        date = self.historical_hourly_interest_df.index.tolist()
        for i in range(len(date)):
            date[i] = date[i].date().strftime("%Y-%m-%d")
        date.insert(0, 'x')
        data = []
        data.append(date)
        for key in self.keyword:
            y = self.historical_hourly_interest_df[key].tolist()
            y.insert(0, key)
            data.append(y)
        return data

    def interest_by_region_df_to_list(
            self):  # interest_by_region_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        region = self.interest_by_region_df.index.tolist()
        data = []
        for key in self.keyword:
            y = self.interest_by_region_df[key].tolist()
        ratio = 0
        for i in [0, 1, 2, 3, 8, 11, 12, 13, 14, 15]:
            ratio += y[i]
        ratio /= 100
        tmp_val = 0
        reg_name = ''
        if ratio > 0:
            for i in range(len(region)):
                if i in [1, 2, 14, 11, 0, 13]:
                    if i == 0:
                        tmp_val = round(y[i] / ratio)
                        reg_name = '강원도'
                    elif i == 1:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '서울/경기'
                    elif i == 2:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '경상도'
                    elif i == 11:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '전라도'
                    elif i == 13:
                        tmp_val = round(y[i] / ratio)
                        reg_name = '제주도'
                    elif i == 14:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '충청도'
                    data.append([reg_name, tmp_val])
        return data

    def search_rate_by_gender(self):
        gender_data = []
        gender_data.append(['male', random.randint(50, 100)])
        gender_data.append(['female', random.randint(50, 100)])
        return gender_data
Example #2
0
class Trendsetter():
    def __init__(self, timezone=1, language='en-US'):
        """
        Args:
            timezone: timezone in hours
            language: language of interface, not important
        """
        self.tz = -60 * timezone
        self.countries = {
            'united_states': ['US', 'en'],
            'united_kingdom': ['GB', 'en'],
            'australia': ['AU', 'en'],
            'germany': ['DE', 'de'],
            'france': ['FR', 'fr'],
            'italy': ['IT', 'it'],
            'japan': ['JP', 'ja'],
            'saudi_arabia': ['SA', 'ar'],
            'egypt': ['EG', 'ar'],
            # 'china': ['CN', 'zh-cn'],
            # 'iran': ['IR', 'ar'],
            'brazil': ['BR', 'pt'],
            'india': ['IN', 'hi'],
            'israel': ['IL', 'iw'],
            # 'spain': ['ES', 'es'],
            'mexico': ['MX', 'es'],
            'russia': ['RU', 'ru'],
            'south_korea': ['KR', 'ko'],
            'taiwan': ['TW', 'zh-tw'],
            'hong_kong': ['HK', 'zh-tw'],
            'thailand': ['TH', 'th'],
            'turkey': ['TR', 'tr'],
            'vietnam': ['VN', 'vi'],
        }
        self.countrycodes = {v[0]: k for k, v in self.countries.items()}
        self.trends = TrendReq(hl=language, tz=self.tz)
        self.translator = gt.Translator(service_urls=[
            "translate.google.com", "translate.google.co.kr",
            "translate.google.at", "translate.google.de",
            "translate.google.ru", "translate.google.ch",
            "translate.google.fr", "translate.google.es"
        ])

    def browse_categories(self, levels=list()):
        """browse categories by list of index
        Args:
            levels: list, eg. [4,2]

        Returns:
            dataframe with child categories
        """
        cat = self.trends.categories()
        for i in levels:
            cat = cat['children'][i]

        print(cat['name'], ", id =", cat['id'])
        if 'children' in cat.keys():
            children = pd.DataFrame.from_dict(cat['children'])
            # children.index = children['id']
            return children

    def get_trending(self, country='united_states'):
        """
        get currently and daily trends for implemented countries

        Args:
            country: country name or country code

        Returns:
            {'trending': list, 'today': list}

        Raises:
            ValueError if country not supported
        """

        if country not in self.countries:
            if country in self.countrycodes:
                country = self.countrycodes[country]
            else:
                raise ValueError("Country not supported.")

        self.trending = {
            'trending':
            list(self.trends.trending_searches(pn=country)[0]),
            'today':
            list(self.trends.today_searches(pn=self.countries[country][0]))
        }

        if self.countries[country][1] != 'en':
            try:
                self.trending_en = {
                    k + '_en': list(
                        map(
                            lambda t: t.text,
                            self.translator.translate(
                                v, dest='en', src=self.countries[country][1])))
                    for k, v in self.trending.items()
                }
                self.trending.update(self.trending_en)
            except JSONDecodeError:
                warnings.warn("google translate API limit reached")
            except:
                warnings.warn("google translate API not working")

        return self.trending

    def get_related(self,
                    kw,
                    timeframe='now 7-d',
                    category=0,
                    location='',
                    gtype=''):

        if isinstance(timeframe, list):
            tf_str = ' '.join(timeframe)
        else:
            tf_str = timeframe

        self.trends.build_payload([kw],
                                  cat=category,
                                  timeframe=tf_str,
                                  geo=location,
                                  gprop=gtype)
        related_topics = self.trends.related_topics()[kw]
        related_topics = related_topics['top'].append(related_topics['rising'],
                                                      ignore_index=True,
                                                      sort=False)

        return related_topics

    def get_interest(self,
                     kwds,
                     timeframe='now 7-d',
                     category=0,
                     location='',
                     gtype=''):
        """

        Args:
            kwds: list of up to 5 keywords
            timeframe: supported google format. or [t_start, t_end]; for daily output: 'YYYY-mm-dd',
                       for hourly output: 'YYYY-mm-ddThh'
            category:
            location: supported google location or country code
            google_product:

        Returns:
            DataFrame
        """
        if isinstance(kwds, str):
            kwds = [kwds]

        if isinstance(timeframe, list):
            tf_str = ' '.join(timeframe)
        else:
            tf_str = timeframe
            timeframe = timeframe.split(' ')

        if 'T' in tf_str:  # hourly data
            format_str = '%Y-%m-%dT%H'
        else:  # daily data
            format_str = '%Y-%m-%d'

        # needs improvement:
        if any(s in tf_str for s in ['now', 'today', 'all']):
            self.trends.build_payload(kwds,
                                      cat=category,
                                      timeframe=tf_str,
                                      geo=location,
                                      gprop=gtype)
            self.interest = self.trends.interest_over_time()
        else:
            t_start = datetime.datetime.strptime(timeframe[0], format_str)
            t_end = datetime.datetime.strptime(timeframe[1], format_str)
            if 'T' in tf_str and t_end - t_start >= datetime.timedelta(days=8):
                self.interest = self.trends.get_historical_interest(
                    kwds,
                    year_start=t_start.year,
                    year_end=t_end.year,
                    month_start=t_start.month,
                    month_end=t_end.month,
                    day_start=t_start.day,
                    day_end=t_end.day,
                    hour_start=t_start.hour,
                    hour_end=t_end.hour,
                    cat=category,
                    geo=location,
                    gprop=gtype,
                    sleep=60)
            else:
                self.trends.build_payload(kwds,
                                          cat=category,
                                          timeframe=tf_str,
                                          geo=location,
                                          gprop=gtype)
                self.interest = self.trends.interest_over_time()
        return self.interest
class DesignerTrendsCollector(BuilderTrendsCollector):
    """DesignerTrendsCollector contains the specific implementation of
    `BuilderTrendsCollector`.

    `DesignerTrendsCollector` contains the specific implementation of
    `BuilderTrendsCollector` based on the external library `pytrends`.

    Args:
        BuilderTrendsCollector (class): Abstract class that provides the implementations of the properties and methods.
    """
    def __init__(
        self,
        keyword_list: list,
        timeframe: str = "today 5-y",
        language: str = "en-US",
        category: int = 0,
        timezone: int = 360,
        country: str = "",
        property_filter="",
        **kwargs,
    ) -> None:
        """Initialization of DesignerTrendsCollector

        Args:
            keyword_list (list): Keyword-list with the items to search for.
            timeframe (str, optional): Time frame, respectively, period to search for. Defaults to "today 5-y".
            language (str, optional): Search language. Defaults to "en-US".
            category (int, optional): Define a specific [search category](https://github.com/pat310/google-trends-api/wiki/Google-Trends-Categories). Defaults to 0.
            timezone (int, optional): [Search timezone](https://developers.google.com/maps/documentation/timezone/overview). Defaults to 360.
            country (str, optional): The country, where to search for. Defaults to "".
            property_filter (str, optional): Property filer of the search; only in news, images, YouTube, shopping. Defaults to "".
        """
        self.keyword_list = keyword_list
        self.timeframe = timeframe
        self.language = language
        self.category = category
        self.timezone = timezone
        self.country = country
        self.property_filter = property_filter

        self.pytrends = TrendReq(hl=self.language, tz=self.timezone, **kwargs)
        self.pytrends.build_payload(
            kw_list=self.keyword_list,
            cat=self.category,
            timeframe=self.timeframe,
            geo=self.country,
            gprop=self.property_filter,
        )
        self.reset()

    def reset(self) -> None:
        """Reset the product to empty."""
        self._product = TrendProduct()

    @property
    def trends(self) -> TrendProduct:
        """Return the trend results.

        Returns:
            TrendProduct: (class) TrendProduct contains the dictionary and the return value of it.
        """
        product = self._product
        self.reset()
        return product

    def get_interest_over_time(self) -> None:
        """Request data from a interest over time search."""
        self._product.add_product(
            key=self.get_interest_over_time,
            value=self.pytrends.interest_over_time(),
        )

    def get_interest_by_region(self, resolution: str, **kwargs) -> None:
        """Request data from a interest by region search.

        Args:
            resolution (str): The resolution of the subregion.
        """
        self._product.add_product(
            key=self.get_interest_by_region,
            value=self.pytrends.interest_by_region(resolution=resolution,
                                                   **kwargs),
        )

    def get_trending_searches(self, trend_country: str) -> None:
        """Request data from a search by country.

        Args:
            trend_country (str, optional): Name of the country of intrest. Defaults to "united_states".
        """
        self._product.add_product(
            key=self.get_trending_searches,
            value=self.pytrends.trending_searches(pn=trend_country),
        )

    def get_today_searches(self, today_country: str) -> None:
        """Request data from the daily search trends.

        Args:
            today_country (str): Name of the country of intrest.
        """
        self._product.add_product(
            key=self.get_today_searches,
            value=self.pytrends.today_searches(pn=today_country),
        )

    def get_top_charts(self, date: int, top_country: str) -> None:
        """Request data from a top charts search.

        Args:
            date (int): Year
            top_country (str): Name of the country of intrest.
        """
        self._product.add_product(
            key=self.get_top_charts,
            value=self.pytrends.top_charts(date,
                                           hl=self.language,
                                           tz=self.timezone,
                                           geo=top_country),
        )

    def get_related_topics(self) -> None:
        """Request data of a related topics based on the keyword."""
        self._product.add_product(key=self.get_related_topics,
                                  value=self.pytrends.related_topics())

    def get_related_queries(self) -> None:
        """Request data of a related queries based on the keyword."""
        self._product.add_product(
            key=self.get_related_queries,
            value=self.pytrends.related_queries(),
        )

    def get_suggestions(self) -> None:
        """Request data from keyword suggestion dropdown search."""
        self._product.add_product(
            key=self.get_suggestions,
            value={
                keyword: self.pytrends.suggestions(keyword=keyword)
                for keyword in self.keyword_list
            },
        )

    def get_categories(self) -> None:
        """Request available categories data for the current search."""
        self._product.add_product(
            key=self.get_categories,
            value=self.pytrends.categories(),
        )

    def get_historical_interest(
        self,
        year_start: int,
        month_start: int,
        day_start: int,
        hour_start: int,
        year_end: int,
        month_end: int,
        day_end: int,
        hour_end: int,
        **kwargs,
    ) -> None:
        """Request data from a hour-grided time search.

        Args:
            year_start (int): Starting year
            month_start (int): Starting month
            day_start (int): Starting day
            hour_start (int): Starting hour
            year_end (int): Final year
            month_end (int): Final month
            day_end (int): Final day
            hour_end (int): Final hour
        """
        self._product.add_product(
            key=self.get_historical_interest,
            value=self.pytrends.get_historical_interest(
                keywords=self.keyword_list,
                year_start=year_start,
                month_start=month_start,
                day_start=day_start,
                hour_start=hour_start,
                year_end=year_end,
                month_end=month_end,
                day_end=day_end,
                hour_end=hour_end,
                cat=self.category,
                geo=self.country,
                gprop=self.property_filter,
                **kwargs,
            ),
        )
Example #4
0
# Related Topics, returns a dictionary of dataframes
related_topics_dict = pytrend.related_topics()
print(related_topics_dict)

# Related Queries, returns a dictionary of dataframes
related_queries_dict = pytrend.related_queries()
print(related_queries_dict)



# Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
pytrend = TrendReq()
keys = ["the top"]
pytrend.build_payload(kw_list=keys, cat=0, geo='', timeframe='now 7-d')
if category:
    categories = pytrend.categories()
    with open('categories.json', 'w') as outfile:
        json.dump(categories, outfile, indent=2)





interest_by_region_df = pytrend.interest_by_region(resolution="COUNTRY")
print(interest_by_region_df.sort_values(keys[0], ascending=False).head(5))

# Related Topics, returns a dictionary of dataframes
related_topics_dict = pytrend.related_topics()
print(related_topics_dict[keys[0]]["top"].to_string())
print(related_topics_dict[keys[0]]["rising"].to_string())