async def test_blacklist_proxy_on_failure(self, create_proxy):
     proxies = ['http://127.0.0.1:2391']
     pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     assert pytrend._get_proxy() is None
     assert len(pytrend.proxies) == 0
     assert len(pytrend.blacklisted_proxies) == len(proxies)
    async def test_receive_error_no_retries_configured(self, client_mock):
        client_mock.return_value.__aenter__.return_value = client_mock
        client_mock.get = CoroutineMock(return_value=Response(status_code=429))

        pytrends = TrendReq(retries=0, backoff_factor=0)
        pytrends.cookies = {'NID': '12eqf98hnf8032r54'}

        with pytest.raises(ResponseError):
            await pytrends.top_charts(date=2018)
        assert client_mock.get.call_count == 1
    async def test_proxy_cycle_on_429_no_blacklist(self, client_mock):
        client_mock.return_value.__aenter__.return_value = client_mock
        proxies = ['http://127.0.0.1:8899', 'http://127.0.0.1:8900']
        retry_count = 0

        async def _get_request_side_effect(url, *args, **kwargs):
            nonlocal retry_count
            retry_count += 1
            if retry_count <= len(proxies):
                raise ProxyError(response=Response(status_code=429))
            else:
                return Response(status_code=429)

        client_mock.get = CoroutineMock(side_effect=_get_request_side_effect)
        pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies)
        with pytest.raises(ResponseError):
            await pytrend.build_payload(kw_list=['pizza', 'bagel'])

        #Ensure we sent req to proxies, and then w/o proxy once proxies exausted
        for proxy in proxies:
            client_mock.assert_any_call(proxies={'all': proxy})
        client_mock.assert_called_with(proxies=None)

        #Proxies that returned 429 should still be available in proxy list
        assert pytrend.proxies.sort() == proxies.sort()
        assert len(pytrend.blacklisted_proxies) == 0
 async def test_fallback_to_local_requests_on_last_proxy_failure(self):
     proxies = ['http://127.0.0.1:2391', 'http://127.0.0.1:2390']
     pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     resp = await pytrend.interest_over_time()
     assert len(pytrend.proxies) == 0
     assert len(pytrend.blacklisted_proxies) == len(proxies)
     assert resp is not None
    async def test_retry_initially_fail_then_succeed(
            self, client_mock, trending_searches_200_response):
        client_mock.return_value.__aenter__.return_value = client_mock
        pytrend = TrendReq(retries=3, backoff_factor=0.1)
        pytrend.cookies = {'NID': '12eqf98hnf8032r54'}
        retry_count = 0

        async def _get_request_side_effect(url, *args, **kwargs):
            nonlocal retry_count
            retry_count += 1
            #Make fail in a few different ways. On last attempt, return response
            if retry_count == pytrend.retries - 1:
                raise ConnectionRefusedError()
            elif retry_count != pytrend.retries:
                return Response(status_code=429)
            else:
                return trending_searches_200_response

        client_mock.get = CoroutineMock(side_effect=_get_request_side_effect)
        trending_searches = await pytrend.trending_searches(pn='united_states')
        assert trending_searches is not None
    async def test_proxy_cycling(self, create_proxy):
        create_proxy('127.0.0.1', 8899)
        create_proxy('127.0.0.1', 8900)
        create_proxy('127.0.0.1', 8901)
        proxies = [
            'http://127.0.0.1:8899', 'http://127.0.0.1:8900',
            'http://127.0.0.1:8901'
        ]

        pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies)
        last_proxy = pytrend._get_proxy()

        await pytrend.suggestions(keyword='pizza')
        curr_proxy = pytrend._get_proxy()
        assert curr_proxy != last_proxy
        last_proxy = curr_proxy

        await pytrend.build_payload(kw_list=['pizza', 'bagel'])
        curr_proxy = pytrend._get_proxy()
        assert curr_proxy != last_proxy
        last_proxy = curr_proxy

        await pytrend.interest_over_time()
        curr_proxy = pytrend._get_proxy()
        assert curr_proxy != last_proxy
    async def test_all_retries_fail(self, client_mock):
        client_mock.return_value.__aenter__.return_value = client_mock
        client_mock.get = CoroutineMock(return_value=Response(status_code=429))

        pytrend = TrendReq(timeout=TIMEOUT, retries=3, backoff_factor=0.1)
        with pytest.raises(ResponseError):
            await pytrend.build_payload(kw_list=['pizza', 'bagel'])

        calls = [
            call('https://trends.google.com/?geo=US', timeout=ANY)
            for _ in range(pytrend.retries)
        ]
        client_mock.get.assert_has_calls(calls)
 async def test_trending_searches(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     trending_searches = await pytrend.trending_searches(pn='united_states')
     assert trending_searches is not None
 async def test_related_queries(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     related_queries = await pytrend.related_queries()
     assert related_queries is not None
 async def test_interest_by_region(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     interest = await pytrend.interest_by_region()
     assert interest is not None
 async def test_interest_over_time(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     resp = await pytrend.interest_over_time()
     assert resp is not None
 async def test_tokens(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     assert pytrend.related_queries_widget_list != None
 async def test_get_data(self):
     """Should use same values as in the documentation"""
     pytrend = TrendReq(timeout=TIMEOUT)
     assert pytrend.hl == 'en-US'
     assert pytrend.tz == 360
     assert pytrend.geo == ''
 async def test_send_req_through_proxy(self, create_proxy):
     create_proxy('127.0.0.1', 8899)
     pytrend = TrendReq(timeout=TIMEOUT, proxies=['http://127.0.0.1:8899'])
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     resp = await pytrend.interest_over_time()
     assert resp is not None
 async def test_top_charts(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     top_charts = await pytrend.top_charts(date=2016)
     assert top_charts is not None
 async def test_get_cookie_on_request(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     await pytrend.interest_over_time()
     assert pytrend.cookies['NID']
 async def test_suggestions(self):
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     suggestions = await pytrend.suggestions(keyword='pizza')
     assert suggestions is not None
 async def test_build_payload(self):
     """Should return the widgets to get data"""
     pytrend = TrendReq(timeout=TIMEOUT)
     await pytrend.build_payload(kw_list=['pizza', 'bagel'])
     resp = await pytrend.interest_over_time()
     assert pytrend.token_payload is not None
Exemple #19
0
from pytrendsasync.request import TrendReq

# Login to Google. Only need to run this once, the rest of requests will use the same session.
pytrend = TrendReq()

# Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
pytrend.build_payload(kw_list=['pizza', 'bagel'])

# Interest Over Time
interest_over_time_df = pytrend.interest_over_time()
print(interest_over_time_df.head())

# Interest by Region
interest_by_region_df = pytrend.interest_by_region()
print(interest_by_region_df.head())

# Related Queries, returns a dictionary of dataframes
related_queries_dict = pytrend.related_queries()
print(related_queries_dict)

# Get Google Hot Trends data
trending_searches_df = pytrend.trending_searches()
print(trending_searches_df.head())

# Get Google Hot Trends data
today_searches_df = pytrend.today_searches()
print(today_searches_df.head())

# Get Google Top Charts
top_charts_df = pytrend.top_charts(2018, hl='en-US', tz=300, geo='GLOBAL')
print(top_charts_df.head())
Exemple #20
0
async def get_daily_data(word: str,
                         start_year: int,
                         start_mon: int,
                         stop_year: int,
                         stop_mon: int,
                         geo: str = 'US',
                         verbose: bool = True,
                         wait_time: float = 5.0) -> pd.DataFrame:
    """Given a word, fetches daily search volume data from Google Trends and
    returns results in a pandas DataFrame.

    Details: Due to the way Google Trends scales and returns data, special
    care needs to be taken to make the daily data comparable over different
    months. To do that, we download daily data on a month by month basis,
    and also monthly data. The monthly data is downloaded in one go, so that
    the monthly values are comparable amongst themselves and can be used to
    scale the daily data. The daily data is scaled by multiplying the daily
    value by the monthly search volume divided by 100.
    For a more detailed explanation see http://bit.ly/trendsscaling

    Args:
        word (str): Word to fetch daily data for.
        start_year (int): the start year
        start_mon (int): start 1st day of the month
        stop_year (int): the end year
        stop_mon (int): end at the last day of the month
        geo (str): geolocation
        verbose (bool): If True, then prints the word and current time frame
            we are fecthing the data for.

    Returns:
        complete (pd.DataFrame): Contains 4 columns.
            The column named after the word argument contains the daily search
            volume already scaled and comparable through time.
            The column f'{word}_unscaled' is the original daily data fetched
            month by month, and it is not comparable across different months
            (but is comparable within a month).
            The column f'{word}_monthly' contains the original monthly data
            fetched at once. The values in this column have been backfilled
            so that there are no NaN present.
            The column 'scale' contains the scale used to obtain the scaled
            daily data.
    """

    # Set up start and stop dates
    start_date = date(start_year, start_mon, 1)
    stop_date = get_last_date_of_month(stop_year, stop_mon)

    # Start pytrends for US region
    pytrends = TrendReq(hl='en-US', tz=360)
    # Initialize build_payload with the word we need data for
    build_payload = partial(pytrends.build_payload,
                            kw_list=[word],
                            cat=0,
                            geo=geo,
                            gprop='')

    # Obtain monthly data for all months in years [start_year, stop_year]
    monthly = await _fetch_data(
        pytrends, build_payload,
        convert_dates_to_timeframe(start_date, stop_date))

    # Get daily data, month by month
    results = {}
    # if a timeout or too many requests error occur we need to adjust wait time
    current = start_date
    while current < stop_date:
        last_date_of_month = get_last_date_of_month(current.year,
                                                    current.month)
        timeframe = convert_dates_to_timeframe(current, last_date_of_month)
        if verbose:
            print(f'{word}:{timeframe}')
        results[current] = await _fetch_data(pytrends, build_payload,
                                             timeframe)
        current = last_date_of_month + timedelta(days=1)
        await sleep(wait_time)  # don't go too fast or Google will send 429s

    daily = pd.concat(results.values()).drop(columns=['isPartial'])
    complete = daily.join(monthly, lsuffix='_unscaled', rsuffix='_monthly')

    # Scale daily data by monthly weights so the data is comparable
    complete[f'{word}_monthly'].ffill(inplace=True)  # fill NaN values
    complete['scale'] = complete[f'{word}_monthly'] / 100
    complete[word] = complete[f'{word}_unscaled'] * complete.scale

    return complete