async def test_blacklist_proxy_on_failure(self, create_proxy): proxies = ['http://127.0.0.1:2391'] pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies) await pytrend.build_payload(kw_list=['pizza', 'bagel']) assert pytrend._get_proxy() is None assert len(pytrend.proxies) == 0 assert len(pytrend.blacklisted_proxies) == len(proxies)
async def test_receive_error_no_retries_configured(self, client_mock): client_mock.return_value.__aenter__.return_value = client_mock client_mock.get = CoroutineMock(return_value=Response(status_code=429)) pytrends = TrendReq(retries=0, backoff_factor=0) pytrends.cookies = {'NID': '12eqf98hnf8032r54'} with pytest.raises(ResponseError): await pytrends.top_charts(date=2018) assert client_mock.get.call_count == 1
async def test_proxy_cycle_on_429_no_blacklist(self, client_mock): client_mock.return_value.__aenter__.return_value = client_mock proxies = ['http://127.0.0.1:8899', 'http://127.0.0.1:8900'] retry_count = 0 async def _get_request_side_effect(url, *args, **kwargs): nonlocal retry_count retry_count += 1 if retry_count <= len(proxies): raise ProxyError(response=Response(status_code=429)) else: return Response(status_code=429) client_mock.get = CoroutineMock(side_effect=_get_request_side_effect) pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies) with pytest.raises(ResponseError): await pytrend.build_payload(kw_list=['pizza', 'bagel']) #Ensure we sent req to proxies, and then w/o proxy once proxies exausted for proxy in proxies: client_mock.assert_any_call(proxies={'all': proxy}) client_mock.assert_called_with(proxies=None) #Proxies that returned 429 should still be available in proxy list assert pytrend.proxies.sort() == proxies.sort() assert len(pytrend.blacklisted_proxies) == 0
async def test_fallback_to_local_requests_on_last_proxy_failure(self): proxies = ['http://127.0.0.1:2391', 'http://127.0.0.1:2390'] pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies) await pytrend.build_payload(kw_list=['pizza', 'bagel']) resp = await pytrend.interest_over_time() assert len(pytrend.proxies) == 0 assert len(pytrend.blacklisted_proxies) == len(proxies) assert resp is not None
async def test_retry_initially_fail_then_succeed( self, client_mock, trending_searches_200_response): client_mock.return_value.__aenter__.return_value = client_mock pytrend = TrendReq(retries=3, backoff_factor=0.1) pytrend.cookies = {'NID': '12eqf98hnf8032r54'} retry_count = 0 async def _get_request_side_effect(url, *args, **kwargs): nonlocal retry_count retry_count += 1 #Make fail in a few different ways. On last attempt, return response if retry_count == pytrend.retries - 1: raise ConnectionRefusedError() elif retry_count != pytrend.retries: return Response(status_code=429) else: return trending_searches_200_response client_mock.get = CoroutineMock(side_effect=_get_request_side_effect) trending_searches = await pytrend.trending_searches(pn='united_states') assert trending_searches is not None
async def test_proxy_cycling(self, create_proxy): create_proxy('127.0.0.1', 8899) create_proxy('127.0.0.1', 8900) create_proxy('127.0.0.1', 8901) proxies = [ 'http://127.0.0.1:8899', 'http://127.0.0.1:8900', 'http://127.0.0.1:8901' ] pytrend = TrendReq(timeout=TIMEOUT, proxies=proxies) last_proxy = pytrend._get_proxy() await pytrend.suggestions(keyword='pizza') curr_proxy = pytrend._get_proxy() assert curr_proxy != last_proxy last_proxy = curr_proxy await pytrend.build_payload(kw_list=['pizza', 'bagel']) curr_proxy = pytrend._get_proxy() assert curr_proxy != last_proxy last_proxy = curr_proxy await pytrend.interest_over_time() curr_proxy = pytrend._get_proxy() assert curr_proxy != last_proxy
async def test_all_retries_fail(self, client_mock): client_mock.return_value.__aenter__.return_value = client_mock client_mock.get = CoroutineMock(return_value=Response(status_code=429)) pytrend = TrendReq(timeout=TIMEOUT, retries=3, backoff_factor=0.1) with pytest.raises(ResponseError): await pytrend.build_payload(kw_list=['pizza', 'bagel']) calls = [ call('https://trends.google.com/?geo=US', timeout=ANY) for _ in range(pytrend.retries) ] client_mock.get.assert_has_calls(calls)
async def test_trending_searches(self): pytrend = TrendReq(timeout=TIMEOUT) trending_searches = await pytrend.trending_searches(pn='united_states') assert trending_searches is not None
async def test_related_queries(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) related_queries = await pytrend.related_queries() assert related_queries is not None
async def test_interest_by_region(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) interest = await pytrend.interest_by_region() assert interest is not None
async def test_interest_over_time(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) resp = await pytrend.interest_over_time() assert resp is not None
async def test_tokens(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) assert pytrend.related_queries_widget_list != None
async def test_get_data(self): """Should use same values as in the documentation""" pytrend = TrendReq(timeout=TIMEOUT) assert pytrend.hl == 'en-US' assert pytrend.tz == 360 assert pytrend.geo == ''
async def test_send_req_through_proxy(self, create_proxy): create_proxy('127.0.0.1', 8899) pytrend = TrendReq(timeout=TIMEOUT, proxies=['http://127.0.0.1:8899']) await pytrend.build_payload(kw_list=['pizza', 'bagel']) resp = await pytrend.interest_over_time() assert resp is not None
async def test_top_charts(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) top_charts = await pytrend.top_charts(date=2016) assert top_charts is not None
async def test_get_cookie_on_request(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) await pytrend.interest_over_time() assert pytrend.cookies['NID']
async def test_suggestions(self): pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) suggestions = await pytrend.suggestions(keyword='pizza') assert suggestions is not None
async def test_build_payload(self): """Should return the widgets to get data""" pytrend = TrendReq(timeout=TIMEOUT) await pytrend.build_payload(kw_list=['pizza', 'bagel']) resp = await pytrend.interest_over_time() assert pytrend.token_payload is not None
from pytrendsasync.request import TrendReq # Login to Google. Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() pytrend.build_payload(kw_list=['pizza', 'bagel']) # Interest Over Time interest_over_time_df = pytrend.interest_over_time() print(interest_over_time_df.head()) # Interest by Region interest_by_region_df = pytrend.interest_by_region() print(interest_by_region_df.head()) # Related Queries, returns a dictionary of dataframes related_queries_dict = pytrend.related_queries() print(related_queries_dict) # Get Google Hot Trends data trending_searches_df = pytrend.trending_searches() print(trending_searches_df.head()) # Get Google Hot Trends data today_searches_df = pytrend.today_searches() print(today_searches_df.head()) # Get Google Top Charts top_charts_df = pytrend.top_charts(2018, hl='en-US', tz=300, geo='GLOBAL') print(top_charts_df.head())
async def get_daily_data(word: str, start_year: int, start_mon: int, stop_year: int, stop_mon: int, geo: str = 'US', verbose: bool = True, wait_time: float = 5.0) -> pd.DataFrame: """Given a word, fetches daily search volume data from Google Trends and returns results in a pandas DataFrame. Details: Due to the way Google Trends scales and returns data, special care needs to be taken to make the daily data comparable over different months. To do that, we download daily data on a month by month basis, and also monthly data. The monthly data is downloaded in one go, so that the monthly values are comparable amongst themselves and can be used to scale the daily data. The daily data is scaled by multiplying the daily value by the monthly search volume divided by 100. For a more detailed explanation see http://bit.ly/trendsscaling Args: word (str): Word to fetch daily data for. start_year (int): the start year start_mon (int): start 1st day of the month stop_year (int): the end year stop_mon (int): end at the last day of the month geo (str): geolocation verbose (bool): If True, then prints the word and current time frame we are fecthing the data for. Returns: complete (pd.DataFrame): Contains 4 columns. The column named after the word argument contains the daily search volume already scaled and comparable through time. The column f'{word}_unscaled' is the original daily data fetched month by month, and it is not comparable across different months (but is comparable within a month). The column f'{word}_monthly' contains the original monthly data fetched at once. The values in this column have been backfilled so that there are no NaN present. The column 'scale' contains the scale used to obtain the scaled daily data. """ # Set up start and stop dates start_date = date(start_year, start_mon, 1) stop_date = get_last_date_of_month(stop_year, stop_mon) # Start pytrends for US region pytrends = TrendReq(hl='en-US', tz=360) # Initialize build_payload with the word we need data for build_payload = partial(pytrends.build_payload, kw_list=[word], cat=0, geo=geo, gprop='') # Obtain monthly data for all months in years [start_year, stop_year] monthly = await _fetch_data( pytrends, build_payload, convert_dates_to_timeframe(start_date, stop_date)) # Get daily data, month by month results = {} # if a timeout or too many requests error occur we need to adjust wait time current = start_date while current < stop_date: last_date_of_month = get_last_date_of_month(current.year, current.month) timeframe = convert_dates_to_timeframe(current, last_date_of_month) if verbose: print(f'{word}:{timeframe}') results[current] = await _fetch_data(pytrends, build_payload, timeframe) current = last_date_of_month + timedelta(days=1) await sleep(wait_time) # don't go too fast or Google will send 429s daily = pd.concat(results.values()).drop(columns=['isPartial']) complete = daily.join(monthly, lsuffix='_unscaled', rsuffix='_monthly') # Scale daily data by monthly weights so the data is comparable complete[f'{word}_monthly'].ffill(inplace=True) # fill NaN values complete['scale'] = complete[f'{word}_monthly'] / 100 complete[word] = complete[f'{word}_unscaled'] * complete.scale return complete