def _plot_map(results, figsize=(15, 8), cmap=['#ff0000'], showfig=True, verbose=3): out = None if (results['method'] == 'geo') and (not results['df'].empty): # get map name map_name = worldmap.code2county(results['geo'])[1] # Normalize data, colnames, idx_names = _normalize_data(results['df']) # Plot per searchword for i in range(0, data.shape[1]): # Color only those with value>0 idx = np.where(data[:, i] > 0)[0] # county_names = results['df'].iloc[idx,i].index.values county_names = idx_names[idx] opacity = data[idx, i] filename = map_name + '_' + colnames[i] + '.svg' # If no data, set all on black if len(county_names) == 0: county_names = idx_names opacity = 1 cmap = ['#D3D3D3'] # Plot on map out = worldmap.plot(county_names, map_name=map_name, opacity=opacity, cmap=cmap, filename=filename, showfig=showfig) return out
def _spatio_per_searchword(searchwords, geo='', date_start=None, date_stop=None, method='news', include_suggestions=False, verbose=3): if geo == '': raise Exception('geo should be a string') if isinstance(geo, list): raise Exception('geo should be a string and can not be of type list.') if isinstance(searchwords, str): searchwords = [searchwords] # Get data range and message _, _, date_range = _set_dates(date_start, date_stop, verbose=verbose) # Collect data per searchword df_city = [] for searchword in searchwords: if verbose >= 3: print('[googletrends] [%s] Working on %s..' % (geo, searchword)) data_per_city = _country_per_searchword( [searchword], geo=geo, date_start=date_start, date_stop=date_stop, method=method, include_suggestions=include_suggestions, verbose=0) df_city.append(pd.DataFrame(data_per_city)) # Combine data in 1 dataframe df_city = pd.concat(df_city, axis=1) # Fin out = {} out['method'] = 'geo' out['df'] = df_city out['geo'] = geo out['geo_names'] = worldmap.code2county(geo)[1] out['date_range'] = date_range out['searchwords'] = searchwords return (out)
# %% import worldmap print(worldmap.__version__) # %% Retrieve availble maps that can be used to superimpose data on top of it [map_names, filenames] = worldmap.map_names() # %% Retrieve names in map county_names = worldmap.county_names(map_name='world') county_names = worldmap.county_names(map_name='netherlands') county_names = worldmap.county_names(map_name='belgium') county_names = worldmap.county_names(map_name='new zealand') # %% Retrieve citynames for abbrevations out = worldmap.code2county('NL') out = worldmap.code2county('GB') # %% Retrieve abbrevations for city names out = worldmap.county2code('Netherlands') out = worldmap.county2code('Germany') # %% Coloring of maps county_names = ['zeeland', 'Overijssel', 'flevoland'] opacity = [0.4, 0.6, 0.9] out = worldmap.plot(county_names, opacity=opacity, cmap='Set1', map_name='netherlands', filename='Netherlands_map.svg') county_names = ['Norway', 'Nederland', 'belgium']
def plot_worldmap(results, cmap=['#ff0000'], showfig=True, verbose=3): """Plot results on the worldmap derived from googletrends.spatio(). Parameters ---------- results : dict results are derived from the temporal() function. figsize : tuple, optional Figure size (height, width). The default is 'auto'. showfig : bool, optional When True, auto open the map figures in your browser. The default is 'True'. verbose : int, optional Print message to screen. The default is 3. Returns ------- dict containing results. Examples -------- >>> # Trending results >>> result = googletrends.spatio(['corona','covid-19'], geo=['nl','italy'], date_start='01-12-2019') >>> googletrends.plot_worldmap(results_spatio) """ out = None if verbose >= 3: print('[googletrends] Superimposing geographical results on worldmap.') df = [] county_names = [] for geo_name in results['geo']: dftmp = results[geo_name]['df'] dftmp['geo'] = worldmap.code2county(geo_name)[1] df.append(dftmp) # Combine data in 1 dataframe df = pd.concat(df, axis=0) df = df.groupby(by='geo').sum() data, colnames, idx_names = _normalize_data(df) for i in range(0, data.shape[1]): # Color only those with value>0 idx = np.where(data[:, i] > 0)[0] county_names = idx_names[idx] opacity = data[idx, i] filename = colnames[i] + '_worldmap.svg' # If no data, set all on black if len(county_names) == 0: county_names = idx_names opacity = 1 cmap = ['#D3D3D3'] # Plot on map out = worldmap.plot(county_names, map_name='world', opacity=opacity, cmap=cmap, filename=filename, showfig=showfig) return (out)
def spatio(searchwords, geo='', date_start=None, date_stop=None, method='news', include_suggestions=False, verbose=3): """Gather data for google searches over geographical locations and time. Parameters ---------- searchwords : list Lookup each input word and return the (normalized) frequency of google searches. Example: ['Corona','earth quake'] geo : list, optional Filter on geographical locations. 'NL' (only netherlands) ['NL','germany','IT'] date_start : str [dd-mm-yyyy] String: Start date for counting. date_stop : str [dd-mm-yyyy], optional String: Stop date for counting. If nothing is filled in, date of today is used. method : str, optional Type of google search. The default is 'news'. Choose on of those: 'images','news','youtube','froogle' include_suggestions : bool, optional Include suggestions. The default is False. verbose : int, optional Print message to screen. The default is 3. Raises ------ Exception code 429: Too Many google requests in a given amount of time ("rate limiting"). Returns ------- dict containing results. Examples -------- >>> result = googletrends.spatio(['corona','covid-19'], geo=['nl','italy'], date_start='01-12-2019') >>> googletrends.plot(result) """ if not check_connection.internet(): raise Exception('No internet connection') if isinstance(searchwords, str): searchwords = [searchwords] if geo == '': raise Exception('geo should be a string') if geo == 'world': geo = get_geo_names()['code'].values if isinstance(geo, str): geo = [geo] # Get data range and message _, _, date_range = _set_dates(date_start, date_stop, verbose=verbose) # Convert to country name to code for i in range(0, len(geo)): geo[i] = geo[i].upper() if len(geo[i]) > 3: geo[i] = worldmap.county2code(geo[i])[0][0].upper() # Search for searchwords out = {} for geo_name in geo: out[geo_name] = _spatio_per_searchword( searchwords, geo=geo_name, date_start=date_start, date_stop=date_stop, method=method, include_suggestions=include_suggestions, verbose=verbose) # Fin out['method'] = 'geo' out['date_range'] = date_range out['searchwords'] = searchwords out['geo'] = geo out['geo_names'] = list(map(lambda x: worldmap.code2county(x)[1], geo)) # return return (out)
def trending(searchwords, geo=None, date_start=None, date_stop=None, method='news', verbose=3): """ Gather data for trending google searches. Description ----------- If you choose a time period that is 3 months or shorter you get daily data, otherwise you get weekly data. If the time period is 3 years or longer, the monthly data is plotted, otherwise it is weekly data. Parameters ---------- searchwords : list Lookup each input word and return the (normalized) frequency of google searches. Example: ['Corona','earth quake'] geo : list, optional Filter on geographical locations. 'NL' (only netherlands), ['NL','germany','IT'], 'world' to examine all countries date_start : str [dd-mm-yyyy] String: Start date for counting. date_stop : str [dd-mm-yyyy], optional String: Stop date for counting. If nothing is filled in, date of today is used. method : str, optional Type of google search. The default is 'news'. Choose on of those: 'images','news','youtube','froogle' verbose : int, optional Print message to screen. The default is 3. Raises ------ Exception code 429: Too Many google requests in a given amount of time ("rate limiting"). Returns ------- dict containing results. Examples -------- >>> result = googletrends.spatio(['corona','covid-19'], geo=['nl','italy'], date_start='01-12-2019') >>> googletrends.plot(result) >>> # Plot only on the worldmap >>> googletrends.plot_worldmap(result) """ if not check_connection.internet(): raise Exception('No internet connection') if isinstance(searchwords, str): searchwords = [searchwords] if isinstance(geo, str): geo = [geo] if geo is None: raise Exception('parameter [geo] must be provided.') if geo == 'world': geo = get_geo_names()['code'].values # Convert to country name to code for i in range(0, len(geo)): geo[i] = geo[i].upper() if len(geo[i]) > 3: geo[i] = worldmap.county2code(geo[i])[0][0].upper() # Get data range and message _, _, date_range = _set_dates(date_start, date_stop, verbose=verbose) # Collect data per searchword df, df_rising, df_top = {}, {}, {} if verbose >= 3: print('[googletrends]') for geo_name in geo: df[geo_name], df_rising[geo_name], df_top[geo_name] = {}, {}, {} dftmp, dftmp1, dftmp2 = [], [], [] if verbose >= 3: print('--------[%s]--------' % geo_name) for searchword in searchwords: try: # Initialize pytrends, _ = _initialize([searchword], date_start, date_stop, geo_name, method, verbose=0) # Trending trending_searches = pytrends.trending_searches( pn=worldmap.code2county(geo_name)[1].lower()) trending_searches['searchword'] = searchword dftmp.append(trending_searches) # Top related_queries = pytrends.related_queries() tmptop = pd.DataFrame(related_queries[searchword]['top']) if not tmptop.empty: tmptop['searchword'] = searchword dftmp1.append(tmptop) # Rising tmprising = pd.DataFrame(related_queries[searchword]['rising']) if not tmprising.empty: tmprising['searchword'] = searchword dftmp2.append(tmprising) if verbose >= 3: print( '[%s]\n Top: %.0f\n Rising: %.0f\n Trending: %.0f' % (searchword, tmptop.shape[0], tmprising.shape[0], trending_searches.shape[0])) except: print( '[googletrends] [%s][%s]: Warning: Could not retrieve informatie. Maybe wrong geo or searchword?' % (geo_name, searchword)) # Combine data in 1 dataframe if len(dftmp1) > 0: dftmp1 = pd.concat(dftmp1, axis=0) dftmp1.sort_values(by='value', ascending=False).reset_index(drop=True, inplace=True) df_top[geo_name] = dftmp1 if len(dftmp2) > 0: dftmp2 = pd.concat(dftmp2, axis=0) dftmp2.sort_values(by='value', ascending=False).reset_index(drop=True, inplace=True) df_rising[geo_name] = dftmp2 if len(dftmp) > 0: dftmp = pd.concat(dftmp, axis=0) dftmp.reset_index(drop=True, inplace=True) df[geo_name] = dftmp out = {} out['method'] = 'trending' out['trending'] = df out['rising'] = df_rising out['top'] = df_top out['geo'] = geo out['geo_names'] = list(map(lambda x: worldmap.code2county(x)[1], geo)) out['searchwords'] = searchwords out['date_range'] = date_range return (out)
def temporal(searchwords, geo=None, date_start=None, date_stop=None, method='news', verbose=3): """ Gather data for google searches over time. Parameters ---------- searchwords : list Lookup each input word and return the (normalized) frequency of google searches. Example: ['Corona','earth quake'] geo : list, optional Filter on geographical locations. 'NL' (only netherlands) ['NL','germany','IT'] date_start : str [dd-mm-yyyy] String: Start date for counting. date_stop : str [dd-mm-yyyy], optional String: Stop date for counting. If nothing is filled in, date of today is used. method : str, optional Type of google search. The default is 'news'. Choose on of those: 'images','news','youtube','froogle' verbose : int, optional Print message to screen. The default is 3. Raises ------ Exception code 429: Too Many google requests in a given amount of time ("rate limiting"). Returns ------- dict containing results. Examples -------- >>> result = googletrends.temporal(['corona','covid-19'], geo=['nl','italy'], date_start='01-12-2019') >>> googletrends.plot(result) """ if not check_connection.internet(): raise Exception('No internet connection') if isinstance(searchwords, str): searchwords = [searchwords] if geo == 'world': geo = get_geo_names()['code'].values if isinstance(geo, str): geo = [geo] if verbose >= 3: print( '[googletrends] Collecting trends over time for geographically: %s' % (geo)) # Get data range and message _, _, date_range = _set_dates(date_start, date_stop, verbose=verbose) # Convert to country name to code for i in range(0, len(geo)): geo[i] = geo[i].upper() if len(geo[i]) > 3: geo[i] = worldmap.county2code(geo[i])[0][0].upper() # Collect data per searchword df_geo = {} for geo_name in geo: dftmp = [] for searchword in searchwords: try: if verbose >= 3: print('[googletrends] [%s] Working on %s..' % (geo_name, searchword)) pytrends, geo_name = _initialize([searchword], date_start, date_stop, geo_name, method, verbose=0) data_time = pytrends.interest_over_time() if not data_time.empty: data_time.sort_values('date', inplace=True) dftmp.append(data_time[[searchword]]) except: if verbose >= 2: print('[googletrends] [%s] Failed %s..' % (geo_name, searchword)) # Combine data in 1 dataframe if len(dftmp) > 0: dftmp = pd.concat(dftmp, axis=1) dftmp.reset_index(inplace=True, drop=False) df_geo[geo_name] = {} df_geo[geo_name] = dftmp else: df_geo = pd.DataFrame() out = {} out['method'] = 'temporal' out['df'] = df_geo out['geo'] = geo out['geo_names'] = list(map(lambda x: worldmap.code2county(x)[1], geo)) out['searchwords'] = searchwords out['date_range'] = date_range return (out)