def retrieve_google_trends(self, search, date_range): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # Create the search object pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') # Retrieve the interest over time trends = pytrends.interest_over_time() related_queries = pytrends.related_queries() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return return trends, related_queries
def retrieve_google_trends(self, search, date_range): # 设置趋势提取对象 pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # 创建搜索对象 pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') #随时检索 trends = pytrends.interest_over_time() related_queries = pytrends.related_queries() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return return trends, related_queries
def wordcloud_queries(key,date): pytrends.build_payload([key], cat=0, timeframe=date, geo='', gprop='') query = pytrends.related_queries() if query[key]['top'] is not None: input_top = list(query[key]['top']['query']) input_rising = list(query[key]['rising']['query']) def plot_wordcloud(output): text = " ".join(output) wordcloud = WordCloud(max_font_size=70, max_words=1000, colormap="Blues", background_color="white").generate(text) plt.figure(figsize=(8,6)) plt.title("WordCloud of Top 25 Related Queries",fontsize=20, weight = 'bold', fontfamily='sans-serif') plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.savefig("image/{}_google_wordcloud.png".format(key),bbox_inches='tight',dpi=100) plt.close() plot_wordcloud(input_top)
def pop2(lists): from pytrends.request import TrendReq # Create pytrends object, request data from Google Trends pytrends = TrendReq(hl='en-US', tz=360) # Extracts data based on our keywords kw_list = lists pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') # Specify, get, and normalize data data = pytrends.interest_over_time() data.drop('isPartial', axis=1, inplace=True) normData = data.apply(lambda x: x / x.max(), axis=0) # Max normalized value from most recent date + index in list recent = normData.values[-1].tolist() max_value = max(recent) max_index = recent.index(max_value) # Name of most popular normalized item out = kw_list[max_index] pytrends.build_payload(kw_list=[out], cat=0, timeframe='today 5-y', geo='', gprop='') trend = pytrends.related_queries() trend_list = trend[out]['top']['query'] # Adds variable number of hashtags to topList numHashtags = 5 topList = [] for i in range(numHashtags): topList.append(trend_list[i]) # TopList is a list of hashtags, out is most popular item poplist = out return (poplist)
def retrieve_google_trends(self, search, date_range): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # Create the search object pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') # Retrieve the interest over time trends = pytrends.interest_over_time() related_queries = pytrends.related_queries() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return return trends, related_queries
def google_trends(query): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] # Create the search object pytrends.build_payload(kw_list, cat=0, geo='', gprop='news') # Get the interest over time interest = pytrends.interest_over_time() print(interest.head()) # Get related searches related_queries = pytrends.related_queries() print(related_queries) # # Get Google Top Charts # top_charts_df = pytrend.top_charts(cid='actors', date=201611) # print(top_charts_df.head()) return interest, related_queries
def pop(lists): from pytrends.request import TrendReq # Create pytrends object, request data from Google Trends pytrends = TrendReq(hl='en-US', tz=360) # Extracts data based on our keywords kw_list = lists pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') # Specify, get, and normalize data data = pytrends.interest_over_time() data.drop('isPartial', axis=1, inplace=True) normData = data.apply(lambda x: x / x.max(), axis=0) # Max normalized value from most recent date + index in list recent = normData.values[-1].tolist() max_value = max(recent) max_index = recent.index(max_value) # Name of most popular normalized item out = kw_list[max_index] pytrends.build_payload(kw_list=[out], cat=0, timeframe='today 5-y', geo='', gprop='') trend = pytrends.related_queries() trend_list = trend[out]['top']['query'] # Adds variable number of hashtags to topList numHashtags = 5 topList = [] for i in range(numHashtags): topList.append(trend_list[i]) # TopList is a list of hashtags, out is most popular item poplist = topList import pandas as pd from pytrends.request import TrendReq import pytrends from pytrends.request import TrendReq import pandas as pd import time import datetime import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime, date, time pytrend = TrendReq() pytrend = TrendReq() pytrend.build_payload(kw_list, timeframe='today 12-m', geo='GB', cat=71) interest_over_time_df = pytrend.interest_over_time() print(interest_over_time_df.head()) sns.set(color_codes=True) dx = interest_over_time_df.plot.line(figsize=(9, 6), title="Interest Over Time") dx.set_xlabel('Date') dx.set_ylabel('Trends Index') dx.tick_params(axis='both', which='major', labelsize=13) dx.figure.savefig("static/output.png") return (poplist)
df_split_test = df_split["keywords"].values.tolist()[0:500] #0-301 DONE for x in range(0, len(df_split_test)): keywords = [df_split_test[x]] print(keywords) keyword_data = pd.DataFrame() # for x in range(0, len(df_split_test)): # startTime = time.time() # pytrends.build_payload(kw_list=[df_split_test[x]], cat=0, timeframe='2020-01-01 2020-07-30', geo='US') # related_queries = pytrends.related_queries() # trends_value = related_queries[df_split_test[x]]['top'] # if len(trends_value)==0: # print(related_queries[df_split_test[x]]['key']) for x in range(0, len(df_split_test)): startTime = time.time() pytrends.build_payload(kw_list=[df_split_test[x]], cat=0, timeframe='2020-01-01 2020-07-30', geo='US') related_queries = pytrends.related_queries() if related_queries != None: trends_value = related_queries[df_split_test[x]]['top'].head(25) keyword_data = keyword_data.append(trends_value) print(keyword_data) keyword_data.to_csv('allkeywords100_200.csv')