def tracking_in_time_keywords(kw_list): pytrends = TrendReq(hl='country', tz=<timezone you want>) future_dataframe={} c=1 for i in range(len(kw_list)): if i%2==0: try: print("Requesting ",str(kw_list[i])) pytrends.build_payload(kw_list[i], cat=kw_list[i+1],timeframe=dates, geo='country', gprop='') future_dataframe[c]=pytrends.interest_over_time() future_dataframe[c].drop(['isPartial'], axis=1,inplace=True) c+=1 result = pd.concat(future_dataframe, axis=1) # this is for intense use of the script, remove it to avoid Cloud Function timeout (and save money) secs=int(random.randrange(10, 50)) print("Sleeping {} seconds before requesting ".format(secs),str(kw_list[i])) timer.sleep(secs) print("Done") except: print("***","\n","Error with ",kw_list[i],"or not enough trending percentaje","\n","***") result.columns = result.columns.droplevel(0) df1=result.unstack(level=-1) df2=pd.DataFrame(df1) df2.reset_index(inplace=True) df2.columns = ["keyword","date","trend_index"] return df2
def queryGTrends(self, kw_list): google_username = "******" google_password = "******" pytrends = TrendReq(hl='en-US') pytrends.build_payload(kw_list, timeframe='all') #pytrends.build_payload(kw_list) # Interest over time time = pytrends.interest_over_time() #time.mean() #print(time.mean()) # Related Queries, returns a dictionary of dataframes #related_queries_dict = pytrends.related_queries() #print(related_queries_dict) #related_topics_dict = pytrends.related_topics() #print(related_topics_dict) # Get Google Hot Trends data # trending_searches_df = pytrends.trending_searches() # print(trending_searches_df.head()) # Get Google Keyword Suggestions #suggestions_dict = pytrends.suggestions(keyword='GIS') # print(suggestions_dict) return time #.mean()
def retrieve_google_trends(self, search, date_range): # 设置趋势提取对象 pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # 创建搜索对象 pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') #随时检索 trends = pytrends.interest_over_time() related_queries = pytrends.related_queries() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return return trends, related_queries
def get_requirements(): skill = request.args.get('skill') if (skill is None): abort(400) pytrends = TrendReq("*****@*****.**", "qwertyqwerty", hl='en-US', tz=360, custom_useragent=None) pytrends.build_payload([skill], cat=0, timeframe='today 5-y', geo='', gprop='') interest_over_time_df = pytrends.interest_over_time() resp = make_response( interest_over_time_df.to_json(path_or_buf=None, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False)) resp.headers['Access-Control-Allow-Origin'] = "*" return resp
def interest_related_topics(key, date): kw = [key] pytrends.build_payload(kw, cat=0, timeframe=date, geo='', gprop='') related = pytrends.related_topics()[key]['top'] kw_list = [] for i in related['topic_title']: if i not in kw_list: kw_list.append(i) if len(kw_list) == 5: break pytrends.build_payload(kw_list, cat=0, timeframe=date, geo='', gprop='') interest = pytrends.interest_over_time().reset_index() fig, ax = plt.subplots(figsize=(8,6)) for i in range(len(kw_list)): no = i+1 ax1 = sns.lineplot(x="date", y=kw_list[i], label=kw_list[i], data=interest, ax=ax) plt.axhline(y=0, color='#414141', linewidth=1.5, alpha=.5) ax.set_yticks([0, 25, 50, 75, 100]) plt.title('Interest Over Time of Related Topics',fontsize=25, weight = 'bold', fontfamily='sans-serif') plt.xticks(rotation=45, ha='right') plt.ylabel('Interest', fontsize = 15) plt.xlabel("") plt.legend(frameon=False,bbox_to_anchor=(1, 0.8) ) plt.savefig("image/google_topics.png",bbox_inches='tight',dpi=100) plt.close()
def scrape_word(text, initial=False): kw_list = [text] pytrends.build_payload(kw_list=kw_list, cat=0, timeframe='today 5-y', geo='GB') # Interest Over Time interest_over_time_df = pytrends.interest_over_time() print(interest_over_time_df) flattened = interest_over_time_df[[text]].values.tolist() interest_flattened = [text] for i in range(len(flattened)): interest_flattened.append(flattened[i][0]) print(interest_flattened) if initial: with open('trends.csv', 'w') as result_file: wr = csv.writer(result_file, dialect='excel') wr.writerow(interest_flattened) else: with open('trends.csv', 'a') as result_file: wr = csv.writer(result_file, dialect='excel') wr.writerow(interest_flattened)
def create_google_trends_df(data_frame, search): """Create a DataFrame with google trends for a search.""" # create data_range date_range = \ [f'{get_df_start_date(data_frame)} {get_df_end_date(data_frame)}'] # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # Create the search object pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') # Retrieve the interest over time trends = pytrends.interest_over_time() #trends = pd.DataFrame(trends_series, search) except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return # Upsample the data to daily trends = trends.resample('D').mean() # add column indicating how long since trend updated trends = create_days_since_valid_value(trends, search, 'Days since updated') # clean up na values from upsample trends = pd.DataFrame.fillna(trends, method='ffill') return trends
def retrieve_google_trends(self, search, date_range): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # Create the search object pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') # Retrieve the interest over time trends = pytrends.interest_over_time() related_queries = pytrends.related_queries() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return return trends, related_queries
def get_topk_related(keyword, k=3): if keyword in dict_relate: return dict_relate[keyword] pytrends.build_payload([keyword], cat=0, timeframe='today 1-m', geo='', gprop='') ret = list(pytrends.related_topics().values())[0] ret = set(ret.title[0:k].values) if ret is not None else None dict_relate[keyword] = ret return ret
def hello_world(): pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["Blockchain"] pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') a = pytrends.interest_over_time() return str(a)
def getPayload(searchterm, startdate, enddate): print("Finding data for " + searchterm + " between " + startdate + ", and " + enddate + "..."); #setting up connection to google pytrends = TrendReq(hl='en-US', tz=360); kw_list = [searchterm]; timeFrame = '' + startdate + ' ' + enddate; #building payload, cat always 0, geo and gprop empty for now pytrends.build_payload(kw_list, cat=0, timeframe = timeFrame, geo='', gprop=''); return pytrends.interest_over_time().to_string();
def pull_trends(kw_list): pytrends = TrendReq(hl='en-US', tz=360) print(kw_list) ####Code to get averages #try: pytrends.build_payload(kw_list, cat=0, timeframe='now 1-H', geo='US', gprop='') y = pytrends.get_historical_interest(kw_list, year_start=2018, month_start=8, day_start=1, hour_start=0, year_end=2018, month_end=10, day_end=10, hour_end=0, cat=0, geo='US', gprop='', sleep=0) print(y) for each in kw_list: mu = y[str(each)].mean() city_mean_list.append(each) city_mean_list.append(mu)
def interest_by_country(key, date): kw_list = [key] pytrends.build_payload(kw_list, cat=0, timeframe=date, geo='', gprop='') interest_country = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=False, inc_geo_code=True) interest_country = interest_country.sort_values(key, ascending=False) interest_country = interest_country.iloc[:19, :].reset_index() fig, ax = plt.subplots(figsize=(10,6)) plt.axhline(y=0, color='#414141', linewidth=1.5, alpha=.5) ax.set_yticks([0, 25, 50, 75, 100]) ax = sns.barplot(x="geoName", y=kw_list[0], data=interest_country, palette='mako') plt.xticks(rotation=45, ha='right') plt.title('Search Interest By Country (Top 20)',fontsize=22, weight = 'bold', fontfamily='sans-serif') plt.xlabel("") plt.savefig("image/google_country.png",bbox_inches='tight',dpi=100) plt.close()
def wordcloud_queries(key,date): pytrends.build_payload([key], cat=0, timeframe=date, geo='', gprop='') query = pytrends.related_queries() if query[key]['top'] is not None: input_top = list(query[key]['top']['query']) input_rising = list(query[key]['rising']['query']) def plot_wordcloud(output): text = " ".join(output) wordcloud = WordCloud(max_font_size=70, max_words=1000, colormap="Blues", background_color="white").generate(text) plt.figure(figsize=(8,6)) plt.title("WordCloud of Top 25 Related Queries",fontsize=20, weight = 'bold', fontfamily='sans-serif') plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.savefig("image/{}_google_wordcloud.png".format(key),bbox_inches='tight',dpi=100) plt.close() plot_wordcloud(input_top)
def GenerateTrends(): print("Fetching Google Trends...", end="", flush=True) dbConfig = loadConfig( 'C:\AppCredentials\CoinTrackerPython\database.config') con = pyodbc.connect(dbConfig[0]["sql_conn"]) cursor = con.cursor() cursor.execute( "select name, id, symbol from CoinTracker.dbo.Market where id in (select coin_fk from CoinTracker.dbo.MarketHistory) and rank = 1 order by id" ) rows = cursor.fetchall() if cursor.rowcount == 0: print("No Price sources found") return pytrends = TrendReq(hl='en-US', tz=360) for row in rows: kw_list = [str(row[0]), str(row[1])] pytrends.build_payload(kw_list) interest_over_time_df = pytrends.interest_over_time() print(interest_over_time_df) ''' interest_by_region_df = pytrend.interest_by_region() print(interest_by_region_df.head()) related_queries_dict = pytrend.related_queries() print(related_queries_dict) trending_searches_df = pytrend.trending_searches() print(trending_searches_df.head()) top_charts_df = pytrend.top_charts(cid='actors', date=201611) print(top_charts_df.head()) suggestions_dict = pytrend.suggestions(keyword='pizza') print(suggestions_dict)''' print("Done")
def google_trends_five_years(inventory='strawberries'): """ Google trends API scripts to collect daily google trends for the past five years on INVENTORY in the Bay Area (geo='US-CA-807') Result is saved to a csv file. """ try: pytrends = TrendReq(hl='en-US', tz=360) csv_path = os.path.join(os.getcwd(), 'backend', 'data') keyword = inventory start = '12/01/2012' end = end = '01/01/2018' past_five_years = pd.date_range(start=start, end=end, freq='M') past_five_years[0].date() + timedelta(days=1) for i in range(len(past_five_years) - 1): start_time = past_five_years[i].date() + timedelta(days=1) end_time = past_five_years[i + 1].date() time_frame = str(start_time) + " " + str(end_time) # call Google Trends API pytrends.build_payload([inventory], cat=0, timeframe=time_frame, geo='US-CA-807', gprop='') five_year_trend_temp = pytrends.interest_over_time() if i == 0: five_year_trend = five_year_trend_temp[keyword].to_frame( ).reset_index(level=['date']) else: five_year_trend = five_year_trend.append( five_year_trend_temp[keyword].to_frame().reset_index( level=['date'])) # save it to csv file under "../appetite/backend/data" directory file_name = inventory + "_google_trends_five_years.csv" five_year_trend.to_csv(os.path.join(csv_path, file_name)) return "Finished collecting five-year Google Trends data" except: return "Exception raised"
def pop2(lists): from pytrends.request import TrendReq # Create pytrends object, request data from Google Trends pytrends = TrendReq(hl='en-US', tz=360) # Extracts data based on our keywords kw_list = lists pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') # Specify, get, and normalize data data = pytrends.interest_over_time() data.drop('isPartial', axis=1, inplace=True) normData = data.apply(lambda x: x / x.max(), axis=0) # Max normalized value from most recent date + index in list recent = normData.values[-1].tolist() max_value = max(recent) max_index = recent.index(max_value) # Name of most popular normalized item out = kw_list[max_index] pytrends.build_payload(kw_list=[out], cat=0, timeframe='today 5-y', geo='', gprop='') trend = pytrends.related_queries() trend_list = trend[out]['top']['query'] # Adds variable number of hashtags to topList numHashtags = 5 topList = [] for i in range(numHashtags): topList.append(trend_list[i]) # TopList is a list of hashtags, out is most popular item poplist = out return (poplist)
def interest_over_time(key, date): kw = [key] pytrends.build_payload(kw, cat=0, timeframe=date, geo='', gprop='') interest = pytrends.interest_over_time().reset_index() fig, ax = plt.subplots(figsize=(8,6)) axs1 = sns.lineplot(x="date", y=kw[0], data=interest, ax=ax) plt.axhline(y=0, color='#414141', linewidth=1.5, alpha=.5) ax.set_yticks([0, 25, 50, 75, 100]) plt.title('"{}" Google Interest Over Time '.format(key),fontsize=25, weight = 'bold', fontfamily='sans-serif') plt.xticks(rotation=45, ha='right') plt.ylabel('Interest', fontsize = 15) plt.xlabel("") ax.text(0.5, 0.6, key, horizontalalignment='center',verticalalignment='center', transform=ax.transAxes) plt.savefig("image/google_freq.png",bbox_inches='tight',dpi=100) plt.close()
def get_trend_data(input): pytrends = TrendReq(hl='en-US', tz=360) kw_list = [input] a = pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') b = pytrends.interest_over_time() return b
def player_trends(list_of_players): pytrends = TrendReq(hl="en-US", tz=360) mean_list = [] list_player_list = [] play_list = get_player_list() for i in range(0, 100, 5): x = i list_player_list.append(play_list[x:x + 5]) for i in list_player_list: kw_list = i pytrends.build_payload(kw_list, cat=1077, timeframe='today 5-y', geo='', gprop='') a = pytrends.interest_over_time() for i in kw_list: mean_list.append(a[i].mean()) tup_list = zip(play_list, mean_list) return list(tup_list)
def pull_trend_avg(kw_list): #print("pulling data") mean_list = [] try: pytrends.build_payload(kw_list, cat=0, timeframe='now 1-H', geo='US', gprop='') z = pytrends.interest_over_time() for each in kw_list: mu = z[str(each)].mean() ratio = get_ratio(each, mu) mean_list.append([each, ratio]) return mean_list except: print("You got yourself an error") for each in kw_list: mean_list.append([each, 0]) return mean_list
def retrieve_google_trends(self,term,date_range): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [term] self.term = term try: # Create the search object pytrends.build_payload(kw_list, cat=0, timeframe=date_range, geo='', gprop='') # Retrieve the interest over time trends = pytrends.interest_over_time() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return trends = trends.resample('D') trends = trends.reset_index(level=0) trends = trends.rename(columns={'date':'ds',term:'freq'}) trends['freq'] = trends['freq'].interpolate() return trends
def google_trends(query): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] # Create the search object pytrends.build_payload(kw_list, cat=0, geo='', gprop='news') # Get the interest over time interest = pytrends.interest_over_time() print(interest.head()) # Get related searches related_queries = pytrends.related_queries() print(related_queries) # # Get Google Top Charts # top_charts_df = pytrend.top_charts(cid='actors', date=201611) # print(top_charts_df.head()) return interest, related_queries
def retrieve_google_trends(self, search, date_range): # Set up the trend fetching object pytrends = TrendReq(hl='en-US', tz=360) kw_list = [search] try: # Create the search object pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news') # Retrieve the interest over time trends = pytrends.interest_over_time() related_queries = pytrends.related_queries() except Exception as e: print('\nGoogle Search Trend retrieval failed.') print(e) return return trends, related_queries
def fetch(): if request.method == 'GET': data = request.args.get('name') pytrends = TrendReq(hl='en-US', tz=360) # Preparing KEYWORD List kw_list = [data] # Build Payload pytrends.build_payload(kw_list, cat=0, timeframe='today 12-m', geo='US', gprop='') # Get result for interest_over_time #return str(pytrends.interest_over_time()) df = pytrends.interest_over_time() df.to_json(r'data.json') with open('data.json') as f: d = json.load(f) print(d) return render_template('simple.html', tables=[df.to_html(classes='data')], titles=df.columns.values)
def GetG_trends_data(): pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["bitcoin", "ethereum"] pytrends.build_payload(kw_list, timeframe='today 5-y', geo='US') df = pytrends.interest_over_time().tail(1) current_google_trends_datapoint = int(df.iat[0, 0]) ethereum_datapoint = str(int(df.iat[0, 1])) print("Gtrends datapoint (ETH): " + str(ethereum_datapoint) + " (previous ATH was 9)") if current_google_trends_datapoint < 75: dangerzone = False else: dangerzone = True if dangerzone == True: if current_google_trends_datapoint < 85: print("Gtrends datapoint (BTC): " + str(current_google_trends_datapoint) + ", indicating that we are in the " + colored("CAUTION ZONE", "yellow")) else: if current_google_trends_datapoint < 95: print("Gtrends datapoint (BTC): " + str(current_google_trends_datapoint) + ", indicating that we are in the " + colored("HIGH CAUTION ZONE", "orange", attrs=["blink"])) else: if current_google_trends_datapoint < 100: print("Gtrends datapoint (BTC): " + str(current_google_trends_datapoint) + ", indicating that we are in the " + colored("DANGER ZONE", "red", attrs=["blink"])) else: print("Gtrends datapoint (BTC): " + str(current_google_trends_datapoint) + ", indicating that we are in the " + colored("SAFE ZONE", "green"))
def get_trend_result(result): result_list = [] for r in result: if len(r)<2: continue length = len(r) if length <= 5: pytrends.build_payload(r, cat=0, timeframe='today 1-m') interest_over_time_df = pytrends.interest_over_time() interest_over_time_df = interest_over_time_df.mean()[:-1] interest_over_time_df = interest_over_time_df.map(lambda x: '%d' % round(x)) interest_over_time_df = interest_over_time_df.to_dict() result_list.append(interest_over_time_df) else: l_r = r[0:length//2][:5] r_r = r[length//2-1:][:5] pytrends.build_payload(l_r, cat=0, timeframe='today 1-m') df_1 = pytrends.interest_over_time() df_1 = df_1.mean()[:-1] pytrends.build_payload(r_r, cat=0, timeframe='today 1-m') df_2 = pytrends.interest_over_time() df_2 = df_2.mean()[:-1] if(df_1[-1] > 0 and df_2[0] >0): r1 = df_1/df_1[-1] r2 = df_2/df_2[0] else: r1=df_1 r2=df_2 interest_over_time_df = pd.concat([r1, r2]) interest_over_time_df = interest_over_time_df.map(lambda x: '%d' % round(x)) interest_over_time_df = interest_over_time_df.to_dict() result_list.append(interest_over_time_df) return result_list
if os.name == 'posix': sl = '/' elif os.name == 'nt': sl = '\\' # timezone 360 = US CST pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["Bitcoin"] # pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='') # Specific time is UTC # pytrends.build_payload(kw_list, cat=0, timeframe='2017-02-06T10 2017-02-12T07', geo='', gprop='') # date format YYYY-DD-MM pytrends.build_payload(kw_list, cat=0, timeframe='2009-01-01 2017-12-31', geo='', gprop='') dt_pd_google = pytrends.interest_over_time() dt_pd_google.rename(columns={'Bitcoin': 'google_tr'}, inplace=True) dt_pd_google['google_tr_fd'] = dt_pd_google['google_tr'].diff(periods=1) dt_pd_google['google_tr_MAVG30'] = round( dt_pd_google['google_tr'].rolling(window=30).mean(), 0) dt_pd_google.to_pickle('dt_pd_google_monthly.pickle') print('Google Trend Download Done') if __name__ == '__main__': main()
print(dates) keywords = [ "zoom", "teams", "skype", "hangouts", "teletrabajo", "videollamada", "videoconferencia", "whatsapp", "telegram", "viber", "tiktok", "refugiados", "inmigracion", "nacionalismo", "corrupcion", "juicio", "guerra comercial", "coronavirus", "pandemia", "infeccion", "medico", "disney", "amazon", "netflix", "hbo", "rakuten", "steam", "cabify", "taxi", "glovo", "just eat", "deliveroo", "uber eats", "comida a domicilio", "hacer deporte", "yoga", "meditacion", "cursos online" ] pytrends = TrendReq(hl='ES', tz=0) future_dataframe = {} c = 1 for k in keywords: try: print("Requesting ", [k]) pytrends.build_payload([k], timeframe=dates, geo='ES', gprop='') future_dataframe[c] = pytrends.interest_over_time() future_dataframe[c].drop(['isPartial'], axis=1, inplace=True) c += 1 result = pd.concat(future_dataframe, axis=1) except: print("***", "\n", "Error with ", k, "or not enough trending percentaje", "\n", "***") result.columns = result.columns.droplevel(0) df1 = result.unstack(level=-1) df2 = pd.DataFrame(df1) df2.to_csv(os.getenv("PROJECT_TMP"))
12: '2014-10-25 2015-07-12', 13: '2015-03-04 2015-11-19', 14: '2015-07-12 2016-03-28', 15: '2015-11-19 2016-08-05', 16: '2016-03-28 2016-12-13', 17: '2016-08-05 2017-04-22', 18: '2016-12-13 2017-08-30', 19: '2017-04-22 2018-01-07', } z = 1 dt_pd_google_segments = pd.DataFrame(columns=['Bitcoin', 'segment']) for x in single_frames: pytrends.build_payload(kw_list, cat=0, timeframe=single_frames[x], geo='', gprop='') dt_pd_google_tmp = pytrends.interest_over_time() if x > 0: print('x>0') dt_pd_google_tmp['segment'] = x lda = dt_pd_google_tmp['Bitcoin'] - dt_pd_google_segments['Bitcoin'] dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp['Bitcoin'] - lda.mean( skipna=True) dt_pd_google_tmp.subtract(lda.mean(skipna=True), fill_value=0)['Bitcoin'] dt_pd_google_segments = dt_pd_google_segments.append(dt_pd_google_tmp) else: print('x = 0') dt_pd_google_tmp['segment'] = x
def main(): print("First Module's Name: {}".format(__name__)) print('OS:', os.name) os.chdir('..') if os.name == 'posix': sl = '/' elif os.name == 'nt': sl = '\\' # timezone 360 = US CST pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["Bitcoin"] single_frames = { 0: '2010-07-16 2011-04-03', 1: '2010-11-25 2011-08-12', 2: '2011-04-04 2011-12-20', 3: '2011-08-12 2012-04-28', 4: '2011-12-20 2012-09-05', 5: '2012-04-28 2013-01-13', 6: '2012-09-05 2013-05-23', 7: '2013-01-13 2013-09-30', 8: '2013-05-23 2014-02-07', 9: '2013-09-30 2014-06-17', 10: '2014-02-07 2014-10-25', 11: '2014-06-17 2015-03-04', 12: '2014-10-25 2015-07-12', 13: '2015-03-04 2015-11-19', 14: '2015-07-12 2016-03-28', 15: '2015-11-19 2016-08-05', 16: '2016-03-28 2016-12-13', 17: '2016-08-05 2017-04-22', 18: '2016-12-13 2017-08-30', 19: '2017-04-22 2017-11-30', } z = 1 # documentation: # dt_pd_google_segments: final pd containing trend data # dt_pd_google_tmp: temporary set contatining one single frame, that is appended to pd_google_segments # lda = mean absolute deviation between dt_pd_google_segments = pd.DataFrame( columns=['Bitcoin', 'segment', 'google_tr_rtn']) for x in single_frames: pytrends.build_payload(kw_list, cat=0, timeframe=single_frames[x], geo='', gprop='') #dt_pd_google_tmp.iloc[0:0] #dt_pd_google_tmp = pytrends.interest_over_time() dt_pd_google_tmp = pd.read_csv( os.path.abspath(os.curdir) + sl + "D_Data" + sl + "G_Google_Trends" + sl + single_frames[x] + ".csv") dt_pd_google_tmp.drop(dt_pd_google_tmp.index[[0]], inplace=True) dt_pd_google_tmp.columns = ['Bitcoin'] dt_pd_google_tmp.set_index( pd.to_datetime(dt_pd_google_tmp.index, errors='raise', format='%Y-%m-%d', exact='True')) #mask = dt_pd_google_tmp.Bitcoin == 0 #column_name = 'Bitcoin' #dt_pd_google_tmp.loc[mask, column_name] = 1 # dt_pd_google_tmp['google_tr_rtn'] = dt_pd_google_tmp['Bitcoin'] / dt_pd_google_tmp['Bitcoin'].shift(1) - 1 if x > 0: print('x>0') dt_pd_google_tmp['segment'] = x # lda = dt_pd_google_tmp['google_tr_rtn'] - dt_pd_google_segments['google_tr_rtn'] # dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp['Bitcoin'] - lda.mean(skipna=True) # dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp['Bitcoin'] - 10 * lda.mean(skipna=True) # dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp.subtract(lda.mean(skipna=True), fill_value=0)['Bitcoin'] dt_pd_google_segments = dt_pd_google_segments.append( dt_pd_google_tmp) else: print('x = 0') dt_pd_google_tmp['segment'] = x dt_pd_google_segments = dt_pd_google_segments.append( dt_pd_google_tmp) print('retrieve frame', x, 'done - ', z / len(single_frames) * 100, '%') z = z + 1 # drop overlapping points # dt_pd_google_segments.sort_index(ascending=True, inplace=True) dt_pd_google_segments['dd'] = dt_pd_google_segments.index dt_pd_google_segments.drop_duplicates(subset='dd', keep='first', inplace=True) dt_pd_google_segments.drop(['dd'], axis=1, inplace=True) # dt_pd_google_segments = dt_pd_google_segments[dt_pd_google_segments.index.duplicated(keep='first')] # dt_pd_google_segments.index.drop_duplicates(keep='last') # rename column dt_pd_google_segments.rename(columns={'Bitcoin': 'google_tr'}, inplace=True) # 'normalize' index to 100 # dt_pd_google_segments['google_tr'] = dt_pd_google_segments / \ # dt_pd_google_segments.loc[dt_pd_google_segments['google_tr'].idxmax()][ # 'google_tr'] * 100 # store to pickle dt_pd_google_segments.to_pickle('dt_pd_google_segments_rconx.pickle') print(os.path.basename(__file__), 'executed')