def getDemand(self): try: pytrends = TrendReq(hl='en-US', tz=360) res = dailydata.get_daily_data(self.keyword, start_year=self.year_start, start_mon=self.month_start, stop_year=self.year_end, stop_mon=self.month_end, geo='', wait_time = 10) return res finally: # os.remove(demand.temp_dir) pass
def get_google_trends(kw, start_unix, end_unix, file_name): """ Get Google Trends Data. Parameters ---------- kw : str DESCRIPTION. start_unix : int Epoch (Unix) Time in Seconds. end_unix : int Epoch (Unix) Time in Seconds. file_name : str File Path. Returns ------- NoneType None. """ if os.stat(file_name).st_size == 0: # If the file is blank last_start_unix = None else: # Otherwise set the last start_unix date last_start_unix = pd.read_csv(file_name, index_col='Date').index[-1] last_start_unix = ts_to_unix(last_start_unix) # Set the start_unix date of our function as the last_start_unix if last_start_unix is not None and start_unix < last_start_unix: start_unix = last_start_unix start = unix_to_ts(start_unix) end = unix_to_ts(end_unix) year_start = start.year month_start = start.month year_end = end.year month_end = end.month search_volume = dailydata.get_daily_data(kw, year_start, month_start, year_end, month_end, geo='') search_volume.drop(search_volume.columns[0:4], axis=1, inplace=True) search_volume['Date'] = search_volume.index search_volume['Date'] = search_volume['Date'].apply(date_to_str) search_volume.set_index('Date', inplace=True) search_volume.rename({kw: 'Search Volume'}, axis=1, inplace=True) with open(file_name, 'a') as f: search_volume.to_csv(f, header=(f.tell() == 0)) print(f'Finished! Updated from {start} to {end}')
def update_csv_data(ticker): start = datetime.date(2010, 1, 1) #datetime.date end = datetime.date.today() #datetime.date try: pd.read_csv(csv, index_col=0) except FileNotFoundError: get_daily_data(ticker, start.year, start.month, end.year, end.month, geo="US", verbose=True).to_csv(csv) finally: df = pd.read_csv(csv, index_col=0) #pandas.DataFrame last = df.index[len(df) - 1] last = datetime.datetime.strptime(last, '%Y-%m-%d').date() #datetime.date if last != end: get_daily_data(ticker, last.year, last.month, end.year, end.month, geo="US", verbose=True).to_csv("temp") new_df = pd.read_csv("temp", index_col=0) #pandas.DataFrame os.remove("temp") new = last + datetime.timedelta(days=1) new = new.strftime('%Y-%m-%d') if new in new_df.index: df = df.append(new_df.loc[new:]) # if next.toString() in #if there is overlap of index from df and new_df, only add the parts of new_df that does not overlap df.to_csv(csv)
def add_trends(self): df = dailydata.get_daily_data('recession', 2004, 1, 2019, 10, geo='USA') self.train_data = pd.merge(left=self.train_data, right=df, how='inner', on='Date', suffixes=(False, False)) self.the_list.append('kurt')
def fetch(): pytrend = TrendReq(hl='en-GB', tz=360) keywords_list = load_keywords("./data/raw/keywords_list.txt") data = defaultdict(list) for x in range(0, len(keywords_list)): keyword = keywords_list[x] df = dailydata.get_daily_data(keyword, 2004, 8, 2020, 8, geo='GB') if not df.empty: col_name = f'{keyword}_unscaled' values = list(df[col_name].values) data[keyword] = values data_by_days = pd.DataFrame.from_dict(data) data_by_days = data_by_days.set_index(df.index) data_by_days.to_csv('./data/raw/search_trends.csv') print('Data is fetched!')
def main(): stock_symbols = st.sidebar.multiselect('Select stocks:', (MSCI_WORLD_SYMBOLS + DAX_SYMBOLS)) for stock in stock_symbols: # TODO: also search for "{stock} portfolio" or "{stock} stock" df = get_daily_data(f"{stock}", START_YEAR, START_MONTH, END_YEAR, END_MONTH, geo="", wait_time=1.0) st.write(df) st.line_chart(df) df.to_csv(f"data/trends/{stock}.csv")
def download_daily_google_trends(keyword, start_year, start_month, end_year, end_month): """ Query for and aggregate daily google search trends data for 'keyword' and download it as a CSV named 'google_trends_{keyword}_{timestamp}.csv' Args: keyword: (str) word to search for start)year: (int) returning trends starting from this year (and month) start_month: (int) returning trends starting from this (year and) month end_year: (int) returning trends ending at this year (and month) end_month: (int) returning trends ending at this (year and) month Returns: None Examples: download_daily_google_trends(keyword = 'ethereum', start_year=2015, start_month=7, end_year=2019, end_month=11) """ #API doc and math explained: https://github.com/GeneralMills/pytrends/blob/master/pytrends/dailydata.py df_daily = dd.get_daily_data(keyword, start_year, start_month, end_year, end_month) print(df_daily.tail(31)) # plotting the data per month obtained from Google plt.plot(df_daily.index, df_daily[f"{keyword}_monthly"]) plt.autoscale(enable=True, axis='x', tight=True) plt.title(f"Google trends (monthly data): {keyword}") plt.grid(True) plt.show() #plotting the daily data rescaled from the monthly data and the data in a month month 'APIs' plt.plot(df_daily.index, df_daily[f"{keyword}"]) plt.autoscale(enable=True, axis='x', tight=True) plt.title( f"Google trends(rescaled to make the daily data comparable): {keyword}" ) plt.grid(True) plt.show() #download CSV of the dt timestamp = int(datetime.timestamp(datetime.now())) filename = f"google_trends_{keyword}_{timestamp}.csv" df_daily.to_csv(filename) return
def get_daily_google_data(kw_list, from_year, from_month): # kw_list - [0] ticker, [1] full name, [2] name without corporation classification if not path.exists(f'googledata'): os.mkdir('googledata') if path.exists(f'googledata/{kw_list[0]}.csv'): df = pd.read_csv(f'googledata/{kw_list[0]}.csv').set_index('Date') df.index = pd.to_datetime(df.index) return df else: df = pd.DataFrame() for kw in kw_list: df[kw] = dailydata.get_daily_data(kw, from_year, from_month, dt.now().year, dt.now().month)[kw] df.index.name = "Date" df.index = pd.to_datetime(df.index) df.to_csv(f'googledata/{kw_list[0]}.csv') return df
count += 1 else: CASH[ CCC ] = temp_cash count += 1 self.CASH = CASH def smotetomek( self ): smt = SMOTETomek() self.X_train, self.y_train = smt.fit_sample( self.X_train, self.y_train ) pytrend = TrendReq() help(pytrend) df = dailydata.get_daily_data('recession', 2019, 10, 2020, 5, geo = 'USA') set_date_inputs = { 'start_date': "2000-01-01", 'end_date': "2020-06-19" } st_ret_inputs = { 'change_days': [ 1, 3, 5, 14, 21 ] } volatility_inputs = { 'windows' : [ 5, 15, 30, 60, 90, 180 ] } lt_ret_inputs = { 'change_days' : [ 60, 90, 180, 250 ] } ma_inputs = { 'sma_list' : [ 15, 30, 60, 90, 180 ], 'ema_list' : [ 90, 180 ] } create_labels_inputs = { 'target_return_period' : 14, 'tail_probs' : [ 0.25, 0.70 ] } data = Data() data.execute_all( set_date_inputs, st_ret_inputs, volatility_inputs, lt_ret_inputs, ma_inputs, create_labels_inputs, regime = True, high = True, low = False, kurt = False ) ext_spike_hp = {'criterion': 'entropy', 'max_depth': 212,
parser.set_defaults(plot=False) parser.set_defaults(verbose=False) args = parser.parse_args() input_path = args.input logging.basicConfig(filename='./google_trends_crawler.log', level=logging.INFO) if not os.path.exists(input_path): print('>>> Input file does not exist!') print('>>> Exit...') sys.exit(1) # == == == == == == Part 3: Start Google trends crawler == == == == == == # # read queries from the input file with open(input_path, 'r') as input_data: for line in input_data: query_json = json.loads(line.rstrip()) keyword = query_json['keyword'] mid = query_json['mid'] start_date_str = query_json['start_date'] end_date_str = query_json['end_date'] start_date_obj = datetime.strptime(start_date_str, '%Y-%m-%d') logging.info('>>> Query for topic {0}'.format(keyword)) # result dict google_trends = {'start_date': start_date_str, 'end_date': end_date_str, 'daily_search': []} res_df = dailydata.get_daily_data(word=mid, start_year=2017, start_mon=1, stop_year=2018, stop_mon=4) res_df.to_csv('data/{0}.csv'.format(keyword))
from pytrends.request import TrendReq from pytrends import dailydata import pandas as pd #ran keywords separately as giving a payload gave an 400 error ao1 =dailydata.get_daily_data("Vaccine 5G", 2020, 3, 2021, 5, geo="",) ao2 =dailydata.get_daily_data("plandemic", 2020, 3, 2021, 5, geo="",) ao3 =dailydata.get_daily_data("anti mask", 2020, 3, 2021, 5, geo="",) ao4 =dailydata.get_daily_data("Great Reset", 2020, 3, 2021, 5, geo="",) ao5 =dailydata.get_daily_data("Bill Gates Vaccine", 2020, 3, 2021, 5, geo="",) #saved all of them under a single csv file ao = pd.concat([ao1,ao2,ao3,ao4,ao5], axis = 1) ao.to_csv('jay.csv', sep=',',index=False)
import numpy as np import matplotlib.pyplot as plt import pandas as pd import pandas_datareader as web import datetime as dt import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' company = 'FB' key = 'facebook' start = dt.datetime(2012, 1, 1) end = dt.datetime(2020, 1, 1) data = web.DataReader(company, 'yahoo', start, end) df = dailydata.get_daily_data(key, 2019, 1, 2019, 2, geo='US') scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data_c = scaler.fit_transform(data['Close'].values.reshape(-1, 1)) scaled_data_h = scaler.fit_transform(data['High'].values.reshape(-1, 1)) scaled_data_l = scaler.fit_transform(data['Low'].values.reshape(-1, 1)) scaled_data_o = scaler.fit_transform(data['Open'].values.reshape(-1, 1)) x_train = [] y_train = [] for x in range(21, len(scaled_data_c)): h_l = float(scaled_data_h[x] - scaled_data_l[x]) o_c = float(scaled_data_o[x] - scaled_data_c[x]) seven_a = np.average(scaled_data_c[x - 7:x, 0]) fourteen_a = np.average(scaled_data_c[x - 14:x, 0])
def get_gsvi(w): gsvi = get_daily_data(w, 2012, 1, 2017, 3) gsvi['lag_media'] = gsvi.loc[:, w].rolling(win).median().shift(1) gsvi['asvi'] = np.log(gsvi.loc[:, w]) - np.log(gsvi.lag_media) gsvi.index = gsvi.index.map(lambda x: x.date) return gsvi
import pandas as pd from pytrends.request import TrendReq from pytrends import dailydata pytrends = TrendReq(hl='en-US', tz=360) #keywords = ['Lana', 'Mercado Pago'] #pytrends.build_payload( # kw_list=keywords, # cat=0, # timeframe='today 3-m', # geo='TW', # gprop='') #data = pytrends.interest_over_time() #data.to_csv('Py_VS_R.csv', encoding='utf_8_sig') #data = data.drop(labels=['isPartial'],axis='columns') #data.to_csv('Py_VS_R.csv', encoding='utf_8_sig') #image = data.plot(title = 'Python V.S. R in last 3 months on Google Trends') #fig = image.get_figure() #fig.savefig('figure.png') #df = pytrends.trending_searches(pn='argentina') #print(df) #t = pytrends.get_historical_interest('Corona', year_start=2020, month_start=1, day_start=1, hour_start=0, year_end=2020, month_end=5, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=20) #print(t) df = dailydata.get_daily_data('cinema', 2019, 1, 2019, 10, geo='BR') print(df)