def main(currency, num_days_to_lookback=30, resample="15Min"): df, _, _ = get_data(currency=currency, num_days_to_lookback=num_days_to_lookback, resample=resample, starting_when=datetime.now()) df.index = pd.to_datetime(df.index) df["volume"] = MinMaxScaler().fit_transform(df["volume"]) first_date = pd.to_datetime(df.index[0]) last_date = pd.to_datetime(df.index[-1]) starting = next_weekday(first_date, 0) filenames = [] min_max = [df["close"].min(), df["close"].max()] while True: ending = starting + timedelta(days=1) df_subset = df.ix[starting:ending] print("[{}] Start {} - End {} - length({})".format( starting.strftime("%a"), starting, ending, len(df_subset))) file = "time_volatility_analysis/daily_hour/{}-{}-{}-{}.jpg".format( currency, resample, starting.date(), starting.strftime("%a")) starting += timedelta(days=1) if (len(df_subset) in [0, 1]): print("SKipping since df length {}".format(len(df))) continue if (starting > last_date): return plot_with_price(df_subset, save_to_file=file, min_max=min_max) filenames.append(file) images = [] for filename in filenames: images.append(imageio.imread(filename)) video = "time_volatility_analysis/daily_hour/_ANIMATED_{}-{}.gif".format( currency, resample) imageio.mimsave(video, images)
def plot_AR(args, debug=False): print("AR Analysis") midpoint = get_data(currency=args.currency, num_days_to_lookback=args.num_days_to_lookback, resample="1Min") ''' 1. print correlation matrix ''' # corr_df = pd.concat([midpoint["close"].shift(1), midpoint["close"]], axis=1) # corr_df.columns = ['t-1', 't+1'] # print(corr_df.corr()) ''' 2. plot ACF ''' # plot_acf(midpoint["close"], lags=1000) ''' 3. plot lag ''' # pd.plotting.lag_plot(midpoint["close"]) ''' 4. prediction ''' num_predictions = 100 X = midpoint["close"].values train, test = X[1:len(X) - num_predictions], X[len(X) - num_predictions:] # train autoregression startTime = datetime.now() model = AR(train) model_fit = model.fit() print("Train & Fit time: {}".format(datetime.now() - startTime)) window = model_fit.k_ar coef = model_fit.params print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) # walk forward over time steps in test history = train[len(train) - window:] history = [history[i] for i in range(len(history))] predictions = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] obs = test[t] predictions.append(yhat) history.append(obs) # print('predicted=%f, expected=%f' % (yhat, obs)) print(predictions) error = mean_squared_error(test, predictions) print('Test MSE: {}'.format(error)) # plot plt.plot(test) plt.plot(predictions, color='red') plt.show()
results.plot_diagnostics(figsize=(15, 12)) plt.show() pred = results.get_prediction(start=pd.to_datetime('2017-04-01'), dynamic=False) pred_ci = pred.conf_int() y_forecasted = pred.predicted_mean y_truth = y['2017-04-01':] # Compute the mean square error mse = ((y_forecasted - y_truth)**2).mean() print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 2))) if __name__ == '__main__': args = parse_args() logging.Formatter.converter = time.gmtime logging.basicConfig(level=logging.CRITICAL if (args.quiet) else logging.INFO, format='[%(levelname)s] %(asctime)s %(message)s', datefmt="%H:%M:%S") midpoint, _, _ = get_data(currency=args.currency, num_days_to_lookback=args.num_days_to_lookback, resample="1Min") print(midpoint.head()) analysis(midpoint["close"])
def get_volatility_stats(currency="EUR_USD", resample="1H", num_days_to_lookback=365, dont_plot_and_return=False): logging.info("Fetchin data for {} and resampling to {}".format(currency, resample)) df,_,_ = get_data(currency=currency, num_days_to_lookback=num_days_to_lookback, resample=resample ) df.index = pd.to_datetime(df.index) # print(df.head()) # print(df.index) df["day"] = df.index.weekday df["time"] = df.index.hour df["change"] = df["close"].diff().shift(-1) df["volatility"] = df["high"] - df["low"] kf = KalmanFilter(transition_matrices = [1], observation_matrices = [1], initial_state_mean = 0, initial_state_covariance = 1, observation_covariance=1, transition_covariance=.01) # Use the observed values of the price to get a rolling mean df["kf"], _ = kf.filter(df["close"].values) # print(df[["close", "kf"]].head(n=30)) # input(">") # chop a few since it takes Kalman filter to catch up df = df.iloc[30:] # print(df[["close", "kf"]].head(n=30)) # input(">") df["close_before"] = df["close"].shift(1) df["close_after"] = df["close"].shift(-1) df.dropna(inplace=True) intersects = [] for index, row in df.iterrows(): intersects.append( 1 if ( (row["close_before"] < row["kf"] <= row["close_after"]) or (row["close_before"] > row["kf"] >= row["close_after"]) ) else 0 ) df["kf_intersect"] = intersects # print(df[["close", "close_before", "close_after", "kf", "kf_intersect"]].head(n=30)) # df[["close", "kf", "kf_intersect"]].plot() # plt.show() # input(">") #print((bid["close"] - ask["close"]).head()) # print((bid["close"] ).head()) # print((ask["close"]).head()) # input(">") # hl_volatility = pd.pivot_table(df, # values='volatility', index=['time'], # columns=['day'], aggfunc=np.mean) # sns.heatmap(hl_volatility, cmap="RdBu_r") # fig.suptitle('{} High Low Volatility'.format(p), fontsize=20) # filename = "{}/{}-HL-volatility.jpg".format(DIR, p) # fig.savefig(filename) # print("Saved to {}".format(filename)) # plt.clf() hl_volatility = pd.pivot_table(df.copy(), values="volatility", index=['time'], columns=['day'], aggfunc=np.mean) fill_nonexistent_columns(hl_volatility) # price_change_pivot = pd.pivot_table(df, # values='change', index=['time'], # columns=['day'], aggfunc=np.mean) # ax = sns.heatmap(price_change_pivot, cmap="RdBu_r") # fig.suptitle('{} Price Change'.format(p), fontsize=20) # filename = # fig.savefig(filename) # print("Saved to {}".format(filename)) # plt.clf() price_change = pd.pivot_table(df.copy(), values="change", index=['time'], columns=['day'], aggfunc=np.mean) fill_nonexistent_columns(price_change) mean_reversion = pd.pivot_table(df.copy(), values="kf_intersect", index=['time'], columns=['day'], aggfunc=np.sum) fill_nonexistent_columns(mean_reversion) volume = pd.pivot_table(df.copy(), values="volume", index=['time'], columns=['day'], aggfunc=np.sum) fill_nonexistent_columns(volume) if(dont_plot_and_return): return hl_volatility, price_change, mean_reversion, volume plot_and_save_heatmap( hl_volatility, title='{} HL Change'.format(currency), filename="{}/{}_{}_{}_hlchange.jpg".format(DIR, currency, datetime.today().strftime('%Y-%m-%d'), num_days_to_lookback) ) plot_and_save_heatmap( price_change, title='{} Price Change'.format(currency), filename="{}/{}_{}_{}_pricechange.jpg".format(DIR, currency, datetime.today().strftime('%Y-%m-%d'), num_days_to_lookback) ) plot_and_save_heatmap( mean_reversion, title='{} Mean Reversion'.format(currency), filename="{}/{}_{}_{}_meanreversion.jpg".format(DIR, currency, datetime.today().strftime('%Y-%m-%d'), num_days_to_lookback) ) plot_and_save_heatmap( volume, title='{} Volume'.format(currency), filename="{}/{}_{}_{}_volume.jpg".format(DIR, currency, datetime.today().strftime('%Y-%m-%d'), num_days_to_lookback) )