def _take_action(self, action): current_price = self.price self.last_balance = self.balance self.last_shares_held = self.shares_held amount = 1 autocorr = acf(self.returns[:self.current_step], fft=True)[1] if autocorr > 0: # momentum action = 0 if self.obs[-1] > 0 else 1 else: # mean-reversion action = 1 if self.obs[-1] > 0 else 0 if action == 0: # Buy amount % of balance in shares shares_bought = amount self.shares_held += shares_bought cost = shares_bought * current_price self.balance -= cost elif action == 1: # Sell amount % of shares held shares_sold = amount self.shares_held -= shares_sold gain = shares_sold * current_price self.balance += gain
def forecast_accuracy(forecast, actual): mape = np.mean(np.abs(forecast - actual) / np.abs(actual)) # MAPE, very important and between 1 and 0 me = np.mean(forecast - actual) # ME mae = np.mean(np.abs(forecast - actual)) # MAE mpe = np.mean((forecast - actual) / actual) # MPE rmse = np.mean((forecast - actual)**2)**.5 # RMSE corr = np.corrcoef(forecast, actual)[0, 1] # corr, very important and between 1 and 0 mins = np.amin(np.hstack([forecast[:, None], actual[:, None]]), axis=1) maxs = np.amax(np.hstack([forecast[:, None], actual[:, None]]), axis=1) minmax = 1 - np.mean( mins / maxs) # minmax , very important and between 1 and 0 acf1 = acf(fc - test)[1] # ACF1 print({ 'mape': mape, 'me': me, 'mae': mae, 'mpe': mpe, 'rmse': rmse, 'acf1': acf1, 'corr': corr, 'minmax': minmax })
def acf_plots(data): n_rows = int(len(data) / 3) if n_rows < 0: n_rows = 1 n_cols = int(len(data) / n_rows) if n_rows * n_cols < len(data): n_cols += 1 plt.figure(figsize=(16, 8)) plt.suptitle( 'Autocorrelations and 95% confidence intervals of no autocorrelation', fontsize=12) for i in range(len(data)): plt.subplot(n_rows, n_cols, i + 1) acf, confint = tsaplots.acf(data[i]["entrance_queue_timeseries"], nlags=100, alpha=0.05, fft=False) plt.plot(range(0, 101), acf, 'ob') plt.fill_between(range(0, 101), (confint[:, 0] - acf), (confint[:, 1] - acf), color='b', alpha=.1) plt.title("Sample %i" % (i + 1)) plt.xlabel('Lag (1 = 10 time steps)') plt.ylabel('Autocorrelation') plt.ylim((-1.05, 1.05)) plt.tight_layout() plt.subplots_adjust(top=0.90) plt.show()
def plot_correlogram(self, lags=10, title=None): # NOTE: without passing residuals this meethod can notbe used by the optimal brute force finder def moving_average(self, a: pd.array, n: int = 3): ret = np.cumsum(a) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n matplotlib.use( 'TkAgg' ) # NOTE: necessary due to inheritence of TimeSeries which uses 'Agg' x = self.data lags = min(10, int(len(x) / 5)) if lags is None else lags fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8)) axes[0][0].plot(x.values) # Residuals # axes[0][0].plot(moving_average(x, n=21), c='k', lw=1) # moving average of risiduals # FIXME calculate moveaverage q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1]) stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}' axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes) probplot(x, plot=axes[0][1]) mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4]) s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}' axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes) plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0]) plot_pacf(x, lags=lags, zero=False, ax=axes[1][1]) axes[1][0].set_xlabel('Lag') axes[1][1].set_xlabel('Lag') fig.suptitle(title, fontsize=14) sns.despine() fig.tight_layout() fig.subplots_adjust(top=.9) fig1 = plt.gcf() print('plotting') plt.show()
def acf_examine(data): data_used = np.array(data) acf_data = acf(data_used) acf_strong_number = [] acf_strong_data = [] for i in range(len(acf_data)): if abs(acf_data[i]) >= 0.5: acf_strong_number.append(i) acf_strong_data.append(acf_data[i]) plot_acf(data_used) return (acf_strong_number, acf_strong_data)
def _next_observation(self): self.obs_ret.popleft() new_return = self.new_return() self.obs_ret.append(new_return) self.last_price = self.price self.price += new_return obs = np.concatenate([ np.array(self.obs_ret), acf(self.obs_ret, fft=True)[1:self.n_autocorr + 1], ]) return obs
def calc_acf_samples(samples: np.ndarray, burn_in: int, dim: int): if samples is not None: if dim == 1 and len(samples) > 1: n_samples = len(samples) samples_wo_burn_in = samples[burn_in:n_samples] acf_values = acf(samples_wo_burn_in, fft=True, nlags=n_samples - burn_in) else: # n_samples = np.size(samples, axis=1) # samples_wo_burn_in = samples[:, burn_in:n_samples] # todo: acf for multi-dimensional input acf_values = None raise Warning('utils.calc_acf_samples not defined for multi-dimensional input') return acf_values
def plot_correlogram(x, lags=None, title=None): lags = min(10, int(len(x) / 5)) if lags is None else lags fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8)) x.plot(ax=axes[0][0]) q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1]) stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}' axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes) probplot(x, plot=axes[0][1]) mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4]) s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}' axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes) plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0]) plot_pacf(x, lags=lags, zero=False, ax=axes[1][1]) axes[1][0].set_xlabel('Lag') axes[1][1].set_xlabel('Lag') fig.suptitle(title, fontsize=20) fig.tight_layout() fig.subplots_adjust(top=.9)
def _next_observation(self): # Get the stock data points for the last self.n_lag days if self.observe_type == 'return': obs = self.df_inc.iloc[self.current_step - self.n_lag:self. current_step].values.flatten() elif self.observe_type == 'price': obs = self.df_inc.iloc[self.current_step - self.n_lag:self. current_step].values.flatten() elif self.observe_type == 'autocorr': ret = self.df_inc.iloc[self.current_step - self.n_lag:self. current_step].values.flatten() obs = np.concatenate([ acf(ret, fft=True)[1:self.n_autocorr + 1], ret[-1:-self.n_autocorr - 1:-1] ]) else: raise Exception('{} not an allowed observation type.'.format( self.observe_type)) return obs
#plt.plot(movingAverage) #1.Transform between moving average and ts_log logAndMA = tsLog - movingAverage logAndMA.dropna(inplace=True) #rollingStatPlot(logAndMA) #2.Difference between logs (d=1) diff = tsLog - tsLog.shift() diff.dropna(inplace=True) #rollingStatPlot(diff) #find p and q from statsmodels.graphics.tsaplots import acf, pacf acfLag = acf(diff, nlags=20) pacfLag = pacf(diff, nlags=20, method='ols') plt.subplot(121) plt.plot(acfLag) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(diff)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(diff)), linestyle='--', color='gray') plt.title('Autocorrelation Function') #Plot PACF: plt.subplot(122) plt.plot(pacfLag) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(diff)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
y3[t]=0.8*y3[t-1]+u[t]+0.6*u[t-1] plt.plot(y3,'o-'); #5.2.4 ARIMA模型 np.random.seed(12) n=100 y4=np.random.randn(n).cumsum() plt.plot(y4,'o-') dy4=np.diff(y4) plt.plot(dy4,'o-') plt.plot(y4,'o-',dy4,'*-');plt.axhline(0); #5.3 ARMA模型 ##5.3.1 序列的相关性检验 from statsmodels.graphics.tsaplots import acf,plot_acf np.round(acf(y2),3) plot_acf(y1); # MR(1)模型的自相关系数 def ac_QP(Yt): import statsmodels.api as sm r,q,p = sm.tsa.acf(Yt, qstat=True) rqp=np.c_[r[1:], q, p] rqp=pd.DataFrame(rqp, columns=["AC", "Q", "Prob(>Q)"]); return(rqp) ac_QP(y2)[:10] from statsmodels.graphics.tsaplots import pacf,plot_pacf np.round(pacf(y1),3) plot_pacf(y2); # AR(1)模型的自相关系数
# ============================================================================= ######## Step 8 ######### # The new bitcoin dataset has been formed. From now on we will be having only this dataset bitcoin = bitcoin[bitcoin['Date'] >= "2017-01-01"] bitcoin['bprice'].plot() # plot8 = plt.plot(bitcoin['Date'],bitcoin['bprice']) ######## Step 9 ######### # acf and pacf gives the relationship between today and yesterday from statsmodels.graphics.tsaplots import acf, pacf from statsmodels.graphics.tsaplots import plot_acf, plot_pacf acf(bitcoin['dbprice'].dropna()) pacf(bitcoin['dbprice'].dropna()) # Plots for acf and pacf plot_acf(bitcoin['dbprice'].dropna()) plot_pacf(bitcoin['dbprice'].dropna()) ######## Step 10 ######### from statsmodels.tsa.arima_model import ARIMA x = bitcoin['doil'] bigX = sm.add_constant( pd.concat((x, bitcoin['deuro'], bitcoin['dgold'], bitcoin['dsp']), 1))[1:] x = x[1:] y = bitcoin['dbprice'][1:]
def objective_func(self, H: float) -> float: ys_fit = self.autocorr_frac_noise_range(H) ys = acf(self.df_inc, nlags=self.n_lags) return np.linalg.norm(ys - ys_fit)
import matplotlib.pyplot as plt from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, acf, pacf # sourcing df from Data Preparation from Data_prep.Data_preparation import MSFTdf # exporting ACF and PACF acf_plot = plot_acf(MSFTdf.IntChange) acf_vals = acf(MSFTdf.IntChange) plt.bar(range(31), acf_vals[:31]) #plt.savefig('MSFT_ACF') pacf_plot = plot_pacf(MSFTdf.IntChange) pacf_vals = pacf(MSFTdf.IntChange) plt.bar(range(31), pacf_vals[:31]) #plt.savefig('MSFT_PACF') plt.show()
def main(): def get_distribution(Dataset): distribution = [ '_binned_statistic', '_constants', '_continuous_distns', '_discrete_distns', '_distn_infrastructure', '_distr_params', '_multivariate', '_stats', '_stats_mstats_common', '_tukeylambda_stats', 'absolute_import', 'alpha', 'anderson', 'anderson_ksamp', 'anglit', 'ansari', 'arcsine', 'argus', 'bartlett', 'bayes_mvs', 'bernoulli', 'beta', 'betaprime', 'binned_statistic', 'binned_statistic_2d', 'binned_statistic_dd', 'binom', 'binom_test', 'boltzmann', 'boxcox', 'boxcox_llf', 'boxcox_normmax', 'boxcox_normplot', 'bradford', 'burr', 'burr12', 'cauchy', 'chi', 'chi2', 'chi2_contingency', 'chisquare', 'circmean', 'circstd', 'circvar', 'combine_pvalues', 'contingency', 'cosine', 'crystalball', 'cumfreq', 'describe', 'dgamma', 'dirichlet', 'distributions', 'division', 'dlaplace', 'dweibull', 'energy_distance', 'entropy', 'erlang', 'expon', 'exponnorm', 'exponpow', 'exponweib', 'f', 'f_oneway', 'fatiguelife', 'find_repeats', 'fisher_exact', 'fisk', 'fligner', 'foldcauchy', 'foldnorm', 'friedmanchisquare', 'gamma', 'gausshyper', 'gaussian_kde', 'genexpon', 'genextreme', 'gengamma', 'genhalflogistic', 'genlogistic', 'gennorm', 'genpareto', 'geom', 'gilbrat', 'gmean', 'gompertz', 'gumbel_l', 'gumbel_r', 'halfcauchy', 'halfgennorm', 'halflogistic', 'halfnorm', 'hmean', 'hypergeom', 'hypsecant', 'invgamma', 'invgauss', 'invweibull', 'invwishart', 'iqr', 'itemfreq', 'jarque_bera', 'johnsonsb', 'johnsonsu', 'kappa3', 'kappa4', 'ksone', 'kstat', 'kstatvar', 'kstest', 'kstwobign', 'kurtosis', 'kurtosistest', 'laplace', 'levene', 'levy', 'levy_l', 'levy_stable', 'linregress', 'loggamma', 'logistic', 'loglaplace', 'lognorm', 'logser', 'lomax', 'mannwhitneyu', 'matrix_normal', 'maxwell', 'mielke', 'mode', 'moment', 'mood', 'morestats', 'moyal', 'mstats', 'mstats_basic', 'mstats_extras', 'multinomial', 'multivariate_normal', 'mvn', 'mvsdist', 'nakagami', 'nbinom', 'ncf', 'nct', 'ncx2', 'norm', 'normaltest', 'norminvgauss', 'obrientransform', 'ortho_group', 'pareto', 'pearson3', 'pearsonr', 'percentileofscore', 'planck', 'pointbiserialr', 'poisson', 'power_divergence', 'powerlaw', 'powerlognorm', 'powernorm', 'ppcc_max', 'ppcc_plot', 'print_function', 'probplot', 'randint', 'random_correlation', 'rankdata', 'ranksums', 'rayleigh', 'rdist', 'recipinvgauss', 'reciprocal', 'relfreq', 'rice', 'rv_continuous', 'rv_discrete', 'rv_histogram', 'scoreatpercentile', 'sem', 'semicircular', 'shapiro', 'sigmaclip', 'skellam', 'skew', 'skewnorm', 'skewtest', 'spearmanr', 'special_ortho_group', 'statlib', 'stats', 't', 'test', 'theilslopes', 'tiecorrect', 'tmax', 'tmean', 'tmin', 'trapz', 'triang', 'trim1', 'trim_mean', 'trimboth', 'truncexpon', 'truncnorm', 'tsem', 'tstd', 'ttest_1samp', 'ttest_ind', 'ttest_ind_from_stats', 'ttest_rel', 'tukeylambda', 'tvar', 'uniform', 'unitary_group', 'variation', 'vonmises', 'vonmises_line', 'wald', 'wasserstein_distance', 'weibull_max', 'weibull_min', 'weightedtau', 'wilcoxon', 'wishart', 'wrapcauchy', 'zipf', 'zmap', 'zscore' ] distResults = [] params = {} print() for distName in distribution: try: dist = getattr(stats, distName) param = dist.fit(Dataset) params[distName] = param D, p = stats.kstest(data, distName, args=param) print("P valor para: " + distName + " = " + str(p)) distResults.append((distName, p)) except Exception: pass print() bestDist, bestP = (max(distResults, key=lambda item: item[1])) return bestDist, bestP, params[bestDist] #load data set data = pd.read_csv("data.txt", header=None) x = pd.DataFrame(np.array([x for x in range(1, 24)])) y = pd.DataFrame(np.array(data)) slope, intercept, r_value, p_value, std_err = stats.linregress(x[0], y[0]) plt.plot(x, y, 'o', label='original data') plt.plot(x, intercept + slope * x, 'r', label='fitted line') plt.legend() plt.show() print("p valor es:", p_value) if (p_value > 0.05): print("La muestra es Identicamente distribuida: ID") print(acf(y[0])) plot_acf(y[0]) plt.show() get_distribution(y[0])
def check_autocorrelation(series: pd.Series, show_plot: bool = False): if show_plot: autocorrelation_plot(series) return acf(series)
# Note here our data is not seasonal thus we have to use " .shift(1) ", else we should use ".shift(12 or period)" df.head() df.plot() adfuller_test(df["Close First Difference"].dropna()) print(df) from statsmodels.graphics.tsaplots import acf, pacf import matplotlib.pyplot as plt import numpy as np lag_acf = acf(df["df_log_shift"].dropna(), nlags=50) plt.figure(figsize=(16, 7)) plt.plot(lag_acf, marker="o") plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(df["df_log_shift"].dropna())), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(df["df_log_shift"].dropna())), linestyle='--', color='gray') plt.title('Autocorrelation Function') plt.xlabel('number of lags') plt.ylabel('correlation') plt.tight_layout() lag_pacf = pacf(df["df_log_shift"].dropna(), nlags=50, method='ols')
def createarima(self, dataconfig): with open(dataconfig) as f: dataconfigfile = yaml.load(f, Loader=FullLoader) metrics = pd.DataFrame(columns=[ 'modelname', 'mean_absolute_error', 'mean_squared_error', 'r2_score', 'mean_squared_log_error' ]) data = pd.read_csv(dataconfigfile["clean_data_address"]) location = dataconfigfile["location"] choice = dataconfigfile['frequency'] diction = { "D": 7, "W": 52, "M": 12, "Q": 4, "Y": 2, } freq = 24 if choice in diction: freq = diction[choice] else: freq = 12 print("frequency", freq) with open("logs.log", "a+") as f: f.write("Frequency=" + str(freq) + "\n") f.write("Creating Arima models\n") f.write("Please wait trying different models...\n") f.write("Trained on several models\n") f.write("Selecting best model\n") f.close() # warnings.filterwarnings("ignore") # sys.stdout=open("logs.log","a+") with StepwiseContext(max_dur=15): model = pm.auto_arima(data, stepwise=True, error_action='ignore', seasonal=True, m=freq, trace=True) # sys.stdout.close() #metrics=met.calculate_metrics("fbprophet","Regression",testpred,testactual) order = model.get_params(deep=False)['order'] seasonal = model.get_params(deep=False)['seasonal_order'] print("order=", order) print("seasonal", seasonal) print("frequency", freq) modelfinal = SARIMAX(data, order=order, seasonal_order=seasonal).fit() start = 1 end = len(data) compare = modelfinal.predict(start=start, end=end, typ='levels') compare.index = data.index metrics_new_row = met.calculate_metrics("arima", "Regression", data['y'], compare) metricsLocation = os.path.join(dataconfigfile["location"], "metrics.csv") metrics.loc[len(metrics.index)] = metrics_new_row metrics.to_csv(metricsLocation, index=True) r2score = metrics_new_row[3] fig = go.Figure() fig.add_trace(go.Scatter(x=data.index, y=data.y, name="actual")) fig.add_trace( go.Scatter(x=compare.index, y=compare, name="predictions")) plotlocation = dataconfigfile['location'] plotlocation = os.path.join(plotlocation, "plot.html") acf_ = acf(data['y']) acf_ = pd.DataFrame(acf_, columns=['data']) pacf_ = pacf(data['y']) pacf_ = pd.DataFrame(pacf_, columns=['data']) fig2 = self.plot_graphs(acf_, "Auto correlative function") fig3 = self.plot_graphs(pacf_, "Partial-Auto correlative funtion") with open(plotlocation, 'a') as f: f.write(fig.to_html(include_plotlyjs='cdn', full_html=False)) f.write(fig2.to_html(include_plotlyjs='cdn', full_html=False)) f.write(fig3.to_html(include_plotlyjs='cdn', full_html=False)) f.close() # modelfinal=auto_arima(data['y'], trace=True,suppress_warnings=True, seasonal=True) location = os.path.join(dataconfigfile["location"], str(dataconfigfile["id"]) + "_model") os.makedirs(location) name = str(dataconfigfile["experimentname"]) + str( dataconfigfile["id"]) + "_model" # modelfinal.save(name) pickleFilePath = os.path.join(location, name) with open(pickleFilePath, 'wb') as pkl: pickle.dump(modelfinal, pkl) # shutil.move(name,location) return { "Successful": True, "cleanDataPath": dataconfigfile["clean_data_address"], "metricsLocation": metricsLocation, "pickleFolderPath": location, "pickleFilePath": pickleFilePath, "plotLocation": plotlocation, "accuracy": r2score }