Ejemplo n.º 1
0
    def _take_action(self, action):
        current_price = self.price
        self.last_balance = self.balance
        self.last_shares_held = self.shares_held

        amount = 1

        autocorr = acf(self.returns[:self.current_step], fft=True)[1]
        if autocorr > 0:
            # momentum
            action = 0 if self.obs[-1] > 0 else 1
        else:
            # mean-reversion
            action = 1 if self.obs[-1] > 0 else 0

        if action == 0:
            # Buy amount % of balance in shares
            shares_bought = amount
            self.shares_held += shares_bought
            cost = shares_bought * current_price
            self.balance -= cost

        elif action == 1:
            # Sell amount % of shares held
            shares_sold = amount
            self.shares_held -= shares_sold
            gain = shares_sold * current_price
            self.balance += gain
Ejemplo n.º 2
0
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual) /
                   np.abs(actual))  # MAPE, very important and between 1 and 0
    me = np.mean(forecast - actual)  # ME
    mae = np.mean(np.abs(forecast - actual))  # MAE
    mpe = np.mean((forecast - actual) / actual)  # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast,
                       actual)[0,
                               1]  # corr, very important and between 1 and 0
    mins = np.amin(np.hstack([forecast[:, None], actual[:, None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:, None], actual[:, None]]), axis=1)
    minmax = 1 - np.mean(
        mins / maxs)  # minmax , very important and between 1 and 0
    acf1 = acf(fc - test)[1]  # ACF1
    print({
        'mape': mape,
        'me': me,
        'mae': mae,
        'mpe': mpe,
        'rmse': rmse,
        'acf1': acf1,
        'corr': corr,
        'minmax': minmax
    })
Ejemplo n.º 3
0
def acf_plots(data):
    n_rows = int(len(data) / 3)
    if n_rows < 0:
        n_rows = 1
    n_cols = int(len(data) / n_rows)
    if n_rows * n_cols < len(data):
        n_cols += 1
    plt.figure(figsize=(16, 8))
    plt.suptitle(
        'Autocorrelations and 95% confidence intervals of no autocorrelation',
        fontsize=12)
    for i in range(len(data)):
        plt.subplot(n_rows, n_cols, i + 1)
        acf, confint = tsaplots.acf(data[i]["entrance_queue_timeseries"],
                                    nlags=100,
                                    alpha=0.05,
                                    fft=False)
        plt.plot(range(0, 101), acf, 'ob')
        plt.fill_between(range(0, 101), (confint[:, 0] - acf),
                         (confint[:, 1] - acf),
                         color='b',
                         alpha=.1)
        plt.title("Sample %i" % (i + 1))
        plt.xlabel('Lag (1 = 10 time steps)')
        plt.ylabel('Autocorrelation')
        plt.ylim((-1.05, 1.05))
        plt.tight_layout()
    plt.subplots_adjust(top=0.90)
    plt.show()
Ejemplo n.º 4
0
    def plot_correlogram(self, lags=10, title=None):
        # NOTE: without passing residuals this meethod can notbe used by the optimal brute force finder

        def moving_average(self, a: pd.array, n: int = 3):
            ret = np.cumsum(a)
            ret[n:] = ret[n:] - ret[:-n]
            return ret[n - 1:] / n

        matplotlib.use(
            'TkAgg'
        )  # NOTE: necessary due to inheritence of TimeSeries which uses 'Agg'
        x = self.data
        lags = min(10, int(len(x) / 5)) if lags is None else lags
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8))
        axes[0][0].plot(x.values)  # Residuals
        # axes[0][0].plot(moving_average(x, n=21), c='k', lw=1) # moving average of risiduals # FIXME calculate moveaverage
        q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1])
        stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}'
        axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes)
        probplot(x, plot=axes[0][1])
        mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4])
        s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}'
        axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes)
        plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0])
        plot_pacf(x, lags=lags, zero=False, ax=axes[1][1])
        axes[1][0].set_xlabel('Lag')
        axes[1][1].set_xlabel('Lag')
        fig.suptitle(title, fontsize=14)
        sns.despine()
        fig.tight_layout()
        fig.subplots_adjust(top=.9)
        fig1 = plt.gcf()
        print('plotting')
        plt.show()
def acf_examine(data):
    data_used = np.array(data)
    acf_data = acf(data_used)

    acf_strong_number = []
    acf_strong_data = []
    for i in range(len(acf_data)):
        if abs(acf_data[i]) >= 0.5:
            acf_strong_number.append(i)
            acf_strong_data.append(acf_data[i])

    plot_acf(data_used)
    return (acf_strong_number, acf_strong_data)
Ejemplo n.º 6
0
    def _next_observation(self):
        self.obs_ret.popleft()
        new_return = self.new_return()

        self.obs_ret.append(new_return)
        self.last_price = self.price
        self.price += new_return

        obs = np.concatenate([
            np.array(self.obs_ret),
            acf(self.obs_ret, fft=True)[1:self.n_autocorr + 1],
        ])

        return obs
Ejemplo n.º 7
0
def calc_acf_samples(samples: np.ndarray, burn_in: int, dim: int):
    if samples is not None:
        if dim == 1 and len(samples) > 1:
            n_samples = len(samples)
            samples_wo_burn_in = samples[burn_in:n_samples]
            acf_values = acf(samples_wo_burn_in, fft=True, nlags=n_samples - burn_in)

        else:
            # n_samples = np.size(samples, axis=1)
            # samples_wo_burn_in = samples[:, burn_in:n_samples]
            # todo: acf for multi-dimensional input
            acf_values = None
            raise Warning('utils.calc_acf_samples not defined for multi-dimensional input')

    return acf_values
Ejemplo n.º 8
0
def plot_correlogram(x, lags=None, title=None):
    lags = min(10, int(len(x) / 5)) if lags is None else lags
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8))
    x.plot(ax=axes[0][0])
    q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1])
    stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}'
    axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes)
    probplot(x, plot=axes[0][1])
    mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4])
    s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}'
    axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes)
    plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0])
    plot_pacf(x, lags=lags, zero=False, ax=axes[1][1])
    axes[1][0].set_xlabel('Lag')
    axes[1][1].set_xlabel('Lag')
    fig.suptitle(title, fontsize=20)
    fig.tight_layout()
    fig.subplots_adjust(top=.9)
    def _next_observation(self):
        # Get the stock data points for the last self.n_lag days
        if self.observe_type == 'return':
            obs = self.df_inc.iloc[self.current_step - self.n_lag:self.
                                   current_step].values.flatten()
        elif self.observe_type == 'price':
            obs = self.df_inc.iloc[self.current_step - self.n_lag:self.
                                   current_step].values.flatten()
        elif self.observe_type == 'autocorr':
            ret = self.df_inc.iloc[self.current_step - self.n_lag:self.
                                   current_step].values.flatten()
            obs = np.concatenate([
                acf(ret, fft=True)[1:self.n_autocorr + 1],
                ret[-1:-self.n_autocorr - 1:-1]
            ])
        else:
            raise Exception('{} not an allowed observation type.'.format(
                self.observe_type))

        return obs
Ejemplo n.º 10
0
#plt.plot(movingAverage)

#1.Transform between moving average and ts_log
logAndMA = tsLog - movingAverage
logAndMA.dropna(inplace=True)
#rollingStatPlot(logAndMA)

#2.Difference between logs (d=1)
diff = tsLog - tsLog.shift()
diff.dropna(inplace=True)
#rollingStatPlot(diff)

#find p and q
from statsmodels.graphics.tsaplots import acf, pacf

acfLag = acf(diff, nlags=20)
pacfLag = pacf(diff, nlags=20, method='ols')

plt.subplot(121)
plt.plot(acfLag)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
plt.subplot(122)
plt.plot(pacfLag)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
Ejemplo n.º 11
0
    y3[t]=0.8*y3[t-1]+u[t]+0.6*u[t-1]
plt.plot(y3,'o-');

#5.2.4 ARIMA模型
np.random.seed(12)
n=100
y4=np.random.randn(n).cumsum()
plt.plot(y4,'o-')
dy4=np.diff(y4)
plt.plot(dy4,'o-')
plt.plot(y4,'o-',dy4,'*-');plt.axhline(0);

#5.3 ARMA模型
##5.3.1 序列的相关性检验
from statsmodels.graphics.tsaplots import acf,plot_acf
np.round(acf(y2),3)

plot_acf(y1); # MR(1)模型的自相关系数

def ac_QP(Yt):
    import statsmodels.api as sm
    r,q,p = sm.tsa.acf(Yt, qstat=True)
    rqp=np.c_[r[1:], q, p]
    rqp=pd.DataFrame(rqp, columns=["AC", "Q", "Prob(>Q)"]);
    return(rqp)
ac_QP(y2)[:10]

from statsmodels.graphics.tsaplots import pacf,plot_pacf
np.round(pacf(y1),3)

plot_pacf(y2); # AR(1)模型的自相关系数
Ejemplo n.º 12
0
# =============================================================================

######## Step 8 #########

# The new bitcoin dataset has been formed. From now on we will be having only this dataset
bitcoin = bitcoin[bitcoin['Date'] >= "2017-01-01"]
bitcoin['bprice'].plot()
# plot8 = plt.plot(bitcoin['Date'],bitcoin['bprice'])

######## Step 9 #########

# acf and pacf gives the relationship between today and yesterday
from statsmodels.graphics.tsaplots import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

acf(bitcoin['dbprice'].dropna())
pacf(bitcoin['dbprice'].dropna())

# Plots for acf and pacf
plot_acf(bitcoin['dbprice'].dropna())
plot_pacf(bitcoin['dbprice'].dropna())

######## Step 10 #########

from statsmodels.tsa.arima_model import ARIMA

x = bitcoin['doil']
bigX = sm.add_constant(
    pd.concat((x, bitcoin['deuro'], bitcoin['dgold'], bitcoin['dsp']), 1))[1:]
x = x[1:]
y = bitcoin['dbprice'][1:]
Ejemplo n.º 13
0
 def objective_func(self, H: float) -> float:
     ys_fit = self.autocorr_frac_noise_range(H)
     ys = acf(self.df_inc, nlags=self.n_lags)
     return np.linalg.norm(ys - ys_fit)
Ejemplo n.º 14
0
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, acf, pacf

# sourcing df from Data Preparation
from Data_prep.Data_preparation import MSFTdf

# exporting ACF and PACF
acf_plot = plot_acf(MSFTdf.IntChange)
acf_vals = acf(MSFTdf.IntChange)
plt.bar(range(31), acf_vals[:31])
#plt.savefig('MSFT_ACF')

pacf_plot = plot_pacf(MSFTdf.IntChange)
pacf_vals = pacf(MSFTdf.IntChange)
plt.bar(range(31), pacf_vals[:31])
#plt.savefig('MSFT_PACF')

plt.show()
def main():
    def get_distribution(Dataset):
        distribution = [
            '_binned_statistic', '_constants', '_continuous_distns',
            '_discrete_distns', '_distn_infrastructure', '_distr_params',
            '_multivariate', '_stats', '_stats_mstats_common',
            '_tukeylambda_stats', 'absolute_import', 'alpha', 'anderson',
            'anderson_ksamp', 'anglit', 'ansari', 'arcsine', 'argus',
            'bartlett', 'bayes_mvs', 'bernoulli', 'beta', 'betaprime',
            'binned_statistic', 'binned_statistic_2d', 'binned_statistic_dd',
            'binom', 'binom_test', 'boltzmann', 'boxcox', 'boxcox_llf',
            'boxcox_normmax', 'boxcox_normplot', 'bradford', 'burr', 'burr12',
            'cauchy', 'chi', 'chi2', 'chi2_contingency', 'chisquare',
            'circmean', 'circstd', 'circvar', 'combine_pvalues', 'contingency',
            'cosine', 'crystalball', 'cumfreq', 'describe', 'dgamma',
            'dirichlet', 'distributions', 'division', 'dlaplace', 'dweibull',
            'energy_distance', 'entropy', 'erlang', 'expon', 'exponnorm',
            'exponpow', 'exponweib', 'f', 'f_oneway', 'fatiguelife',
            'find_repeats', 'fisher_exact', 'fisk', 'fligner', 'foldcauchy',
            'foldnorm', 'friedmanchisquare', 'gamma', 'gausshyper',
            'gaussian_kde', 'genexpon', 'genextreme', 'gengamma',
            'genhalflogistic', 'genlogistic', 'gennorm', 'genpareto', 'geom',
            'gilbrat', 'gmean', 'gompertz', 'gumbel_l', 'gumbel_r',
            'halfcauchy', 'halfgennorm', 'halflogistic', 'halfnorm', 'hmean',
            'hypergeom', 'hypsecant', 'invgamma', 'invgauss', 'invweibull',
            'invwishart', 'iqr', 'itemfreq', 'jarque_bera', 'johnsonsb',
            'johnsonsu', 'kappa3', 'kappa4', 'ksone', 'kstat', 'kstatvar',
            'kstest', 'kstwobign', 'kurtosis', 'kurtosistest', 'laplace',
            'levene', 'levy', 'levy_l', 'levy_stable', 'linregress',
            'loggamma', 'logistic', 'loglaplace', 'lognorm', 'logser', 'lomax',
            'mannwhitneyu', 'matrix_normal', 'maxwell', 'mielke', 'mode',
            'moment', 'mood', 'morestats', 'moyal', 'mstats', 'mstats_basic',
            'mstats_extras', 'multinomial', 'multivariate_normal', 'mvn',
            'mvsdist', 'nakagami', 'nbinom', 'ncf', 'nct', 'ncx2', 'norm',
            'normaltest', 'norminvgauss', 'obrientransform', 'ortho_group',
            'pareto', 'pearson3', 'pearsonr', 'percentileofscore', 'planck',
            'pointbiserialr', 'poisson', 'power_divergence', 'powerlaw',
            'powerlognorm', 'powernorm', 'ppcc_max', 'ppcc_plot',
            'print_function', 'probplot', 'randint', 'random_correlation',
            'rankdata', 'ranksums', 'rayleigh', 'rdist', 'recipinvgauss',
            'reciprocal', 'relfreq', 'rice', 'rv_continuous', 'rv_discrete',
            'rv_histogram', 'scoreatpercentile', 'sem', 'semicircular',
            'shapiro', 'sigmaclip', 'skellam', 'skew', 'skewnorm', 'skewtest',
            'spearmanr', 'special_ortho_group', 'statlib', 'stats', 't',
            'test', 'theilslopes', 'tiecorrect', 'tmax', 'tmean', 'tmin',
            'trapz', 'triang', 'trim1', 'trim_mean', 'trimboth', 'truncexpon',
            'truncnorm', 'tsem', 'tstd', 'ttest_1samp', 'ttest_ind',
            'ttest_ind_from_stats', 'ttest_rel', 'tukeylambda', 'tvar',
            'uniform', 'unitary_group', 'variation', 'vonmises',
            'vonmises_line', 'wald', 'wasserstein_distance', 'weibull_max',
            'weibull_min', 'weightedtau', 'wilcoxon', 'wishart', 'wrapcauchy',
            'zipf', 'zmap', 'zscore'
        ]
        distResults = []
        params = {}
        print()
        for distName in distribution:
            try:
                dist = getattr(stats, distName)
                param = dist.fit(Dataset)
                params[distName] = param
                D, p = stats.kstest(data, distName, args=param)
                print("P valor para: " + distName + " = " + str(p))
                distResults.append((distName, p))
            except Exception:
                pass

        print()
        bestDist, bestP = (max(distResults, key=lambda item: item[1]))
        return bestDist, bestP, params[bestDist]

    #load data set
    data = pd.read_csv("data.txt", header=None)

    x = pd.DataFrame(np.array([x for x in range(1, 24)]))

    y = pd.DataFrame(np.array(data))

    slope, intercept, r_value, p_value, std_err = stats.linregress(x[0], y[0])
    plt.plot(x, y, 'o', label='original data')

    plt.plot(x, intercept + slope * x, 'r', label='fitted line')

    plt.legend()

    plt.show()

    print("p valor es:", p_value)

    if (p_value > 0.05):
        print("La muestra es Identicamente distribuida: ID")

    print(acf(y[0]))
    plot_acf(y[0])
    plt.show()
    get_distribution(y[0])
Ejemplo n.º 16
0
def check_autocorrelation(series: pd.Series, show_plot: bool = False):
    if show_plot:
        autocorrelation_plot(series)

    return acf(series)
Ejemplo n.º 17
0
# Note here our data is not seasonal thus we have to use " .shift(1) ", else we should use ".shift(12 or period)"

df.head()

df.plot()

adfuller_test(df["Close First Difference"].dropna())

print(df)

from statsmodels.graphics.tsaplots import acf, pacf
import matplotlib.pyplot as plt
import numpy as np

lag_acf = acf(df["df_log_shift"].dropna(), nlags=50)
plt.figure(figsize=(16, 7))
plt.plot(lag_acf, marker="o")
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')
plt.xlabel('number of lags')
plt.ylabel('correlation')
plt.tight_layout()

lag_pacf = pacf(df["df_log_shift"].dropna(), nlags=50, method='ols')
Ejemplo n.º 18
0
    def createarima(self, dataconfig):
        with open(dataconfig) as f:
            dataconfigfile = yaml.load(f, Loader=FullLoader)
        metrics = pd.DataFrame(columns=[
            'modelname', 'mean_absolute_error', 'mean_squared_error',
            'r2_score', 'mean_squared_log_error'
        ])

        data = pd.read_csv(dataconfigfile["clean_data_address"])
        location = dataconfigfile["location"]
        choice = dataconfigfile['frequency']
        diction = {
            "D": 7,
            "W": 52,
            "M": 12,
            "Q": 4,
            "Y": 2,
        }
        freq = 24
        if choice in diction:
            freq = diction[choice]
        else:
            freq = 12
        print("frequency", freq)
        with open("logs.log", "a+") as f:
            f.write("Frequency=" + str(freq) + "\n")
            f.write("Creating Arima models\n")
            f.write("Please wait trying different models...\n")
            f.write("Trained on several models\n")
            f.write("Selecting best model\n")
            f.close()
        # warnings.filterwarnings("ignore")
        # sys.stdout=open("logs.log","a+")
        with StepwiseContext(max_dur=15):
            model = pm.auto_arima(data,
                                  stepwise=True,
                                  error_action='ignore',
                                  seasonal=True,
                                  m=freq,
                                  trace=True)
        # sys.stdout.close()
        #metrics=met.calculate_metrics("fbprophet","Regression",testpred,testactual)
        order = model.get_params(deep=False)['order']
        seasonal = model.get_params(deep=False)['seasonal_order']
        print("order=", order)
        print("seasonal", seasonal)
        print("frequency", freq)
        modelfinal = SARIMAX(data, order=order, seasonal_order=seasonal).fit()

        start = 1
        end = len(data)
        compare = modelfinal.predict(start=start, end=end, typ='levels')

        compare.index = data.index

        metrics_new_row = met.calculate_metrics("arima", "Regression",
                                                data['y'], compare)
        metricsLocation = os.path.join(dataconfigfile["location"],
                                       "metrics.csv")
        metrics.loc[len(metrics.index)] = metrics_new_row
        metrics.to_csv(metricsLocation, index=True)
        r2score = metrics_new_row[3]

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data.index, y=data.y, name="actual"))

        fig.add_trace(
            go.Scatter(x=compare.index, y=compare, name="predictions"))

        plotlocation = dataconfigfile['location']
        plotlocation = os.path.join(plotlocation, "plot.html")
        acf_ = acf(data['y'])
        acf_ = pd.DataFrame(acf_, columns=['data'])
        pacf_ = pacf(data['y'])
        pacf_ = pd.DataFrame(pacf_, columns=['data'])
        fig2 = self.plot_graphs(acf_, "Auto correlative function")
        fig3 = self.plot_graphs(pacf_, "Partial-Auto correlative funtion")
        with open(plotlocation, 'a') as f:
            f.write(fig.to_html(include_plotlyjs='cdn', full_html=False))
            f.write(fig2.to_html(include_plotlyjs='cdn', full_html=False))
            f.write(fig3.to_html(include_plotlyjs='cdn', full_html=False))
        f.close()

        # modelfinal=auto_arima(data['y'], trace=True,suppress_warnings=True, seasonal=True)
        location = os.path.join(dataconfigfile["location"],
                                str(dataconfigfile["id"]) + "_model")
        os.makedirs(location)
        name = str(dataconfigfile["experimentname"]) + str(
            dataconfigfile["id"]) + "_model"
        # modelfinal.save(name)
        pickleFilePath = os.path.join(location, name)
        with open(pickleFilePath, 'wb') as pkl:
            pickle.dump(modelfinal, pkl)

        # shutil.move(name,location)

        return {
            "Successful": True,
            "cleanDataPath": dataconfigfile["clean_data_address"],
            "metricsLocation": metricsLocation,
            "pickleFolderPath": location,
            "pickleFilePath": pickleFilePath,
            "plotLocation": plotlocation,
            "accuracy": r2score
        }