def test_plot_kwarg(self): # Check with the matplotlib.pyplot module fig = plt.figure() fig.add_subplot(111) stats.boxcox_normplot(self.x, -20, 20, plot=plt) plt.close() # Check that a Matplotlib Axes object is accepted fig.add_subplot(111) ax = fig.add_subplot(111) stats.boxcox_normplot(self.x, -20, 20, plot=ax) plt.close()
def test(samples, la=-20, lb=20): fig = plt.figure() ax = fig.add_subplot(111) prob = boxcox_normplot(samples, la, lb, plot=ax) best_lambda = boxcox_normmax(samples) ax.axvline(best_lambda, color='r') plt.show()
def test_basic(self): N = 5 lmbdas, ppcc = stats.boxcox_normplot(self.x, -10, 10, N=N) ppcc_expected = [0.57783375, 0.83610988, 0.97524311, 0.99756057, 0.95843297] assert_allclose(lmbdas, np.linspace(-10, 10, num=N)) assert_allclose(ppcc, ppcc_expected)
def train_theta_boxcox(ts, seasonality, n): theta_bc = Theta(theta=0, season_mode=SeasonalityMode.NONE) shiftdata = 0 if (ts.univariate_values() < 0).any(): shiftdata = -ts.min() + 100 ts = ts + shiftdata new_values, lmbd = boxcox(ts.univariate_values()) if lmbd < 0: lmbds, value = boxcox_normplot(ts.univariate_values(), lmbd - 1, 0, N=100) if np.isclose(value[0], 0): lmbd = lmbds[np.argmax(value)] new_values = boxcox(ts.univariate_values(), lmbd) if np.isclose(new_values, new_values[0]).all(): lmbd = 0 new_values = boxcox(ts.univariate_values(), lmbd) ts = TimeSeries.from_times_and_values(ts.time_index(), new_values) theta_bc.fit(ts) forecast = theta_bc.predict(n) new_values = inv_boxcox(forecast.univariate_values(), lmbd) forecast = TimeSeries.from_times_and_values(seasonality.time_index(), new_values) if shiftdata > 0: forecast = forecast - shiftdata forecast = forecast * seasonality if (forecast.univariate_values() < 0).any(): indices = seasonality.time_index()[forecast < 0] forecast = forecast.update(indices, np.zeros(len(indices)), inplace=True) return forecast
def box_cox_normality_plot(series, lambda_min=-2, lambda_max=2, N=100, ax=None, show=True, save=False): if ax is None: fig, ax = plt.subplots() lambdas, corrs = stats.boxcox_normplot(series, lambda_min, lambda_max, plot=ax, N=N) max_corr_value = corrs.max() max_corr_lambda = lambdas[corrs.argmax()] show_and_save_plot(show=show, save=save, filename="box_cox_normality.png") return lambdas, corrs, max_corr_lambda, max_corr_value
def histogram_boxcox_plot(var_name, var_data): shift = np.amin(var_data) shift = min([shift, 0]) shift_output_var = [x + shift for x in var_data] #Histogram and boxcox plot for output fig = plt.figure(figsize=(8,11), dpi=300) ax1 = fig.add_subplot(211) ax2 = fig.add_subplot(212) #histogram of original data ax1.set_title("Distribution of " + var_name) ax1.set_xlabel(var_name) var_data.hist(ax=ax1, rasterized=True) #boxcox norm plot of shifted data lmbdas, ppcc = stats.boxcox_normplot(shift_output_var, -10, 10, plot=ax2) ax2.plot(lmbdas, ppcc, 'bo') shift_output_var_t, maxlog = stats.boxcox(shift_output_var) boxcox_output_var_t = [x - shift for x in shift_output_var_t] #adding vertical line to plot ax2.axvline(maxlog, color='r') ax2.text(maxlog + 0.1, 0, s=str(maxlog)) return fig
def test_empty(self): assert_(stats.boxcox_normplot([], 0, 1).size == 0)
from scipy import stats import matplotlib.pyplot as plt # Generate some non-normally distributed data, and create a Box-Cox plot: x = stats.loggamma.rvs(5, size=500) + 5 fig = plt.figure() ax = fig.add_subplot(111) prob = stats.boxcox_normplot(x, -20, 20, plot=ax) # Determine and plot the optimal ``lmbda`` to transform ``x`` and plot it in # the same plot: _, maxlog = stats.boxcox(x) ax.axvline(maxlog, color='r') plt.show()
# First we generate some random data from a Tukey-Lambda distribution, # with shape parameter -0.7: from scipy import stats import matplotlib.pyplot as plt np.random.seed(1234567) x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000) + 1e4 # Now we explore this data with a PPCC plot as well as the related # probability plot and Box-Cox normplot. A red line is drawn where we # expect the PPCC value to be maximal (at the shape parameter -0.7 used # above): fig = plt.figure(figsize=(12, 4)) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) res = stats.probplot(x, plot=ax1) res = stats.boxcox_normplot(x, -5, 5, plot=ax2) res = stats.ppcc_plot(x, -5, 5, plot=ax3) ax3.vlines(-0.7, 0, 1, colors='r', label='Expected shape value') plt.show()
from scipy import stats import matplotlib.pyplot as plt np.random.seed(1234) # make this example reproducible # Generate some data and determine optimal ``lmbda`` in various ways: x = stats.loggamma.rvs(5, size=30) + 5 y, lmax_mle = stats.boxcox(x) lmax_pearsonr = stats.boxcox_normmax(x) lmax_mle # 7.177... lmax_pearsonr # 7.916... stats.boxcox_normmax(x, method='all') # array([ 7.91667384, 7.17718692]) fig = plt.figure() ax = fig.add_subplot(111) prob = stats.boxcox_normplot(x, -10, 10, plot=ax) ax.axvline(lmax_mle, color='r') ax.axvline(lmax_pearsonr, color='g', ls='--') plt.show()
plt.show() # inspect histogram Newton_wind_temp['Daily_Rainfall(mm)'].hist(bins=30) plt.title('Histogram of Dependent Variable') plt.show() # diagnose/inspect residual normality using qqplot: stats.probplot(Newton_wind_temp['resid'], dist="norm", plot=plt) plt.figure(figsize=(8,8)) plt.show() #%% # inspect sqrt histogram np.sqrt(Newton_wind_temp['Daily_Rainfall(mm)']).hist(bins=30) plt.title('Histogram of Dependent Variable') plt.show() # inspect log histogram np.log(Newton_wind_temp['Daily_Rainfall(mm)']+1).hist(bins=30) plt.title('Histogram of Dependent Variable') plt.show() # inspect to examine for box cox transformation fig = plt.figure() ax = fig.add_subplot(111) prob = stats.boxcox_normplot(Newton_wind_temp['Daily_Rainfall(mm)']+1, 1, 3, plot=ax) #%%