def get_distributions(my_folder_rooth): with open(my_folder_rooth + "project_utilities/joints_distr.pkl", "rb") as f: u = pkl._Unpickler(f) u.encoding = 'latin1' joints_distr = u.load() with open(my_folder_rooth + "project_utilities/max_min_joints.pkl", "rb") as f: u = pkl._Unpickler(f) u.encoding = 'latin1' mm = u.load() n_distr_samples = 5000 x_tl = np.linspace(joints_distr[0][0][1], joints_distr[0][0][2], n_distr_samples) tl_distr = skewnorm.cdf(x_tl, *joints_distr[0][0][0]) x_rl = np.linspace(joints_distr[1][0][1], joints_distr[1][0][2], n_distr_samples) rl_distr = skewnorm.cdf(x_rl, *joints_distr[1][0][0]) x_bl = np.linspace(joints_distr[2][0][1], joints_distr[2][0][2], n_distr_samples) bl_distr = skewnorm.cdf(x_bl, *joints_distr[2][0][0]) x_tr = np.linspace(joints_distr[3][0][1], joints_distr[3][0][2], n_distr_samples) tr_distr = skewnorm.cdf(x_tr, *joints_distr[3][0][0]) x_rr1 = np.linspace(joints_distr[4][0][1], joints_distr[4][0][2], n_distr_samples) x_rr2 = np.linspace(joints_distr[4][1][1], joints_distr[4][1][2], n_distr_samples) x_rr3 = np.linspace(joints_distr[4][2][1], joints_distr[4][2][2], n_distr_samples) rr_distr1 = skewnorm.cdf(x_rr1, *joints_distr[4][0][0]) rr_distr2 = skewnorm.cdf(x_rr2, *joints_distr[4][1][0]) rr_distr3 = skewnorm.cdf(x_rr3, *joints_distr[4][2][0]) x_br1 = np.linspace(joints_distr[5][0][1], joints_distr[5][0][2], n_distr_samples) x_br2 = np.linspace(joints_distr[5][1][1], joints_distr[5][1][2], n_distr_samples) br_distr1 = skewnorm.cdf(x_br1, *joints_distr[5][0][0]) br_distr2 = skewnorm.cdf(x_br2, *joints_distr[5][1][0]) distributions = [[tl_distr, x_tl], [rl_distr, x_rl], [bl_distr, x_bl], [tr_distr, x_tr], [[rr_distr1, x_rr1], [rr_distr2, x_rr2], [rr_distr3, x_rr3]], [[br_distr1, x_br1], [br_distr2, x_br2]]] return distributions, mm
def Skewnorm_CDF_func(x, a, mu, sig): ''' a = skewness (gaussian if a==0) mu = mean of gaussian sig = std dev of gaussian ''' return skewnorm.cdf(x, a, loc=mu, scale=sig)
def test_summary_max_symmetric_3(self): c = ChainConsumer() c.add_chain(self.data_skew) summary_area = 0.95 c.configure(statistics="max_symmetric", bins=1.0, summary_area=summary_area) summary = c.analysis.get_summary()['0'] xs = np.linspace(0, 2, 1000) pdf = skewnorm.pdf(xs, 5, 1, 1.5) xmax = xs[pdf.argmax()] cdf_top = skewnorm.cdf(summary[2], 5, 1, 1.5) cdf_bottom = skewnorm.cdf(summary[0], 5, 1, 1.5) area = cdf_top - cdf_bottom assert np.isclose(xmax, summary[1], atol=0.05) assert np.isclose(area, summary_area, atol=0.05) assert np.isclose(summary[2] - summary[1], summary[1] - summary[0])
def calculate_gradient(t_values, gradient_function, skew_norm_params, n_classes): gradient_arguments = [ skewnorm.pdf(t_values[i], *skew_norm_params[i][j]) for i in range(n_classes) for j in range(n_classes) ] gradient_arguments += [ skewnorm.cdf(t_values[i], *skew_norm_params[i][j]) for i in range(n_classes) for j in range(n_classes) ] return np.array(gradient_function(*gradient_arguments))
def test_summary_max_central_3(self): c = ChainConsumer() c.add_chain(self.data_skew) summary_area = 0.95 c.configure(statistics="max_central", bins=1.0, summary_area=summary_area) summary = c.analysis.get_summary()['0'] xs = np.linspace(-1, 5, 1000) pdf = skewnorm.pdf(xs, 5, 1, 1.5) cdf = skewnorm.cdf(xs, 5, 1, 1.5) xval = interp1d( cdf, xs)([0.5 - 0.5 * summary_area, 0.5 + 0.5 * summary_area]) xmax = xs[pdf.argmax()] assert np.isclose(xmax, summary[1], atol=0.05) assert np.isclose(xval[0], summary[0], atol=0.05) assert np.isclose(xval[1], summary[2], atol=0.05)
def test_summary_max_shortest_3(self): c = ChainConsumer() c.add_chain(self.data_skew) summary_area = 0.95 c.configure(statistics="max_shortest", bins=1.0, summary_area=summary_area) summary = c.analysis.get_summary()['0'] xs = np.linspace(-1, 5, 1000) pdf = skewnorm.pdf(xs, 5, 1, 1.5) cdf = skewnorm.cdf(xs, 5, 1, 1.5) x2 = interp1d(cdf, xs, bounds_error=False, fill_value=np.inf)(cdf + summary_area) dist = x2 - xs ind = np.argmin(dist) x0 = xs[ind] x2 = x2[ind] xmax = xs[pdf.argmax()] assert np.isclose(xmax, summary[1], atol=0.05) assert np.isclose(x0, summary[0], atol=0.05) assert np.isclose(x2, summary[2], atol=0.05)
thisWeek = pd.concat([dummy, thisWeek.iloc[:]]).reset_index(drop=True) lastPrediction = thisWeek['text'].count() - 1 + hourlyRates.loc[ hourlyRates['hourNumber'] == thisWeek['hourNumber'][0], 'meanRemainingTweets'].iloc[0] stddev = hourlyRates.loc[hourlyRates['hourNumber'] == thisWeek['hourNumber'][0], 'stdRemainingTweets'].iloc[0] skew = hourlyRates.loc[hourlyRates['hourNumber'] == thisWeek['hourNumber'][0], 'skewRemainingTweets'].iloc[0] yesProbs = [] for b in range(0, len(priceData)): if b == 0: yesProbs.append(skewnorm.cdf(brackets[b], skew, lastPrediction, stddev)) elif b > 0 and b < priceData.index.max(): yesProbs.append( 1 - skewnorm.cdf(brackets[b - 1], skew, lastPrediction, stddev) - skewnorm.sf(brackets[b], skew, lastPrediction, stddev)) else: yesProbs.append( skewnorm.sf(brackets[b - 1], skew, lastPrediction, stddev)) priceData['probabilityYes'] = yesProbs priceData['probabilityNo'] = 1 - priceData['probabilityYes'] #Plot data collection plotdf = averagePace plotdf['averagePlus'] = plotdf['meanTweets'] + plotdf['stdTweets'] plotdf['averageMinus'] = plotdf['meanTweets'] - plotdf['stdTweets']
df = df.dropna(subset=["mass", "year"]) df = df.loc[(df["year"] > 1975) & (df["year"] < 2010) & (df["mass"] != 0)] observed_rate = land_area / earth_total_area * not_populated_rate logmass = np.log(df["mass"]) plt.plot(df["year"], logmass, "bo") plt.show() plt.hist(df["year"], bins=50) plt.show() pd.plotting.scatter_matrix(df[["mass", "year", "reclat", "reclong"]], figsize=(7, 7)) plt.show() ms = np.linspace(-5, 20, 100) p_skewnorm = skewnorm.fit(logmass) pdf_skewnorm = skewnorm.pdf(ms, *p_skewnorm) plt.hist(logmass, bins=50, alpha=0.2, density=True) plt.plot(ms, pdf_skewnorm, c="r") plt.show() mass_of_doom = np.log( (4 / 3) * np.pi * 500**3 * 1600 * 1000) # Just using a spherical approximation and some avg density meteor_is_doom = 1 - skewnorm.cdf(mass_of_doom, *p_skewnorm) num_events = 1000 * df["year"].value_counts().mean() / observed_rate print(meteor_is_doom * num_events)
ax[0][0].plot(x, norm.pdf(x, loc=0, scale=1), 'bo', alpha=0.6, label='norm pdf (scipy)') ax[0][0].plot(x, normal_pdf(x, mu=0, sigma=1), 'r.', alpha=0.6, label='norm pdf (custom)') ax[0][0].legend(loc='best', frameon=False) ax[0][0].set_title("Normal PDF Comparison ($\mu = 0$ $\sigma = 1$)") ax[1][0].plot(x, norm.cdf(x), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[1][0].plot(x, normal_cdf(x, mu=0, sigma=1), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)') ax[1][0].legend(loc='best', frameon=False) ax[1][0].set_title("Normal CDF Comparison ($\mu = 0$ $\sigma = 1$)") ax[2][0].plot(y, norm.ppf(y), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[2][0].legend(loc='best', frameon=False) ax[2][0].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)") ax[0][1].plot(x, skewnorm.pdf(x, loc=0, scale=1, a=4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[0][1].plot(x, skew_normal_pdf(x, epsilon=0, omega=1, alpha=4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)') ax[0][1].legend(loc='best', frameon=False) ax[0][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)") ax[1][1].plot(x, skewnorm.cdf(x, loc=0, scale=1, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[1][1].plot(x, skew_normal_cdf(x, epsilon=0, omega=1, alpha=-4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)') ax[1][1].legend(loc='best', frameon=False) ax[1][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)") ax[2][1].plot(y, skewnorm.ppf(y, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[2][1].legend(loc='best', frameon=False) ax[2][1].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)") plt.show()
x = np.linspace(skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a), 100) ax.plot(x, skewnorm.pdf(x, a), 'r-', lw=5, alpha=0.6, label='skewnorm pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = skewnorm(a) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = skewnorm.ppf([0.001, 0.5, 0.999], a) np.allclose([0.001, 0.5, 0.999], skewnorm.cdf(vals, a)) # True # Generate random numbers: r = skewnorm.rvs(a, size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def cdf(self, t): return skewnorm.cdf(t, self.alpha, self.zeta, self.omega)
def cdf(self, x): q = skewnorm.cdf(x=x, a=self._a, loc=self._loc, scale=self._scale) return q
def inverse_transform(self, z, copy=None): q = skewnorm.cdf(x=z, a=0, loc=0, scale=1) x = skewnorm.ppf(q=q, a=self._a, loc=self._loc, scale=self._scale) return x
def transform(self, x): q = skewnorm.cdf(x=x, a=self._a, loc=self._loc, scale=self._scale) z = skewnorm.ppf(q=q, a=0, loc=0, scale=1) return z
def skew(x,e=0,w=1,a=0): t = (x-e) / w return 2 / w * pdf(t) * cdf(a*t) # You can of course use the scipy.stats.norm versions # return 2 * norm.pdf(t) * norm.cdf(a*t) n = 2**12 e = 1.0 # location w = 2.0 # scale x = linspace(0,7,n) #p = skew(x,e,w,9) p = skewnorm.pdf(x, 5, loc=1, scale=1.5) print(x[p.argmax()]) c = skewnorm.cdf(x, 5, loc=1, scale=1.5) #c = p.cumsum() #c /= c.max() b0 = np.where(c > 0.15865)[0][0] bm = np.where(c > 0.5)[0][0] b1 = np.where(c > 0.84135)[0][0] x0 = x[b0] x1 = x[b1] s=30 fig, ax = plt.subplots(figsize=(5,5), nrows=2, sharex=True)
def _fitFunction(x, *a): r"""!Function to fit to the recorded nstep.""" return skewnorm.cdf(x, *a)
def generic_dispersion(self, nd_dict, GH_dict=None): weight_arrays = [] value_arrays = [] for i in range(0, len(self.simulation_options["dispersion_parameters"])): if self.simulation_options["dispersion_distributions"][ i] == "uniform": value_arrays.append( np.linspace( self.simulation_options["dispersion_parameters"][i] + "_lower", self.simulation_options["dispersion_parameters"][i] + "_upper", self.simulation_options["dispersion_bins"][i])) weight_arrays.append( [1 / self.simulation_options["dispersion_bins"][i]] * self.simulation_options["dispersion_bins"][i]) elif self.simulation_options["dispersion_distributions"][ i] == "normal": param_mean = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_mean"] param_std = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_std"] if type(GH_dict) is dict: param_vals = [ (param_std * math.sqrt(2) * node) + param_mean for node in GH_dict["nodes"] ] param_weights = GH_dict["normal_weights"] else: min_val = norm.ppf(1e-4, loc=param_mean, scale=param_std) max_val = norm.ppf(1 - 1e-4, loc=param_mean, scale=param_std) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = norm.cdf(param_vals[0], loc=param_mean, scale=param_std) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = norm.ppf((1e-4 / 2), loc=param_mean, scale=param_std) for j in range( 1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = norm.cdf( param_vals[j], loc=param_mean, scale=param_std) - norm.cdf(param_vals[j - 1], loc=param_mean, scale=param_std) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 param_vals = param_midpoints value_arrays.append(param_vals) weight_arrays.append(param_weights) elif self.simulation_options["dispersion_distributions"][ i] == "lognormal": param_loc = 0 param_shape = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_shape"] param_scale = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_scale"] print("shape, scale", param_shape, param_scale) min_val = lognorm.ppf(1e-4, param_shape, loc=param_loc, scale=param_scale) max_val = lognorm.ppf(1 - 1e-4, param_shape, loc=param_loc, scale=param_scale) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = lognorm.cdf(param_vals[0], param_shape, loc=param_loc, scale=param_scale) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = lognorm.ppf((1e-4 / 2), param_shape, loc=param_loc, scale=param_scale) for j in range(1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = lognorm.cdf( param_vals[j], param_shape, loc=param_loc, scale=param_scale) - lognorm.cdf(param_vals[j - 1], param_shape, loc=param_loc, scale=param_scale) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 value_arrays.append(param_midpoints) weight_arrays.append(param_weights) elif self.simulation_options["dispersion_distributions"][ i] == "skewed_normal": param_mean = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_mean"] param_std = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_std"] param_skew = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_skew"] min_val = skewnorm.ppf(1e-4, param_skew, loc=param_mean, scale=param_std) max_val = skewnorm.ppf(1 - 1e-4, param_skew, loc=param_mean, scale=param_std) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = skewnorm.cdf(param_vals[0], param_skew, loc=param_mean, scale=param_std) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = skewnorm.ppf((1e-4 / 2), param_skew, loc=param_mean, scale=param_std) for j in range(1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = skewnorm.cdf( param_vals[j], param_skew, loc=param_mean, scale=param_std) - skewnorm.cdf(param_vals[j - 1], param_skew, loc=param_mean, scale=param_std) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 value_arrays.append(param_midpoints) weight_arrays.append(param_weights) elif self.simulation_options["dispersion_distributions"][ i] == "log_uniform": param_upper = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_logupper"] param_lower = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_loglower"] min_val = loguniform.ppf(1e-4, param_lower, param_upper, loc=0, scale=1) max_val = loguniform.ppf(1 - 1e-4, param_lower, param_upper, loc=0, scale=1) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = loguniform.cdf(min_val, param_lower, param_upper, loc=0, scale=1) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = loguniform.ppf((1e-4) / 2, param_lower, param_upper, loc=0, scale=1) for j in range(1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = loguniform.cdf( param_vals[j], param_lower, param_upper, loc=0, scale=1) - loguniform.cdf(param_vals[j - 1], param_lower, param_upper, loc=0, scale=1) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 value_arrays.append(param_midpoints) weight_arrays.append(param_weights) total_len = np.prod(self.simulation_options["dispersion_bins"]) weight_combinations = list(itertools.product(*weight_arrays)) value_combinations = list(itertools.product(*value_arrays)) sim_params = copy.deepcopy( self.simulation_options["dispersion_parameters"]) for i in range(0, len(sim_params)): if sim_params[i] == "E0": sim_params[i] = "E_0" if sim_params[i] == "k0": sim_params[i] = "k_0" return sim_params, value_combinations, weight_combinations
def calculate_itr(t_values, itr_function, skew_norm_params, n_classes): return itr_function(*[ skewnorm.cdf(t_values[i], *skew_norm_params[i][j]) for i in range(n_classes) for j in range(n_classes) ])