Esempio n. 1
0
def get_distributions(my_folder_rooth):
    with open(my_folder_rooth + "project_utilities/joints_distr.pkl",
              "rb") as f:
        u = pkl._Unpickler(f)
        u.encoding = 'latin1'
        joints_distr = u.load()

    with open(my_folder_rooth + "project_utilities/max_min_joints.pkl",
              "rb") as f:
        u = pkl._Unpickler(f)
        u.encoding = 'latin1'
        mm = u.load()

    n_distr_samples = 5000

    x_tl = np.linspace(joints_distr[0][0][1], joints_distr[0][0][2],
                       n_distr_samples)
    tl_distr = skewnorm.cdf(x_tl, *joints_distr[0][0][0])

    x_rl = np.linspace(joints_distr[1][0][1], joints_distr[1][0][2],
                       n_distr_samples)
    rl_distr = skewnorm.cdf(x_rl, *joints_distr[1][0][0])

    x_bl = np.linspace(joints_distr[2][0][1], joints_distr[2][0][2],
                       n_distr_samples)
    bl_distr = skewnorm.cdf(x_bl, *joints_distr[2][0][0])

    x_tr = np.linspace(joints_distr[3][0][1], joints_distr[3][0][2],
                       n_distr_samples)
    tr_distr = skewnorm.cdf(x_tr, *joints_distr[3][0][0])

    x_rr1 = np.linspace(joints_distr[4][0][1], joints_distr[4][0][2],
                        n_distr_samples)
    x_rr2 = np.linspace(joints_distr[4][1][1], joints_distr[4][1][2],
                        n_distr_samples)
    x_rr3 = np.linspace(joints_distr[4][2][1], joints_distr[4][2][2],
                        n_distr_samples)
    rr_distr1 = skewnorm.cdf(x_rr1, *joints_distr[4][0][0])
    rr_distr2 = skewnorm.cdf(x_rr2, *joints_distr[4][1][0])
    rr_distr3 = skewnorm.cdf(x_rr3, *joints_distr[4][2][0])

    x_br1 = np.linspace(joints_distr[5][0][1], joints_distr[5][0][2],
                        n_distr_samples)
    x_br2 = np.linspace(joints_distr[5][1][1], joints_distr[5][1][2],
                        n_distr_samples)
    br_distr1 = skewnorm.cdf(x_br1, *joints_distr[5][0][0])
    br_distr2 = skewnorm.cdf(x_br2, *joints_distr[5][1][0])

    distributions = [[tl_distr, x_tl], [rl_distr, x_rl], [bl_distr, x_bl],
                     [tr_distr, x_tr],
                     [[rr_distr1, x_rr1], [rr_distr2, x_rr2],
                      [rr_distr3, x_rr3]],
                     [[br_distr1, x_br1], [br_distr2, x_br2]]]

    return distributions, mm
Esempio n. 2
0
def Skewnorm_CDF_func(x, a, mu, sig):
    '''
    a = skewness (gaussian if a==0)
    mu = mean of gaussian
    sig = std dev of gaussian
    '''
    return skewnorm.cdf(x, a, loc=mu, scale=sig)
Esempio n. 3
0
    def test_summary_max_symmetric_3(self):
        c = ChainConsumer()
        c.add_chain(self.data_skew)
        summary_area = 0.95
        c.configure(statistics="max_symmetric",
                    bins=1.0,
                    summary_area=summary_area)
        summary = c.analysis.get_summary()['0']

        xs = np.linspace(0, 2, 1000)
        pdf = skewnorm.pdf(xs, 5, 1, 1.5)
        xmax = xs[pdf.argmax()]
        cdf_top = skewnorm.cdf(summary[2], 5, 1, 1.5)
        cdf_bottom = skewnorm.cdf(summary[0], 5, 1, 1.5)
        area = cdf_top - cdf_bottom

        assert np.isclose(xmax, summary[1], atol=0.05)
        assert np.isclose(area, summary_area, atol=0.05)
        assert np.isclose(summary[2] - summary[1], summary[1] - summary[0])
def calculate_gradient(t_values, gradient_function, skew_norm_params,
                       n_classes):
    gradient_arguments = [
        skewnorm.pdf(t_values[i], *skew_norm_params[i][j])
        for i in range(n_classes) for j in range(n_classes)
    ]
    gradient_arguments += [
        skewnorm.cdf(t_values[i], *skew_norm_params[i][j])
        for i in range(n_classes) for j in range(n_classes)
    ]
    return np.array(gradient_function(*gradient_arguments))
Esempio n. 5
0
    def test_summary_max_central_3(self):
        c = ChainConsumer()
        c.add_chain(self.data_skew)
        summary_area = 0.95
        c.configure(statistics="max_central",
                    bins=1.0,
                    summary_area=summary_area)
        summary = c.analysis.get_summary()['0']

        xs = np.linspace(-1, 5, 1000)
        pdf = skewnorm.pdf(xs, 5, 1, 1.5)
        cdf = skewnorm.cdf(xs, 5, 1, 1.5)
        xval = interp1d(
            cdf, xs)([0.5 - 0.5 * summary_area, 0.5 + 0.5 * summary_area])
        xmax = xs[pdf.argmax()]

        assert np.isclose(xmax, summary[1], atol=0.05)
        assert np.isclose(xval[0], summary[0], atol=0.05)
        assert np.isclose(xval[1], summary[2], atol=0.05)
Esempio n. 6
0
    def test_summary_max_shortest_3(self):
        c = ChainConsumer()
        c.add_chain(self.data_skew)
        summary_area = 0.95
        c.configure(statistics="max_shortest",
                    bins=1.0,
                    summary_area=summary_area)
        summary = c.analysis.get_summary()['0']

        xs = np.linspace(-1, 5, 1000)
        pdf = skewnorm.pdf(xs, 5, 1, 1.5)
        cdf = skewnorm.cdf(xs, 5, 1, 1.5)
        x2 = interp1d(cdf, xs, bounds_error=False,
                      fill_value=np.inf)(cdf + summary_area)
        dist = x2 - xs
        ind = np.argmin(dist)
        x0 = xs[ind]
        x2 = x2[ind]
        xmax = xs[pdf.argmax()]

        assert np.isclose(xmax, summary[1], atol=0.05)
        assert np.isclose(x0, summary[0], atol=0.05)
        assert np.isclose(x2, summary[2], atol=0.05)
Esempio n. 7
0
thisWeek = pd.concat([dummy, thisWeek.iloc[:]]).reset_index(drop=True)

lastPrediction = thisWeek['text'].count() - 1 + hourlyRates.loc[
    hourlyRates['hourNumber'] == thisWeek['hourNumber'][0],
    'meanRemainingTweets'].iloc[0]
stddev = hourlyRates.loc[hourlyRates['hourNumber'] ==
                         thisWeek['hourNumber'][0],
                         'stdRemainingTweets'].iloc[0]
skew = hourlyRates.loc[hourlyRates['hourNumber'] == thisWeek['hourNumber'][0],
                       'skewRemainingTweets'].iloc[0]

yesProbs = []
for b in range(0, len(priceData)):
    if b == 0:
        yesProbs.append(skewnorm.cdf(brackets[b], skew, lastPrediction,
                                     stddev))
    elif b > 0 and b < priceData.index.max():
        yesProbs.append(
            1 - skewnorm.cdf(brackets[b - 1], skew, lastPrediction, stddev) -
            skewnorm.sf(brackets[b], skew, lastPrediction, stddev))
    else:
        yesProbs.append(
            skewnorm.sf(brackets[b - 1], skew, lastPrediction, stddev))

priceData['probabilityYes'] = yesProbs
priceData['probabilityNo'] = 1 - priceData['probabilityYes']

#Plot data collection
plotdf = averagePace
plotdf['averagePlus'] = plotdf['meanTweets'] + plotdf['stdTweets']
plotdf['averageMinus'] = plotdf['meanTweets'] - plotdf['stdTweets']
df = df.dropna(subset=["mass", "year"])

df = df.loc[(df["year"] > 1975) & (df["year"] < 2010) & (df["mass"] != 0)]

observed_rate = land_area / earth_total_area * not_populated_rate
logmass = np.log(df["mass"])

plt.plot(df["year"], logmass, "bo")
plt.show()
plt.hist(df["year"], bins=50)
plt.show()

pd.plotting.scatter_matrix(df[["mass", "year", "reclat", "reclong"]],
                           figsize=(7, 7))
plt.show()

ms = np.linspace(-5, 20, 100)
p_skewnorm = skewnorm.fit(logmass)
pdf_skewnorm = skewnorm.pdf(ms, *p_skewnorm)
plt.hist(logmass, bins=50, alpha=0.2, density=True)
plt.plot(ms, pdf_skewnorm, c="r")
plt.show()

mass_of_doom = np.log(
    (4 / 3) * np.pi * 500**3 * 1600 *
    1000)  # Just using a spherical approximation and some avg density

meteor_is_doom = 1 - skewnorm.cdf(mass_of_doom, *p_skewnorm)
num_events = 1000 * df["year"].value_counts().mean() / observed_rate
print(meteor_is_doom * num_events)
Esempio n. 9
0
    ax[0][0].plot(x, norm.pdf(x, loc=0, scale=1), 'bo', alpha=0.6, label='norm pdf (scipy)')
    ax[0][0].plot(x, normal_pdf(x, mu=0, sigma=1), 'r.', alpha=0.6, label='norm pdf (custom)')
    ax[0][0].legend(loc='best', frameon=False)
    ax[0][0].set_title("Normal PDF Comparison ($\mu = 0$ $\sigma = 1$)")

    ax[1][0].plot(x, norm.cdf(x), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[1][0].plot(x, normal_cdf(x, mu=0, sigma=1), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)')
    ax[1][0].legend(loc='best', frameon=False)
    ax[1][0].set_title("Normal CDF Comparison ($\mu = 0$ $\sigma = 1$)")

    ax[2][0].plot(y, norm.ppf(y), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[2][0].legend(loc='best', frameon=False)
    ax[2][0].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)")

    ax[0][1].plot(x, skewnorm.pdf(x, loc=0, scale=1, a=4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[0][1].plot(x, skew_normal_pdf(x, epsilon=0, omega=1, alpha=4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)')
    ax[0][1].legend(loc='best', frameon=False)
    ax[0][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)")

    ax[1][1].plot(x, skewnorm.cdf(x, loc=0, scale=1, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[1][1].plot(x, skew_normal_cdf(x, epsilon=0, omega=1, alpha=-4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)')
    ax[1][1].legend(loc='best', frameon=False)
    ax[1][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)")

    ax[2][1].plot(y, skewnorm.ppf(y, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[2][1].legend(loc='best', frameon=False)
    ax[2][1].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)")

    plt.show()
Esempio n. 10
0
x = np.linspace(skewnorm.ppf(0.01, a),
                skewnorm.ppf(0.99, a), 100)
ax.plot(x, skewnorm.pdf(x, a),
       'r-', lw=5, alpha=0.6, label='skewnorm pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = skewnorm(a)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = skewnorm.ppf([0.001, 0.5, 0.999], a)
np.allclose([0.001, 0.5, 0.999], skewnorm.cdf(vals, a))
# True

# Generate random numbers:

r = skewnorm.rvs(a, size=1000)

# And compare the histogram:

ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
Esempio n. 11
0
 def cdf(self, t):
     return skewnorm.cdf(t, self.alpha, self.zeta, self.omega)
Esempio n. 12
0
 def cdf(self, x):
     q = skewnorm.cdf(x=x, a=self._a, loc=self._loc, scale=self._scale)
     return q
Esempio n. 13
0
 def inverse_transform(self, z, copy=None):
     q = skewnorm.cdf(x=z, a=0, loc=0, scale=1)
     x = skewnorm.ppf(q=q, a=self._a, loc=self._loc, scale=self._scale)
     return x
Esempio n. 14
0
 def transform(self, x):
     q = skewnorm.cdf(x=x, a=self._a, loc=self._loc, scale=self._scale)
     z = skewnorm.ppf(q=q, a=0, loc=0, scale=1)
     return z
Esempio n. 15
0
def skew(x,e=0,w=1,a=0):
    t = (x-e) / w
    return 2 / w * pdf(t) * cdf(a*t)
    # You can of course use the scipy.stats.norm versions
    # return 2 * norm.pdf(t) * norm.cdf(a*t)


n = 2**12
e = 1.0 # location
w = 2.0 # scale
x = linspace(0,7,n) 
#p = skew(x,e,w,9)

p = skewnorm.pdf(x, 5, loc=1, scale=1.5)
print(x[p.argmax()])
c = skewnorm.cdf(x, 5, loc=1, scale=1.5)
#c = p.cumsum()
#c /= c.max()



b0 = np.where(c > 0.15865)[0][0]
bm = np.where(c > 0.5)[0][0]
b1 = np.where(c > 0.84135)[0][0]
x0 = x[b0]
x1 = x[b1]


s=30

fig, ax = plt.subplots(figsize=(5,5), nrows=2, sharex=True)
Esempio n. 16
0
def _fitFunction(x, *a):
    r"""!Function to fit to the recorded nstep."""
    return skewnorm.cdf(x, *a)
 def generic_dispersion(self, nd_dict, GH_dict=None):
     weight_arrays = []
     value_arrays = []
     for i in range(0,
                    len(self.simulation_options["dispersion_parameters"])):
         if self.simulation_options["dispersion_distributions"][
                 i] == "uniform":
             value_arrays.append(
                 np.linspace(
                     self.simulation_options["dispersion_parameters"][i] +
                     "_lower",
                     self.simulation_options["dispersion_parameters"][i] +
                     "_upper",
                     self.simulation_options["dispersion_bins"][i]))
             weight_arrays.append(
                 [1 / self.simulation_options["dispersion_bins"][i]] *
                 self.simulation_options["dispersion_bins"][i])
         elif self.simulation_options["dispersion_distributions"][
                 i] == "normal":
             param_mean = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_mean"]
             param_std = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_std"]
             if type(GH_dict) is dict:
                 param_vals = [
                     (param_std * math.sqrt(2) * node) + param_mean
                     for node in GH_dict["nodes"]
                 ]
                 param_weights = GH_dict["normal_weights"]
             else:
                 min_val = norm.ppf(1e-4, loc=param_mean, scale=param_std)
                 max_val = norm.ppf(1 - 1e-4,
                                    loc=param_mean,
                                    scale=param_std)
                 param_vals = np.linspace(
                     min_val, max_val,
                     self.simulation_options["dispersion_bins"][i])
                 param_weights = np.zeros(
                     self.simulation_options["dispersion_bins"][i])
                 param_weights[0] = norm.cdf(param_vals[0],
                                             loc=param_mean,
                                             scale=param_std)
                 param_midpoints = np.zeros(
                     self.simulation_options["dispersion_bins"][i])
                 param_midpoints[0] = norm.ppf((1e-4 / 2),
                                               loc=param_mean,
                                               scale=param_std)
                 for j in range(
                         1, self.simulation_options["dispersion_bins"][i]):
                     param_weights[j] = norm.cdf(
                         param_vals[j], loc=param_mean,
                         scale=param_std) - norm.cdf(param_vals[j - 1],
                                                     loc=param_mean,
                                                     scale=param_std)
                     param_midpoints[j] = (param_vals[j - 1] +
                                           param_vals[j]) / 2
                 param_vals = param_midpoints
             value_arrays.append(param_vals)
             weight_arrays.append(param_weights)
         elif self.simulation_options["dispersion_distributions"][
                 i] == "lognormal":
             param_loc = 0
             param_shape = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_shape"]
             param_scale = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_scale"]
             print("shape, scale", param_shape, param_scale)
             min_val = lognorm.ppf(1e-4,
                                   param_shape,
                                   loc=param_loc,
                                   scale=param_scale)
             max_val = lognorm.ppf(1 - 1e-4,
                                   param_shape,
                                   loc=param_loc,
                                   scale=param_scale)
             param_vals = np.linspace(
                 min_val, max_val,
                 self.simulation_options["dispersion_bins"][i])
             param_weights = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_weights[0] = lognorm.cdf(param_vals[0],
                                            param_shape,
                                            loc=param_loc,
                                            scale=param_scale)
             param_midpoints = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_midpoints[0] = lognorm.ppf((1e-4 / 2),
                                              param_shape,
                                              loc=param_loc,
                                              scale=param_scale)
             for j in range(1,
                            self.simulation_options["dispersion_bins"][i]):
                 param_weights[j] = lognorm.cdf(
                     param_vals[j],
                     param_shape,
                     loc=param_loc,
                     scale=param_scale) - lognorm.cdf(param_vals[j - 1],
                                                      param_shape,
                                                      loc=param_loc,
                                                      scale=param_scale)
                 param_midpoints[j] = (param_vals[j - 1] +
                                       param_vals[j]) / 2
             value_arrays.append(param_midpoints)
             weight_arrays.append(param_weights)
         elif self.simulation_options["dispersion_distributions"][
                 i] == "skewed_normal":
             param_mean = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_mean"]
             param_std = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_std"]
             param_skew = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_skew"]
             min_val = skewnorm.ppf(1e-4,
                                    param_skew,
                                    loc=param_mean,
                                    scale=param_std)
             max_val = skewnorm.ppf(1 - 1e-4,
                                    param_skew,
                                    loc=param_mean,
                                    scale=param_std)
             param_vals = np.linspace(
                 min_val, max_val,
                 self.simulation_options["dispersion_bins"][i])
             param_weights = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_weights[0] = skewnorm.cdf(param_vals[0],
                                             param_skew,
                                             loc=param_mean,
                                             scale=param_std)
             param_midpoints = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_midpoints[0] = skewnorm.ppf((1e-4 / 2),
                                               param_skew,
                                               loc=param_mean,
                                               scale=param_std)
             for j in range(1,
                            self.simulation_options["dispersion_bins"][i]):
                 param_weights[j] = skewnorm.cdf(
                     param_vals[j],
                     param_skew,
                     loc=param_mean,
                     scale=param_std) - skewnorm.cdf(param_vals[j - 1],
                                                     param_skew,
                                                     loc=param_mean,
                                                     scale=param_std)
                 param_midpoints[j] = (param_vals[j - 1] +
                                       param_vals[j]) / 2
             value_arrays.append(param_midpoints)
             weight_arrays.append(param_weights)
         elif self.simulation_options["dispersion_distributions"][
                 i] == "log_uniform":
             param_upper = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_logupper"]
             param_lower = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_loglower"]
             min_val = loguniform.ppf(1e-4,
                                      param_lower,
                                      param_upper,
                                      loc=0,
                                      scale=1)
             max_val = loguniform.ppf(1 - 1e-4,
                                      param_lower,
                                      param_upper,
                                      loc=0,
                                      scale=1)
             param_vals = np.linspace(
                 min_val, max_val,
                 self.simulation_options["dispersion_bins"][i])
             param_weights = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_weights[0] = loguniform.cdf(min_val,
                                               param_lower,
                                               param_upper,
                                               loc=0,
                                               scale=1)
             param_midpoints = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_midpoints[0] = loguniform.ppf((1e-4) / 2,
                                                 param_lower,
                                                 param_upper,
                                                 loc=0,
                                                 scale=1)
             for j in range(1,
                            self.simulation_options["dispersion_bins"][i]):
                 param_weights[j] = loguniform.cdf(
                     param_vals[j],
                     param_lower,
                     param_upper,
                     loc=0,
                     scale=1) - loguniform.cdf(param_vals[j - 1],
                                               param_lower,
                                               param_upper,
                                               loc=0,
                                               scale=1)
                 param_midpoints[j] = (param_vals[j - 1] +
                                       param_vals[j]) / 2
             value_arrays.append(param_midpoints)
             weight_arrays.append(param_weights)
     total_len = np.prod(self.simulation_options["dispersion_bins"])
     weight_combinations = list(itertools.product(*weight_arrays))
     value_combinations = list(itertools.product(*value_arrays))
     sim_params = copy.deepcopy(
         self.simulation_options["dispersion_parameters"])
     for i in range(0, len(sim_params)):
         if sim_params[i] == "E0":
             sim_params[i] = "E_0"
         if sim_params[i] == "k0":
             sim_params[i] = "k_0"
     return sim_params, value_combinations, weight_combinations
def calculate_itr(t_values, itr_function, skew_norm_params, n_classes):
    return itr_function(*[
        skewnorm.cdf(t_values[i], *skew_norm_params[i][j])
        for i in range(n_classes) for j in range(n_classes)
    ])