예제 #1
0
def find_global_offset(im_list,
                       bbox=9,
                       splitstyle="hsplit",
                       fsize=10,
                       scale=0.5,
                       binning=1):
    """
    This function finds the optimal x-offset and y-offset of the data using ``scrub_outliers`` to filter
    the data collected from ``get_offset_distribution``. The filtered data are then fit using ``scipy.stats.skewnorm``


    :param im_list: 1D list of image arrays to be used in determination of the offset
    :param bbox: int, passed to ``point_fitting.fit_routine``, size of ROI around each point to apply gaussian fit. Default is 9.
    :param splitstyle: string, passed to ``im_split``; accepts "hsplit", "vsplit". Default is "hsplit"
    :param fsize: int, passed to ``point_fitting.find_maxima``, size of average filters used in maxima determination. Default is 10.

    :return: Mean x- and y-offset values.

    :Example:
        >>> from toolbox.alignment import find_global_offset
        >>> import toolbox.testdata as test
        >>> im = test.image_stack()
        >>> print(find_global_offset(im))
        (5.624042070667237, -2.651128775580636)
    """
    pooled_x, pooled_y = [], []
    for im in im_list:
        xdist, ydist = get_offset_distribution(im, bbox, splitstyle, fsize)
        pooled_x += pinhole_filter(xdist, scale, binning)
        pooled_y += pinhole_filter(ydist, scale, binning)
    skew, mu1, sigma1 = skewnorm.fit(pooled_x)
    skew, mu2, sigma2 = skewnorm.fit(pooled_y)
    return mu1, mu2
def plot_skewed_data(data):
    # generate histogram using data
    num_bins = 200
    bins = np.linspace(min(data), max(data), num_bins)
    _, bins = np.histogram(data, bins=bins, density=True)
    bin_centers = 0.5 * (bins[1:] + bins[:-1])

    # get PDF from data by fitting it to skewnorm
    a, loc, scale = skewnorm.fit(data)
    pdf = st.skewnorm.pdf(bin_centers, a, loc, scale)

    # plot
    kwargs = dict(normed=True, edgecolor='black', linewidth=1.2, alpha=0.5, bins=20, stacked=True)
    # kwargs = dict(alpha=0.5, bins=20, normed=True, histtype='stepfilled', stacked=False, edgecolor='black', linewidth=1.2)
    fig, ax = plt.subplots(1, 1)
    ax.plot(bin_centers, pdf, 'r-', lw=5, alpha=0.6, label='skewnorm pdf')
    # ax.plot(bin_centers, histogram, )
    # Newer version of matplotlib uses density rather than normed
    ax.hist(data, **kwargs, color='g', label="Histogram of samples (normalized)")
    ax.axvline(x=np.percentile(data, 1), color='red', ls=':', lw=2, label='1 pc')
    ax.axvline(x=np.percentile(data, 50), color='red', ls=':', lw=2, label='mean')
    ax.axvline(x=np.percentile(data, 99), color='red', ls=':', lw=2, label='99 pc')
    print("%f :: %f" % (skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a)))
    ax.legend(loc='best', frameon=True)
    plt.show()
예제 #3
0
def compare_hist_to_norm_ax(x, data, bins=25):

    mu, std = scipy.stats.norm.fit(data)

    ax[x].hist(data,
               bins=bins,
               density=True,
               alpha=0.6,
               color='purple',
               label="Données")

    # Plot le PDF.
    xmin, xmax = plt.xlim()
    X = np.linspace(xmin, xmax)

    ax[x].plot(X,
               scipy.stats.norm.pdf(X, mu, std),
               label="Normal Distribution")
    ax[x].plot(X,
               skewnorm.pdf(X, *skewnorm.fit(data)),
               color='black',
               label="Skewed Normal Distribution")

    mu, std = scipy.stats.norm.fit(data)
    sk = scipy.stats.skew(data)

    title2 = "Moments mu: {}, sig: {}, sk: {}".format(round(mu, 4),
                                                      round(std, 4),
                                                      round(sk, 4))
    ax[x].ylabel("Fréquence", rotation=90)
    ax[x].title(title2)
    ax[x].legend()
    pass
예제 #4
0
def fig4(name, func, eps):
    """Makes figure 4.

    Args:
        name (str): Descriptive name of the model. Posterior samples, statistics, and
            figures are generated and saved in a subdirectory with this name.
        func (:obj:`<class 'function'>): Function for model construction. Should
            return a formatted copy of the data.
        eps (bool): If True, saves the figures to the manuscript subdirectory in .eps
            format.

    """

    with pm.Model() as m:

        fit_model(name, func)
        trace = pm.load_trace(name)
        params = sorted(
            [p.name for p in m.deterministics if "Lambda" in p.name])

    set_fig_defaults()
    rcParams["figure.figsize"] = (3, 3 * 2)
    fig, axes = plt.subplots(5, 1, constrained_layout=True)

    for p, ax in zip(params, axes):

        vals, bins, _ = ax.hist(trace[p],
                                bins=50,
                                density=True,
                                histtype="step",
                                color="lightgray")
        ax.set_xlabel(p)
        if ax == axes[0]:
            ax.set_ylabel("Posterior density")

        start, stop = pm.stats.hpd(trace[p])
        for n, l, r in zip(vals, bins, bins[1:]):

            if l > start:
                if r < stop:
                    ax.fill_between([l, r], 0, [n, n], color="lightgray")
                elif l < stop < r:
                    ax.fill_between([l, stop], 0, [n, n], color="lightgray")
            elif l < start < r:
                ax.fill_between([start, r], 0, [n, n], color="lightgray")

        x = np.linspace(min([bins[0], 0]), max([0, bins[-1]]))
        theta = skewnorm.fit(trace[p])
        ax.plot(x, skewnorm.pdf(x, *theta), "k", label="Normal approx.")
        ax.plot(x, norm.pdf(x), "k--", label="Prior")
        ax.plot([0, 0], [skewnorm.pdf(0, *theta), norm.pdf(0)], "ko")

    fig.savefig(f"{name}/fig4.png")

    if eps is True:
        fig.savefig("manuscript/fig4.eps")
예제 #5
0
    def calc_risk_skewnorm(self, confidence=0.95):
        port_returns = self.returns.dot(self.allocation)
        losses = -port_returns.iloc[:, 0]

        params = skewnorm.fit(losses)
        VaR = skewnorm.ppf(confidence, *params)

        tail_loss = skewnorm.expect(lambda y: y,
                                    args=(params[0], ),
                                    loc=params[1],
                                    scale=params[2],
                                    lb=VaR)
        CVaR = (1 / (1 - confidence)) * tail_loss

        return losses, VaR, CVaR
예제 #6
0
def plot_skew_norm_fit(ax,
                       data,
                       bins,
                       color='k',
                       linestyle='-',
                       linewidth=3,
                       label=None):
    s, loc, scale = skewnorm.fit(data)
    pdf = skewnorm.pdf(bins, s, scale=scale, loc=loc)
    ax.plot(bins,
            pdf,
            color=color,
            linestyle=linestyle,
            linewidth=linewidth,
            label=label)
예제 #7
0
def main():
	symbol = 'BTCUSDT'
	start = int(datetime.datetime.timestamp(datetime.datetime(2019, 6, 1))) * 1000 - 365 * 24 * 60 * 60 * 1000
	previous = start
	trades = []
	total_trades = 0
	max_trades = 0
	time_step = 1
	with open('../Binance/' + symbol + '.csv', 'r') as csvfile:
		reader = csv.DictReader(csvfile)
		for line in reader:
			if int(line['Timestamp']) > previous + time_step * 60 * 60 * 1000:
				previous += time_step * 60 * 60 * 1000
				trades.append(total_trades)
				if max_trades < total_trades:
					max_trades = total_trades
				total_trades = 0
			total_trades += 1

	mean = np.mean(trades)
	print(mean)

	trades = [t for t in trades if t != 1]

	#mean, scale = norm.fit(np.log(trades))
	a, loc, scale = skewnorm.fit(np.log(trades))
	loc_n, scale_n = norm.fit(np.log(trades))

	k, loc_ne, scale_ne = exponnorm.fit(np.log(trades))

	plt.hist(np.log(trades), bins=100, density=True)
	x = np.linspace(6, 12, 100)
	plt.plot(x, skewnorm.pdf(x, a, loc=loc, scale=scale), label='skewnorm')
	plt.plot(x, norm.pdf(x, loc=loc_n, scale=scale_n), label='norm')
	plt.plot(x, exponnorm.pdf(x, k, loc=loc_n, scale=scale_n), label='Exponentially modified Gaussian')

	plt.xlabel('Log Trades')
	plt.ylabel('Density')

	plt.legend()

	#plt.plot([i for i in range(1, max_trades)], poisson_density(np.array([i for i in range(1, max_trades)]), mean))
	#plt.plot([i for i in range(0, max_trades)], norm.pdf([i for i in range(max_trades)], loc=mean, scale=np.sqrt(mean)))
	plt.savefig(symbol + ' log Trades')

	plt.show()
예제 #8
0
def calc_distribution(y, type='norm', lower=0.01, upper=99.99, points=100):
    lo, up = get_percentiles(y, lower, upper)
    X = np.linspace(lo, up, points)

    if type == 'norm':
        p1, p2 = norm.fit(y)
        Y = norm.pdf(X, p1, p2)

        return X, Y

    elif type == 'skewed':
        p1, p2, p3 = skewnorm.fit(y)
        Y = skewnorm.pdf(X, p1, p2, p3)

        return X, Y

    else:
        raise AttributeError("'type' not recognized.")
예제 #9
0
def table2(name, func, tex):
    """Makes table 2.

    Args:
        name (str): Descriptive name of the model. Posterior samples, statistics, and
            figures are generated and saved in a subdirectory with this name.
        func (:obj:`<class 'function'>): Function for model construction. Should
            return a formatted copy of the data.
        tex (bool): If True, saves the table to the manuscript subdirectory.

    """

    with pm.Model() as m:
        fit_model(name, func)
        trace = pm.load_trace(name)
        params = sorted([p.name for p in m.deterministics if "Lambda" in p.name])
        df = pm.summary(trace, var_names=params)

    table = []
    for p, i in zip(params, interps):

            theta = skewnorm.fit(trace[p])
            p0 = norm.pdf(0)
            p1 = skewnorm.pdf(0, *theta)
            bf = p0 / p1
            a, b, c = df.loc[p, ["mean", "hpd_2.5", "hpd_97.5"]]

            dic = {
                "Variable": p,
                "Posterior mean (95% HPD)": "%s (%s, %s)" % (
                    latexify(a), latexify(b), latexify(c)),
                "During roved-frequency trials ...": i,
                "BF": latexify(bf),
                "Evidence": interpret(bf),
            }
            table.append(dic)
            # print(p, bf)

    df = pd.DataFrame(table)[dic.keys()]
    df.to_latex(f"{name}/table2.tex", escape=False, index=False)

    if tex is True:
        df.to_latex("manuscript/table2.tex", escape=False, index=False)
예제 #10
0
def fit_params_to_1d_data(logX):
    """
    Fit skewed normal distributions to 1-D capactity data, 
    and return the distribution parameters.

    Args
    ----
    logX:
        Logarithm of one-dimensional capacity data, 
        indexed by module and phase resolution index

    """
    m_max = logX.shape[0]
    p_max = logX.shape[1]
    params = np.zeros((m_max, p_max, 3))
    for m_ in range(m_max):
        for p_ in range(p_max):
            params[m_, p_] = skewnorm.fit(logX[m_, p_])

    return params
예제 #11
0
def compare_hist_to_norm(data, bins=25):
    """

    :param data:
    :param bins:
    :return:
    """
    fig = plt.figure(figsize=(10, 5))

    mu, std = scipy.stats.norm.fit(data)

    plt.hist(data,
             bins=bins,
             density=True,
             alpha=0.6,
             color='purple',
             label="Données")

    # Plot le PDF.
    xmin, xmax = plt.xlim()
    X = np.linspace(xmin, xmax)

    plt.plot(X, scipy.stats.norm.pdf(X, mu, std), label="Normal Distribution")
    plt.plot(X,
             skewnorm.pdf(X, *skewnorm.fit(data)),
             color='black',
             label="Skewed Normal Distribution")

    mu, std = scipy.stats.norm.fit(data)
    sk = scipy.stats.skew(data)

    title2 = "Moments mu: {}, sig: {}, sk: {}".format(round(mu, 4),
                                                      round(std, 4),
                                                      round(sk, 4))
    plt.ylabel("Fréquence", rotation=90)
    plt.title(title2)
    plt.legend()
    #plt.show()
    pass
예제 #12
0
def fit_skewnormal(signal, tag):
    mu, loc, std = skewnorm.fit(signal["mean"].values)
    confidence_interval = skewnorm.interval(CONFIDENCE, mu, loc=loc, scale=std)
    if PLOTTING:
        # Plot the histogram.
        plt.subplots()
        plt.hist(signal["mean"].values,
                 bins=25,
                 density=True,
                 alpha=0.6,
                 color="g")

        # Plot the PDF.
        xmin, xmax = plt.xlim()
        x = np.linspace(xmin, xmax, 100)
        p = skewnorm.pdf(x, mu, loc, std)
        plt.plot(x, p, "k", linewidth=1)
        title = "Fit results skewnormal: a=%2f loc = %.2f,  std = %.2f" % (
            mu, loc, std)
        plt.title(title)
        plt.axvline(x=confidence_interval[0])
        plt.axvline(x=confidence_interval[1])

        # Plot the confidence interval
        plt.savefig(f"analysis/images/{slugify(tag)}_fit_skew_histogram.png",
                    format="png")

    return {
        "distribution": "skewnormal",
        "params": [{
            "a": mu,
            "loc": loc,
            "std": std
        }],
        "confidence": [confidence_interval],
    }
예제 #13
0
def sep_two_skewed_normals(x, th_init):
    x0 = x[x < th_init]
    x1 = x[x >= th_init]

    if x0.size == 0:
        return th_init, (x.min() - 1, x1.mean(), 0.01, x1.std(), 0, 0)
    if x1.size == 1:
        a1 = TH_SKEWNESS
        m1 = x0.mean()
        s1 = MIN_SCALE
    else:
        a1, m1, s1 = skewnorm.fit(x1)
        if a1 > TH_SKEWNESS:
            a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS)
    if x0.size == 1:
        a0 = TH_SKEWNESS
        m0 = x0.mean()
        s0 = MIN_SCALE
    else:
        a0, m0, s0 = skewnorm.fit(x0)
        if a0 > TH_SKEWNESS:
            a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS)

    num_x0_last = x0.size
    num_change = 1
    x_sorted = sorted(x)
    nums_x0 = [num_x0_last, ]
    while num_change:
        # E, binary search for new th
        i0 = int(x0.size/2)
        i1 = x.size - int(x1.size/2)
        while i1 - i0 > 1:
            i = int((i0 + i1) / 2)
            p0 = skewnorm.pdf(x_sorted[i], a0, m0, s0) - skewnorm.pdf(x_sorted[i], a1, m1, s1)
            if p0 > 0:
                i0 = i
            else:
                i1 = i

        th = (x_sorted[i0] + x_sorted[i1]) / 2

        x0 = x[x < th]
        x1 = x[x >= th]

        # M
        if x0.size == 0:
            break
        if x1.size == 1:
            a1 = TH_SKEWNESS
            m1 = x0.mean()
            s1 = MIN_SCALE
        else:
            a1, m1, s1 = skewnorm.fit(x1)
            if a1 > TH_SKEWNESS:
                a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS)
        if x0.size == 1:
            a0 = TH_SKEWNESS
            m0 = x0.mean()
            s0 = MIN_SCALE
        else:
            a0, m0, s0 = skewnorm.fit(x0)
            if a0 > TH_SKEWNESS:
                a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS)

        # update
        num_change = x0.size - num_x0_last
        num_x0_last = x0.size
        if num_x0_last not in nums_x0:
            nums_x0.append(num_x0_last)
        else:
            break

    th = min(skewnorm.ppf(TH_SKEWNORM_PPF, a0, m0, s0), th)
    # extreme case that under very weak L1 constraint, negligible cluster is fitted with large sigma
    if s1 > 0.1 and s0 / s1 > 10:
        th = min(skewnorm.ppf(1e-4, a1, m1, s1), th)
    return th, (m0, m1, s0, s1, a0, a1)
df = df.dropna(subset=["mass", "year"])

df = df.loc[(df["year"] > 1975) & (df["year"] < 2010) & (df["mass"] != 0)]

observed_rate = land_area / earth_total_area * not_populated_rate
logmass = np.log(df["mass"])

plt.plot(df["year"], logmass, "bo")
plt.show()
plt.hist(df["year"], bins=50)
plt.show()

pd.plotting.scatter_matrix(df[["mass", "year", "reclat", "reclong"]],
                           figsize=(7, 7))
plt.show()

ms = np.linspace(-5, 20, 100)
p_skewnorm = skewnorm.fit(logmass)
pdf_skewnorm = skewnorm.pdf(ms, *p_skewnorm)
plt.hist(logmass, bins=50, alpha=0.2, density=True)
plt.plot(ms, pdf_skewnorm, c="r")
plt.show()

mass_of_doom = np.log(
    (4 / 3) * np.pi * 500**3 * 1600 *
    1000)  # Just using a spherical approximation and some avg density

meteor_is_doom = 1 - skewnorm.cdf(mass_of_doom, *p_skewnorm)
num_events = 1000 * df["year"].value_counts().mean() / observed_rate
print(meteor_is_doom * num_events)
예제 #15
0
        spw = input()
        print("Select baseline pair from " + str(uvp.get_blpairs()) +
              " by typing the index : ")
        i = input()
        key = (spw, blpairs[i], pol)
        power = np.abs(np.real(uvp.get_data(key)))

        data = power.T[min_delay_index[0][0]:max_delay_index[0]
                       [0]][:, time_min:time_max].flatten()

        fig_hist2, ax_hist2 = plt.subplots(2, figsize=(12, 8))

        x = np.linspace(min(data), max(data), 30)
        # We now fit a skewed probability distribution.
        ax_hist2[0].plot(x,
                         skewnorm.pdf(x, *skewnorm.fit(data)),
                         lw=3,
                         color='k',
                         label='skewnorm pdf')

        p4 = ax_hist2[0].hist(data,
                              bins=30,
                              density=True,
                              linewidth=2,
                              edgecolor='k')
        ax_hist2[0].set_ylabel("Probability", fontsize=14)
        ax_hist2[0].set_xlabel(r"$P(k)\ \rm [mK^2\ h^{-3}\ Mpc^3]$",
                               fontsize=14)

        # We plot the residuals between our histogram and our fit.
        ax_hist2[1].plot(x, p4[0] - skewnorm.pdf(x, *skewnorm.fit(data)))
예제 #16
0
        "../Data/Empirical/Emp_Phi_Final/nosp_main_phi.csv", sep=",", header=0)
    avg_nosp = gmean(exp_nosp[["Phi"]])

    mp = np.array(exp_mc[["Phi"]])[:, 0]
    nosp = np.array(exp_nosp[["Phi"]])[:, 0]
    print "Average Phi, secretory: ", avg_mc[0]
    print "Median Phi, secretory: ", np.median(mp)
    print "Variance Phi, secretory ", np.var(mp)
    print ""
    print "Average Phi, nonsecretory: ", avg_nosp[0]
    print "Median Phi, nonsecretory: ", np.median(nosp)
    print "Variance Phi, nonsecretory ", np.var(nosp)
total = 0.0

if sys.argv[1] == "skewnorm":
    mp_shape, mp_loc, mp_s, = skewnorm.fit(mp)
    nosp_shape, nosp_loc, nosp_s = skewnorm.fit(nosp)
    dist_mp = skewnorm(mp_shape, mp_loc, mp_s)
    dist_nosp = skewnorm(nosp_shape, nosp_loc, nosp_s)
elif sys.argv[1] == "norm":
    mp_loc, mp_s = norm.fit(mp)
    nosp_loc, nosp_s = norm.fit(nosp)
    dist_mp = norm(mp_loc, mp_s)
    dist_nosp = norm(nosp_loc, nosp_s)
elif sys.argv[1] == "lognorm":
    mp_shape, mp_loc, mp_s, = lognorm.fit(mp, floc=0)
    nosp_shape, nosp_loc, nosp_s = lognorm.fit(nosp, floc=0)
    dist_mp = lognorm(mp_shape, mp_loc, mp_s)
    dist_nosp = lognorm(nosp_shape, nosp_loc, nosp_s)

index = 0
예제 #17
0
mode = x[np.argmax(y)]

x = x - mode
s = s - mode

ax.plot(x, y, 'r-', lw=5, alpha=0.6, label='skewnorm pdf')
ax.axvline(x=0, color='y', label='mode')
ax.hist(s, density=True)
ax.set_xlabel('noise')
ax.set_ylabel('Number of samples/PDF')
ax.set_title('Skewnormal histogram, shape = %.1f, loc = %.1f, w = %.1f' %
             (a, loc, w))
text = 'Shift all noise by mode = %.3f' % mode
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
ax.text(0.05,
        0.8,
        text,
        transform=ax.transAxes,
        fontsize=12,
        verticalalignment='top',
        bbox=props)
ax.legend()
plt.show()

print 'mode: ', mode

model = skewnorm.fit(s)
print model

raw_input('press')
예제 #18
0
    #+++    x1[0]=xbar;   x1[1:]=x[nsum:]
    #+++    y1[0]=ytilde; y1[1:]=y[nsum:]
    #+++    ax2.plot(x1,y1,color='blue')

    ax2.plot(x, y, color='blue')
    ax2.plot([x[0], x[-1]], [1, 1], color='grey')

ax2.plot([x[0], x[-1]], [1, 1], color='grey')
ax2.set_ylabel('b5 m2 on/off ratio')

x = np.linspace(2000, 21500, 100)

# mu_ge,std_ge = norm.fit(b5_img_ge)
# p_ge = norm.pdf(x,mu_ge,std_ge)

a, loc, scale = skewnorm.fit(b5_img_ge[np.where(b5_img_ge > 15000)])
p_ge = skewnorm.pdf(x, a, loc, scale)
p_ge = p_ge / np.amax(p_ge)
p_ge = p_ge * (np.amax(y_hist_ge) - bkg)
ax1.plot(x, p_ge + bkg, linewidth=2, color='cyan')

# mu_lt,std_lt = norm.fit(b5_img_lt)
# p_lt = norm.pdf(x,mu_lt,std_lt)

a, loc, scale = skewnorm.fit(b5_img_lt[np.where((b5_img_lt > 7500)
                                                & (b5_img_lt < 15000))])
p_lt = skewnorm.pdf(x, a, loc, scale)
p_lt = p_lt / np.amax(p_lt)
p_lt = p_lt * (np.amax(y_hist_lt) - bkg)
ax1.plot(x, p_lt + bkg, linewidth=2, color='gold')
예제 #19
0
    #distLATarray = (distLATarray - np.mean(distLATarray, axis=0)) / np.std(distLATarray, axis=0)
    #distEarray = (distEarray - np.mean(distEarray, axis=0)) / np.std(distEarray, axis=0)

    # Distribution Test
    y = np.array(distNMRSE)
    y_centered = (y - np.mean(y)) / (np.max(y) - np.min(y))
    y = (y - np.mean(y)) / np.std(y)
    plt.figure()
    plt.plot(y, label='NRMSE')
    plt.title('NRMSE per k-fold validation iteration')
    plt.grid()
    x = np.linspace(np.min(y), np.max(y), iters)
    bins = np.linspace(np.min(y), np.max(y), iters + 1)
    hist, bin_edges = np.histogram(y, bins=bins, density=True)
    mean, var = norm.fit(y)  # Get first 2 moments of data
    smean, svar, sk = skewnorm.fit(y)  # Get first 3 moments of data
    #df, nc, cmean, cvar = ncx2.fit(y, 4) # Get 3 first moments of data
    gNorm = norm.pdf(x, mean, var)  # Center and scale a Gaussian function
    sNorm = skewnorm.pdf(x, smean, svar,
                         sk)  # Center and scale a Skewed Gaussian function
    #chiSq = ncx2.pdf(x, 4, 1) # Center and scale a Chi-Square function
    plt.figure()
    plt.plot(x, gNorm, 'r-', label='Norm PDF')
    plt.plot(x, sNorm, 'g-', label='Skewed Norm PDF')
    #plt.plot(x, chiSq,'m-', label='Chi-Square PDF')
    plt.bar(bin_edges[:-1],
            hist,
            width=(max(bin_edges) - min(bin_edges)) / iters)
    plt.title('NRMSE distribution')
    plt.xlim(np.min(x), np.max(x))
    plt.legend()
예제 #20
0
def calculate_skew_norm_params(train_features_given_class, n_classes):
    return np.array([[
        skewnorm.fit(train_features_given_class[class_i, :, feature_i])
        for feature_i in range(n_classes)
    ] for class_i in range(n_classes)])
예제 #21
0
def extract_corner_pixel_info(data, cremove, acis_prefix, obsid, omode, fits):
    """
    extract and analyze acis corner pixel distribution
    input:  data        --- event data
            cremove     --- a list of pixel position to remove
            aics_prefix --- prefix of the output files
            obsid       --- obsid
            omode       --- data mode
            fits        --- fits file name
    output: <plot_dir>/Ind_Plots/<acis_prefix>_<ccd_id>_<tail>.png (e.g. acisf20783_I2_hist.png)
            <plot_dir>/Ind_Plots/<acis_prefix>_<taiL>.png (e.g., acisf20783_cp.png)
            <data_dir>/<ccd)_id>.dat
    """
    #
    #--- select data with grade 0, 2, 3, 4, or 6
    #
    mask = data['grade'] != 1
    data2 = data[mask]
    mask = data2['grade'] != 4
    data3 = data2[mask]
    mask = data3['grade'] != 7
    data4 = data3[mask]
    #
    #--- create lists of lists to save fitted results
    #
    nc_list = [[] for x in range(0, 4)]  #--- normal distribution center
    nw_list = [[] for x in range(0, 4)]  #--- normal distribution width
    sc_list = [[] for x in range(0, 4)]  #--- skewed normal distribution center
    sw_list = [[] for x in range(0, 4)]  #--- skewed normal distribution width
    sk_list = [[] for x in range(0, 4)]  #--- skewness
    m_list = [[] for x in range(0, 4)]  #--- bin position
    #
    #--- go through each ccd
    #
    for n in range(0, 4):
        ccd = ccds[n]
        cname = ccd_id[n]
        mask = data4['ccd_id'] == ccd
        data5 = data4[mask]

        if len(data5) < 1:
            continue
#
#--- devide the data into 16 sections
#
        minexpo = min(data5['expno'])
        maxexpo = max(data5['expno'])
        kstop = int((maxexpo - minexpo) / 16.0 / binsize) + 1

        k = 0  #--- bin skip counter: reset after each hist plotted
        m = 0  #--- bin counter
        j = minexpo
        #
        #--- create holders for data for later use (creating a matrix of histogram plots)
        #
        data_list = []
        mnp_list = []
        wnp_list = []
        msp_list = []
        wsp_list = []
        skp_list = []
        bp_list = []
        while j < maxexpo:
            k += 1
            m += 1

            mask = data5['expno'] >= j
            data6 = data5[mask]
            mask = data6['expno'] < (j + binsize)
            data7 = data6[mask]

            pdata = data7['phas']
            edata = data7['expno']

            hlist = flaten_the_data(pdata, cremove)
            if hlist != 'NA':

                mu = numpy.mean(hlist)
                std = numpy.std(hlist)
                [skew, smu, sstd] = skewnorm.fit(hlist)
                #
                #--- save data for the tending plots
                #
                nc_list[n].append(mu)
                nw_list[n].append(std)
                sc_list[n].append(smu)
                sw_list[n].append(sstd)
                sk_list[n].append(skew)
                m_list[n].append(numpy.mean(edata))
                #
                #---  save histogram data for bin: m if k reachs kstop
                #
                if k == kstop:
                    data_list.append(hlist)
                    mnp_list.append(mu)
                    wnp_list.append(std)
                    msp_list.append(smu)
                    wsp_list.append(sstd)
                    skp_list.append(skew)
                    bp_list.append(m)
                    #
                    #--- set k = 0for the next round
                    #
                    k = 0

            j += binsize
#
#--- create histogram plots in a multipanel plot
#
        create_histogram_plot(data_list, mnp_list, wnp_list, msp_list, wsp_list,\
                              skp_list, bp_list, acis_prefix, ccd_id[n], omode)
        #
        #--- save data
        #
        stime = numpy.mean(data4['time'])
        save_data(stime, m_list[n], nc_list[n], nw_list[n],  sc_list[n], sw_list[n],\
                  sk_list[n], ccd_id[n], obsid, omode)
#
#--- now create trend plots: normal distribution and skewed normal distribution
#
    create_trend_plots(ccd_id, m_list, nc_list, fits, acis_prefix, omode)
    create_trend_plots(ccd_id, m_list, sc_list, fits, acis_prefix, omode, sk=1)
예제 #22
0
 def fit(self, x, y=None):
     x = np.asarray(x)
     assert x.ndim == 1
     self._a, self._loc, self._scale = skewnorm.fit(x)
     return self
예제 #23
0
    def plot_diagnostic(self, bins: int = 25) -> None:
        # error
        residuals = self.error_by_models.flatten()
        norm_residuals = (residuals - np.mean(residuals)) / np.std(residuals)
        flatten_all_real = np.array([self.real_and_forcecast[i][0] for i in range(self.num_forecasting \
                                                                                  - self.forecast_range)]).flatten()

        flatten_all_forecast_error = np.array([
            self.real_and_forcecast[i][1] - self.real_and_forcecast[i][0]
            for i in range(self.num_forecasting - self.forecast_range)
        ]).flatten()

        fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
        fig.subplots_adjust(hspace=0.5)

        # Histogram
        mu, std = scipy.stats.norm.fit(residuals)

        ax[0, 0].hist(residuals,
                      bins=bins,
                      density=True,
                      alpha=0.6,
                      color='purple',
                      label="Données")

        # Plot le PDF.
        xmin, xmax = ax[0, 0].get_xlim()
        X = np.linspace(xmin, xmax)

        ax[0, 0].plot(X,
                      scipy.stats.norm.pdf(X, mu, std),
                      label="Normal Distribution")
        ax[0, 0].plot(X,
                      skewnorm.pdf(X, *skewnorm.fit(residuals)),
                      color='black',
                      label="Skewed Normal Distribution")

        mu, std = scipy.stats.norm.fit(residuals)
        sk = scipy.stats.skew(residuals)

        title2 = "Moments mu: {}, sig: {}, sk: {}".format(
            round(mu, 4), round(std, 4), round(sk, 4))
        ax[0, 0].set_ylabel("Fréquence", rotation=90)
        ax[0, 0].set_title(title2)
        ax[0, 0].legend()

        # OLS
        alpha, beta = self.__alpha_beta_coef(x=flatten_all_forecast_error,
                                             y=flatten_all_real)

        ax[0, 1].scatter(y=flatten_all_real, x=flatten_all_forecast_error)
        ax[0, 1].plot(flatten_all_forecast_error,
                      alpha + flatten_all_forecast_error * beta,
                      color="red")
        # ax[0, 1].grid()
        ax[0, 1].set_ylabel("Observations")
        ax[0, 1].set_xlabel("Forecast error")
        ax[0, 1].set_title("OLS Obs vs error")

        # autocorr
        plot_acf(flatten_all_forecast_error,
                 lags=self.forecast_range,
                 ax=ax[1, 0])

        # qqplot
        stats.probplot(residuals, plot=plt)

        plt.show()

        pass
#First I plot all fitted skewed normal distributions through distance bins
#The skewed normal distributions are fitted through Bayesian optimization using the scipy module
plt.figure(3)
plt.xlabel('distance bin')
plt.ylabel('sizes (m)')
n = 1
x = np.arange(0, 200,
              0.1)  #range needed to create probability distribution function
skewed_mean_distances = []
skewed_std_distances = []
skewed_variance_distances = []
skewed_kurtosis_distances = []
skewed_skew_distances = []
for bins in size_nest_distances:
    if len(bins) > 10:
        a, loc, scale = skewnorm.fit(bins)  #I fit all distance bins
        mean, var, skew, kurt = skewnorm.stats(a, loc, scale, moments='mvsk')
        skewed_mean_distances.append(mean)
        skewed_std_distances.append(skewnorm.std(
            a, loc, scale))  #Calculate the std of all distributions
        skewed_variance_distances.append(
            var)  #Calcualate variance if interesting
        skewed_skew_distances.append(skew)  #calculate skewness if interesting
        skewed_kurtosis_distances.append(
            kurt)  #calculate kurtosis if interesting
        pdf = skewnorm.pdf(x, a, loc,
                           scale)  #create probability distribution function
        #I now plot all of the fitted distributions with their respective histograms
        plt.subplot(1, len(size_nest_distances), n)
        plt.hist(bins, bins=50, normed=True)
        plt.plot(x, pdf)
예제 #25
0
    if task_type not in all_solved_sizes:
        all_solved_sizes[task_type] = solved_sizes
    else:
        all_solved_sizes[task_type] = np.concatenate(
            (all_solved_sizes[task_type], solved_sizes), 0)

for task_type in all_solved_sizes.keys():
    plt.figure()
    n, bins, patches = plt.hist(all_solved_sizes[task_type],
                                50,
                                density=True,
                                facecolor='blue',
                                alpha=0.5)
    X = np.linspace(min(all_solved_sizes[task_type]),
                    max(all_solved_sizes[task_type]))
    plt.plot(X, skewnorm.pdf(X, *skewnorm.fit(all_solved_sizes[task_type])))
    kernel = gaussian_kde(all_solved_sizes[task_type], bw_method="silverman")
    plt.plot(X, kernel.pdf(X))
    plt.title(task_type)

plt.show()

for task_type in all_solved_loc.keys():
    plt.figure()
    plt.scatter(all_solved_loc[task_type][:, 0], all_solved_loc[task_type][:,
                                                                           1])
    plt.title(task_type)

plt.show()
def calculateAnglePerParticle(gap_in_cm):
    # Read in raw hit data
    detector_hits = pd.read_csv('./data/hits.csv',
                                names=["det", "x", "y", "z", "energy"],
                                dtype={
                                    "det": np.int8,
                                    "x": np.float64,
                                    "y": np.float64,
                                    "z": np.float64,
                                    "energy": np.float64
                                },
                                delimiter=',',
                                error_bad_lines=False,
                                engine='c')

    n_entries = len(detector_hits['det'])

    if len(detector_hits['det']) == 0:
        raise ValueError('No particles hits on either detector!')
    elif 2 not in detector_hits['det']:
        raise ValueError('No particles hit detector 2!')

    deltaX = np.zeros(n_entries, dtype=np.float64)
    deltaZ = np.zeros(n_entries, dtype=np.float64)

    array_counter = 0
    for count, el in enumerate(detector_hits['det']):
        # pandas series can throw a KeyError if character starts line
        # TODO: replace this with parse command that doesn't import keyerror throwing lines
        while True:
            try:
                pos1 = detector_hits['det'][count]
                pos2 = detector_hits['det'][count + 1]

                detector_hits['x'][count]
                detector_hits['z'][count]

                detector_hits['x'][count + 1]
                detector_hits['z'][count + 1]

            except KeyError:
                count = count + 1
                if count == n_entries:
                    break
                continue
            break

        # Checks if first hit detector == 1 and second hit detector == 2
        if np.equal(pos1, 1) & np.equal(pos2, 2):
            deltaX[array_counter] = detector_hits['x'][
                count + 1] - detector_hits['x'][count]
            deltaZ[array_counter] = detector_hits['z'][
                count + 1] - detector_hits['z'][count]

            # Successful pair, continues to next possible pair
            count = count + 2
            array_counter = array_counter + 1
        else:
            # Unsuccessful pair, continues
            count = count + 1

    # Copy of array with trailing zeros removed
    deltaX_rm = deltaX[:array_counter]
    deltaZ_rm = deltaZ[:array_counter]

    del deltaX
    del deltaZ

    # Find angles in degrees
    theta = np.rad2deg(np.arctan2(deltaZ_rm, gap_in_cm))
    phi = np.rad2deg(np.arctan2(deltaX_rm, gap_in_cm))

    # Fit a standard normal distribution to data
    try:
        x_theta = np.linspace(min(theta), max(theta))
        mu_theta, std_theta = norm.fit(theta)
        p_theta = norm.pdf(x_theta, mu_theta, std_theta)

        x_phi = np.linspace(min(phi), max(phi))
        mu_phi, std_phi = norm.fit(phi)
        p_phi = norm.pdf(x_phi, mu_phi, std_phi)

    except:
        pass

    # Fit skew normal distribution to data
    #TODO: write a check for sig_p RuntimeError when np.sqrt(-#)
    alpha_t, loc_t, scale_t = skewnorm.fit(theta)
    alpha_p, loc_p, scale_p = skewnorm.fit(phi)

    delta_t = alpha_t / np.sqrt(1 + alpha_t**2)
    delta_p = alpha_t / np.sqrt(1 + alpha_p**2)

    mean_t = loc_t + scale_t * delta_t * np.sqrt(2 / np.pi)
    mean_p = loc_p + scale_p * delta_p * np.sqrt(2 / np.pi)

    p_test = scale_p**2 * (1 - 2 * (delta_p**2) / np.pi)
    if np.equal(0, np.round(p_test, 2)):
        sig_p = None
    else:
        sig_p = np.sqrt(p_test)

    t_test = scale_t**2 * (1 - 2 * (delta_t**2) / np.pi)
    if np.equal(0, np.round(t_test, 2)):
        sig_t = None
    else:
        sig_t = np.sqrt(t_test)

    theta_actual, phi_actual, numberOfParticles = findSourceAngle()

    with open('./data/results.txt', 'a') as f:
        f.write(
            str(numberOfParticles) + ',' + str(theta_actual) + ',' +
            str(phi_actual) + ',' + str(round(np.mean(theta), 4)) + ',' +
            str(round(np.std(theta), 4)) + ',' + str(round(np.mean(phi), 4)) +
            ',' + str(round(np.std(phi), 4)) + ',' +
            str(round(np.median(theta), 4)) + ',' +
            str(round(np.median(phi), 4)) + ',' + str(round(mu_theta, 4)) +
            ',' + str(round(std_theta, 4)) + ',' + str(round(mu_phi, 4)) +
            ',' + str(round(std_phi, 4)) + ',' + str(round(mean_t, 4)) + ',' +
            str(round(sig_t, 4)) + ',' + str(round(mean_p, 4)) + ',' +
            str(round(sig_p, 4)) + '\n')
# ##### The Anderson-Darling test above value of 38.20 exceeds the 99% critical value of 1.088 by a large margin, indicating that the Normal distribution  may be a poor choice to represent portfolio losses

# In[ ]:

## Null Hypothesis - No Skewness

# In[375]:

# Test the data for skewness
print("Skewtest result: ", skewtest(losses))

# In[376]:

# Fit the portfolio loss data to the skew-normal distribution
params = skewnorm.fit(losses)

# In[377]:

# Compute the 95% VaR from the fitted distribution, using parameter estimates
VaR_95 = skewnorm.ppf(0.95, *params)
print("VaR_95 from skew-normal: ", VaR_95)

# Losses are not normally distributed as the critical value exceeeds the 99% conidence interval of test statistic value
# Losses can be skewed
#
# Definition wiki - anderson
# In many cases (but not all), you can determine a p value for the Anderson-Darling statistic and use that value to help you
# determine if the test is significant are not. Remember the p ("probability") value is the probability of getting a result
# #that is more extreme if the null hypothesis is true. If the p value is low (e.g., <=0.05), you conclude that the data do
# not follow the normal distribution. Remember that you chose the significance level even though many people just use 0.05
예제 #28
0
파일: prob.py 프로젝트: gneville6/C-SAG
 def __create_pdf(val):
     a, loc, scale = skewnorm.fit(val)
     return skewnorm(a, loc, scale)